commit c97bd9aa46d53200da7e12a92e7e532eef541ca9
parent 1f47d2a91045607faa7a39c8672a4ce9cdfea3fd
Author: Tomas Hlavaty <tom@logand.com>
Date:   Sun, 18 Aug 2013 00:31:06 +0200
base64 email filesystem.lisp and xml readers/writers added
Diffstat:
| A | base64.lisp |  |  | 80 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | 
| M | cl-rw.asd |  |  | 6 | +++++- | 
| A | email.lisp |  |  | 109 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | 
| A | filesystem.lisp |  |  | 68 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | 
| A | xml.lisp |  |  | 152 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | 
5 files changed, 414 insertions(+), 1 deletion(-)
diff --git a/base64.lisp b/base64.lisp
@@ -0,0 +1,80 @@
+;;; Copyright (C) 2013 Tomas Hlavaty <tom@logand.com>
+;;;
+;;; Permission is hereby granted, free of charge, to any person
+;;; obtaining a copy of this software and associated documentation
+;;; files (the "Software"), to deal in the Software without
+;;; restriction, including without limitation the rights to use, copy,
+;;; modify, merge, publish, distribute, sublicense, and/or sell copies
+;;; of the Software, and to permit persons to whom the Software is
+;;; furnished to do so, subject to the following conditions:
+;;;
+;;; The above copyright notice and this permission notice shall be
+;;; included in all copies or substantial portions of the Software.
+;;;
+;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+;;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+;;; DEALINGS IN THE SOFTWARE.
+
+(defpackage :rw.base64
+  (:use :cl)
+  (:export :encode-reader
+           :decode-reader))
+
+(in-package :rw.base64)
+
+(defun encode-reader (reader &optional table wrap) ;; TODO wrap 76
+  (let (pending
+        (table (or table
+                   "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")))
+    (lambda ()
+      (cond
+        (pending (pop pending))
+        ((not (rw:peek reader)) nil)
+        (t
+         (flet ((%next ()
+                  (let ((x (rw:next reader)))
+                    (when x
+                      (logand #xff (if (characterp x) (char-code x) x)))))
+                (%map (x n)
+                  (char table (ldb (byte 6 n) x))))
+           (let* ((a (%next))
+                  (b (%next))
+                  (c (%next))
+                  (x (+ (ash a 16) (ash (or b 0) 8) (or c 0))))
+             (push (if c (%map x 0) #\=) pending)
+             (push (if b (%map x 6) #\=) pending)
+             (push (%map x 12) pending)
+             (%map x 18))))))))
+
+(defun decode-reader (reader &optional table) ;; TODO skip newlines?
+  (let (pending
+        (table (or table
+                   "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")))
+    (lambda ()
+      (cond
+        (pending (pop pending))
+        ((not (rw:peek reader)) nil)
+        (t
+         (flet ((%next ()
+                  (let ((x (rw:next reader)))
+                    (unless (eql #\= x)
+                      (position x table))))
+                (%map (x n)
+                  (code-char (ldb (byte 8 n) x))))
+           (let* ((a (%next))
+                  (b (%next))
+                  (c (%next))
+                  (d (%next))
+                  (x (+ (ash a 18) (ash b 12) (ash (or c 0) 6) (or d 0))))
+             (when d (push (%map x 0) pending))
+             (when c (push (%map x 8) pending))
+             (%map x 16))))))))
+
+;;(rw:till (rw:peek-reader (decode-reader (rw:peek-reader (encode-reader (rw:peek-reader (rw:reader "any carnal pleasure.")))))))
+;;(rw:till (rw:peek-reader (encode-reader (rw:peek-reader (rw:reader "any carnal pleasure.")))))
+;;(rw:till (rw:peek-reader (decode-reader (rw:peek-reader (rw:reader "YW55IGNhcm5hbCBwbGVhc3VyZS4=")))))
diff --git a/cl-rw.asd b/cl-rw.asd
@@ -35,4 +35,8 @@
   :licence "MIT"
   :depends-on ()
   :serial t
-  :components ((:file "rw")))
+  :components ((:file "rw")
+               (:file "filesystem")
+               (:file "base64")
+               (:file "xml")
+               (:file "email")))
diff --git a/email.lisp b/email.lisp
@@ -0,0 +1,109 @@
+;;; Copyright (C) 2013 Tomas Hlavaty <tom@logand.com>
+;;;
+;;; Permission is hereby granted, free of charge, to any person
+;;; obtaining a copy of this software and associated documentation
+;;; files (the "Software"), to deal in the Software without
+;;; restriction, including without limitation the rights to use, copy,
+;;; modify, merge, publish, distribute, sublicense, and/or sell copies
+;;; of the Software, and to permit persons to whom the Software is
+;;; furnished to do so, subject to the following conditions:
+;;;
+;;; The above copyright notice and this permission notice shall be
+;;; included in all copies or substantial portions of the Software.
+;;;
+;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+;;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+;;; DEALINGS IN THE SOFTWARE.
+
+(defpackage :rw.email
+  (:use :cl)
+  (:export :directory-reader
+           :file-reader))
+
+(in-package :rw.email)
+
+;;(with-open-file (s "~/Mail/goethe/27") (rw:till (rw:peek-reader (rw:char-reader s))))
+
+(defun header-reader (reader)
+  (flet ((peek () (rw:peek reader))
+         (next () (rw:next reader))
+         (skip () (rw:skip reader))
+         (till (items) (rw:till reader items)))
+    (let (eof)
+      (lambda ()
+        (or eof
+            (case (peek)
+              ((nil) (setq eof 'eof))
+              (#\newline (next) (setq eof t))
+              (t (cons
+                  (prog1 (till '(#\space #\tab #\newline #\:))
+                    (assert (eql #\: (next)))
+                    (skip))
+                  (with-output-to-string (s)
+                    (flet ((line ()
+                             (write-string (till '(#\newline)) s)
+                             (assert (eql #\newline (next)))))
+                      (line)
+                      (do ()
+                          ((not (member (peek) '(#\space #\tab))))
+                        (terpri s)
+                        (line))))))))))))
+
+(defun header-alist (reader)
+  (rw:till (rw:peek-reader (header-reader (rw:peek-reader (rw:char-reader reader))))))
+
+(defun content-type (reader)
+  (flet ((peek () (rw:peek reader))
+         (next () (rw:next reader))
+         (skip () (rw:skip reader))
+         (till (items) (rw:till reader items)))
+    (let ((mime (till '(#\space #\tab #\newline #\;))))
+      (make
+       (link mime)
+       (assert (eql #\; (next)))
+       (skip)
+       (do ()
+           ((not (peek)))
+         (link (let ((k (till '(#\space #\tab #\newline #\=))))
+                 (cond
+                   ((string= "type" k) :type)
+                   ((string= "boundary" k) :boundary)
+                   (t (error "unknown attribute ~s of content-type ~s" k mime)))))
+         (assert (eql #\= (next)))
+         (assert (eql #\" (next)))
+         (link (till '(#\space #\tab #\newline #\")))
+         (assert (eql #\" (next)))
+         (when (eql #\; (peek))
+           (next)
+           (skip)))))))
+
+;; https://en.wikipedia.org/wiki/MIME#Multipart_subtypes
+(defun parse-nnml-file (pathname)
+  (with-open-file (s pathname)
+    (let ((x (rw:peek-reader (rw:char-reader (cdr (assoc "Content-Type" (header-alist s) :test #'string=))))))
+      (destructuring-bind (mime &key type boundary) (content-type x)
+        (cond
+          #+nil
+          ((string= "multipart/mixed" mime)
+           (list mime type boundary))
+          ((string= "multipart/alternative" mime)
+           (list mime type boundary))
+          ((string= "multipart/related" mime)
+           (list mime type boundary))
+          #+nil
+          ((string= "multipart/form-data" mime)
+           (list mime type boundary))
+          #+nil
+          ((string= "multipart/signed" mime)
+           (list mime type boundary))
+          #+nil
+          ((string= "multipart/encrypted" mime)
+           (list mime type boundary))
+          (t (error "unknown content-type ~s" mime)))))))
+
+;;(parse-nnml-file "~/Mail/goethe/27")
diff --git a/filesystem.lisp b/filesystem.lisp
@@ -0,0 +1,68 @@
+;;; Copyright (C) 2013 Tomas Hlavaty <tom@logand.com>
+;;;
+;;; Permission is hereby granted, free of charge, to any person
+;;; obtaining a copy of this software and associated documentation
+;;; files (the "Software"), to deal in the Software without
+;;; restriction, including without limitation the rights to use, copy,
+;;; modify, merge, publish, distribute, sublicense, and/or sell copies
+;;; of the Software, and to permit persons to whom the Software is
+;;; furnished to do so, subject to the following conditions:
+;;;
+;;; The above copyright notice and this permission notice shall be
+;;; included in all copies or substantial portions of the Software.
+;;;
+;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+;;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+;;; DEALINGS IN THE SOFTWARE.
+
+(defpackage :rw.filesystem
+  (:use :cl)
+  (:export :directory-reader
+           :file-reader))
+
+(in-package :rw.filesystem)
+
+#+nil ;; TODO already defined on ccl?! but not on sbcl?
+(defun directoryp (pathname)
+  (equal (directory-namestring pathname) (namestring pathname)))
+
+(defun directory-reader (pathname &optional recurse)
+  (when (directoryp pathname)
+    (flet ((expand (x) (directory (merge-pathnames "*" #+nil "*.*" x))))
+      (let ((stack (list (expand pathname))))
+        (lambda ()
+          (when stack
+            (let ((x (pop (car stack))))
+              (unless (car stack)
+                (pop stack))
+              (prog1 x
+                (when (and x recurse (directoryp x))
+                  (let ((y (expand x)))
+                    (when y
+                      (push y stack))))))))))))
+
+;;(rw:till (rw:peek-reader (directory-reader "~/Mail/")))
+;;(rw:till (rw:peek-reader (directory-reader "~/News/")))
+;;(rw:till (rw:peek-reader (directory-reader "~/News/" t)))
+;;(rw:till (rw:peek-reader (directory-reader "/tmp/")))
+;;(rw:till (rw:peek-reader (directory-reader "/tmp/" t)))
+
+#+nil
+(defun directory-reader (reader)
+  (lambda ()
+    (do ((x (rw:next reader) (rw:next reader)))
+        ((or (not x) (directoryp x)) x))))
+
+#+nil
+(defun file-reader (reader)
+  (lambda ()
+    (do ((x (rw:next reader) (rw:next reader)))
+        ((or (not x) (not (directoryp x))) x))))
+
+;;(till (rw:peek-reader (directory-reader (dir-reader "/tmp/" t))))
+;;(till (rw:peek-reader (file-reader (dir-reader "/tmp/" t))))
diff --git a/xml.lisp b/xml.lisp
@@ -0,0 +1,152 @@
+;;; Copyright (C) 2013 Tomas Hlavaty <tom@logand.com>
+;;;
+;;; Permission is hereby granted, free of charge, to any person
+;;; obtaining a copy of this software and associated documentation
+;;; files (the "Software"), to deal in the Software without
+;;; restriction, including without limitation the rights to use, copy,
+;;; modify, merge, publish, distribute, sublicense, and/or sell copies
+;;; of the Software, and to permit persons to whom the Software is
+;;; furnished to do so, subject to the following conditions:
+;;;
+;;; The above copyright notice and this permission notice shall be
+;;; included in all copies or substantial portions of the Software.
+;;;
+;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+;;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+;;; DEALINGS IN THE SOFTWARE.
+
+(defpackage :rw.xml
+  (:use :cl)
+  (:export :xmarkup-reader
+           :parse-xml))
+
+(in-package :rw.xml)
+
+;; https://github.com/drewc/smug/blob/master/smug.org
+;; http://www.htmlhelp.com/reference/wilbur/misc/comment.html
+
+;; TODO xml is made of bytes, not chars
+
+(defun parse-xml-attributes (reader finish) ;; finish='((#\/))|'((#\?))
+  (flet ((peek () (rw:peek reader))
+         (next () (rw:next reader))
+         (skip () (rw:skip reader))
+         (till (markers) (rw:till reader markers)))
+    (do (z)
+        ((eql #\> (peek))
+         (assert (eql #\> (next)))
+         (let ((f (equal (car z) finish)))
+           (when f (pop z))
+           (values (nreverse z) f)))
+      (push (cons (prog1 (till '(#\space #\tab #\newline #\> #\=))
+                    (skip)
+                    (when (eql #\= (peek))
+                      (next)
+                      (skip)))
+                  (let ((q (peek)))
+                    (when (member q '(#\" #\'))
+                      (next)
+                      (prog1 (till (cons q '(#\space #\tab #\newline #\>)))
+                        (assert (eql q (next)))
+                        (skip)))))
+            z))))
+
+(defun xmarkup-reader (reader) ;; TODO see and move cl-parsers to cl-rw?
+  (flet ((peek () (rw:peek reader))
+         (next () (rw:next reader))
+         (skip () (rw:skip reader))
+         (till (markers) (rw:till reader markers)))
+    (lambda ()
+      (case (peek)
+        ((nil))
+        (#\<
+         (next)
+         (skip)
+         (let ((e (till '(#\space #\tab #\newline #\>))))
+           (skip)
+           (case (car e) ;; TODO doctype
+             (#\?
+              (multiple-value-bind (a f)
+                  (parse-xml-attributes reader '((#\?)))
+                (assert f)
+                (cons :pi (cons (cdr e) a))))
+             (#\!
+              (prog1 (cons :comment (till '(#\>))) ;; TODO properly
+                (assert (eql #\> (next)))))
+             (#\/
+              (assert (eql #\> (next)))
+              (cons :end (cdr e)))
+             (t
+              (multiple-value-bind (a f)
+                  (parse-xml-attributes reader '((#\/)))
+                (unless f
+                  (when (equal '(#\/) (last e))
+                    (setq f t
+                          e (nreverse (cdr (nreverse e)))))) ;; TODO better
+                (cons (if f :begin/ :begin) (cons e a)))))))
+        (t (cons :text (till '(#\<)))))))) ;; TODO entities
+
+(defun parse-xml (x)
+  (labels ((id (x)
+             (intern (string-upcase (concatenate 'string x)) :keyword))
+           (xattrs (x)
+             (loop
+                for (f . r) in x
+                appending (list (id f) (concatenate 'string r))))
+           (parse (r)
+             (do ((z (list nil))
+                  (r (xmarkup-reader (rw:skip r)))
+                  a)
+                 ((not (setq a (rw:next r)))
+                  (let ((y (pop z)))
+                    (assert (not z))
+                    (assert (not (cdr y)))
+                    (car y)))
+               (ecase (car a)
+                 (:pi)
+                 (:comment)
+                 (:begin/
+                  (let ((tag (id (cadr a)))
+                        (attrs (xattrs (cddr a))))
+                    (push (list (if attrs (cons tag attrs) tag)) (car z))))
+                 (:begin
+                  (let ((tag (cadr a))
+                        (attrs (xattrs (cddr a))))
+                    (push (list (if attrs (cons tag attrs) tag)) z))
+                  (push nil z))
+                 (:end
+                  (let ((tag (cdr a))
+                        (b (nreverse (pop z)))
+                        (e (pop z)))
+                    (assert e)
+                    (assert z)
+                    (let* ((h (car e))
+                           (tag2 (if (atom (car h)) h (car h)))
+                           (attrs (unless (atom (car h)) (cdr h))))
+                      (assert (equal tag tag2))
+                      (push (cons (if attrs (cons (id tag) attrs) (id tag)) b)
+                            (car z)))))
+                 (:text
+                  (when (and (cdr z)
+                             (find-if-not
+                              (lambda (c)
+                                (or (member c '(#\space #\tab #\newline))))
+                              (cdr a)))
+                    (push (string-trim '(#\space #\tab #\newline)
+                                       (concatenate 'string (cdr a)))
+                          (car z))))))))
+    (etypecase x
+      (function (parse x))
+      ((or list vector) (parse (rw:peek-reader (rw:reader x))))
+      (pathname (with-open-file (s x)
+                  (parse (rw:peek-reader (rw:char-reader s))))))))
+
+;;(parse-xml "<rss><ahoj/>hi<cau></cau><br x='314'/></rss>")
+;;(parse-xml "<rss a='1' b='2'>hi<br/></rss>")
+;;(parse-xml "<rss>hi<br/></hello>")
+;;(parse-xml #p"/home/tomas/git/cl-rw/a.xml")