300行Lisp根据语法和词法生成解析器

1. 概要

无,作者自闭中。。。

2. 代码

(defun make-token (class-name data)
  (list class-name data))

(defun make-string-scanner (scanner-spec)
  (lambda (str)
    (with-input-from-string
     (stream str)
     (scan-stream stream scanner-spec))))

(defun make-stream-scanner (scanner-spec)
  (lambda (stream)
    (scan-stream stream scanner-spec)))

(defun re-read (v)
  (if (vectorp v)
      (let ((buffer (svref v 1))
	    (fun (svref v 0)))
	(if buffer
	    (cond
	      ((functionp fun) (re-read (vector (funcall fun (car buffer)) (cdr buffer))))
	      ((vectorp fun) (vector (svref fun 0) (append (svref fun 1) buffer)))
	      (t v))
	    fun))
      v))

(defun scan-stream (stream scanner-spac)
  (let ((scanner-list (mapcar #'token-spec->scanner scanner-spac)))
    (labels
	((iter (stream scanners token-list &optional buffer)
	   (let ((c (read-char stream nil 'end-of-stream)))
	     (if (not (characterp c))
		 (reverse token-list)
		 (let ((temp (mapcar #'(lambda (s)
					 (etypecase s
					   (function (funcall s c))
					   (cons (vector s (cons c nil)))
					   (vector (vector (svref s 0) (cons c (svref s 1))))))
				     scanners)))
		   (if (some #'functionp temp)
		       (iter stream temp token-list (cons c buffer))
		       (let* ((scan-result (car (sort temp #'< :key (lambda (s) (if (vectorp s)
										    (if (svref s 0)
											(length (svref s 1))
											most-positive-fixnum)
										   0)))))
			      (r (if (vectorp scan-result) (svref scan-result 0) scan-result))
			      (unuse-buffer (if (vectorp scan-result) (reverse (svref scan-result 1)))))
			 (if (null r)
			     (error "scan error:~A not match any scanner~%" (reverse buffer))
			     (destructuring-bind (token-string name action) r
			       (iter (if unuse-buffer
					       (make-concatenated-stream
						 (make-string-input-stream (coerce unuse-buffer 'string))
						 stream)
					       stream)
				     scanner-list
				     (ecase action
				       (skip token-list)
				       (symbol (cons (make-token name (make-symbol token-string)) token-list))
				       (number (cons (make-token name (read-from-string token-string)) token-list))
				       (string (cons (make-token name token-string) token-list)))))))))))))
      (iter stream scanner-list nil))))

(defun token-spec->scanner (raa)
  (destructuring-bind (name regexp action) raa
    (labels
	((make-concat-receiver (regexp receiver backouter)
	   (lambda (char-seq)
	     (if (cdr regexp)
		 (regexp->scanner (cdr regexp)
				  (lambda (char-seq2) (funcall receiver (append char-seq char-seq2)))
				  (lambda (char-seq2) (funcall backouter (append char-seq char-seq2))))
		 (funcall receiver char-seq))))
	 (make-single-char-scanner (regexp receiver char-tester backouter)
	   (lambda (c)
	     (if (funcall char-tester c)
		 (if (cdr regexp)
		     (regexp->scanner (cdr regexp)
				      (lambda (s) (funcall receiver (cons c s)))
				      (lambda (s) (funcall backouter (cons c s))))
		     (funcall receiver (cons c nil)))
		 (vector nil (cons c nil)))))
	 (regexp->scanner (regexp receiver backouter)
	   (let ((exp (car regexp)))
	     (cond
	       ((stringp exp)
		(regexp->scanner
		 (cons (cons 'concat (coerce exp 'list)) (cdr regexp))
		 receiver backouter))
	       ((consp exp)
		(regexp->scanner exp (make-concat-receiver regexp receiver backouter) backouter))
	       ((characterp exp)
		(make-single-char-scanner regexp receiver (lambda (c) (eql c exp)) backouter))
	       ((eql exp 'letter)
		(make-single-char-scanner regexp receiver #'alpha-char-p backouter))
	       ((eql exp 'digit)
		(make-single-char-scanner regexp receiver #'digit-char-p backouter))
	       ((eql exp 'any)
		(make-single-char-scanner regexp receiver (constantly t) backouter))
	       ((eql exp 'not)
		(lambda (c)
		  (if (not (eql c (cadr regexp)))
		      (funcall receiver (cons c nil))
		      (vector nil (cons c nil)))))
	       ((eql exp 'whitespace)
		(regexp->scanner
		 '((or #\Space #\NewLine) (arbno (or #\Space #\NewLine)))
		 (make-concat-receiver regexp receiver backouter)
		 backouter))
	       ((eql exp 'or)
		(labels
		    ((make-or-scan (or-scanners &optional buffer)
		       (labels
			   ((or-scan (c)
			      (let ((temp (mapcar (lambda (s)
						    (etypecase s
						      (function (funcall s c))
						      (cons (vector s (cons c nil)))
						      (vector (vector (svref s 0) (cons c (svref s 1))))))
						  or-scanners)))
				(if (some #'functionp temp)
				    (make-or-scan temp (cons c nil))
				    (let ((r (car (sort temp #'< :key (lambda (s)
									(if (vectorp s)
									    (length (svref s 1))
									    0))))))
				      (etypecase r
					(cons (funcall receiver r))
					(vector (re-read (vector (funcall receiver (svref r 0)) (reverse (svref r 1)))))
					(null (vector nil (funcall backouter (reverse (cons c buffer)))))))))))
			 #'or-scan)))
		  (make-or-scan (mapcar (lambda (e) (regexp->scanner (cons e nil) #'identity #'identity)) (cdr regexp)))))
	       ((eql exp 'arbno)
		(lambda (c)
		  (let ((r (funcall (regexp->scanner (cdr regexp) #'identity #'identity) c)))
		    (labels
			((handle-result (r buffer)
			   (etypecase r
			     (cons (regexp->scanner regexp
						    (lambda (s) (funcall receiver (append r s)))
						    (lambda (s) (funcall backouter (append (reverse buffer) s)))))
			     (vector (re-read (vector (funcall receiver (svref r 0)) (svref r 1))))
			     (function (lambda (c) (handle-result (funcall r c) (cons c buffer))))
			     (null (re-read (vector (funcall receiver nil) (reverse buffer)))))))
		      (handle-result r (cons c nil))))))
	       ((eql exp 'concat)
		(regexp->scanner (cdr regexp) receiver backouter))
	       (t (error "Unknown expression:~A~%" exp))))))
      (regexp->scanner regexp (lambda (s) (if s (list (coerce s 'string) name action))) #'identity))))

(defun parse-token (grammar-spec token-list)
  (let ((prod-parser-table (make-hash-table)))
    (dolist (production grammar-spec)
      (destructuring-bind (lhs rhs-list prod-name) production
	(push (production->parser production prod-parser-table) (gethash lhs prod-parser-table))))
    (let ((parser-list '()))
      (maphash (lambda (k v) (setf parser-list (append parser-list v))) prod-parser-table)
      (labels
	  ((iter (token-list parsers receiver)
	     (if (null token-list)
		 (funcall receiver nil)
		 (let ((temps (mapcan (lambda (r)
					(and (or (functionp r) (consp r) (and (vectorp r) (consp (svref r 0)))) (list r)))
				      (mapcar (lambda (p)
						(etypecase p
						  (function (funcall p (car token-list)))
						  (cons (vector p (cons (car token-list) nil)))
						  (vector (vector (svref p 0) (nconc (svref p 1) (cons (car token-list) nil))))))
					      parsers))))
		   (if (some #'functionp temps)
		       (iter (cdr token-list) temps receiver)
		       (let ((r (car (sort temps #'< :key (lambda (p) (etypecase p
									(cons 0)
									(vector (length (svref p 1)))))))))
			 (etypecase r
			   (cons (iter (cdr token-list) parser-list (lambda (gs) (cons r gs))))
			   (vector (iter (append (svref r 1) (cdr token-list)) parser-list (lambda (gs) (cons (svref r 0) gs)))))))))))
	(iter token-list parser-list #'identity)))))

(defun production->parser (production parser-table)
  (destructuring-bind (lhs rhs-list prod-name) production
    (labels
	((rhs->parser (remain-rhs receiver backouter)
	   (let ((rhs (car remain-rhs)))
	     (cond
	       ((and (symbolp rhs) (not (equal rhs 'arbno)) (not (equal rhs 'separated-list)))
		(labels
		    ((parse-lhs (parsers buffer)
		       (lambda (token)
			 (if (null parsers)
			     (if (equal rhs (car token))
				 (if (cdr remain-rhs)
				     (rhs->parser (cdr remain-rhs)
						  (lambda (p) (cons (cadr token) p))
						  (lambda (ts) (funcall backouter (append (reverse (cons token buffer)) ts))))
				     (funcall receiver (cons (cadr token) nil)))
				 (vector nil (funcall backouter (reverse (cons token buffer)))))
			     (let* ((temps (mapcar (lambda (parser)
						     (if (functionp parser)
							 (funcall parser token)
							 parser))
						   parsers))
				    (r (find-if #'consp temps)))
			       (if (some #'functionp temps)
				   (parse-lhs temps (cons token buffer))
				   (let ((r (car (sort temps #'< :key (lambda (p) (etypecase p
										    (cons 0)
										    (vector (length (svref p 1)))))))))
				     (etypecase r
				       (cons (if (cdr remain-rhs)
						 (rhs->parser (cdr remain-rhs)
							      (lambda (p) (funcall receiver (cons r p)))
							      (lambda (ts) (funcall backouter (append (reverse (cons token buffer)) ts))))
						 (funcall receiver (cons r nil))))
				       (vector (if (cdr remain-rhs)
						   (rhs->parser (cdr remain-rhs)
								(lambda (p) (funcall (cons (svref r 0) p)))
								(lambda (ts) (funcall backouter (append (reverse (cons token buffer)) ts))))
						   (funcall receiver (cons (svref r 0) nil))))))))))))
		  (parse-lhs (gethash rhs parser-table) nil)))
	       ((stringp rhs)
		(lambda (token)
		  (if (equal rhs (cadr token))
		      (if (cdr remain-rhs)
			  (rhs->parser (cdr remain-rhs) receiver (lambda (ts) (funcall backouter (cons token ts))))
			  (funcall receiver nil))
		      (vector nil (cons token nil)))))
	       ((consp rhs)
		(lambda (token)
		  (funcall (rhs->parser rhs
					(lambda (r1)
					  (if (cdr remain-rhs)
					      (rhs->parser (cdr remain-rhs)
							   (lambda (r2)
							     (funcall receiver (cons r1 r2)))
							   (lambda (ts) (funcall backouter (cons token ts))))
					      (funcall receiver r1)))
					backouter)
			   token)))
	       ((eql 'arbno rhs)
		(lambda (token)
		  (let ((r (funcall (rhs->parser (cdr remain-rhs) #'identity #'identity) token)))
		    (labels
			((handle-result (r buffer)
			   (etypecase r
			     (cons (rhs->parser remain-rhs (lambda (r2)
							     (funcall receiver (if r2 (mapcar #'list r r2) r)))
						(lambda (ts) (funcall backouter (append (reverse buffer) ts)))))
			     (function (lambda (token)
			       (handle-result (funcall r token) (cons token buffer))))
			     (vector (re-read (vector (funcall receiver (svref r 0)) (svref r 1))))
			     (null (re-read (vector (funcall receiver nil) (reverse buffer)))))))
		      (handle-result r (cons token nil))))))
	       ((eql 'separated-list rhs)
		(lambda (token)
		  (let ((r (funcall (rhs-parser (butlast (cdr remain-rhs)) #'identity backouter) token)))
		    (labels
			((handle-result (r buffer)
			   (etypecase r
			     (cons
			      (lambda (token)
				(if (string-equal (cadr token) (car (last remain-rhs)))
				    (rhs->parser remain-rhs (lambda (r2) (funcall receiver (mapcar #'cons r r2)))
						 (lambda (ts) (funcall backouter (append (reverse (cons token buffer)) ts))))
				    (re-read (vector (funcall receiver nil) (funcall backouter (append (reverse (cons token buffer)) ts)))))))
			     (function
			      (lambda (token)
			       (handle-result (funcall r token) (cons token buffer))))
			     (vector
			      (re-read (vector (funcall receiver (svref r 0)) (svref r 1))))
			     (null
			      (re-read (vector (funcall receiver nil) (funcall backouter (reverse buffer))))))))
		      (handle-result r (cons token nil))))))
	       (t (error "Unexpected rhs:~A~%" rhs))))))
      (rhs->parser rhs-list (lambda (r) (cons prod-name r)) #'identity))))

(defun make-string-parser (the-lexical-spec the-grammar)
  (labels ((find-keywords (rhs-items receiver)
	     (if (null rhs-items)
		 (funcall receiver nil)
		 (etypecase (car rhs-items)
		   (string (find-keywords (cdr rhs-items) (lambda (ks) (funcall receiver (cons (car rhs-items) ks)))))
		   (symbol (find-keywords (cdr rhs-items) receiver))
		   (cons (let ((cks (find-keywords (car rhs-items) #'identity)))
			   (find-keywords (cdr rhs-items) (lambda (ks) (funcall receiver (append cks ks))))))))))
    (let* ((ext-lexical-spec `((keyword
				((or ,@(reduce (lambda (l1 l2) (union l1 l2 :test #'equal))
					      (mapcar
					       (lambda (production) (find-keywords (cadr production) #'identity))
					       the-grammar))))
				string)
			       ,@the-lexical-spec))
	   (scanner (make-string-scanner ext-lexical-spec)))
      (lambda (s)
	(let ((token-list (funcall scanner s)))
	  (format t "token-list:~A~%" token-list)
	  (parse-token the-grammar token-list))))))

3. 使用

(setf the-lexical-spec
      '((whitespace ((or #\Space #\NewLine) (arbno (or #\Space #\NewLine))) skip)
	(comment ("//" (arbno (not #\newline))) skip)
	(identifier (letter (arbno (or letter digit "_" "-" "?"))) symbol)
	(number (digit (arbno digit)) number)
	(number ("-" digit (arbno digit)) number)))

(setf the-grammar
      '((program (expression) a-program)
	(expression (number) const-exp)
	(expression
	 ("-" "(" expression "," expression ")")
	 diff-exp)
	(expression
	 ("zero?" "(" expression ")")
	 zero?-exp)
	(expression
	 ("if" expression "then" expression "else" expression)
	 if-exp)
	(expression (identifier) var-exp)
	(expression
	 ("let" (arbno identifier "=" expression) "in" expression)
	 let-exp)))

(defun test-scan ()
  (funcall (make-string-scanner the-lexical-spec)
	   "asdf  1234  -4321   // skdlajf"))

(defun scan1 (s)
  (funcall (make-string-scanner the-lexical-spec)
	   s))

(defun scan&parse1 (s)
  (funcall (make-string-parser the-lexical-spec the-grammar) s))

(defun test-parse ()
  (scan&parse1 "let x = y u1 = 321 in z "))
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值