HH-Parse / parser.lisp

;; Copyright (c) 2010 Phil Hargett

;; Permission is hereby granted, free of charge, to any person obtaining a copy
;; of this software and associated documentation files (the "Software"), to deal
;; in the Software without restriction, including without limitation the rights
;; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
;; copies of the Software, and to permit persons to whom the Software is
;; furnished to do so, subject to the following conditions:

;; The above copyright notice and this permission notice shall be included in
;; all copies or substantial portions of the Software.

;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
;; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
;; THE SOFTWARE.

(in-package :hh-parse)

;; ---------------------------------------------------------------------------------------------------------------------
;; LALR(1) parser construction

;; Source code helper
(defmethod source-text ((parser lalr1-parser))
  (source-text (lexer parser)))

(defmethod (setf source-text) (text (parser lalr1-parser) )
  ;; Drop existing lines
  (setf (source-text (lexer parser)) text))

(defun expected-next-symbols (parser)
  "Given a parser's current state, return a list of valid symbols (terminal and non-terminal) that would advance the parse"
  (let ((state (caar (stack parser)))
        (actions (entries (actions (grammar parser))))
        (expected ()))
    (loop for (expected-state expected-symbol) being the hash-key of actions
         if (= state expected-state ) 
         do (push expected-symbol expected))
    expected))

(defun assimilate-captures (parser reduced-term)
  (let ((remaining-captures ()))
    (loop for (slot value) in (captures parser)
       if (slot-exists-p reduced-term slot)
       do (setf (slot-value reduced-term slot) value)
       else do (push (list slot value) remaining-captures))
    (setf (captures parser) remaining-captures)))

(defun reduce-production (parser production node-values)
  (let ((reduced-term (apply (reduction production) parser (rule-name production) node-values)))
    (assimilate-captures parser reduced-term)
    reduced-term))

;; parser context helpers

(defun push-parser-context (parser state node)
  "Because the parser is a table-driven LR parser, the parser uses a stack to manage
   its progress through a parse.  The entries in this stack are contexts, each of which
   is a 2-element list.  The 1st element in a context is the state (a number) associated
   with the context: the state in the context on the top of the stack is the state
   used by the LR parsing algorithm for table lookups.  The 2nd element is a node (see
   current-parser-node for details)."
  (push (list state node) (stack parser)))

(defun current-parser-context (parser)
  (car (stack parser)))

(defun pop-parser-context (parser)
  (pop (stack parser)))

(defun pop-parser-node (parser)
  (when (stack parser)
    (destructuring-bind (state node) (pop-parser-context parser)
      (declare (ignorable state))
      node)))

(defun current-parser-state (parser)
  (when (stack parser)
    (destructuring-bind (state node) (current-parser-context parser)
      (declare (ignorable node))
      state)))

(defun current-parser-node (parser)
  "Return the node in the context on the top of a parser's stack. The parser understands a node as a 2-element list,
  where the first element is a symbol identifying the node type (and corresponding to a symbol in the underlying grammar),
  and the second element is (usually) an AST node"
  (when (stack parser)
    (destructuring-bind (state node) (current-parser-context parser)
      (declare (ignorable state))
      node)))

(defun get-parse-result (parser)
  (current-parser-node parser))

;; parsing

(defun parse-token (parser token)
  "Advance the state of the parser by parsing a single token; does not assume token came from lexer"
  (let ((grammar (grammar parser)))
    (destructuring-bind (token-symbol token-value) (if token token (list :eof nil))
      (declare (ignorable token-value))
      (loop with continue = t
         with result = nil
         while continue
         do (let ((stack-state (current-parser-state parser)))
              (let ((action (gethash (list stack-state token-symbol) (entries (actions grammar)))))
                (if action
                    (destructuring-bind (op arg) action

                      (cond ((equal :shift op)
                             (let ((next-state arg))
                               (push-parser-context parser next-state token)
                               (setf continue nil)
                               (setf result :continue)))

                            ((equal :reduce op)
                             (let* ((production arg)
                                    (reduced-term (reduce-production parser
                                                                     production 
                                                                     ;; we reverse the values, because they were on stack in reverse
                                                                     (reverse (loop for i from 1 to (length (slot-value production 'rhs))
                                                                                 collect (let ((stack-node (pop-parser-node parser)))
                                                                                           (destructuring-bind (node-type node-value) stack-node
                                                                                             (declare (ignorable node-type))
                                                                                             node-value)))))))
                               (let ((new-stack-state (current-parser-state parser)))
                                 (push-parser-context parser 
                                                      (gethash (list new-stack-state (slot-value production 'rule-name )) (entries (gotos grammar)))
                                                      (list (slot-value production 'rule-name) reduced-term)))))

                            ((equal :accept op)
                             (setf continue nil)
                             (setf result :succeeded))

                            (t (setf continue nil)
                               (setf result :failed))))
                    (progn
                      (setf continue nil)
                      (setf result :failed)))))
         finally (return (values result (current-parser-context parser)))))))

(defun parse-input (parser &optional input)
  (let ((lexer (lexer parser)))
    (when input (setf (source-text lexer) input))
    (loop for result = (parse-token parser (next-token lexer))
       while (equal :continue result)
       finally (return (values result (get-parse-result parser))))))

(defun make-parser (lexer grammar)
  (let ((parser (make-instance 'lalr1-parser :lexer lexer :grammar grammar)))
    (push-parser-context parser 0 (start grammar))
    parser))

;; ---------------------------------------------------------------------------------------------------------------------
;;

(defmacro defparser (name &key grammar lexer)
  (let ((parser-factory (intern (format nil "MAKE-~a-PARSER" name) (symbol-package name)))
        (grammar-name (intern (format nil "~a-GRAMMAR" name) (symbol-package name)))
        (lexer-name (intern (format nil "~a-LEXER" name) (symbol-package name))))
    `(progn 

       (defgrammar ,grammar-name
           ,@grammar)

       (deflexer ,lexer-name
           ,@lexer)

       (defun ,parser-factory ()
         (let ((grammar (,grammar-name))
               (lexer (make-instance ',lexer-name)))
           (make-parser lexer grammar))))))
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.