;; Copyright (c) 2010 Phil Hargett
;; Permission is hereby granted, free of charge, to any person obtaining a copy
;; of this software and associated documentation files (the "Software"), to deal
;; in the Software without restriction, including without limitation the rights
;; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
;; copies of the Software, and to permit persons to whom the Software is
;; furnished to do so, subject to the following conditions:
;; The above copyright notice and this permission notice shall be included in
;; all copies or substantial portions of the Software.
;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
;; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
;; THE SOFTWARE.
(in-package :hh-parse)
;; ---------------------------------------------------------------------------------------------------------------------
;; LALR(1) parser construction
;; Source code helper
(defmethod source-text ((parser lalr1-parser))
(source-text (lexer parser)))
(defmethod (setf source-text) (text (parser lalr1-parser) )
;; Drop existing lines
(setf (source-text (lexer parser)) text))
(defun expected-next-symbols (parser)
"Given a parser's current state, return a list of valid symbols (terminal and non-terminal) that would advance the parse"
(let ((state (caar (stack parser)))
(actions (entries (actions (grammar parser))))
(expected ()))
(loop for (expected-state expected-symbol) being the hash-key of actions
if (= state expected-state )
do (push expected-symbol expected))
expected))
(defun assimilate-captures (parser reduced-term)
(let ((remaining-captures ()))
(loop for (slot value) in (captures parser)
if (slot-exists-p reduced-term slot)
do (setf (slot-value reduced-term slot) value)
else do (push (list slot value) remaining-captures))
(setf (captures parser) remaining-captures)))
(defun reduce-production (parser production node-values)
(let ((reduced-term (apply (reduction production) parser (rule-name production) node-values)))
(assimilate-captures parser reduced-term)
reduced-term))
;; parser context helpers
(defun push-parser-context (parser state node)
"Because the parser is a table-driven LR parser, the parser uses a stack to manage
its progress through a parse. The entries in this stack are contexts, each of which
is a 2-element list. The 1st element in a context is the state (a number) associated
with the context: the state in the context on the top of the stack is the state
used by the LR parsing algorithm for table lookups. The 2nd element is a node (see
current-parser-node for details)."
(push (list state node) (stack parser)))
(defun current-parser-context (parser)
(car (stack parser)))
(defun pop-parser-context (parser)
(pop (stack parser)))
(defun pop-parser-node (parser)
(when (stack parser)
(destructuring-bind (state node) (pop-parser-context parser)
(declare (ignorable state))
node)))
(defun current-parser-state (parser)
(when (stack parser)
(destructuring-bind (state node) (current-parser-context parser)
(declare (ignorable node))
state)))
(defun current-parser-node (parser)
"Return the node in the context on the top of a parser's stack. The parser understands a node as a 2-element list,
where the first element is a symbol identifying the node type (and corresponding to a symbol in the underlying grammar),
and the second element is (usually) an AST node"
(when (stack parser)
(destructuring-bind (state node) (current-parser-context parser)
(declare (ignorable state))
node)))
(defun get-parse-result (parser)
(current-parser-node parser))
;; parsing
(defun parse-token (parser token)
"Advance the state of the parser by parsing a single token; does not assume token came from lexer"
(let ((grammar (grammar parser)))
(destructuring-bind (token-symbol token-value) (if token token (list :eof nil))
(declare (ignorable token-value))
(loop with continue = t
with result = nil
while continue
do (let ((stack-state (current-parser-state parser)))
(let ((action (gethash (list stack-state token-symbol) (entries (actions grammar)))))
(if action
(destructuring-bind (op arg) action
(cond ((equal :shift op)
(let ((next-state arg))
(push-parser-context parser next-state token)
(setf continue nil)
(setf result :continue)))
((equal :reduce op)
(let* ((production arg)
(reduced-term (reduce-production parser
production
;; we reverse the values, because they were on stack in reverse
(reverse (loop for i from 1 to (length (slot-value production 'rhs))
collect (let ((stack-node (pop-parser-node parser)))
(destructuring-bind (node-type node-value) stack-node
(declare (ignorable node-type))
node-value)))))))
(let ((new-stack-state (current-parser-state parser)))
(push-parser-context parser
(gethash (list new-stack-state (slot-value production 'rule-name )) (entries (gotos grammar)))
(list (slot-value production 'rule-name) reduced-term)))))
((equal :accept op)
(setf continue nil)
(setf result :succeeded))
(t (setf continue nil)
(setf result :failed))))
(progn
(setf continue nil)
(setf result :failed)))))
finally (return (values result (current-parser-context parser)))))))
(defun parse-input (parser &optional input)
(let ((lexer (lexer parser)))
(when input (setf (source-text lexer) input))
(loop for result = (parse-token parser (next-token lexer))
while (equal :continue result)
finally (return (values result (get-parse-result parser))))))
(defun make-parser (lexer grammar)
(let ((parser (make-instance 'lalr1-parser :lexer lexer :grammar grammar)))
(push-parser-context parser 0 (start grammar))
parser))
;; ---------------------------------------------------------------------------------------------------------------------
;;
(defmacro defparser (name &key grammar lexer)
(let ((parser-factory (intern (format nil "MAKE-~a-PARSER" name) (symbol-package name)))
(grammar-name (intern (format nil "~a-GRAMMAR" name) (symbol-package name)))
(lexer-name (intern (format nil "~a-LEXER" name) (symbol-package name))))
`(progn
(defgrammar ,grammar-name
,@grammar)
(deflexer ,lexer-name
,@lexer)
(defun ,parser-factory ()
(let ((grammar (,grammar-name))
(lexer (make-instance ',lexer-name)))
(make-parser lexer grammar))))))