guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, elisp, updated. release_1-9-1-83-g9a9f


From: Daniel Kraft
Subject: [Guile-commits] GNU Guile branch, elisp, updated. release_1-9-1-83-g9a9f123
Date: Thu, 27 Aug 2009 16:50:22 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=9a9f123144c179ef659252d3442117d6b774be7f

The branch, elisp has been updated
       via  9a9f123144c179ef659252d3442117d6b774be7f (commit)
      from  9e90010f075412e360890bd155de24c5d583de8a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 9a9f123144c179ef659252d3442117d6b774be7f
Author: Daniel Kraft <address@hidden>
Date:   Thu Aug 27 18:49:29 2009 +0200

    Support circular structures in elisp reader.
    
    * module/language/elisp/lexer.scm: Recognize circular markers.
    * module/language/elisp/parser.scm: Handle them correctly.
    * test-suite/tests/elisp-reader.test: Check circular structure parsing.

-----------------------------------------------------------------------

Summary of changes:
 module/language/elisp/lexer.scm    |   30 ++++++++++-
 module/language/elisp/parser.scm   |   94 +++++++++++++++++++++++++++++++++---
 test-suite/tests/elisp-reader.test |   19 +++++++-
 3 files changed, 131 insertions(+), 12 deletions(-)

diff --git a/module/language/elisp/lexer.scm b/module/language/elisp/lexer.scm
index 099c9b6..4aecca6 100644
--- a/module/language/elisp/lexer.scm
+++ b/module/language/elisp/lexer.scm
@@ -1,6 +1,6 @@
 ;;; Guile Emac Lisp
 
-;; Copyright (C) 2001 Free Software Foundation, Inc.
+;; Copyright (C) 2009 Free Software Foundation, Inc.
 
 ;; This program is free software; you can redistribute it and/or modify
 ;; it under the terms of the GNU General Public License as published by
@@ -33,7 +33,6 @@
 ; into a real Scheme character range.  Additionally, elisp wants characters
 ; as integers, so we just do the right thing...
 
-; TODO: Circular syntax markers like #1= or #1#
 ; TODO: address@hidden comments
 
 
@@ -221,6 +220,26 @@
         (else
           (unread-char c port)
           (finish))))))
+
+
+; Parse a circular structure marker without the leading # (which was already
+; read and recognized), that is, a number as identifier and then either
+; = or #.
+
+(define (get-circular-marker port)
+  (call-with-values
+    (lambda ()
+      (let iterate ((result 0))
+        (let ((cur (read-char port)))
+          (if (char-numeric? cur)
+            (let ((val (- (char->integer cur) (char->integer #\0))))
+              (iterate (+ (* result 10) val)))
+            (values result cur)))))
+    (lambda (id type)
+      (case type
+        ((#\#) `(circular-ref . ,id))
+        ((#\=) `(circular-def . ,id))
+        (else (lexer-error port "invalid circular marker character" type))))))
   
 
 ; Main lexer routine, which is given a port and does look for the next token.
@@ -290,6 +309,11 @@
                                        result-chars))))))
                  (else (iterate (cons cur result-chars)))))))
 
+          ; Circular markers (either reference or definition).
+          ((#\#)
+           (let ((mark (get-circular-marker port)))
+             (return (car mark) (cdr mark))))
+
           ; Parentheses and other special-meaning single characters.
           ((#\() (return 'paren-open #f))
           ((#\)) (return 'paren-close #f))
@@ -374,7 +398,7 @@
              (set! paren-level (1+ paren-level)))
             ((paren-close square-close)
              (set! paren-level (1- paren-level)))
-            ((quote backquote unquote unquote-splicing)
+            ((quote backquote unquote unquote-splicing circular-def)
              (set! quotation #t)))
           (if (and (not quotation) (<= paren-level 0))
             (set! finished #t))
diff --git a/module/language/elisp/parser.scm b/module/language/elisp/parser.scm
index 423ee6e..04229d8 100644
--- a/module/language/elisp/parser.scm
+++ b/module/language/elisp/parser.scm
@@ -26,8 +26,8 @@
 ; The parser (reader) for elisp expressions.
 ; Is is hand-written (just as the lexer is) instead of using some parser
 ; generator because this allows easier transfer of source properties from the
-; lexer, makes the circular syntax parsing easier (as it would be with
-; (text parse-lalr) and is easy enough anyways.
+; lexer ((text parse-lalr) seems not to allow access to the original lexer
+; token-pair) and is easy enough anyways.
 
 
 ; Report a parse error.  The first argument is some current lexer token
@@ -37,6 +37,73 @@
   (apply error msg args))
 
 
+; For parsing circular structures, we keep track of definitions in a
+; hash-map that maps the id's to their values.
+; When defining a new id, though, we immediatly fill the slot with a promise
+; before parsing and setting the real value, because it must already be
+; available at that time in case of a circular reference.  The promise refers
+; to a local variable that will be set when the real value is available through
+; a closure.  After parsing the expression is completed, we work through it
+; again and force all promises we find.
+; The definitions themselves are stored in a fluid and their scope is one
+; call to read-elisp (but not only the currently parsed expression!).
+
+(define circular-definitions (make-fluid))
+
+(define (make-circular-definitions)
+  (make-hash-table))
+
+(define (circular-ref token)
+  (if (not (eq? (car token) 'circular-ref))
+    (error "invalid token for circular-ref" token))
+  (let* ((id (cdr token))
+         (value (hashq-ref (fluid-ref circular-definitions) id)))
+    (if value
+      value
+      (parse-error token "undefined circular reference" id))))
+
+; Returned is a closure that, when invoked, will set the final value.
+; This means both the variable the promise will return and the hash-table
+; slot so we don't generate promises any longer.
+(define (circular-define! token)
+  (if (not (eq? (car token) 'circular-def))
+    (error "invalid token for circular-define!" token))
+  (let ((value #f)
+        (table (fluid-ref circular-definitions))
+        (id (cdr token)))
+    (hashq-set! table id (delay value))
+    (lambda (real-value)
+      (set! value real-value)
+      (hashq-set! table id real-value))))
+
+; Work through a parsed data structure and force the promises there.
+; After a promise is forced, the resulting value must not be recursed on;
+; this may lead to infinite recursion with a circular structure, and
+; additionally this value was already processed when it was defined.
+; All deep data structures that can be parsed must be handled here!
+(define (force-promises! data)
+  (cond
+    ((pair? data)
+     (begin
+       (if (promise? (car data))
+         (set-car! data (force (car data)))
+         (force-promises! (car data)))
+       (if (promise? (cdr data))
+         (set-cdr! data (force (cdr data)))
+         (force-promises! (cdr data)))))
+    ((vector? data)
+     (let ((len (vector-length data)))
+       (let iterate ((i 0))
+         (if (< i len)
+           (let ((el (vector-ref data i)))
+             (if (promise? el)
+               (vector-set! data i (force el))
+               (force-promises! el))
+             (iterate (1+ i)))))))
+    ; Else nothing needs to be done.
+  ))
+
+
 ; We need peek-functionality for the next lexer token, this is done with some
 ; single token look-ahead storage.  This is handled by a closure which allows
 ; getting or peeking the next token.
@@ -67,6 +134,8 @@
 ; found.  The same code is used for vectors and lists, where lists allow the
 ; dotted tail syntax and vectors not; additionally, the closing parenthesis
 ; must of course match.
+; The implementation here is not tail-recursive, but I think it is clearer
+; and simpler this way.
 
 (define (get-list lex allow-dot close-square)
   (let* ((next (lex 'peek))
@@ -117,16 +186,27 @@
        (return (get-list lex #t #f)))
       ((square-open)
        (return (list->vector (get-list lex #f #t))))
+      ((circular-ref)
+       (circular-ref token))
+      ((circular-def)
+       ; The order of definitions is important!
+       (let* ((setter (circular-define! token))
+              (expr (get-expression lex)))
+         (setter expr)
+         (force-promises! expr)
+         expr))
       (else
         (parse-error token "expected expression, got" token)))))
 
 
 ; Define the reader function based on this; build a lexer, a lexer-buffer,
 ; and then parse a single expression to return.
+; We also define a circular-definitions data structure to use.
 
 (define (read-elisp port)
-  (let* ((lexer (get-lexer port))
-         (lexbuf (make-lexer-buffer lexer))
-         (result (get-expression lexbuf)))
-    (lexbuf 'finish)
-    result))
+  (with-fluids ((circular-definitions (make-circular-definitions)))
+    (let* ((lexer (get-lexer port))
+           (lexbuf (make-lexer-buffer lexer))
+           (result (get-expression lexbuf)))
+      (lexbuf 'finish)
+      result)))
diff --git a/test-suite/tests/elisp-reader.test 
b/test-suite/tests/elisp-reader.test
index c228283..fc7cd1b 100644
--- a/test-suite/tests/elisp-reader.test
+++ b/test-suite/tests/elisp-reader.test
@@ -116,7 +116,11 @@ test\"ab\"\\ abcd
               ,(- (char->integer #\X) (char->integer #\@))
               ,(+ (expt 2 22) (expt 2 23) (expt 2 24) 32))))
 
-  (let* ((lex1-string "'((1 2) [2 [3]] 5)")
+  (pass-if "circular markers"
+    (equal? (lex-string "#0342= #1#")
+            '((circular-def . 342) (circular-ref . 1))))
+
+  (let* ((lex1-string "#1='((1 2) [2 [3]] 5)")
          (lexer (call-with-input-string (string-append lex1-string " 1 2")
                                         get-lexer/1)))
     (pass-if "lexer/1"
@@ -167,4 +171,15 @@ test\"ab\"\\ abcd
 
   (pass-if "vectors"
     (equal? (parse-str "[1 2 [] (3 4) \"abc\" d]")
-            #(1 2 #() (3 4) "abc" d))))
+            #(1 2 #() (3 4) "abc" d)))
+
+  (pass-if "circular structures"
+    (and (equal? (parse-str "(#1=a #2=b #1# (#1=c #1# #2#) #1#)")
+                 '(a b a (c c b) c))
+         (let ((eqpair (parse-str "(#1=\"foobar\" . #1#)")))
+           (eq? (car eqpair) (cdr eqpair)))
+         (let ((circlst (parse-str "#1=(42 #1# #1=5 #1#)")))
+           (and (eq? circlst (cadr circlst))
+                (equal? (cddr circlst) '(5 5))))
+         (let ((circvec (parse-str "#1=[a #1# b]")))
+           (eq? circvec (vector-ref circvec 1))))))


hooks/post-receive
-- 
GNU Guile




reply via email to

[Prev in Thread] Current Thread [Next in Thread]