emacs-pretest-bug
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

rx.el changes


From: Dave Love
Subject: rx.el changes
Date: Thu, 15 Jan 2004 11:42:09 +0000
User-agent: Gnus/5.1005 (Gnus v5.10.5) Emacs/21.2 (gnu/linux)

[Sorry if this is a repeat.  I have Gnus problems and I don't know
whether it got sent before.]

The change to rx.el after 21.2 to add a group around `and' forms isn't
documented and I think it needs a corresponding change to `or'.

(I think there should be some commentary about the effect on
efficiency of redundant automatically-added shy groups; does that have
any effect?)

There are some features missing c.f. the claim to cover all regexp
features.  I added what I needed/spotted.

I'm not sure why `rx' doesn't evaluate at compile time, but I think
there should be a macro which does -- that's what you normally want.
I've used

(defmacro crx (re)
  (rx-to-string re))

Rx isn't documented in lispref.

2004-01-02  Dave Love  <address@hidden>

        * emacs-lisp/rx.el (rx-or): Put group around result.
        (rx-constituents): Add backref.
        (rx-syntax): Add string-delimiter, comment-delimiter.
        (rx-categories): Add combining-diacritic.
        (rx-check-not, rx-greedy, rx): Doc fix.
        (rx-backref, rx-check-backref): New.

*** rx.el~      Wed Sep  3 15:58:17 2003
--- rx.el       Fri Jan  2 20:12:09 2004
@@ -120,6 +120,7 @@
     (optional          . zero-or-one)
     (minimal-match     . (rx-greedy 1 1))
     (maximal-match     . (rx-greedy 1 1))
+    (backref           . (rx-backref 1 1 rx-check-backref))
     (line-start                . "^")
     (line-end          . "$")
     (string-start      . "\\`")
@@ -175,7 +176,9 @@
     (escape            . ?\\)
     (character-quote   . ?/)
     (comment-start     . ?<)
-    (comment-end       . ?>))
+    (comment-end       . ?>)
+    (string-delimiter  . ?|)
+    (comment-delimiter . ?!))
   "Alist mapping Rx syntax symbols to syntax characters.
 Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid
 symbol in `(syntax SYMBOL)', and CHAR is the syntax character
@@ -204,6 +207,7 @@
     (japanese-katakana-two-byte . ?K)
     (korean-hangul-two-byte    . ?N)
     (cyrillic-two-byte         . ?Y)
+    (combining-diacritic       . ?^)
     (ascii                     . ?a)
     (arabic                    . ?b)
     (chinese                   . ?c)
@@ -286,9 +290,10 @@
     (dolist (arg (cdr form))
       (unless (stringp arg)
        (setq all-args-strings nil)))
-    (if all-args-strings
-       (regexp-opt (cdr form))
-      (mapconcat #'rx-to-string (cdr form) "\\|"))))
+    (concat "\\(?:" (if all-args-strings
+                       (regexp-opt (cdr form))
+                     (mapconcat #'rx-to-string (cdr form) "\\|"))
+           "\\)")))
 
 
 (defun rx-quote-for-set (string)
@@ -328,8 +333,8 @@
           (concat "[" (rx-quote-for-set (cadr form)) "]")))))
 
 
-(defun rx-check-not (form)
-  "Check arguments of FORM.  FORM is `(not ...)'."
+(defun rx-check-not (arg)
+  "Check arg ARG for Rx `not'."
   (unless (or (memq form
                    '(digit control hex-digit blank graphic printing
                            alphanumeric letter ascii nonascii lower
@@ -393,6 +398,16 @@
                     (cdr form) nil)
          "\\)"))
 
+(defun rx-backref (form)
+  "Parse and produce code from FORM, which is `(backref N)'."
+  (rx-check form)
+  (format "\\%d" (nth 1 form)))
+
+(defun rx-check-backref (arg)
+  "Check arg ARG for Rx `backref'."
+  (or (and (integerp arg) (>= arg 1) (<= arg 9))
+      (error "Rx `backref' requires numeric 1<=arg<=9: %s" arg)))
+
 (defun rx-kleene (form)
   "Parse and produce code from FORM.
 FORM is `(OP FORM1)', where OP is one of the `zero-or-one',
@@ -481,10 +496,10 @@
 
 
 (defun rx-greedy (form)
-  "Parse and produce code from FORM.  If FORM is '(minimal-match
-FORM1)', non-greedy versions of `*', `+', and `?' operators will be
-used in FORM1.  If FORM is '(maximal-match FORM1)', greedy operators
-will be used."
+  "Parse and produce code from FORM.
+If FORM is '(minimal-match FORM1)', non-greedy versions of `*',
+`+', and `?' operators will be used in FORM1.  If FORM is
+'(maximal-match FORM1)', greedy operators will be used."
   (rx-check form)
   (let ((rx-greedy-flag (eq (car form) 'maximal-match)))
     (rx-to-string (cadr form))))
@@ -663,6 +678,8 @@
      `character-quote'         (\\s/)
      `comment-start'           (\\s<)
      `comment-end'             (\\s>)
+     `string-delimiter'                (\\s|)
+     `comment-delimiter'       (\\s!)
 
 `(not (syntax SYNTAX))'
      matches a character that has not syntax SYNTAX.
@@ -691,6 +708,7 @@
      `japanese-katakana-two-byte'      (\\cK)
      `korean-hangul-two-byte'          (\\cN)
      `cyrillic-two-byte'               (\\cY)
+     `combining-diacritic'              (\\c^)
      `ascii'                           (\\ca)
      `arabic'                          (\\cb)
      `chinese'                         (\\cc)
@@ -730,7 +748,7 @@
 
 `(minimal-match SEXP)'
      produce a non-greedy regexp for SEXP.  Normally, regexps matching
-     zero or more occurrances of something are \"greedy\" in that they
+     zero or more occurrences of something are \"greedy\" in that they
      match as much as they can, as long as the overall regexp can
      still match.  A non-greedy regexp matches as little as possible.
 

reply via email to

[Prev in Thread] Current Thread [Next in Thread]