emacs-pretest-bug
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: filling long html href


From: Stefan Monnier
Subject: Re: filling long html href
Date: Tue, 01 May 2007 12:09:40 -0400
User-agent: Gnus/5.11 (Gnus v5.11) Emacs/22.1.50 (gnu/linux)

Here's a variant:

   (defun regexp-not-in (words endchars &optional prefix)
     "Return a regexp that matches anything other than words in WORDS.
   ENDCHARS is a list containing the chars that can appear after a word."
     ;; `prefix' is only used internally.
     (if prefix (setq words (all-completions prefix words)) (setq prefix ""))
     (let* ((pos (length prefix))
            (empty (when (member prefix words)
                     (setq words (remove prefix words)) t))
            (chars (delete-dups (mapcar (lambda (word) (aref word pos)) words)))
            (chars-re (regexp-opt-charset (if empty (nconc chars endchars) 
chars)))
            (nonchars-re (concat "[^" (substring chars-re 1 -1) "]")))
       (cond
        ((and empty (null endchars))
         ;; If there are no termination chars, then we have to simply disallow
         ;; this match altogether.
         "\\`")
        ((null chars) nonchars-re)
        (t
         (concat "\\(?:" nonchars-re
                 "\\|"
                 (mapconcat (lambda (char)
                              (setq char (string char))
                              (concat char
                                      (regexp-not-in words endchars
                                                     (concat prefix char))))
                            chars
                            "\\|")
                 "\\)")))))


-- Stefan


PS: It's not case-insensitive like your code, so I wrote another function to
make a regexp case-insensitive:

(defun regexp-fold-case (regexp)
  "Return a case-insensitive version of REGEXP."
  ;; The basic idea is simple, but there are many nasty cases inside
  ;; char-ranges.  If we're inside a char-range and we bump into an
  ;; interval, we have to expand it.
  (let ((escape nil)                   ;Non-nil if right after an escaping \
        (range nil)                    ;- `in' if inside a char-range
                                       ;- The interval's start char if at the
                                       ;  intervals' end.
                                       ;- The charclass name prefix we've seen
                                       ;- `start' if at the start of a range
                                       ;- `bracket' if after an internal
                                       ;  open-bracket (maybe a char-class).
        (prev nil)                     ;Previous char.
        up down prefix)
    (mapconcat
     (lambda (char)
       (setq prefix
             (if (and (eq range 'bracket) (not (eq char ?:)))
                 ;; That wasn't a char-class.
                 (progn (setq range 'in) "[")
               ""))
       (prog1
           (cond
            ;; End of a char-range interval:  Expand it.
            ;; Note that the initial char was already outputted.
            ((char-valid-p range)
             ;; So as to avoid bumping into yet more corner cases
             ;; (if the interval includes special chars like -
             ;; and ]), we just reuse the original interval and
             ;; just add more elements to the char range.
             (let ((strings (list (string ?- char))))
               (while (< range char)
                 (if (eq (setq up (upcase char))
                         (setq down (downcase char)))
                     ;; Covered by the original interval.
                     nil
                   (push (if (eq up char) (string down) (string up))
                         strings))
                 (decf char))
               (setq range 'in)
               (apply 'concat (nreverse strings))))
            ;; Inside char-class.
            ((stringp range)
             ;; FIXME: If `prev' is not ?: then this really wasn't
             ;; a char class.
             (if (not (eq char ?\]))
                 (progn (setq range (concat range (string char))) nil)
               (prog1 (if (let ((completion-ignore-case t))
                            (test-completion range '("upper:" "lower:")))
                          "[:alpha:]" (concat "[:" range "]"))
                 (setq range 'in))))
            ;; An interval.
            ((and (eq range 'in) (eq char ?-))
             (setq range prev) nil)
            ;; End of interval.
            ((and (eq range 'in) (eq char ?\]))
             (setq range nil escape nil) (concat prefix "]"))
            ;; Shift from boundary to `in' since we're done checking `in'
            ;; for cases where `boundary' needs to be handled differently.
            ((progn (if (eq range 'boundary) (setq range 'in)) nil))
            ;; A char-class maybe?
            ((and (eq range 'in) (eq char ?\[))
             (setq range 'bracket)
             prefix)
            ;; Yes, a char-class.
            ((and (eq range 'bracket) (eq char ?:))
             (setq range ""))
            ;; Beginning of a char range.
            ((and (not range) (not escape) (eq char ?\[))
             (setq range 'boundary)
             "[")
            ;; Some other char in a range or in text.
            ((or (eq (setq up (upcase char))
                     (setq down (downcase char)))
                 escape)
             (if range
                 (concat prefix (string char))
               (setq escape (and (eq char ?\\) (not escape)))
               (string char)))
            (range
             ;; This may be the beginning of an interval: make
             ;; sure that `char' is placed last, so that we can
             ;; just tuck on "-<foo>" and get back our
             ;; interval unchanged.
             (if (eq char up) (string down char) (string up char)))
            (t (string ?\[ up down ?\])))
         ;; Remember this char for next time.
         (setq prev char)))
     regexp
     "")))




reply via email to

[Prev in Thread] Current Thread [Next in Thread]