emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] Changes to emacs/src/regex.c,v


From: Stefan Monnier
Subject: [Emacs-diffs] Changes to emacs/src/regex.c,v
Date: Fri, 22 Sep 2006 17:30:14 +0000

CVSROOT:        /sources/emacs
Module name:    emacs
Changes by:     Stefan Monnier <monnier>        06/09/22 17:30:13

Index: regex.c
===================================================================
RCS file: /sources/emacs/emacs/src/regex.c,v
retrieving revision 1.212
retrieving revision 1.213
diff -u -b -r1.212 -r1.213
--- regex.c     16 Sep 2006 15:28:47 -0000      1.212
+++ regex.c     22 Sep 2006 17:30:13 -0000      1.213
@@ -3877,10 +3877,12 @@
          if (fastmap)
            {
              int c = RE_STRING_CHAR (p + 1, pend - p);
-
+             /* When fast-scanning, the fastmap can be indexed either with
+                a char (smaller than 256) or with the first byte of
+                a char's byte sequence.  So we have to conservatively add
+                both to the table.  */
              if (SINGLE_BYTE_CHAR_P (c))
                fastmap[c] = 1;
-             else
                fastmap[p[1]] = 1;
            }
          break;
@@ -3899,6 +3901,10 @@
             So any that are not listed in the charset
             are possible matches, even in multibyte buffers.  */
          if (!fastmap) break;
+         /* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially
+            because it will automatically be set when needed by virtue of
+            being larger than the highest char of its charset (0xbf) but
+            smaller than (1<<BYTEWIDTH).  */
          for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
               j < (1 << BYTEWIDTH); j++)
            fastmap[j] = 1;
@@ -3909,7 +3915,13 @@
          for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
               j >= 0; j--)
            if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
+             {
              fastmap[j] = 1;
+#ifdef emacs
+               if (j >= 0x80 && j < 0xa0)
+                 fastmap[LEADING_CODE_8_BIT_CONTROL] = 1;
+#endif
+             }
 
          if ((not && multibyte)
              /* Any character set can possibly contain a character
@@ -4352,11 +4364,33 @@
                    }
                }
              else
+               do
+                 {
+                   re_char *d_start = d;
                while (range > lim && !fastmap[*d])
                  {
                    d++;
                    range--;
                  }
+#ifdef emacs
+                   if (multibyte && range > lim)
+                     {
+                       /* Check that we are at the beginning of a char.  */
+                       int at_boundary;
+                       AT_CHAR_BOUNDARY_P (at_boundary, d, d_start);
+                       if (at_boundary)
+                         break;
+                       else
+                         { /* We have matched an internal byte of a char
+                              rather than the leading byte, so it's a false
+                              positive: we should keep scanning.  */
+                           d++; range--;
+                         }
+                     }
+                   else
+#endif
+                     break;
+                 } while (1);
 
              startpos += irange - range;
            }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]