grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Changes to grep/src/dfa.c


From: Stepan Kasal
Subject: Changes to grep/src/dfa.c
Date: Thu, 16 Dec 2004 03:37:34 -0500

Index: grep/src/dfa.c
diff -u grep/src/dfa.c:1.30 grep/src/dfa.c:1.31
--- grep/src/dfa.c:1.30 Thu Dec 16 07:18:15 2004
+++ grep/src/dfa.c      Thu Dec 16 08:19:29 2004
@@ -365,7 +365,6 @@
    reader is referred to the GNU Regex documentation for the
    meaning of the @address@hidden@ syntax bits. */
 
-static char const *lexstart;   /* Pointer to beginning of input string. */
 static char const *lexptr;     /* Pointer to next input character. */
 static int lexleft;            /* Number of characters remaining. */
 static token lasttok;          /* Previous token returned; initially END. */
@@ -383,7 +382,7 @@
 static int cur_mb_index;        /* Byte index of the current scanning multibyte
                                    character.
 
-                                  singlebyte character : cur_mb_index = 0
+                                  single byte character : cur_mb_index = 0
                                   multibyte character
                                       1st byte : cur_mb_index = 1
                                       2nd byte : cur_mb_index = 2
@@ -394,7 +393,7 @@
                                   byte of corresponding multibyte character
                                   in the input string.  A element's value
                                   is 0 if corresponding character is a
-                                  singlebyte chracter.
+                                  single byte chracter.
                                   e.g. input : 'a', <mb(0)>, <mb(1)>, <mb(2)>
                                    mblen_buf :   0,       3,       2,       1
                                */
@@ -432,10 +431,10 @@
           cur_mb_len was already set by mbrlen().  */
        cur_mb_index = 1;
       else if (cur_mb_len < 1)
-       /* Invalid sequence.  We treat it as a singlebyte character.
+       /* Invalid sequence.  We treat it as a single byte character.
           cur_mb_index is aleady 0.  */
        cur_mb_len = 1;
-      /* Otherwise, cur_mb_len == 1, it is a singlebyte character.
+      /* Otherwise, cur_mb_len == 1, it is a single byte character.
         cur_mb_index is aleady 0.  */
     }
 }
@@ -528,7 +527,8 @@
 
   work_mbc->nchars = work_mbc->nranges = work_mbc->nch_classes = 0;
   work_mbc->nequivs = work_mbc->ncoll_elems = 0;
-  work_mbc->chars = work_mbc->ch_classes = NULL;
+  work_mbc->chars = NULL;
+  work_mbc->ch_classes = NULL;
   work_mbc->range_sts = work_mbc->range_ends = NULL;
   work_mbc->equivs = work_mbc->coll_elems = NULL;
 
@@ -1098,15 +1098,17 @@
                          setbit_case_fold (c, ccl);
                      } else {
                        /* POSIX locales are painful - leave the decision to 
libc */
-                       char expr[6] = { '[', 0 /* c */, '-', 0 /* c2 */, ']', 
'\0' };
                        regex_t re;
-                       expr[1] = c;
-                       expr[3] = c2;
+                       char expr[6]; /* = { '[', c, '-', c2, ']', '\0' }; */
+
+                       expr[0] = '['; expr[1] = c; expr[2] = '-';
+                       expr[3] = c2; expr[4] = ']'; expr[5] = '\0';
                        if (regcomp (&re, expr, case_fold ? REG_ICASE : 0) == 
REG_NOERROR) {
                          for (c = 0; c < NOTCHAR; ++c) {
-                           char buf[2] = { 0 /* c */, '\0' };
                            regmatch_t mat;
-                           buf[0] = c;
+                           char buf[2]; /* = { c, '\0' }; */
+
+                           buf[0] = c; buf[1] = '\0';
                            if (regexec (&re, buf, 1, &mat, 0) == REG_NOERROR
                                && mat.rm_so == 0 && mat.rm_eo == 1)
                               setbit_case_fold (c, ccl);
@@ -1179,8 +1181,8 @@
          : (((cur_mb_index == 1)? 1 : 0) /* 1st-byte of multibyte char */
             + ((cur_mb_index == cur_mb_len)? 2 : 0)); /* last-byte */
       else
-       /* It may be unnecesssary, but it is safer to treat other
-          symbols as singlebyte characters.  */
+       /* It may be unnecessary, but it is safer to treat other
+          symbols as single byte characters.  */
        dfa->multibyte_prop[dfa->tindex] = 3;
     }
 #endif
@@ -1406,7 +1408,7 @@
 dfaparse (char const *s, size_t len, struct dfa *d)
 {
   dfa = d;
-  lexstart = lexptr = s;
+  lexptr = s;
   lexleft = len;
   lasttok = END;
   laststart = 1;
@@ -2000,7 +2002,7 @@
   int state_letter;            /* New state on a letter transition. */
   static int initialized;      /* Flag for static initialization. */
 #ifdef MBS_SUPPORT
-  int next_isnt_1st_byte = 0;  /* Flag If we can't add state0.  */
+  int next_isnt_1st_byte = 0;  /* Flag if we can't add state0.  */
 #endif
   int i, j, k;
 
@@ -2183,11 +2185,11 @@
             character, or the states of follows must accept the bytes
             which are not 1st byte of the multibyte character.
             Then, if a state of follows encounter a byte, it must not be
-            a 1st byte of a multibyte character nor singlebyte character.
+            a 1st byte of a multibyte character nor single byte character.
             We cansel to add state[0].follows to next state, because
             state[0] must accept 1st-byte
 
-            For example, we assume <sb a> is a certain singlebyte
+            For example, we assume <sb a> is a certain single byte
             character, <mb A> is a certain multibyte character, and the
             codepoint of <sb a> equals the 2nd byte of the codepoint of
             <mb A>.
@@ -2363,12 +2365,12 @@
 #ifdef MBS_SUPPORT
 /* Multibyte character handling sub-routines for dfaexec.  */
 
-/* Initial state may encounter the byte which is not a singlebyte character
+/* Initial state may encounter the byte which is not a single byte character
    nor 1st byte of a multibyte character.  But it is incorrect for initial
    state to accept such a byte.
    For example, in sjis encoding the regular expression like "\\" accepts
    the codepoint 0x5c, but should not accept the 2nd byte of the codepoint
-   0x815c. Then Initial state must skip the bytes which are not a singlebyte
+   0x815c. Then Initial state must skip the bytes which are not a single byte
    character nor 1st byte of a multibyte character.  */
 #define SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p)         \
   if (s == 0)                                          \
@@ -2733,7 +2735,7 @@
 
   if (nelem == 0 || maxlen == 0)
     /* This state has no multibyte operator which can match.
-       We need to check only one singlebyte character.  */
+       We need to check only one single byte character.  */
     {
       status_transit_state rs;
       rs = transit_state_singlebyte(d, s, *pp, &s1);




reply via email to

[Prev in Thread] Current Thread [Next in Thread]