bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

fix for dfa when mixed with libsigsegv on Windows


From: Aharon Robbins
Subject: fix for dfa when mixed with libsigsegv on Windows
Date: Tue, 06 Apr 2010 20:02:27 +0300

Hi. The Cygwin maintainer tells me that libsigsegv on Windows pulls in the
dreaded <windows.h> header file which defines WCHAR as wchar_t, causing a
conflict with the WCHAR in the enum in dfa.h.  I propose the following diff
which compiles OK under Linux and moves the details into dfa.c.

Thanks,

Arnold
---------------------------------------------------------------
Index: dfa.h
===================================================================
RCS file: /d/mongo/cvsrep/gawk-stable/dfa.h,v
retrieving revision 1.5
diff -u -r1.5 dfa.h
--- dfa.h       23 Mar 2010 17:46:27 -0000      1.5
+++ dfa.h       4 Apr 2010 12:26:36 -0000
@@ -50,98 +50,8 @@
    are operators and others are terminal symbols.  Most (but not all) of these
    codes are returned by the lexical analyzer. */
 
-typedef enum
-{
-  END = -1,                    /* END is a terminal symbol that matches the
-                                  end of input; any value of END or less in
-                                  the parse tree is such a symbol.  Accepting
-                                  states of the DFA are those that would have
-                                  a transition on END. */
-
-  /* Ordinary character values are terminal symbols that match themselves. */
-
-  EMPTY = NOTCHAR,             /* EMPTY is a terminal symbol that matches
-                                  the empty string. */
-
-  BACKREF,                     /* BACKREF is generated by \<digit>; it
-                                  it not completely handled.  If the scanner
-                                  detects a transition on backref, it returns
-                                  a kind of "semi-success" indicating that
-                                  the match will have to be verified with
-                                  a backtracking matcher. */
-
-  BEGLINE,                     /* BEGLINE is a terminal symbol that matches
-                                  the empty string if it is at the beginning
-                                  of a line. */
-
-  ENDLINE,                     /* ENDLINE is a terminal symbol that matches
-                                  the empty string if it is at the end of
-                                  a line. */
-
-  BEGWORD,                     /* BEGWORD is a terminal symbol that matches
-                                  the empty string if it is at the beginning
-                                  of a word. */
-
-  ENDWORD,                     /* ENDWORD is a terminal symbol that matches
-                                  the empty string if it is at the end of
-                                  a word. */
-
-  LIMWORD,                     /* LIMWORD is a terminal symbol that matches
-                                  the empty string if it is at the beginning
-                                  or the end of a word. */
-
-  NOTLIMWORD,                  /* NOTLIMWORD is a terminal symbol that
-                                  matches the empty string if it is not at
-                                  the beginning or end of a word. */
-
-  QMARK,                       /* QMARK is an operator of one argument that
-                                  matches zero or one occurences of its
-                                  argument. */
-
-  STAR,                                /* STAR is an operator of one argument 
that
-                                  matches the Kleene closure (zero or more
-                                  occurrences) of its argument. */
-
-  PLUS,                                /* PLUS is an operator of one argument 
that
-                                  matches the positive closure (one or more
-                                  occurrences) of its argument. */
-
-  REPMN,                       /* REPMN is a lexical token corresponding
-                                  to the {m,n} construct.  REPMN never
-                                  appears in the compiled token vector. */
-
-  CAT,                         /* CAT is an operator of two arguments that
-                                  matches the concatenation of its
-                                  arguments.  CAT is never returned by the
-                                  lexical analyzer. */
-
-  OR,                          /* OR is an operator of two arguments that
-                                  matches either of its arguments. */
-
-  ORTOP,                       /* OR at the toplevel in the parse tree.
-                                  This is used for a boyer-moore heuristic. */
-
-  LPAREN,                      /* LPAREN never appears in the parse tree,
-                                  it is only a lexeme. */
-
-  RPAREN,                      /* RPAREN never appears in the parse tree. */
-
-#ifdef MBS_SUPPORT
-  ANYCHAR,                     /* ANYCHAR is a terminal symbol that matches
-                                  any multibyte (or single byte) characters.
-                                 It is used only if MB_CUR_MAX > 1.  */
-
-  MBCSET,                      /* MBCSET is similar to CSET, but for
-                                  multibyte characters.  */
-
-  WCHAR,                       /* Only returned by lex.  wctok contains
-                                  the wide character representation.  */
-#endif /* MBS_SUPPORT */
-
-  CSET                         /* CSET and (and any value greater) is a
-                                  terminal symbol that matches any of a
-                                  class of characters. */
-} token;
+enum token_enum;
+typedef enum token_enum token;
 
 /* Sets are stored in an array in the compiled dfa; the index of the
    array corresponding to a given set token is given by SET_INDEX(t). */
Index: dfa.c
===================================================================
RCS file: /d/mongo/cvsrep/gawk-stable/dfa.c,v
retrieving revision 1.29
diff -u -r1.29 dfa.c
--- dfa.c       2 Apr 2010 09:33:41 -0000       1.29
+++ dfa.c       4 Apr 2010 12:29:47 -0000
@@ -102,6 +102,99 @@
 # undef clrbit
 #endif
 
+enum token_enum
+{
+  END = -1,                    /* END is a terminal symbol that matches the
+                                  end of input; any value of END or less in
+                                  the parse tree is such a symbol.  Accepting
+                                  states of the DFA are those that would have
+                                  a transition on END. */
+
+  /* Ordinary character values are terminal symbols that match themselves. */
+
+  EMPTY = NOTCHAR,             /* EMPTY is a terminal symbol that matches
+                                  the empty string. */
+
+  BACKREF,                     /* BACKREF is generated by \<digit>; it
+                                  it not completely handled.  If the scanner
+                                  detects a transition on backref, it returns
+                                  a kind of "semi-success" indicating that
+                                  the match will have to be verified with
+                                  a backtracking matcher. */
+
+  BEGLINE,                     /* BEGLINE is a terminal symbol that matches
+                                  the empty string if it is at the beginning
+                                  of a line. */
+
+  ENDLINE,                     /* ENDLINE is a terminal symbol that matches
+                                  the empty string if it is at the end of
+                                  a line. */
+
+  BEGWORD,                     /* BEGWORD is a terminal symbol that matches
+                                  the empty string if it is at the beginning
+                                  of a word. */
+
+  ENDWORD,                     /* ENDWORD is a terminal symbol that matches
+                                  the empty string if it is at the end of
+                                  a word. */
+
+  LIMWORD,                     /* LIMWORD is a terminal symbol that matches
+                                  the empty string if it is at the beginning
+                                  or the end of a word. */
+
+  NOTLIMWORD,                  /* NOTLIMWORD is a terminal symbol that
+                                  matches the empty string if it is not at
+                                  the beginning or end of a word. */
+
+  QMARK,                       /* QMARK is an operator of one argument that
+                                  matches zero or one occurences of its
+                                  argument. */
+
+  STAR,                                /* STAR is an operator of one argument 
that
+                                  matches the Kleene closure (zero or more
+                                  occurrences) of its argument. */
+
+  PLUS,                                /* PLUS is an operator of one argument 
that
+                                  matches the positive closure (one or more
+                                  occurrences) of its argument. */
+
+  REPMN,                       /* REPMN is a lexical token corresponding
+                                  to the {m,n} construct.  REPMN never
+                                  appears in the compiled token vector. */
+
+  CAT,                         /* CAT is an operator of two arguments that
+                                  matches the concatenation of its
+                                  arguments.  CAT is never returned by the
+                                  lexical analyzer. */
+
+  OR,                          /* OR is an operator of two arguments that
+                                  matches either of its arguments. */
+
+  ORTOP,                       /* OR at the toplevel in the parse tree.
+                                  This is used for a boyer-moore heuristic. */
+
+  LPAREN,                      /* LPAREN never appears in the parse tree,
+                                  it is only a lexeme. */
+
+  RPAREN,                      /* RPAREN never appears in the parse tree. */
+
+#ifdef MBS_SUPPORT
+  ANYCHAR,                     /* ANYCHAR is a terminal symbol that matches
+                                  any multibyte (or single byte) characters.
+                                 It is used only if MB_CUR_MAX > 1.  */
+
+  MBCSET,                      /* MBCSET is similar to CSET, but for
+                                  multibyte characters.  */
+
+  WCHAR,                       /* Only returned by lex.  wctok contains
+                                  the wide character representation.  */
+#endif /* MBS_SUPPORT */
+
+  CSET                         /* CSET and (and any value greater) is a
+                                  terminal symbol that matches any of a
+                                  class of characters. */
+};
+
 static void dfamust (struct dfa *dfa);
 static void regexp (int toplevel);
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]