help-flex
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: FW: Location revisited, column tracking...


From: Akim Demaille
Subject: Re: FW: Location revisited, column tracking...
Date: 25 Jul 2002 17:32:23 +0200
User-agent: Gnus/5.0808 (Gnus v5.8.8) XEmacs/21.4 (Honest Recruiter)

>>>>> "Valkanas" == Valkanas Nikos <address@hidden> writes:

Valkanas> I guess if you are not very used to it, or if you try to
Valkanas> retrofit location info to an already completed scanner, may
Valkanas> take more time than usually. 

I don't agree.  See below Bison's own scanner.  I have ==> the lines
that handle the locations.

     /* Bison Grammar Scanner                             -*- C -*-
        Copyright (C) 2002 Free Software Foundation, Inc.
     
        This file is part of Bison, the GNU Compiler Compiler.
     
        This program is free software; you can redistribute it and/or modify
        it under the terms of the GNU General Public License as published by
        the Free Software Foundation; either version 2 of the License, or
        (at your option) any later version.
     
        This program is distributed in the hope that it will be useful,
        but WITHOUT ANY WARRANTY; without even the implied warranty of
        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        GNU General Public License for more details.
     
        You should have received a copy of the GNU General Public License
        along with this program; if not, write to the Free Software
        Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
        02111-1307  USA
     */
     
     %option debug nodefault noyywrap nounput never-interactive stack
     %option prefix="gram_" outfile="lex.yy.c"
     
     %{
     #include "system.h"
     #include "complain.h"
     #include "quote.h"
     #include "getargs.h"
     #include "gram.h"
     #include "reader.h"
     
     /* Each time we match a string, move the end cursor to its end. */
=>   #define YY_USER_INIT                               \
=>   do {                                               \
=>     LOCATION_RESET (*yylloc);                        \
=>     yylloc->file = infile;                   \
=>      /* This is only to avoid GCC warnings. */       \
=>     if (yycontrol) {;};                              \
=>   } while (0)
=>     
=>   #define YY_USER_ACTION  LOCATION_COLUMNS (*yylloc, yyleng);
=>   #define YY_LINES        LOCATION_LINES (*yylloc, yyleng);
=>   #define YY_STEP         LOCATION_STEP (*yylloc);
     
     /* STRING_OBSTACK -- Used to store all the characters that we need to
        keep (to construct ID, STRINGS etc.).  Use the following macros to
        use it.
     
        Use YY_OBS_GROW to append what has just been matched, and
        YY_OBS_FINISH to end the string (it puts the ending 0).
        YY_OBS_FINISH also stores this string in LAST_STRING, which can be
        used, and which is used by YY_OBS_FREE to free the last string.  */
     
     static struct obstack string_obstack;
     char *last_string;
     
     #define YY_OBS_GROW   \
       obstack_grow (&string_obstack, yytext, yyleng)
     
     #define YY_OBS_FINISH                                      \
       do {                                                     \
         obstack_1grow (&string_obstack, '\0');                 \
         last_string = obstack_finish (&string_obstack);        \
       } while (0)
     
     #define YY_OBS_FREE                                        \
       do {                                                     \
         obstack_free (&string_obstack, last_string);           \
       } while (0)
     
     void
     scanner_last_string_free (void)
     {
       YY_OBS_FREE;
     }
     
     
     static int braces_level = 0;
     static int percent_percent_count = 0;
     
     static void handle_dollar PARAMS ((braced_code_t code_kind,
                                        char *cp, location_t location));
     static void handle_at PARAMS ((braced_code_t code_kind,
                                    char *cp, location_t location));
     
     %}
     %x SC_COMMENT
     %x SC_STRING SC_CHARACTER
     %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
     %x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
     
     id         [.a-zA-Z_][.a-zA-Z_0-9]*
     int          [0-9]+
     eols     (\n|\r|\n\r|\r\n)+
     blanks   [ \t\f]+
     
     %%
=>   %{
=>     /* At each yylex invocation, mark the current position as the
=>        start of the next token.  */
=>     YY_STEP;
=>   %}
     
     
       /*----------------------------.
       | Scanning Bison directives.  |
       `----------------------------*/
     <INITIAL>
     {
       "%binary"               return PERCENT_NONASSOC;
       "%debug"                return PERCENT_DEBUG;
       "%define"               return PERCENT_DEFINE;
       "%defines"              return PERCENT_DEFINES;
       "%destructor"           return PERCENT_DESTRUCTOR;
       "%dprec"                  return PERCENT_DPREC;
       "%error"[-_]"verbose"   return PERCENT_ERROR_VERBOSE;
       "%expect"               return PERCENT_EXPECT;
       "%file-prefix"          return PERCENT_FILE_PREFIX;
       "%fixed"[-_]"output"[-_]"files"   return PERCENT_YACC;
       "%glr"[-_]"parser"          return PERCENT_GLR_PARSER;
       "%left"                 return PERCENT_LEFT;
       "%locations"            return PERCENT_LOCATIONS;
       "%merge"                  return PERCENT_MERGE;
       "%name"[-_]"prefix"     return PERCENT_NAME_PREFIX;
       "%no"[-_]"lines"        return PERCENT_NO_LINES;
       "%nonassoc"             return PERCENT_NONASSOC;
       "%nterm"                return PERCENT_NTERM;
       "%output"               return PERCENT_OUTPUT;
       "%prec"                 return PERCENT_PREC;
       "%printer"              return PERCENT_PRINTER;
       "%pure"[-_]"parser"     return PERCENT_PURE_PARSER;
       "%right"                return PERCENT_RIGHT;
       "%skeleton"             return PERCENT_SKELETON;
       "%start"                return PERCENT_START;
       "%term"                 return PERCENT_TOKEN;
       "%token"                return PERCENT_TOKEN;
       "%token"[-_]"table"     return PERCENT_TOKEN_TABLE;
       "%type"                 return PERCENT_TYPE;
       "%union"                return PERCENT_UNION;
       "%verbose"              return PERCENT_VERBOSE;
       "%yacc"                 return PERCENT_YACC;
     
       "="                     return EQUAL;
       ":"                     return COLON;
       "|"                     return PIPE;
       ";"                     return SEMICOLON;
     
=>     {eols}      YY_LINES; YY_STEP;
=>     {blanks}    YY_STEP;
       {id}        {
         yylval->symbol = symbol_get (yytext, *yylloc);
         return ID;
       }
     
       {int}       yylval->integer = strtol (yytext, 0, 10); return INT;
     
       /* Characters.  We don't check there is only one.  */
       \'          YY_OBS_GROW; yy_push_state (SC_ESCAPED_CHARACTER);
     
       /* Strings. */
       \"          YY_OBS_GROW; yy_push_state (SC_ESCAPED_STRING);
     
       /* Comments. */
       "/*"        yy_push_state (SC_COMMENT);
=>     "//".*      YY_STEP;
     
       /* Prologue. */
       "%{"        yy_push_state (SC_PROLOGUE);
     
       /* Code in between braces.  */
       "{"         YY_OBS_GROW; ++braces_level; yy_push_state (SC_BRACED_CODE);
     
       /* A type. */
       "<"[^>]+">" {
         obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
         YY_OBS_FINISH;
         yylval->string = last_string;
         return TYPE;
       }
     
     
       "%%"   {
         if (++percent_percent_count == 2)
           yy_push_state (SC_EPILOGUE);
         return PERCENT_PERCENT;
       }
     
       .           {
         LOCATION_PRINT (stderr, *yylloc);
         fprintf (stderr, ": invalid character: `%c'\n", *yytext);
=>       YY_STEP;
       }
     }
     
     
       /*------------------------------------------------------------.
       | Whatever the start condition (but those which correspond to |
       | entity `swallowed' by Bison: SC_ESCAPED_STRING and          |
       | SC_ESCAPED_CHARACTER), no M4 character must escape as is.   |
       `------------------------------------------------------------*/
     
     <SC_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
     {
       \[          if (YY_START != SC_COMMENT) obstack_sgrow (&string_obstack, 
"@<:@");
       \]          if (YY_START != SC_COMMENT) obstack_sgrow (&string_obstack, 
"@:>@");
     }
     
     
     
       /*-----------------------------------------------------------.
       | Scanning a C comment. The initial `/ *' is already eaten.  |
       `-----------------------------------------------------------*/
     
     <SC_COMMENT>
     {
       "*/" { /* End of the comment. */
         if (yy_top_state () == INITIAL)
           {
=>           YY_STEP;
           }
         else
           {
             YY_OBS_GROW;
           }
         yy_pop_state ();
       }
     
       [^\[\]*\n\r]+          if (yy_top_state () != INITIAL) YY_OBS_GROW;
       {eols}        if (yy_top_state () != INITIAL) YY_OBS_GROW; YY_LINES;
       .             /* Stray `*'. */if (yy_top_state () != INITIAL) 
YY_OBS_GROW;
     
       <<EOF>> {
         LOCATION_PRINT (stderr, *yylloc);
         fprintf (stderr, ": unexpected end of file in a comment\n");
         yy_pop_state ();
       }
     }
     
     
       /*----------------------------------------------------------------.
       | Scanning a C string, including its escapes.  The initial `"' is |
       | already eaten.                                                  |
       `----------------------------------------------------------------*/
     
     <SC_ESCAPED_STRING>
     {
       \" {
         assert (yy_top_state () == INITIAL);
         YY_OBS_GROW;
         YY_OBS_FINISH;
         yylval->string = last_string;
         yy_pop_state ();
         return STRING;
       }
     
       [^\"\n\r\\]+      YY_OBS_GROW;
     
       {eols}    obstack_1grow (&string_obstack, '\n'); YY_LINES;
     
       <<EOF>> {
         LOCATION_PRINT (stderr, *yylloc);
         fprintf (stderr, ": unexpected end of file in a string\n");
         assert (yy_top_state () == INITIAL);
         YY_OBS_FINISH;
         yylval->string = last_string;
         yy_pop_state ();
         return STRING;
       }
     }
     
       /*---------------------------------------------------------------.
       | Scanning a C character, decoding its escapes.  The initial "'" |
       | is already eaten.                                              |
       `---------------------------------------------------------------*/
     
     <SC_ESCAPED_CHARACTER>
     {
       \' {
         YY_OBS_GROW;
         assert (yy_top_state () == INITIAL);
         {
           YY_OBS_FINISH;
           yylval->symbol = symbol_get (last_string, *yylloc);
           symbol_class_set (yylval->symbol, token_sym, *yylloc);
           symbol_user_token_number_set (yylval->symbol,
                                         (unsigned char) last_string[1], 
*yylloc);
           YY_OBS_FREE;
           yy_pop_state ();
           return ID;
         }
       }
     
       [^\'\n\r\\]      YY_OBS_GROW;
     
=>     {eols}    obstack_1grow (&string_obstack, '\n'); YY_LINES;
     
       <<EOF>> {
         LOCATION_PRINT (stderr, *yylloc);
         fprintf (stderr, ": unexpected end of file in a character\n");
         assert (yy_top_state () == INITIAL);
         YY_OBS_FINISH;
         yylval->string = last_string;
         yy_pop_state ();
         return CHARACTER;
       }
     }
     
     
       /*----------------------------.
       | Decode escaped characters.  |
       `----------------------------*/
     
     <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
     {
       \\[0-7]{3}                {
         long c = strtol (yytext + 1, 0, 8);
         if (c > 255)
           {
             LOCATION_PRINT (stderr, *yylloc);
             fprintf (stderr, ": invalid escape: %s\n", quote (yytext));
=>           YY_STEP;
           }
         else
           obstack_1grow (&string_obstack, c);
       }
     
       \\x[0-9a-fA-F]{2}        {
         obstack_1grow (&string_obstack, strtol (yytext + 2, 0, 16));
       }
     
       \\a        obstack_1grow (&string_obstack, '\a');
       \\b        obstack_1grow (&string_obstack, '\b');
       \\f        obstack_1grow (&string_obstack, '\f');
       \\n        obstack_1grow (&string_obstack, '\n');
       \\r        obstack_1grow (&string_obstack, '\r');
       \\t        obstack_1grow (&string_obstack, '\t');
       \\v        obstack_1grow (&string_obstack, '\v');
       \\[\\""]   obstack_1grow (&string_obstack, yytext[1]);
       \\(.|\n)        {
         LOCATION_PRINT (stderr, *yylloc);
         fprintf (stderr, ": unrecognized escape: %s\n", quote (yytext));
         YY_OBS_GROW;
       }
       /* FLex wants this rule, in case of a `\<<EOF>>'. */
       \\                   YY_OBS_GROW;
     }
     
     
       /*----------------------------------------------------------.
       | Scanning a C character without decoding its escapes.  The |
       | initial "'" is already eaten.                             |
       `----------------------------------------------------------*/
     
     <SC_CHARACTER>
     {
       \' {
         YY_OBS_GROW;
         assert (yy_top_state () != INITIAL);
         yy_pop_state ();
       }
     
       [^\[\]\'\n\r\\]+     YY_OBS_GROW;
       \\(.|\n)             YY_OBS_GROW;
       /* FLex wants this rule, in case of a `\<<EOF>>'. */
       \\                   YY_OBS_GROW;
     
=>     {eols}               YY_OBS_GROW; YY_LINES;
     
       <<EOF>> {
         LOCATION_PRINT (stderr, *yylloc);
         fprintf (stderr, ": unexpected end of file in a character\n");
         assert (yy_top_state () != INITIAL);
         yy_pop_state ();
       }
     }
     
     
       /*----------------------------------------------------------------.
       | Scanning a C string, without decoding its escapes.  The initial |
       | `"' is already eaten.                                           |
       `----------------------------------------------------------------*/
     
     <SC_STRING>
     {
       \" {
         assert (yy_top_state () != INITIAL);
         YY_OBS_GROW;
         yy_pop_state ();
       }
     
       [^\[\]\"\n\r\\]+      YY_OBS_GROW;
       \\(.|\n)              YY_OBS_GROW;
       /* FLex wants this rule, in case of a `\<<EOF>>'. */
       \\                   YY_OBS_GROW;
     
=>     {eols}                YY_OBS_GROW; YY_LINES;
     
       <<EOF>> {
         LOCATION_PRINT (stderr, *yylloc);
         fprintf (stderr, ": unexpected end of file in a string\n");
         assert (yy_top_state () != INITIAL);
         yy_pop_state ();
       }
     }
     
     
       /*---------------------------------------------------.
       | Strings, comments etc. can be found in user code.  |
       `---------------------------------------------------*/
     
     <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
     {
       /* Characters.  We don't check there is only one.  */
       \'          YY_OBS_GROW; yy_push_state (SC_CHARACTER);
     
       /* Strings. */
       \"          YY_OBS_GROW; yy_push_state (SC_STRING);
     
       /* Comments. */
       "/*"        YY_OBS_GROW; yy_push_state (SC_COMMENT);
       "//".*      YY_OBS_GROW;
     
       /* Not comments. */
       "/"         YY_OBS_GROW;
     }
     
     
       /*---------------------------------------------------------------.
       | Scanning some code in braces (%union and actions). The initial |
       | "{" is already eaten.                                          |
       `---------------------------------------------------------------*/
     
     <SC_BRACED_CODE>
     {
       "}" {
         YY_OBS_GROW;
         if (--braces_level == 0)
           {
             yy_pop_state ();
             YY_OBS_FINISH;
             yylval->string = last_string;
             return BRACED_CODE;
           }
       }
     
       "{"                  YY_OBS_GROW; braces_level++;
     
       "$"("<"[^>]+">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
                                                        yytext, *yylloc); }
       "@"(-?[0-9]+|"$")               { handle_at (current_braced_code,
                                                    yytext, *yylloc); }
     
       address@hidden/\'\"\{\}\n\r]+ YY_OBS_GROW;
=>     {eols}        YY_OBS_GROW; YY_LINES;
     
       /* A lose $, or /, or etc. */
       .             YY_OBS_GROW;
     
       <<EOF>> {
         LOCATION_PRINT (stderr, *yylloc);
         fprintf (stderr, ": unexpected end of file in a braced code\n");
         yy_pop_state ();
         YY_OBS_FINISH;
         yylval->string = last_string;
         return BRACED_CODE;
       }
     
     }
     
     
       /*--------------------------------------------------------------.
       | Scanning some prologue: from "%{" (already scanned) to "%}".  |
       `--------------------------------------------------------------*/
     
     <SC_PROLOGUE>
     {
       "%}" {
         yy_pop_state ();
         YY_OBS_FINISH;
         yylval->string = last_string;
         return PROLOGUE;
       }
     
       [^%\[\]/\'\"\n\r]+ YY_OBS_GROW;
       "%"                YY_OBS_GROW;
=>     {eols}             YY_OBS_GROW; YY_LINES;
     
       <<EOF>> {
         LOCATION_PRINT (stderr, *yylloc);
         fprintf (stderr, ": unexpected end of file in a prologue\n");
         yy_pop_state ();
         YY_OBS_FINISH;
         yylval->string = last_string;
         return PROLOGUE;
       }
     }
     
     
       /*---------------------------------------------------------------.
       | Scanning the epilogue (everything after the second "%%", which |
       | has already been eaten.                                        |
       `---------------------------------------------------------------*/
     
     <SC_EPILOGUE>
     {
       ([^\[\]]|{eols})+  YY_OBS_GROW;
     
       <<EOF>> {
         yy_pop_state ();
         YY_OBS_FINISH;
         yylval->string = last_string;
         return EPILOGUE;
       }
     }
     
     
     %%
     
     /*------------------------------------------------------------------.
     | TEXT is pointing to a wannabee semantic value (i.e., a `$').      |
     |                                                                   |
     | Possible inputs: $[<TYPENAME>]($|integer)                         |
     |                                                                   |
     | Output to the STRING_OBSTACK a reference to this semantic value.  |
     `------------------------------------------------------------------*/
     
     static inline void
     handle_action_dollar (char *text, location_t location)
     {
       const char *type_name = NULL;
       char *cp = text + 1;
     
       /* Get the type name if explicit. */
       if (*cp == '<')
         {
           type_name = ++cp;
           while (*cp != '>')
             ++cp;
           *cp = '\0';
           ++cp;
         }
     
       if (*cp == '$')
         {
           if (!type_name)
             type_name = symbol_list_n_type_name_get (current_rule, location, 
0);
           if (!type_name && typed)
             complain_at (location, _("$$ of `%s' has no declared type"),
                          current_rule->sym->tag);
           if (!type_name)
             type_name = "";
           obstack_fgrow1 (&string_obstack,
                           "]b4_lhs_value([%s])[", type_name);
         }
       else if (isdigit (*cp) || *cp == '-')
         {
           /* RULE_LENGTH is the number of values in the current rule so
              far, which says where to find `$0' with respect to the top of
              the stack.  It is not the same as the rule->length in the
              case of mid rule actions.  */
           int rule_length = symbol_list_length (current_rule->next);
           int n = strtol (cp, &cp, 10);
     
           if (n > rule_length)
             complain_at (location, _("invalid value: %s%d"), "$", n);
           else
             {
               if (!type_name && n > 0)
                 type_name = symbol_list_n_type_name_get (current_rule, 
location,
                                                          n);
               if (!type_name && typed)
                 complain_at (location, _("$%d of `%s' has no declared type"),
                           n, current_rule->sym->tag);
               if (!type_name)
                 type_name = "";
               obstack_fgrow3 (&string_obstack,
                               "]b4_rhs_value([%d], [%d], [%s])[",
                               rule_length, n, type_name);
             }
         }
       else
         {
           complain_at (location, _("%s is invalid"), quote (text));
         }
     }
     
     
     /*---------------------------------------------------------------.
     | TEXT is expexted tp be $$ in some code associated to a symbol: |
     | destructor or printer.                                         |
     `---------------------------------------------------------------*/
     
     static inline void
     handle_symbol_code_dollar (char *text, location_t location)
     {
       char *cp = text + 1;
       if (*cp == '$')
         obstack_sgrow (&string_obstack, "]b4_dollar_dollar[");
       else
         complain_at (location, _("%s is invalid"), quote (text));
     }
     
     
     /*-----------------------------------------------------------------.
     | Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
     | depending upon CODE_KIND.                                        |
     `-----------------------------------------------------------------*/
     
     static void
     handle_dollar (braced_code_t braced_code_kind,
                    char *text, location_t location)
     {
       switch (braced_code_kind)
         {
         case action_braced_code:
           handle_action_dollar (text, location);
           break;
     
         case destructor_braced_code:
         case printer_braced_code:
           handle_symbol_code_dollar (text, location);
           break;
         }
     }
     
     
     /*------------------------------------------------------.
     | TEXT is a location token (i.e., a address@hidden').  Output to |
     | STRING_OBSTACK a reference to this location.          |
     `------------------------------------------------------*/
     
     static inline void
     handle_action_at (char *text, location_t location)
     {
       char *cp = text + 1;
       locations_flag = 1;
     
       if (*cp == '$')
         {
           obstack_sgrow (&string_obstack, "]b4_lhs_location[");
         }
       else if (isdigit (*cp) || *cp == '-')
         {
           /* RULE_LENGTH is the number of values in the current rule so
              far, which says where to find `$0' with respect to the top of
              the stack.  It is not the same as the rule->length in the
              case of mid rule actions.  */
           int rule_length = symbol_list_length (current_rule->next);
           int n = strtol (cp, &cp, 10);
     
           if (n > rule_length)
             complain_at (location, _("invalid value: %s%d"), "@", n);
           else
             obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
                             rule_length, n);
         }
       else
         {
           complain_at (location, _("%s is invalid"), quote (text));
         }
     }
     
     
     /*---------------------------------------------------------------.
     | TEXT is expexted tp be @$ in some code associated to a symbol: |
     | destructor or printer.                                         |
     `---------------------------------------------------------------*/
     
     static inline void
     handle_symbol_code_at (char *text, location_t location)
     {
       char *cp = text + 1;
       if (*cp == '$')
         obstack_sgrow (&string_obstack, "]b4_at_dollar[");
       else
         complain_at (location, _("%s is invalid"), quote (text));
     }
     
     
     /*-------------------------------------------------------------------.
     | Dispatch onto handle_action_at, or handle_destructor_at, depending |
     | upon CODE_KIND.                                                    |
     `-------------------------------------------------------------------*/
     
     static void
     handle_at (braced_code_t braced_code_kind,
                char *text, location_t location)
     {
       switch (braced_code_kind)
         {
         case action_braced_code:
           handle_action_at (text, location);
           break;
     
         case destructor_braced_code:
         case printer_braced_code:
           handle_symbol_code_at (text, location);
           break;
         }
     }
     
     
     /*-------------------------.
     | Initialize the scanner.  |
     `-------------------------*/
     
     void
     scanner_initialize (void)
     {
       obstack_init (&string_obstack);
     }
     
     
     /*-----------------------------------------------.
     | Free all the memory allocated to the scanner.  |
     `-----------------------------------------------*/
     
     void
     scanner_free (void)
     {
       obstack_free (&string_obstack, 0);
       /* Reclaim Flex's buffers.  */
       yy_delete_buffer (YY_CURRENT_BUFFER);
     }



reply via email to

[Prev in Thread] Current Thread [Next in Thread]