[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
SML and LISP support for GNU id-utils
From: |
Cynbe ru Taren |
Subject: |
SML and LISP support for GNU id-utils |
Date: |
Sun, 23 Jan 2005 22:15:29 -0600 |
User-agent: |
Gnus/5.1007 (Gnus v5.10.7) XEmacs/21.4 (Corporate Culture, linux) |
The GNU id-utils package is great! It is valuable
for reading any codebase over ten kilolines, and
indispensable for multi-megaline codebases (where
etags simply halts and catches fire).
Which makes it unfortunate that the id-utils do
not support more languages. :(
In particular, lisp- and ml-family languages are
ill-served by the current scanners.
Appended please find a copy of scanners.c tweaked
to support these two language families.
If one also amends the default id-lang.map by
eliding the existing elisp line and adding the
lines (remember to preserve tabs!)
*.sig SML
*.sml SML
*.mli SML
*.ml SML
*.lsp LISP
*.el LISP
then reasonably good support for most lisp and
ml variants results. I tested on SML and elisp,
but it should also work fine on Ocaml, Moby, xlisp
commonlisp, scheme &tc.
It would be great if these changes could be
folded into the canonical source release.
If needed for some reason, the full tarball
(and a Debian binary .deb) may be found here:
http://muq.org/~cynbe/ml/smlnj-hacking-idutils.html
Thanks!
Life is Good,
-- Cynbe
address@hidden
(PS: My tweaks are clearly derivative works
and thus fall under the existing license.
So let's not sweat that, please.)
-------------------------------------snip-----------------------------
/* scanners.c -- file & directory name manipulations
Copyright (C) 1986, 1995, 1996, 1999 Free Software Foundation, Inc.
Written by Greg McGary <address@hidden>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#include <config.h>
#include <stdio.h>
#include <ctype.h>
#if HAVE_FCNTL_H
# include <fcntl.h>
#endif
#include <getopt.h>
#include "xstdlib.h"
#include "xstddef.h"
#include "xunistd.h"
#include "xsysstat.h"
#include "xstring.h"
#include "xmalloc.h"
#include "xnls.h"
#include "error.h"
#include "scanners.h"
#include "tokflags.h"
#define DEBUG(args) /* printf args */
struct obstack lang_args_obstack;
struct lang_args *lang_args_default = 0;
struct lang_args *lang_args_list = 0;
struct obstack tokens_obstack;
int log_8_member_files = 0;
extern void usage __P((void));
extern char *program_name;
/****************************************************************************/
struct lang_args **parse_language_map_file __P((char const *file_name, struct
lang_args **next_ptr));
char *read_language_map_file __P((char const *file_name));
void tokenize_args_string __P((char *args_string, int *argcp, char ***argvp));
static struct token *get_token_lisp __P((FILE *in_FILE, void const *args, int
*flags));
static void *parse_args_lisp __P((char **argv, int argc));
static void help_me_lisp __P((void));
static struct token *get_token_sml __P((FILE *in_FILE, void const *args, int
*flags));
static void *parse_args_sml __P((char **argv, int argc));
static void help_me_sml __P((void));
static struct token *get_token_c __P((FILE *in_FILE, void const *args, int
*flags));
static void *parse_args_c __P((char **argv, int argc));
static void help_me_c __P((void));
static struct token *get_token_asm __P((FILE *in_FILE, void const *args, int
*flags));
static void *parse_args_asm __P((char **argv, int argc));
static void help_me_asm __P((void));
static struct token *get_token_text __P((FILE *in_FILE, void const *args, int
*flags));
static void *parse_args_text __P((char **argv, int argc));
static void help_me_text __P((void));
struct language languages_0[] =
{
{ "LISP", parse_args_lisp, get_token_lisp, help_me_lisp },
{ "SML", parse_args_sml, get_token_sml, help_me_sml },
{ "C", parse_args_c, get_token_c, help_me_c },
{ "C++", parse_args_c, get_token_c, help_me_c },
{ "asm", parse_args_asm, get_token_asm, help_me_asm },
{ "text", parse_args_text, get_token_text, help_me_text },
};
struct language const *languages_N = &languages_0[cardinalityof (languages_0)];
void
language_help_me (void)
{
struct language *lang;
for (lang = languages_0; lang < languages_N; lang++)
{
putchar ('\n');
(*lang->lg_help_me) ();
}
}
void
language_save_arg (char *arg)
{
static char horizontal_space[] = " \t";
char *lang_name = strtok (arg, ":");
struct language *lang = get_language (lang_name);
if (lang == 0)
{
error (0, 0, _("unrecognized language: `%s'"), lang_name);
usage ();
}
if (lang->lg_argc == 0)
lang->lg_argv[lang->lg_argc++] = program_name;
lang->lg_argv[lang->lg_argc++] = strtok (0, horizontal_space);
}
void
language_getopt (void)
{
struct language *lang;
for (lang = languages_0; lang < languages_N; lang++)
if (lang->lg_argc)
lang->lg_parse_args (lang->lg_argv, lang->lg_argc);
}
struct language *
get_language (char const *lang_name)
{
struct language *lang;
for (lang = languages_0; lang < languages_N; lang++)
if (strequ (lang_name, lang->lg_name))
{
DEBUG (("lang=%s", lang_name));
return lang;
}
DEBUG (("!lang=%s", lang_name));
return 0;
}
/****************************************************************************/
int lang_args_index = 0;
void
set_default_language (char const *lang_name)
{
}
void
parse_language_map (char const *file_name)
{
if (obstack_init (&lang_args_obstack) == 0)
error (1, 0, _("can't allocate language args obstack: memory exhausted"));
if (file_name == 0)
file_name = LANGUAGE_MAP_FILE;
parse_language_map_file (file_name, &lang_args_list);
}
struct lang_args **
parse_language_map_file (char const *file_name, struct lang_args **next_ptr)
{
static char white_space[] = " \t\r\n\v\f";
static char horizontal_space[] = " \t";
static char vertical_space[] = "\r\n\v\f";
char *lang_map_buffer;
char *lmp;
lmp = lang_map_buffer = read_language_map_file (file_name);
for (;;)
{
struct lang_args *new_args;
struct language const *lang;
int pattern_size;
char *lang_name;
int space;
/* Skip leading white space and full-line comments */
while (*lmp)
{
lmp += strspn (lmp, white_space);
if (*lmp != '#')
break;
lmp += strcspn (lmp, vertical_space);
}
if (*lmp == '\0')
break;
pattern_size = strcspn (lmp, white_space);
if (pattern_size == 3 && strnequ (lmp, "***", 3))
{
lmp += pattern_size;
lmp += strspn (lmp, horizontal_space);
if (isspace (*lmp))
next_ptr = parse_language_map_file (LANGUAGE_MAP_FILE, next_ptr);
else
{
char *end = lmp + strcspn (lmp, white_space);
*end = '\0';
next_ptr = parse_language_map_file (lmp, next_ptr);
lmp = end + 1;
}
continue;
}
new_args = OBSTACK_ALLOC (&lang_args_obstack, struct lang_args, 1);
if (new_args == 0)
error (1, 0, _("can't allocate language args: memory exhausted"));
new_args->la_pattern = obstack_copy0 (&lang_args_obstack, lmp,
pattern_size);
new_args->la_args_string = 0;
new_args->la_next = 0;
lmp += pattern_size;
lmp += strspn (lmp, horizontal_space);
if (isspace (*lmp))
{
error (0, 0, _("language name expected following `%s' in file `%s'"),
new_args->la_pattern, file_name);
obstack_free (&lang_args_obstack, new_args);
continue;
}
lang_name = lmp;
lmp += strcspn (lmp, white_space);
space = *lmp;
*lmp++ = '\0';
lmp += strspn (lmp, horizontal_space);
lang = new_args->la_language = get_language (lang_name);
if (*lmp == '#')
lmp += strcspn (lmp, vertical_space);
else if (!isspace (*lmp) && (space == ' ' || space == '\t'))
{
int args_size = strcspn (lmp, vertical_space);
new_args->la_args_string = obstack_copy0 (&lang_args_obstack, lmp,
args_size);
lmp += args_size;
}
new_args->la_args_digested = (lang
? lang->lg_parse_args
(&new_args->la_args_string, 0)
: 0);
if (pattern_size == 2 && strnequ (new_args->la_pattern, "**", 2))
{
if (lang_args_default)
{
obstack_free (&lang_args_obstack, new_args);
continue;
}
lang_args_default = new_args;
DEBUG ((", <default>"));
}
else
{
new_args->la_index = lang_args_index++;
*next_ptr = new_args;
next_ptr = &new_args->la_next;
}
DEBUG ((", pat=%s\n", new_args->la_pattern));
}
free (lang_map_buffer);
return next_ptr;
}
char *
read_language_map_file (char const *file_name)
{
int map_fd;
char *lang_map_buffer;
struct stat st;
int bytes;
map_fd = open (file_name, O_RDONLY);
if (map_fd < 0)
error (1, errno, _("can't open language map file `%s'"), file_name);
if (fstat (map_fd, &st) < 0)
error (1, errno, _("can't get size of map file `%s'"), file_name);
lang_map_buffer = MALLOC (char, st.st_size + 2);
if (lang_map_buffer == 0)
error (1, 0, _("can't allocate language args: memory exhausted"));
lang_map_buffer[st.st_size] = '\n';
lang_map_buffer[st.st_size+1] = '\0';
bytes = read (map_fd, lang_map_buffer, st.st_size);
if (bytes < 0)
error (1, errno, _("can't read language map file `%s'"), file_name);
/* FIXME: handle interrupted & partial reads */
if (bytes != st.st_size)
error (1, errno, _("can't read entire language map file `%s'"), file_name);
close (map_fd);
return lang_map_buffer;
}
/****************************************************************************/
void
tokenize_args_string (char *args_string, int *argcp, char ***argvp)
{
static char horizontal_space[] = " \t";
char **argv_0 = MALLOC (char *, strlen (args_string) / 2);
char **argv = argv_0;
char *arg;
*argv++ = program_name;
arg = strtok (args_string, horizontal_space);
while (arg)
{
*argv++ = arg;
arg = strtok (0, horizontal_space);
}
*argcp = argv - argv_0;
*argvp = REALLOC (argv_0, char *, *argcp);
}
static void
set_ushort_ctype (unsigned short *ctype, char const *chars, int type)
{
unsigned short *rct = &ctype[1];
unsigned char const *uc = (unsigned char const *) chars;
while (*uc)
rct[*uc++] |= type;
}
static void
clear_ushort_ctype (unsigned short *ctype, char const *chars, int type)
{
unsigned short *rct = &ctype[1];
unsigned char const *uc = (unsigned char const *) chars;
while (*uc)
rct[*uc++] &= ~type;
}
static void
set_uchar_ctype (unsigned char *ctype, char const *chars, int type)
{
unsigned char *rct = &ctype[1];
unsigned char const *uc = (unsigned char const *) chars;
while (*uc)
rct[*uc++] |= type;
}
static void
clear_uchar_ctype (unsigned char *ctype, char const *chars, int type)
{
unsigned char *rct = &ctype[1];
unsigned char const *uc = (unsigned char const *) chars;
while (*uc)
rct[*uc++] &= ~type;
}
/*************** LISP ****************************************************/
#define I1 0x0001 /* 1st char of an identifier */
#define DG 0x0002 /* decimal digit [0-9] */
#define NM 0x0004 /* extra chars in a hex or long number
[a-fA-FxXlL] */
#define C1 0x0008 /* Lisp comment introduction char: ; */
#define HA 0x0010 /* Lisp special-syntax prefix char: # */
#define BA 0x0020 /* Lisp special-syntax id-quote char: | */
#define Q2 0x0040 /* double quote: " */
#define ES 0x0080 /* escape char: \ */
#define NL 0x0100 /* newline: \n */
#define EF 0x0200 /* EOF */
#define SK 0x0400 /* Make these chars valid for names within
strings */
#define WS 0x1000 /* White space characters */
/* character class membership macros: */
#define ISDIGIT(c) ((rct)[c] & (DG)) /* digit */
#define ISNUMBER(c) ((rct)[c] & (DG|NM)) /* legal in a number */
#define ISEOF(c) ((rct)[c] & (EF)) /* EOF */
#define ISID1ST(c) ((rct)[c] & (I1|ES)) /* 1st char of an identifier */
#define ISIDREST(c) ((rct)[c] & (I1|DG|ES)) /* rest of an identifier */
#define ISSYM1ST(c) ((rct)[c] & (S1)) /* 1st char of a symbol */
#define ISSTRKEEP(c) ((rct)[c] & (I1|DG|SK)) /* keep contents of string */
#define ISSPACE(c) ((rct)[c] & (WS)) /* white space character */
/* The `BORING' classes should be skipped over until something
interesting comes along... */
#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|DG|HA|BA|Q2|C1|ES))) /*
fluff */
#define ISCBORING(c) (!((rct)[c] & (EF|C1|C2))) /* comment fluff */
#define ISCCBORING(c) (!((rct)[c] & (EF|NL))) /* C++ // comment fluff */
#define ISQ2BORING(c) (!((rct)[c] & (EF|NL|Q2|ES))) /* quoted str fluff */
/* Lisp identifiers can include basically any */
/* printing chars except: ( ) [ ] { } ' ; \ | # ` , : */
static unsigned short ctype_lisp[257] =
{
EF,
/* 0 1 2 3 4 5 6 7 */
/* ----- ----- ----- ----- ----- ----- ----- ----- */
/*000*/ 0, 0, 0, 0, 0, 0, 0, 0, /* ^@
^A ^B ^C ^D ^E ^F ^G */
/*010*/ 0, 0, NL, 0, 0, NL, 0, 0, /* ^H
^I ^J ^K ^L ^M ^N ^O */
/*020*/ 0, 0, 0, 0, 0, 0, 0, 0, /* ^P
^Q ^R ^S ^T ^U ^V ^W */
/*030*/ 0, 0, 0, 0, 0, 0, 0, 0, /* ^X
^Y ^Z ^[ ^\ ^] ^^ ^_ */
/*040*/ 0, I1, Q2, HA, I1, I1, I1, 0, /*
! " # $ % & ' */
/*050*/ 0, 0, I1, I1, 0, I1, I1, I1, /* (
) * + , - . / */
/*060*/ DG, DG, DG, DG, DG, DG, DG, DG, /* 0
1 2 3 4 5 6 7 */
/*070*/ DG, DG, 0, C1, I1, I1, I1, I1, /* 8
9 : ; < = > ? */
/*100*/ I1, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /* @
A B C D E F G */
/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /* H
I J K L M N O */
/*120*/ I1, I1, I1, I1, I1, I1, I1, I1, /* P
Q R S T U V W */
/*130*/ I1|NM, I1, I1, 0, ES, 0, I1, I1, /* X
Y Z [ \ ] ^ _ */
/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /* `
a b c d e f g */
/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /* h
i j k l m n o */
/*160*/ I1, I1, I1, I1, I1, I1, I1, I1, /* p
q r s t u v w */
/*170*/ I1|NM, I1, I1, 0, BA, 0, I1, 0, /* x
y z { | } ~ */
/* FIXME: latin-1 */
};
struct args_lisp
{
unsigned short *ctype;
};
static struct args_lisp args_lisp = { ctype_lisp };
static struct option const long_options_lisp[] =
{
{ "keep", required_argument, 0, 'k' },
{ "ignore", required_argument, 0, 'i' },
{ 0 }
};
static void
help_me_lisp (void)
{
printf (_("\
LISP language:\n\
-k,--keep=CHARS Allow CHARS in single-token strings, keep the result\n\
-i,--ignore=CHARS Allow CHARS in single-token strings, toss the result\n\
"));
}
static void *
parse_args_lisp (char **argv, int argc)
{
char *tmp_string = 0;
struct args_lisp *args;
if (argv == 0 || *argv == 0)
return &args_lisp;
if (argc)
args = &args_lisp;
else
{
tmp_string = strdup (*argv);
tokenize_args_string (tmp_string, &argc, &argv);
args = MALLOC (struct args_lisp, 1);
args->ctype = ctype_lisp;
}
optind = 0;
for (;;)
{
int optc = getopt_long (argc, argv, "k:i:",
long_options_lisp, (int *) 0);
if (optc < 0)
break;
if ((optc == 'k' || optc == 'i') && args->ctype == ctype_lisp)
args->ctype = CLONE (ctype_lisp, unsigned short, cardinalityof
(ctype_lisp));
switch (optc)
{
case 'k':
set_ushort_ctype (args->ctype, optarg, SK);
break;
case 'i':
clear_ushort_ctype (args->ctype, optarg, SK);
break;
default:
usage ();
}
}
if (tmp_string)
{
free (argv);
free (tmp_string);
}
return args;
}
static struct token *
get_token_lisp (FILE *in_FILE, void const *args, int *flags)
{
#define ARGS ((struct args_lisp const *) args)
unsigned short const *rct = &ARGS->ctype[1];
unsigned char *id = scanner_buffer;
int c;
obstack_blank (&tokens_obstack, OFFSETOF_TOKEN_NAME);
top:
c = getc (in_FILE);
next:
while (ISBORING (c))
c = getc (in_FILE);
switch (c)
{
case '"':
id = scanner_buffer;
*id++ = c = getc (in_FILE);
for (;;)
{
while (ISQ2BORING (c))
*id++ = c = getc (in_FILE);
if (c == '\\')
{
*id++ = c = getc (in_FILE);
continue;
}
else if (c != '"')
goto next;
break;
}
*--id = '\0';
id = scanner_buffer;
while (ISSTRKEEP (*id))
id++;
if (*id || id == scanner_buffer)
{
c = getc (in_FILE);
goto next;
}
*flags = TOK_STRING;
obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer);
return (struct token *) obstack_finish (&tokens_obstack);
case ';':
/* Handle ; ... comment-to-end-of-line construction: */
while (((c = getc (in_FILE)) != '\n') && !ISEOF(c));
goto next;
case '#':
/* Treat # as a comment-to-next-whitespace construction: */
c = getc (in_FILE); /* Skip next char */
while ((c = getc (in_FILE)) && !ISSPACE(c) && !ISEOF(c));
goto next;
case '\n':
case '\015':
goto top;
default:
if (ISEOF (c))
{
obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
return 0;
}
id = scanner_buffer;
*id++ = c;
if (ISID1ST (c))
{
*flags = TOK_NAME;
while (ISIDREST (c = getc (in_FILE)))
{
if (c == '\\')
c = getc (in_FILE); /* Lisp allows abc\(def as an id */
*id++ = c;
}
ungetc (c, in_FILE);
}
else if (ISDIGIT (c))
{
*flags = TOK_NUMBER;
while (ISNUMBER (c = getc (in_FILE)))
*id++ = c;
ungetc (c, in_FILE);
}
else if (c == '|')
{
/* Lisp allows tokens quoted by |...| */
/* to be identifiers containing basically */
/* any characters. I stop at a newline */
/* just to keep this from running away: */
*flags = TOK_NAME;
--id; /* Don't include initial '|' in id. */
while ((c = getc (in_FILE)) != '|' && c != '\n' && !ISEOF (c))
{
if (c == '\\')
c = getc (in_FILE); /* Allow |abc\|def| and such */
*id++ = c;
}
if (c == '|')
--id; /* Don't include final '|' in id. */
}
else
{
if (isprint (c))
fprintf (stderr, _("junk: `%c'"), c);
else
fprintf (stderr, _("junk: `\\%03o'"), c);
}
*flags |= TOK_LITERAL;
obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer);
return (struct token *) obstack_finish (&tokens_obstack);
}
#undef ARGS
}
#undef BA
#undef HA
#undef I1
#undef DG
#undef NM
#undef C1
#undef C2
#undef Q2
#undef ES
#undef NL
#undef EF
#undef SK
#undef WS
#undef ISDIGIT
#undef ISNUMBER
#undef ISEOF
#undef ISID1ST
#undef ISIDREST
#undef ISSTRKEEP
#undef ISSPACE
#undef ISBORING
#undef ISCBORING
#undef ISCCBORING
#undef ISQ2BORING
/*************** SML and OCAML
****************************************************/
#define I1 0x0001 /* 1st char of an identifier [a-zA-Z_'] */
#define DG 0x0002 /* decimal digit [0-9] */
#define NM 0x0004 /* extra chars in a hex or long number
[a-fA-FxXlL] */
#define C1 0x0008 /* SML comment introduction char: ( */
#define C2 0x0010 /* SML comment termination char: * */
#define S1 0x0020 /* symbol: ! % & $ # + - / : < = > ? @ \ ~ ` ^
| * */
#define Q2 0x0040 /* double quote: " */
#define ES 0x0080 /* escape char: \ */
#define NL 0x0100 /* newline: \n */
#define EF 0x0200 /* EOF */
#define SK 0x0400 /* Make these chars valid for names within
strings */
#define WS 0x1000 /* White space characters */
/* character class membership macros: */
#define ISDIGIT(c) ((rct)[c] & (DG)) /* digit */
#define ISNUMBER(c) ((rct)[c] & (DG|NM)) /* legal in a number */
#define ISEOF(c) ((rct)[c] & (EF)) /* EOF */
#define ISID1ST(c) ((rct)[c] & (I1)) /* 1st char of an identifier */
#define ISIDREST(c) ((rct)[c] & (I1|DG)) /* rest of an identifier */
#define ISSYM1ST(c) ((rct)[c] & (S1)) /* 1st char of a symbol */
#define ISSTRKEEP(c) ((rct)[c] & (I1|DG|SK)) /* keep contents of string */
#define ISSPACE(c) ((rct)[c] & (WS)) /* white space character */
/* The `BORING' classes should be skipped over until something
interesting comes along... */
#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|DG|S1|Q2|C1))) /* fluff */
#define ISCBORING(c) (!((rct)[c] & (EF|C1|C2))) /* comment fluff */
#define ISCCBORING(c) (!((rct)[c] & (EF|NL))) /* C++ // comment fluff */
#define ISQ2BORING(c) (!((rct)[c] & (EF|NL|Q2|ES))) /* quoted str fluff */
static unsigned short ctype_sml[257] =
{
EF,
/* 0 1 2 3 4 5 6 7 */
/* ----- ----- ----- ----- ----- ----- ----- ----- */
/*000*/ 0, 0, 0, 0, 0, 0, 0, 0, /* ^@
^A ^B ^C ^D ^E ^F ^G */
/*010*/ 0, 0, NL, 0, 0, 0, 0, 0, /* ^H
^I ^J ^K ^L ^M ^N ^O */
/*020*/ 0, 0, 0, 0, 0, 0, 0, 0, /* ^P
^Q ^R ^S ^T ^U ^V ^W */
/*030*/ 0, 0, 0, 0, 0, 0, 0, 0, /* ^X
^Y ^Z ^[ ^\ ^] ^^ ^_ */
/*040*/ 0, S1, Q2, S1, S1, S1, S1, I1, /*
! " # $ % & ' */
/*050*/ C1, 0, C2|S1, S1, 0, S1, 0, S1, /* (
) * + , - . / */
/*060*/ DG, DG, DG, DG, DG, DG, DG, DG, /* 0
1 2 3 4 5 6 7 */
/*070*/ DG, DG, S1, 0, S1, S1, S1, S1, /* 8
9 : ; < = > ? */
/*100*/ S1, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /* @
A B C D E F G */
/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /* H
I J K L M N O */
/*120*/ I1, I1, I1, I1, I1, I1, I1, I1, /* P
Q R S T U V W */
/*130*/ I1|NM, I1, I1, 0, ES|S1, 0, S1, I1, /* X
Y Z [ \ ] ^ _ */
/*140*/ S1, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1, /* `
a b c d e f g */
/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1, /* h
i j k l m n o */
/*160*/ I1, I1, I1, I1, I1, I1, I1, I1, /* p
q r s t u v w */
/*170*/ I1|NM, I1, I1, 0, S1, 0, S1, 0, /* x
y z { | } ~ */
/* FIXME: latin-1 */
};
struct args_sml
{
unsigned short *ctype;
};
static struct args_sml args_sml = { ctype_sml };
static struct option const long_options_sml[] =
{
{ "keep", required_argument, 0, 'k' },
{ "ignore", required_argument, 0, 'i' },
{ 0 }
};
static void
help_me_sml (void)
{
printf (_("\
SML language:\n\
-k,--keep=CHARS Allow CHARS in single-token strings, keep the result\n\
-i,--ignore=CHARS Allow CHARS in single-token strings, toss the result\n\
"));
}
static void *
parse_args_sml (char **argv, int argc)
{
char *tmp_string = 0;
struct args_sml *args;
if (argv == 0 || *argv == 0)
return &args_sml;
if (argc)
args = &args_sml;
else
{
tmp_string = strdup (*argv);
tokenize_args_string (tmp_string, &argc, &argv);
args = MALLOC (struct args_sml, 1);
args->ctype = ctype_sml;
}
optind = 0;
for (;;)
{
int optc = getopt_long (argc, argv, "k:i:",
long_options_sml, (int *) 0);
if (optc < 0)
break;
if ((optc == 'k' || optc == 'i') && args->ctype == ctype_sml)
args->ctype = CLONE (ctype_sml, unsigned short, cardinalityof
(ctype_sml));
switch (optc)
{
case 'k':
set_ushort_ctype (args->ctype, optarg, SK);
break;
case 'i':
clear_ushort_ctype (args->ctype, optarg, SK);
break;
default:
usage ();
}
}
if (tmp_string)
{
free (argv);
free (tmp_string);
}
return args;
}
static struct token *
get_token_sml (FILE *in_FILE, void const *args, int *flags)
{
#define ARGS ((struct args_sml const *) args)
unsigned short const *rct = &ARGS->ctype[1];
unsigned char *id = scanner_buffer;
int c;
int nesting;
obstack_blank (&tokens_obstack, OFFSETOF_TOKEN_NAME);
top:
c = getc (in_FILE);
next:
while (ISBORING (c))
c = getc (in_FILE);
switch (c)
{
case '"':
id = scanner_buffer;
*id++ = c = getc (in_FILE);
for (;;)
{
while (ISQ2BORING (c))
*id++ = c = getc (in_FILE);
if (c == '\\')
{
*id++ = c = getc (in_FILE);
continue;
}
else if (c != '"')
goto next;
break;
}
*--id = '\0';
id = scanner_buffer;
while (ISSTRKEEP (*id))
id++;
if (*id || id == scanner_buffer)
{
c = getc (in_FILE);
goto next;
}
*flags = TOK_STRING;
obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer);
return (struct token *) obstack_finish (&tokens_obstack);
case '(':
/* Handle (* ... *) comments. In */
/* SML, unlike in C, comments nest: */
c = getc (in_FILE);
if (c != '*')
goto next;
nesting = 1;
c = getc (in_FILE);
for (;;)
{
while (ISCBORING (c))
c = getc (in_FILE);
if (c != '(' ) {
c = getc (in_FILE);
if (c == ')')
{
c = getc (in_FILE);
if (!--nesting) goto next;
}
else if (ISEOF (c))
{
obstack_free (&tokens_obstack, obstack_finish
(&tokens_obstack));
return 0;
}
} else {
c = getc (in_FILE);
if (c == '*')
{
++nesting;
c = getc (in_FILE);
}
else if (ISEOF (c))
{
obstack_free (&tokens_obstack, obstack_finish
(&tokens_obstack));
return 0;
}
}
}
case '\n':
case '\015':
goto top;
default:
if (ISEOF (c))
{
obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
return 0;
}
id = scanner_buffer;
*id++ = c;
if (ISID1ST (c))
{
*flags = TOK_NAME;
while (ISIDREST (c = getc (in_FILE)))
*id++ = c;
ungetc (c, in_FILE);
}
else if (ISSYM1ST (c))
{
/* SML treats sequences made of the chars: */
/* ! % & $ # + - / : < = > ? @ \ ~ ` ^ | * */
/* as identifiers, although it calls them */
/* "symbols" (and uses them as infix ops): */
*flags = TOK_NAME;
while (ISSYM1ST (c = getc (in_FILE)))
*id++ = c;
ungetc (c, in_FILE);
}
else if (ISDIGIT (c))
{
*flags = TOK_NUMBER;
while (ISNUMBER (c = getc (in_FILE)))
*id++ = c;
ungetc (c, in_FILE);
}
else
{
if (isprint (c))
fprintf (stderr, _("junk: `%c'"), c);
else
fprintf (stderr, _("junk: `\\%03o'"), c);
}
*flags |= TOK_LITERAL;
obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer);
return (struct token *) obstack_finish (&tokens_obstack);
}
#undef ARGS
}
#undef I1
#undef DG
#undef NM
#undef C1
#undef C2
#undef S1
#undef Q2
#undef ES
#undef NL
#undef EF
#undef SK
#undef WS
#undef ISDIGIT
#undef ISNUMBER
#undef ISEOF
#undef ISID1ST
#undef ISIDREST
#undef ISSTRKEEP
#undef ISSPACE
#undef ISBORING
#undef ISCBORING
#undef ISCCBORING
#undef ISQ2BORING
/*************** C & C++ ****************************************************/
#define I1 0x0001 /* 1st char of an identifier [a-zA-Z_] */
#define DG 0x0002 /* decimal digit [0-9] */
#define NM 0x0004 /* extra chars in a hex or long number
[a-fA-FxXlL] */
#define C1 0x0008 /* C comment introduction char: / */
#define C2 0x0010 /* C comment termination char: * */
#define Q1 0x0020 /* single quote: ' */
#define Q2 0x0040 /* double quote: " */
#define ES 0x0080 /* escape char: \ */
#define NL 0x0100 /* newline: \n */
#define EF 0x0200 /* EOF */
#define SK 0x0400 /* Make these chars valid for names within
strings */
#define VH 0x0800 /* VHIL comment introduction char: # */
#define WS 0x1000 /* White space characters */
/* character class membership macros: */
#define ISDIGIT(c) ((rct)[c] & (DG)) /* digit */
#define ISNUMBER(c) ((rct)[c] & (DG|NM)) /* legal in a number */
#define ISEOF(c) ((rct)[c] & (EF)) /* EOF */
#define ISID1ST(c) ((rct)[c] & (I1)) /* 1st char of an identifier */
#define ISIDREST(c) ((rct)[c] & (I1|DG)) /* rest of an identifier */
#define ISSTRKEEP(c) ((rct)[c] & (I1|DG|SK)) /* keep contents of string */
#define ISSPACE(c) ((rct)[c] & (WS)) /* white space character */
/* The `BORING' classes should be skipped over until something
interesting comes along... */
#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|DG|Q1|Q2|C1|VH))) /*
fluff */
#define ISCBORING(c) (!((rct)[c] & (EF|C2))) /* comment fluff */
#define ISCCBORING(c) (!((rct)[c] & (EF|NL))) /* C++ // comment fluff */
#define ISQ1BORING(c) (!((rct)[c] & (EF|NL|Q1|ES))) /* char const fluff */
#define ISQ2BORING(c) (!((rct)[c] & (EF|NL|Q2|ES))) /* quoted str fluff */
static unsigned short ctype_c[257] =
{
EF,
/* 0 1 2 3 4 5 6 7 */
/* ----- ----- ----- ----- ----- ----- ----- ----- */
/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*010*/ 0, 0, NL, 0, 0, 0, 0, 0,
/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*040*/ 0, 0, Q2, 0, 0, 0, 0, Q1,
/*050*/ 0, 0, C2, 0, 0, 0, 0, C1,
/*060*/ DG, DG, DG, DG, DG, DG, DG, DG,
/*070*/ DG, DG, 0, 0, 0, 0, 0, 0,
/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
/*130*/ I1|NM, I1, I1, 0, ES, 0, 0, I1,
/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
/* FIXME: latin-1 */
};
struct args_c
{
int strip_underscore;
unsigned short *ctype;
};
static struct args_c args_c = { 0, ctype_c };
static struct option const long_options_c[] =
{
{ "keep", required_argument, 0, 'k' },
{ "ignore", required_argument, 0, 'i' },
{ "strip-underscore", no_argument, 0, 'u' },
{ 0 }
};
static void
help_me_c (void)
{
printf (_("\
C language:\n\
-k,--keep=CHARS Allow CHARS in single-token strings, keep the result\n\
-i,--ignore=CHARS Allow CHARS in single-token strings, toss the result\n\
-u,--strip-underscore Strip a leading underscore from single-token strings\n\
"));
}
static void *
parse_args_c (char **argv, int argc)
{
char *tmp_string = 0;
struct args_c *args;
if (argv == 0 || *argv == 0)
return &args_c;
if (argc)
args = &args_c;
else
{
tmp_string = strdup (*argv);
tokenize_args_string (tmp_string, &argc, &argv);
args = MALLOC (struct args_c, 1);
args->strip_underscore = 0;
args->ctype = ctype_c;
}
optind = 0;
for (;;)
{
int optc = getopt_long (argc, argv, "k:i:u",
long_options_c, (int *) 0);
if (optc < 0)
break;
if ((optc == 'k' || optc == 'i') && args->ctype == ctype_c)
args->ctype = CLONE (ctype_c, unsigned short, cardinalityof (ctype_c));
switch (optc)
{
case 'k':
set_ushort_ctype (args->ctype, optarg, SK);
break;
case 'i':
clear_ushort_ctype (args->ctype, optarg, SK);
break;
case 'u':
args->strip_underscore = 1;
break;
default:
usage ();
}
}
if (tmp_string)
{
free (argv);
free (tmp_string);
}
return args;
}
unsigned char *scanner_buffer;
#define SCAN_CPP_DIRECTIVE \
do \
{ \
c = getc (in_FILE); \
while (ISBORING (c)) \
c = getc (in_FILE); \
if (!ISID1ST (c)) \
goto next; \
id = scanner_buffer; \
*id++ = c; \
while (ISIDREST (c = getc (in_FILE))) \
*id++ = c; \
*id = '\0'; \
if (strequ (scanner_buffer, "include")) \
{ \
while (c == ' ' || c == '\t') \
c = getc (in_FILE); \
if (c == '\n') \
{ \
new_line = 1; \
goto top; \
} \
id = scanner_buffer; \
if (c == '"') \
{ \
c = getc (in_FILE); \
while (c != '\n' && c != EOF && c != '"') \
{ \
*id++ = c; \
c = getc (in_FILE); \
} \
*flags = TOK_STRING; \
} \
else if (c == '<') \
{ \
c = getc (in_FILE); \
while (c != '\n' && c != EOF && c != '>') \
{ \
*id++ = c; \
c = getc (in_FILE); \
} \
*flags = TOK_STRING; \
} \
else if (ISID1ST (c)) \
{ \
*id++ = c; \
while (ISIDREST (c = getc (in_FILE))) \
*id++ = c; \
*flags = TOK_NAME; \
} \
else \
{ \
while (c != '\n' && c != EOF) \
c = getc (in_FILE); \
new_line = 1; \
goto top; \
} \
while (c != '\n' && c != EOF) \
c = getc (in_FILE); \
new_line = 1; \
obstack_grow0 (&tokens_obstack, scanner_buffer, \
id - scanner_buffer); \
return (struct token *) obstack_finish (&tokens_obstack); \
} \
if (strnequ (scanner_buffer, "if", 2) \
|| strequ (scanner_buffer, "define") \
|| strequ (scanner_buffer, "elif") /* ansi C */ \
|| strequ (scanner_buffer, "undef")) \
goto next; \
while ((c != '\n') && (c != EOF)) \
c = getc (in_FILE); \
new_line = 1; \
goto top; \
} while (0)
/* Grab the next identifier from the C source file. This state
machine is built for speed, not elegance. */
static struct token *
get_token_c (FILE *in_FILE, void const *args, int *flags)
{
#define ARGS ((struct args_c const *) args)
static int new_line = 1;
unsigned short const *rct = &ARGS->ctype[1];
unsigned char *id = scanner_buffer;
int c;
obstack_blank (&tokens_obstack, OFFSETOF_TOKEN_NAME);
top:
c = getc (in_FILE);
if (new_line)
{
new_line = 0;
if (c != '#')
goto next;
SCAN_CPP_DIRECTIVE;
}
next:
while (ISBORING (c))
c = getc (in_FILE);
switch (c)
{
case '"':
id = scanner_buffer;
*id++ = c = getc (in_FILE);
for (;;)
{
while (ISQ2BORING (c))
*id++ = c = getc (in_FILE);
if (c == '\\')
{
*id++ = c = getc (in_FILE);
continue;
}
else if (c != '"')
goto next;
break;
}
*--id = '\0';
id = scanner_buffer;
while (ISSTRKEEP (*id))
id++;
if (*id || id == scanner_buffer)
{
c = getc (in_FILE);
goto next;
}
*flags = TOK_STRING;
if (ARGS->strip_underscore && scanner_buffer[0] == '_' &&
scanner_buffer[1])
obstack_grow0 (&tokens_obstack, scanner_buffer + 1, id - scanner_buffer
- 1);
else
obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer);
return (struct token *) obstack_finish (&tokens_obstack);
case '\'':
c = getc (in_FILE);
for (;;)
{
while (ISQ1BORING (c))
c = getc (in_FILE);
if (c == '\\')
{
c = getc (in_FILE);
continue;
}
else if (c == '\'')
c = getc (in_FILE);
goto next;
}
case '/':
c = getc (in_FILE);
if (c == '/')
{ /* Cope with C++ comment */
while (ISCCBORING (c))
c = getc (in_FILE);
new_line = 1;
goto top;
}
else if (c != '*')
goto next;
c = getc (in_FILE);
for (;;)
{
while (ISCBORING (c))
c = getc (in_FILE);
c = getc (in_FILE);
if (c == '/')
{
c = getc (in_FILE);
goto next;
}
else if (ISEOF (c))
{
new_line = 1;
obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
return 0;
}
}
case '\n':
case '\015':
new_line = 1;
goto top;
default:
if (ISEOF (c))
{
new_line = 1;
obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
return 0;
}
id = scanner_buffer;
*id++ = c;
if (ISID1ST (c))
{
*flags = TOK_NAME;
while (ISIDREST (c = getc (in_FILE)))
*id++ = c;
ungetc (c, in_FILE);
}
else if (ISDIGIT (c))
{
*flags = TOK_NUMBER;
while (ISNUMBER (c = getc (in_FILE)))
*id++ = c;
ungetc (c, in_FILE);
}
else
{
if (isprint (c))
fprintf (stderr, _("junk: `%c'"), c);
else
fprintf (stderr, _("junk: `\\%03o'"), c);
}
*flags |= TOK_LITERAL;
obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer);
return (struct token *) obstack_finish (&tokens_obstack);
}
#undef ARGS
}
#undef I1
#undef DG
#undef NM
#undef C1
#undef C2
#undef Q1
#undef Q2
#undef ES
#undef NL
#undef EF
#undef SK
#undef VH
#undef WS
#undef ISDIGIT
#undef ISNUMBER
#undef ISEOF
#undef ISID1ST
#undef ISIDREST
#undef ISSTRKEEP
#undef ISSPACE
#undef ISBORING
#undef ISCBORING
#undef ISCCBORING
#undef ISQ1BORING
#undef ISQ2BORING
/*************** Assembly ***************************************************/
#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */
#define NM 0x02 /* digit [0-9a-fA-FxX] */
#define NL 0x04 /* newline: \n */
#define CM 0x08 /* assembler comment char: usually # or | */
#define IG 0x10 /* ignore `identifiers' with these chars in
them */
#define C1 0x20 /* C comment introduction char: / */
#define C2 0x40 /* C comment termination char: * */
#define EF 0x80 /* EOF */
/* Assembly Language character classes */
#define ISID1ST(c) ((rct)[c] & (I1))
#define ISIDREST(c) ((rct)[c] & (I1|NM))
#define ISNUMBER(c) ((rct)[c] & (NM))
#define ISEOF(c) ((rct)[c] & (EF))
#define ISCOMMENT(c) ((rct)[c] & (CM))
#define ISBORING(c) (!((rct)[c] & (EF|NL|I1|NM|CM|C1)))
#define ISCBORING(c) (!((rct)[c] & (EF|NL)))
#define ISCCBORING(c) (!((rct)[c] & (EF|C2)))
#define ISIGNORE(c) ((rct)[c] & (IG))
static unsigned char ctype_asm[257] =
{
EF,
/* 0 1 2 3 4 5 6 7 */
/* ----- ----- ----- ----- ----- ----- ----- ----- */
/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*010*/ 0, 0, NL, 0, 0, 0, 0, 0,
/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*040*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*050*/ 0, 0, C2, 0, 0, 0, 0, C1,
/*060*/ NM, NM, NM, NM, NM, NM, NM, NM,
/*070*/ NM, NM, 0, 0, 0, 0, 0, 0,
/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1,
/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
};
struct args_asm
{
int handle_cpp;
int strip_underscore;
unsigned char *ctype;
};
static struct args_asm args_asm = { 1, 0, ctype_asm };
static struct option const long_options_asm[] =
{
{ "comment", required_argument, 0, 'c' },
{ "keep", required_argument, 0, 'k' },
{ "ignore", required_argument, 0, 'i' },
{ "strip-underscore", no_argument, 0, 'u' },
{ "no-cpp", no_argument, 0, 'p' },
{ 0 }
};
static void
help_me_asm (void)
{
printf (_("\
Assembly language:\n\
-c,--comment=CHARS Any of CHARS starts a comment until end-of-line\n\
-k,--keep=CHARS Allow CHARS in tokens, and keep the result\n\
-i,--ignore=CHARS Allow CHARS in tokens, and toss the result\n\
-u,--strip-underscore Strip a leading underscore from tokens\n\
-n,--no-cpp Don't handle C pre-processor directives\n\
"));
}
static void *
parse_args_asm (char **argv, int argc)
{
char *tmp_string = 0;
struct args_asm *args;
if (argv == 0 || *argv == 0)
return &args_asm;
if (argc)
args = &args_asm;
else
{
tmp_string = strdup (*argv);
tokenize_args_string (tmp_string, &argc, &argv);
args = MALLOC (struct args_asm, 1);
args->strip_underscore = 0;
args->ctype = ctype_asm;
args->handle_cpp = 1;
}
optind = 0;
for (;;)
{
int optc = getopt_long (argc, argv, "c:k:i:un",
long_options_asm, (int *) 0);
if (optc < 0)
break;
if ((optc == 'k' || optc == 'i' || optc == 'c')
&& args->ctype == ctype_asm)
args->ctype = CLONE (ctype_asm, unsigned char, cardinalityof
(ctype_asm));
switch (optc)
{
case 'c':
set_uchar_ctype (args->ctype, optarg, CM);
break;
case 'k':
set_uchar_ctype (args->ctype, optarg, I1);
break;
case 'i':
set_uchar_ctype (args->ctype, optarg, I1 | IG);
break;
case 'u':
args->strip_underscore = 1;
break;
case 'n':
args->handle_cpp = 0;
break;
default:
usage ();
}
}
if (tmp_string)
{
free (argv);
free (tmp_string);
}
return args;
}
/* Grab the next identifier the assembly language source file. This
state machine is built for speed, not elegance. */
static struct token *
get_token_asm (FILE *in_FILE, void const *args, int *flags)
{
#define ARGS ((struct args_asm const *) args)
static int new_line = 1;
unsigned char const *rct = &ARGS->ctype[1];
unsigned char *id = scanner_buffer;
int c;
obstack_blank (&tokens_obstack, OFFSETOF_TOKEN_NAME);
top:
c = getc (in_FILE);
if (ARGS->handle_cpp > 0 && new_line)
{
new_line = 0;
if (c != '#')
goto next;
SCAN_CPP_DIRECTIVE;
}
next:
while (ISBORING (c))
c = getc (in_FILE);
if (ISCOMMENT (c))
{
while (ISCBORING (c))
c = getc (in_FILE);
new_line = 1;
}
if (ISEOF (c))
{
new_line = 1;
obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
return 0;
}
if (c == '\n')
{
new_line = 1;
goto top;
}
if (c == '/')
{
if ((c = getc (in_FILE)) != '*')
goto next;
c = getc (in_FILE);
for (;;)
{
while (ISCCBORING (c))
c = getc (in_FILE);
c = getc (in_FILE);
if (c == '/')
{
c = getc (in_FILE);
break;
}
else if (ISEOF (c))
{
new_line = 1;
obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
return 0;
}
}
goto next;
}
id = scanner_buffer;
if (ARGS->strip_underscore && c == '_' && !ISID1ST (c = getc (in_FILE)))
{
obstack_grow0 (&tokens_obstack, "_", 1);
return (struct token *) obstack_finish (&tokens_obstack);
}
*id++ = c;
if (ISID1ST (c))
{
*flags = TOK_NAME;
while (ISIDREST (c = getc (in_FILE)))
*id++ = c;
}
else if (ISNUMBER (c))
{
*flags = TOK_NUMBER;
while (ISNUMBER (c = getc (in_FILE)))
*id++ = c;
}
else
{
if (isprint (c))
fprintf (stderr, _("junk: `%c'"), c);
else
fprintf (stderr, _("junk: `\\%03o'"), c);
goto next;
}
*id = '\0';
for (id = scanner_buffer; *id; id++)
if (ISIGNORE (*id))
goto next;
ungetc (c, in_FILE);
*flags |= TOK_LITERAL;
obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer);
return (struct token *) obstack_finish (&tokens_obstack);
#undef ARGS
}
#undef I1
#undef NM
#undef NL
#undef CM
#undef IG
#undef C1
#undef C2
#undef EF
#undef ISID1ST
#undef ISIDREST
#undef ISNUMBER
#undef ISEOF
#undef ISCOMMENT
#undef ISBORING
#undef ISCBORING
#undef ISCCBORING
#undef ISIGNORE
/*************** Text *******************************************************/
#define I1 0x01 /* 1st char of an identifier [a-zA-Z_] */
#define NM 0x02 /* digit [0-9a-fA-FxX] */
#define SQ 0x04 /* squeeze these out (.,',-) */
#define EF 0x80 /* EOF */
/* Text character classes */
#define ISID1ST(c) ((rct)[c] & (I1))
#define ISIDREST(c) ((rct)[c] & (I1|NM|SQ))
#define ISNUMBER(c) ((rct)[c] & (NM))
#define ISEOF(c) ((rct)[c] & (EF))
#define ISBORING(c) (!((rct)[c] & (I1|NM|EF)))
#define ISIDSQUEEZE(c) ((rct)[c] & (SQ))
static unsigned char ctype_text[257] =
{
EF,
/* 0 1 2 3 4 5 6 7 */
/* ----- ----- ----- ----- ----- ----- ----- ----- */
/*000*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*010*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*020*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*030*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*040*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*050*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*060*/ NM, NM, NM, NM, NM, NM, NM, NM,
/*070*/ NM, NM, 0, 0, 0, 0, 0, 0,
/*100*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
/*110*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
/*120*/ I1, I1, I1, I1, I1, I1, I1, I1,
/*130*/ I1|NM, I1, I1, 0, 0, 0, 0, I1,
/*140*/ 0, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1|NM, I1,
/*150*/ I1, I1, I1, I1, I1|NM, I1, I1, I1,
/*160*/ I1, I1, I1, I1, I1, I1, I1, I1,
/*170*/ I1|NM, I1, I1, 0, 0, 0, 0, 0,
/*200*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*210*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*220*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*230*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*240*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*250*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*260*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*270*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*300*/ I1, I1, I1, I1, I1, I1, I1, I1,
/*310*/ I1, I1, I1, I1, I1, I1, I1, I1,
/*320*/ I1, I1, I1, I1, I1, I1, I1, 0,
/*330*/ I1, I1, I1, I1, I1, I1, I1, I1,
/*340*/ I1, I1, I1, I1, I1, I1, I1, I1,
/*350*/ I1, I1, I1, I1, I1, I1, I1, I1,
/*360*/ I1, I1, I1, I1, I1, I1, I1, 0,
/*370*/ I1, I1, I1, I1, I1, I1, I1, I1,
};
struct args_text
{
unsigned char *ctype;
};
static struct args_text args_text = { ctype_text };
static struct option const long_options_text[] =
{
{ "include", required_argument, 0, 'i' },
{ "exclude", required_argument, 0, 'x' },
{ 0 }
};
static void
help_me_text (void)
{
printf (_("\
Text language:\n\
-i,--include=CHAR-CLASS Treat characters of CHAR-CLASS as token
constituents\n\
-x,--exclude=CHAR-CLASS Treat characters of CHAR-CLASS as token delimiters\n\
"));
}
static void *
parse_args_text (char **argv, int argc)
{
char *tmp_string = 0;
struct args_text *args;
if (argv == 0 || *argv == 0)
return &args_text;
if (argc)
args = &args_text;
else
{
tmp_string = strdup (*argv);
tokenize_args_string (tmp_string, &argc, &argv);
args = MALLOC (struct args_text, 1);
args->ctype = ctype_text;
}
optind = 0;
for (;;)
{
int optc = getopt_long (argc, argv, "i:x:",
long_options_text, (int *) 0);
if (optc < 0)
break;
if ((optc == 'k' || optc == 'i') && args->ctype == ctype_text)
args->ctype = CLONE (ctype_text, unsigned char, cardinalityof
(ctype_text));
switch (optc)
{
case 'i':
set_uchar_ctype (args->ctype, optarg, I1);
break;
case 'x':
clear_uchar_ctype (args->ctype, optarg, I1);
break;
default:
usage ();
}
}
if (tmp_string)
{
free (argv);
free (tmp_string);
}
return args;
}
/* Grab the next identifier the text source file. This state machine
is built for speed, not elegance. */
static struct token *
get_token_text (FILE *in_FILE, void const *args, int *flags)
{
#define ARGS ((struct args_text const *) args)
unsigned char const *rct = &ARGS->ctype[1];
int c;
unsigned char *id = scanner_buffer;
obstack_blank (&tokens_obstack, OFFSETOF_TOKEN_NAME);
top:
c = getc (in_FILE);
while (ISBORING (c))
c = getc (in_FILE);
if (ISEOF (c))
{
obstack_free (&tokens_obstack, obstack_finish (&tokens_obstack));
return 0;
}
id = scanner_buffer;
*id++ = c;
if (ISID1ST (c))
{
*flags = TOK_NAME;
while (ISIDREST (c = getc (in_FILE)))
if (!ISIDSQUEEZE (c))
*id++ = c;
}
else if (ISNUMBER (c))
{
*flags = TOK_NUMBER;
while (ISNUMBER (c = getc (in_FILE)))
*id++ = c;
}
else
{
if (isprint (c))
fprintf (stderr, _("junk: `%c'"), c);
else
fprintf (stderr, _("junk: `\\%03o'"), c);
goto top;
}
ungetc (c, in_FILE);
*flags |= TOK_LITERAL;
obstack_grow0 (&tokens_obstack, scanner_buffer, id - scanner_buffer);
return (struct token *) obstack_finish (&tokens_obstack);
#undef ARGS
}
#undef I1
#undef NM
#undef SQ
#undef EF
#undef ISID1ST
#undef ISIDREST
#undef ISNUMBER
#undef ISEOF
#undef ISBORING
#undef ISIDSQUEEZE
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- SML and LISP support for GNU id-utils,
Cynbe ru Taren <=