>From dd2bd96e34e8c90fc2962a7a51c6a886e7747f16 Mon Sep 17 00:00:00 2001 From: Fabrice Nicol Date: Wed, 24 Mar 2021 07:36:27 +0100 Subject: [PATCH] Applied Changes for Mercury (Fabrice Nicol). Signed-off-by: Fabrice Nicol --- doc/man/etags.1 | 21 ++- etc/NEWS | 9 ++ lib-src/etags.c | 343 +++++++++++++++++++++++++++++++++++++++++++++-- lisp/speedbar.el | 2 + 4 files changed, 363 insertions(+), 12 deletions(-) diff --git a/doc/man/etags.1 b/doc/man/etags.1 index c5c15fb182..2de23cd85d 100644 --- a/doc/man/etags.1 +++ b/doc/man/etags.1 @@ -50,9 +50,9 @@ format understood by .BR vi ( 1 )\c \&. Both forms of the program understand the syntax of C, Objective C, C++, Java, Fortran, Ada, Cobol, Erlang, -Forth, Go, HTML, LaTeX, Emacs Lisp/Common Lisp, Lua, Makefile, Pascal, Perl, -Ruby, PHP, PostScript, Python, Prolog, Scheme and -most assembler\-like syntaxes. +Forth, Go, HTML, LaTeX, Emacs Lisp/Common Lisp, Lua, Makefile, Mercury, Pascal, +Perl, Ruby, PHP, PostScript, Python, Prolog, Scheme and most assembler\-like +syntaxes. Both forms read the files specified on the command line, and write a tag table (defaults: \fBTAGS\fP for \fBetags\fP, \fBtags\fP for \fBctags\fP) in the current working directory. @@ -270,6 +270,21 @@ prints detailed information about how tags are created for LANG. .B \-V, \-\-version Print the current version of the program (same as the version of the emacs \fBetags\fP is shipped with). +.TP +.B \-, \-\-version +Print the current version of the program (same as the version of the +emacs \fBetags\fP is shipped with). +.TP +.B \-M, \-\-with\-mercury\-all +For the Mercury programming language, include both declarations and +definitions. Declarations start a line with :\- while definitions +are first rules for a given item, as for Prolog. Implies +\-\-language=mercury. +.TP +.B \-m, \-\-with\-mercury\-definitions +For the Mercury programming language, only tag declarations. +Declarations start a line with :\-. Implies \-\-language=mercury. + .SH "SEE ALSO" "\|\fBemacs\fP\|" entry in \fBinfo\fP; \fIGNU Emacs Manual\fP, Richard diff --git a/etc/NEWS b/etc/NEWS index 49a4bb8106..3b3ad87dea 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -93,6 +93,15 @@ useful on systems such as FreeBSD which ships only with "etc/termcap". * Changes in Emacs 28.1 +--- +** Etags support for the Mercury programming language (https://mercurylang.org). +** New etags command line options '-M/-m' or --with-mercury-definitions/all'. +Tags all Mercury declarations. For compatibility with Prolog etags support, +predicates and functions appearing first in clauses will be tagged if etags is +run with the option '-M' or '--with-mercury-all'. If run with '-m' or +'--with-mercury-definitions', only declarations will be tagged. Both options +imply --language=mercury. + +++ ** New command 'font-lock-update', bound to 'C-x x f'. This command updates the syntax highlighting in this buffer. diff --git a/lib-src/etags.c b/lib-src/etags.c index b5c18e0e01..88b622b4d3 100644 --- a/lib-src/etags.c +++ b/lib-src/etags.c @@ -359,6 +359,7 @@ #define xrnew(op, n, m) ((op) = xnrealloc (op, n, (m) * sizeof *(op))) static void Lisp_functions (FILE *); static void Lua_functions (FILE *); static void Makefile_targets (FILE *); +static void Mercury_functions (FILE *); static void Pascal_functions (FILE *); static void Perl_functions (FILE *); static void PHP_functions (FILE *); @@ -502,6 +503,8 @@ #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */ { "ignore-case-regex", required_argument, NULL, 'c' }, { "parse-stdin", required_argument, NULL, STDIN }, { "version", no_argument, NULL, 'V' }, + { "with-mercury-all", no_argument, NULL, 'M' }, + { "with-mercury-definitions", no_argument, NULL, 'm' }, #if CTAGS /* Ctags options */ { "backward-search", no_argument, NULL, 'B' }, @@ -621,7 +624,6 @@ #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */ "In Java code, all the tags constructs of C and C++ code are\n\ tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)"; - static const char *Cobol_suffixes [] = { "COB", "cob", NULL }; static char Cobol_help [] = @@ -683,10 +685,21 @@ #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */ "In makefiles, targets are tags; additionally, variables are tags\n\ unless you specify '--no-globals'."; +/* Mercury and Objective C share the same .m file extensions. */ +static const char *Mercury_suffixes [] = + {"m", /* Use option -l mercury to switch from Objective C to Mercury. */ + NULL}; +static const char Mercury_help [] = + "In Mercury code, tags are all declarations beginning a line with :-\n\ +and optionally Prolog-like definitions (first rule for a predicate or \ +function).\n\ +To enable this behavior, run etags using --with-mercury-definitions."; +static bool with_mercury_definitions = false; + static const char *Objc_suffixes [] = - { "lm", /* Objective lex file */ - "m", /* Objective C file */ - NULL }; + {"lm", + "m", /* By default, Objective C will be assumed. */ + NULL}; static const char Objc_help [] = "In Objective C code, tags include Objective C definitions for classes,\n\ class categories, methods and protocols. Tags for variables and\n\ @@ -773,7 +786,6 @@ #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */ 'TEXTAGS' to a colon-separated list like, for example,\n\ TEXTAGS=\"mycommand:myothercommand\"."; - static const char *Texinfo_suffixes [] = { "texi", "texinfo", "txi", NULL }; static const char Texinfo_help [] = @@ -824,6 +836,7 @@ #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */ { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes }, { "lua", Lua_help,Lua_functions,Lua_suffixes,NULL,Lua_interpreters}, { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames}, + { "mercury", Mercury_help, Mercury_functions, Mercury_suffixes }, { "objc", Objc_help, plain_C_entries, Objc_suffixes }, { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes }, { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters}, @@ -1061,6 +1074,17 @@ print_help (argument *argbuffer) which you like."); } + puts ("-m, --mercury-declarations\n\ + For the Mercury programming language, only tag declarations.\n\ + Declarations start a line with :- \n\ + Implies --language=mercury."); + + puts ("-M, --mercury-all\n\ + For the Mercury programming language, include both declarations and\n\ + definitions. Declarations start a line with :- while definitions\n\ + are first rules for a given item, as for Prolog.\n\ + Implies --language=mercury."); + puts ("-V, --version\n\ Print the version of the program.\n\ -h, --help\n\ @@ -1111,7 +1135,7 @@ main (int argc, char **argv) /* When the optstring begins with a '-' getopt_long does not rearrange the non-options arguments to be at the end, but leaves them alone. */ - optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH", + optstring = concat ("-ac:Cf:Il:Mmo:Qr:RSVhHW", (CTAGS) ? "BxdtTuvw" : "Di:", ""); @@ -1202,6 +1226,17 @@ main (int argc, char **argv) case 'Q': class_qualify = 1; break; + case 'M': + with_mercury_definitions = true; FALLTHROUGH; + case 'm': + { + language lang = + { "mercury", Mercury_help, Mercury_functions, Mercury_suffixes }; + + argbuffer[current_arg].lang = ⟨ + argbuffer[current_arg].arg_type = at_language; + } + break; /* Etags options */ case 'D': constantypedefs = false; break; @@ -1281,6 +1316,22 @@ main (int argc, char **argv) pfatal (tagfile); } + /* /\* Settle the Mercury/Objective C file extension issue. *\/ */ + + /* if (parsing_mercury) */ + /* { */ + /* Objc_suffixes = */ + /* { "lm", /\* Objective lex file *\/ */ + /* NULL }; /\* Remove .m from Obj_c identification. *\/ */ + /* Mercury_suffixes = {"m", NULL}; */ + /* } */ + /* else */ + /* { */ + /* Objc_suffixes = /\* Standard Objective C specification *\/ */ + /* {"lm", "m", NULL}; */ + /* Mercury_suffixes = {NULL}; */ + /* } */ + /* * Loop through files finding functions. */ @@ -2297,7 +2348,7 @@ invalidate_nodes (fdesc *badfdp, node **npp) } } - + static ptrdiff_t total_size_of_entries (node *); static int number_len (intmax_t) ATTRIBUTE_CONST; @@ -3222,7 +3273,7 @@ consider_token (char *str, /* IN: token pointer */ return false; } - + /* * C_entries often keeps pointers to tokens or lines which are older than * the line currently read. By keeping two line buffers, and switching @@ -5890,7 +5941,8 @@ Prolog_functions (FILE *inf) { if (cp[0] == '\0') /* Empty line */ continue; - else if (c_isspace (cp[0])) /* Not a predicate */ + else if (c_isspace (cp[0]) || cp[0] == '%') + /* Not a predicate or comment */ continue; else if (cp[0] == '/' && cp[1] == '*') /* comment. */ prolog_skip_comment (&lb, inf); @@ -6019,6 +6071,279 @@ prolog_atom (char *s, size_t pos) return 0; } + +/* + * Support for Mercury + * + * Assumes that the declarationa starts at column 0. + * Original code by Sunichirou Sugou (1989) for Prolog. + * Rewritten by Anders Lindgren (1996) for Prolog. + * Adapted by Fabrice Nicol (2021) for Mercury. + * Note: Prolog-support behavior is preserved if + * --with-mercury-definitions is used, corresponding to + * with_mercury_definitions=true. + */ + +static ptrdiff_t mercury_pr (char *, char *, ptrdiff_t); +static void mercury_skip_comment (linebuffer *, FILE *); +static bool is_mercury_type = false; +static bool is_mercury_declaration = false; + +static void +Mercury_functions (FILE *inf) +{ + char *cp, *last = NULL; + ptrdiff_t lastlen = 0, allocated = 0; + + LOOP_ON_INPUT_LINES (inf, lb, cp) + { + if (cp[0] == '\0') /* Empty line */ + continue; + else if (c_isspace (cp[0]) || cp[0] == '%') + /* a Prolog-type comment or anything other than a declaration */ + continue; + else if (cp[0] == '/' && cp[1] == '*') /* Mercury C-type comment. */ + mercury_skip_comment (&lb, inf); + else + { + is_mercury_declaration = (cp[0] == ':' && cp[1] == '-'); + + if (is_mercury_declaration + || with_mercury_definitions) + { + ptrdiff_t len = mercury_pr (cp, last, lastlen); + if (0 < len) + { + /* Store the declaration to avoid generating duplicate + tags later. */ + if (allocated <= len) + { + xrnew (last, len + 1, 1); + allocated = len + 1; + } + memcpyz (last, cp, len); + lastlen = len; + } + } + } + } + free (last); +} + +static void +mercury_skip_comment (linebuffer *plb, FILE *inf) +{ + char *cp; + + do + { + for (cp = plb->buffer; *cp != '\0'; ++cp) + if (cp[0] == '*' && cp[1] == '/') + return; + readline (plb, inf); + } + while (perhaps_more_input (inf)); +} + +/* + * A declaration is added if it matches: + * :-( + * If with_mercury_definitions == true, we also add: + * ( + * or :- + * As for Prolog support, different arities and types are not taken into + * consideration. + * Item is added to the tags database if it doesn't match the + * name of the previous declaration. + * + * Consume a Mercury declaration. + * Return the number of bytes consumed, or 0 if there was an error. + * + * A Mercury declaration must be one of: + * :- type + * :- solver type + * :- pred + * :- func + * :- inst + * :- mode + * :- typeclass + * :- instance + * :- pragma + * :- promise + * :- initialise + * :- finalise + * :- mutable + * :- module + * :- interface + * :- implementation + * :- import_module + * :- use_module + * :- include_module + * :- end_module + * followed on the same line by an alphanumeric sequence, starting with a lower + * case letter or by a single-quoted arbitrary string. + * Single quotes can escape themselves. Backslash quotes everything. + * + * Return the size of the name of the declaration or 0 if no header was found. + */ + +static const char *Mercury_decl_tags[] = {"type", "solver type", "pred", + "func", "inst", "mode", "typeclass", "instance", "pragma", "promise", + "initialise", "finalise", "mutable", "module", "interface", "implementation", + "import_module", "use_module", "include_module", "end_module"}; + +static size_t +mercury_decl (char *s, size_t pos) +{ + size_t origpos; + origpos = pos; + + while (s + pos != NULL && (c_isalnum (s[pos]) || s[pos] == '_')) ++pos; + + if (s == NULL) + return 0; + + uint8_t decl_type_length = pos - origpos; + char buf[decl_type_length + 1]; + memset(buf, 0, decl_type_length + 1); + + /* Mercury declaration tags. Consume them, then check the declaration item + following :- is legitimate, then go on as in the prolog case. */ + + memcpy(buf, &s[origpos], decl_type_length); + + bool found_decl_tag = false; + + for (int j = 0; j < sizeof(Mercury_decl_tags)/sizeof(char*); ++j) + { + if (strcmp(buf, Mercury_decl_tags[j]) == 0) + { + found_decl_tag = true; + if (strcmp(Mercury_decl_tags[j], "type") == 0) is_mercury_type = true; + break; /* found declaration tag of rank j */ + } + else + /* 'solver type' has a blank in the middle, so this is the hard case */ + if (strcmp(buf, "solver") == 0) + { + ++pos; + while (s + pos != NULL && (c_isalnum (s[pos]) || s[pos] == '_')) + ++pos; + + decl_type_length = pos - origpos; + char buf2[decl_type_length + 1]; + memset(buf2, 0, decl_type_length + 1); + memcpy(buf2, &s[origpos], decl_type_length); + + if (strcmp(buf2, "solver type") == 0) + { + found_decl_tag = false; + break; /* found declaration tag of rank j */ + } + } + } + + /* if with_mercury_definitions == false + * this is a Mercury syntax error, ignoring... */ + + if (with_mercury_definitions) + { + if (found_decl_tag) + pos = skip_spaces (s + pos) - s; /* skip len blanks again */ + else + /* Prolog-like behavior + * we have parsed the predicate once, yet inappropriately + * so restarting again the parsing step */ + pos = 0; + } + else + { + if (found_decl_tag) + pos = skip_spaces (s + pos) - s; /* skip len blanks again */ + else + return 0; + } + + /* From now on it is the same as for Prolog except for module dots */ + + if (c_islower (s[pos]) || s[pos] == '_' ) + { + /* The name is unquoted. + Do not confuse module dots with end-of-declaration dots. */ + + while (c_isalnum (s[pos]) + || s[pos] == '_' + || (s[pos] == '.' /* a module dot */ + && s + pos + 1 != NULL + && (c_isalnum (s[pos + 1]) || s[pos + 1] == '_'))) + ++pos; + + return pos - origpos; + } + else if (s[pos] == '\'') + { + ++pos; + for (;;) + { + if (s[pos] == '\'') + { + ++pos; + if (s[pos] != '\'') + break; + ++pos; /* A double quote */ + } + else if (s[pos] == '\0') /* Multiline quoted atoms are ignored. */ + return 0; + else if (s[pos] == '\\') + { + if (s[pos+1] == '\0') + return 0; + pos += 2; + } + else + ++pos; + } + return pos - origpos; + } + else + return 0; +} + +static ptrdiff_t +mercury_pr (char *s, char *last, ptrdiff_t lastlen) +{ + size_t len0 = 0; + is_mercury_type = false; + + if (is_mercury_declaration) + len0 = skip_spaces (s + 2) - s; /* skip len0 blanks only for declarations */ + + size_t len = mercury_decl (s , len0); + + if (len == 0) + return 0; + + len += len0; + + if (( (s[len] == '.' /* this is a statement dot, not a module dot */ + || (s[len] == '(' && (len += 1)) + || (s[len] == ':' /* stopping in case of a rule */ + && s[len + 1] == '-' + && (len += 2))) + && (lastlen != len || memcmp (s, last, len) != 0) + ) + /* types are often declared on several lines so keeping just + the first line */ + || is_mercury_type + ) + { + make_tag (s, 0, true, s, len, lineno, linecharno); + return len; + } + + return 0; +} + /* * Support for Erlang diff --git a/lisp/speedbar.el b/lisp/speedbar.el index 12e57b1108..63f3cd6ca1 100644 --- a/lisp/speedbar.el +++ b/lisp/speedbar.el @@ -3534,6 +3534,8 @@ speedbar-fetch-etags-parse-list speedbar-parse-c-or-c++tag) ("^\\.emacs$\\|.\\(el\\|l\\|lsp\\)\\'" . "def[^i]+\\s-+\\(\\(\\w\\|[-_]\\)+\\)\\s-*\C-?") + ("^\\.m$\\'" . + "\\(^:-\\)?\\s-*\\(\\(pred\\|func\\|type\\|instance\\|typeclass\\)+\\s+\\([a-z]+[a-zA-Z0-9_]*\\)+\\)\\s-*(?^?") ; ("\\.\\([fF]\\|for\\|FOR\\|77\\|90\\)\\'" . ; speedbar-parse-fortran77-tag) ("\\.tex\\'" . speedbar-parse-tex-string) -- 2.26.3