[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[SCM] gawk branch, feature/minrx, updated. gawk-4.1.0-5870-g87149cfc
From: |
Arnold Robbins |
Subject: |
[SCM] gawk branch, feature/minrx, updated. gawk-4.1.0-5870-g87149cfc |
Date: |
Sun, 15 Dec 2024 08:01:13 -0500 (EST) |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".
The branch, feature/minrx has been updated
via 87149cfcf2b6acf748cf820d60ef948045f15a5d (commit)
via 3886d976985bfd963491853cdb3a6764694e7711 (commit)
via 81eb42976124a83fd3ee88bdf5dabe8fa75bd852 (commit)
from 081e2ce95e574b6b09481b5afed777d33799832f (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=87149cfcf2b6acf748cf820d60ef948045f15a5d
commit 87149cfcf2b6acf748cf820d60ef948045f15a5d
Merge: 081e2ce9 3886d976
Author: Arnold D. Robbins <arnold@skeeve.com>
Date: Sun Dec 15 15:01:02 2024 +0200
Merge branch 'master' into feature/minrx
diff --cc re.c
index 07652106,c74b468e..3880f6f6
--- a/re.c
+++ b/re.c
@@@ -262,98 -261,68 +262,98 @@@ make_regexp(const char *s, size_t len,
*dest = '\0';
len = dest - buf;
- ezalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
+ ezalloc(rp, Regexp *, sizeof(*rp));
- rp->pat.allocated = 0; /* regex will allocate the buffer */
- emalloc(rp->pat.fastmap, char *, 256);
- /*
- * Lo these many years ago, had I known what a P.I.T.A. IGNORECASE
- * was going to turn out to be, I wouldn't have bothered with it.
- *
- * In the case where we have a multibyte character set, we have no
- * choice but to use RE_ICASE, since the casetable is for single-byte
- * character sets only.
- *
- * On the other hand, if we do have a single-byte character set,
- * using the casetable should give a performance improvement, since
- * it's computed only once, not each time a regex is compiled. We
- * also think it's probably better for portability. See the
- * discussion by the definition of casetable[] in eval.c.
- */
+ if (use_gnu_matchers) {
+ rp->pat.allocated = 0; /* regex will allocate the buffer */
+ emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
- ignorecase = !! ignorecase; /* force to 1 or 0 */
- if (ignorecase) {
- if (gawk_mb_cur_max > 1) {
- syn |= RE_ICASE;
- rp->pat.translate = NULL;
+ /*
+ * Lo these many years ago, had I known what a P.I.T.A.
IGNORECASE
+ * was going to turn out to be, I wouldn't have bothered with
it.
+ *
+ * In the case where we have a multibyte character set, we have
no
+ * choice but to use RE_ICASE, since the casetable is for
single-byte
+ * character sets only.
+ *
+ * On the other hand, if we do have a single-byte character set,
+ * using the casetable should give a performance improvement,
since
+ * it's computed only once, not each time a regex is compiled.
We
+ * also think it's probably better for portability. See the
+ * discussion by the definition of casetable[] in eval.c.
+ */
+
+ ignorecase = !! ignorecase; /* force to 1 or 0 */
+ if (ignorecase) {
+ if (gawk_mb_cur_max > 1) {
+ syn |= RE_ICASE;
+ rp->pat.translate = NULL;
+ } else {
+ syn &= ~RE_ICASE;
+ rp->pat.translate = (RE_TRANSLATE_TYPE)
casetable;
+ }
} else {
+ rp->pat.translate = NULL;
syn &= ~RE_ICASE;
- rp->pat.translate = (RE_TRANSLATE_TYPE) casetable;
}
+
+ /* initialize dfas to hold syntax */
+ if (first) {
+ first = false;
+ dfaregs[0] = dfaalloc();
+ dfaregs[1] = dfaalloc();
+ dfasyntax(dfaregs[0], & localeinfo, syn, DFA_ANCHOR);
+ dfasyntax(dfaregs[1], & localeinfo, syn | RE_ICASE,
DFA_ANCHOR);
+ }
+
+ re_set_syntax(syn);
+
+ if ((rerr = re_compile_pattern(buf, len, &(rp->pat))) != NULL) {
+ refree(rp);
+ if (! canfatal) {
+ /* rerr already gettextized inside regex
routines */
+ error("%s: /%s/", rerr, s);
+ return NULL;
+ }
+ fatal("invalid regexp: %s: /%s/", rerr, s);
+ }
+
+ /* gack. this must be done *after* re_compile_pattern */
+ rp->pat.newline_anchor = false; /* don't get \n in middle of
string */
+ if (dfa && ! no_dfa) {
+ rp->dfareg = dfaalloc();
+ dfacopysyntax(rp->dfareg, dfaregs[ignorecase]);
+ dfacomp(buf, len, rp->dfareg, true);
+ } else
+ rp->dfareg = NULL;
} else {
- rp->pat.translate = NULL;
- syn &= ~RE_ICASE;
- }
+ int flags = MINRX_REG_EXTENDED | MINRX_REG_BRACK_ESCAPE |
+ MINRX_REG_BRACE_COMPAT | MINRX_REG_NATIVE1B;
+ int ret;
- /* initialize dfas to hold syntax */
- if (first) {
- first = false;
- dfaregs[0] = dfaalloc();
- dfaregs[1] = dfaalloc();
- dfasyntax(dfaregs[0], & localeinfo, syn, DFA_ANCHOR);
- dfasyntax(dfaregs[1], & localeinfo, syn | RE_ICASE, DFA_ANCHOR);
- }
+ if (ignorecase)
+ flags |= MINRX_REG_ICASE;
- re_set_syntax(syn);
+ if (syn == RE_SYNTAX_GNU_AWK)
+ flags |= (MINRX_REG_EXTENSIONS_GNU |
MINRX_REG_EXTENSIONS_BSD);
- if ((rerr = re_compile_pattern(buf, len, &(rp->pat))) != NULL) {
- refree(rp);
- if (! canfatal) {
+ if ((ret = minrx_regncomp(& rp->mre_pat, len, buf, flags)) !=
0) {
/* rerr already gettextized inside regex routines */
- error("%s: /%s/", rerr, s);
- return NULL;
+ rerr = get_minrx_regerror(ret, rp);
+
+ refree(rp);
+ if (! canfatal) {
+ error("%s: /%s/", rerr, s);
+ return NULL;
+ }
+ fatal("invalid regexp: %s: /%s/", rerr, s);
}
- fatal("invalid regexp: %s: /%s/", rerr, s);
- }
- /* gack. this must be done *after* re_compile_pattern */
- rp->pat.newline_anchor = false; /* don't get \n in middle of string */
- if (dfa && ! no_dfa) {
- rp->dfareg = dfaalloc();
- dfacopysyntax(rp->dfareg, dfaregs[ignorecase]);
- dfacomp(buf, len, rp->dfareg, true);
- } else
- rp->dfareg = NULL;
+ // Allocate re_nsub + 1, since 0 is the whole thing and 1-N
+ // are for actual parenthesized subexpressions.
+ emalloc(rp->mre_regs, minrx_regmatch_t *,
+ (rp->mre_pat.re_nsub + 1) * sizeof(minrx_regmatch_t),
"make_regexp");
+ }
/* Additional flags that help with RS as regexp. */
for (i = 0; i < len; i++) {
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 7 +++++++
array.c | 16 ++++++++--------
awk.h | 12 ++++++------
awkgram.c | 38 +++++++++++++++++++-------------------
awkgram.y | 38 +++++++++++++++++++-------------------
builtin.c | 20 ++++++++++----------
cint_array.c | 16 ++++++++--------
command.c | 14 +++++++-------
command.y | 14 +++++++-------
debug.c | 50 +++++++++++++++++++++++++-------------------------
eval.c | 20 ++++++++++----------
ext.c | 2 +-
field.c | 24 ++++++++++++------------
gawkapi.c | 17 ++++++++---------
int_array.c | 8 ++++----
interpret.h | 7 +++----
io.c | 30 ++++++++++++++----------------
main.c | 14 ++++++--------
mpfr.c | 4 ++--
node.c | 22 +++++++++++-----------
printf.c | 44 ++++++++++++++++++++++----------------------
profile.c | 34 +++++++++++++++++-----------------
re.c | 6 +++---
str_array.c | 6 +++---
symbol.c | 12 ++++++------
vms/ChangeLog | 4 ++++
vms/vms_misc.c | 2 +-
vms/vms_popen.c | 6 +++---
28 files changed, 246 insertions(+), 241 deletions(-)
hooks/post-receive
--
gawk
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [SCM] gawk branch, feature/minrx, updated. gawk-4.1.0-5870-g87149cfc,
Arnold Robbins <=