gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gawk-diffs] [SCM] gawk branch, master, updated. gawk-4.1.0-2406-g5a619e


From: Arnold Robbins
Subject: [gawk-diffs] [SCM] gawk branch, master, updated. gawk-4.1.0-2406-g5a619e1
Date: Thu, 19 Jan 2017 19:08:36 +0000 (UTC)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, master has been updated
       via  5a619e1986724cf8e27b637509925a8da36837e8 (commit)
      from  baadccc7297fa9a0cd1bcc276385872fa0ca8b6e (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=5a619e1986724cf8e27b637509925a8da36837e8

commit 5a619e1986724cf8e27b637509925a8da36837e8
Author: Arnold D. Robbins <address@hidden>
Date:   Thu Jan 19 21:08:19 2017 +0200

    Speed up programs that toggle IGNORECASE a lot.

diff --git a/ChangeLog b/ChangeLog
index 62b9e85..44aa730 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2017-01-04         Arnold Robbins        <address@hidden>
+
+       Trade space for time for programs that toggle IGNORECASE a lot.
+       Brings 25% to 39% speedup.  NODE does not actually grow in size.
+
+       * awk.h (NODE::preg): Now an array of size two.
+       [CASE]: Flag no longer needed, so removed.
+       (IGNORECASE): Change type from int to bool.
+       * awkgram.y (make_regnode): Build two copies of the compiled regexp,
+       one without ignorecase, and one with.
+       * io.c (RS_re): Array replacing RS_re_yes_case and RS_re_no_case.
+       (set_RS): Use RS_re[IGNORECASE] as appropriate. Free and recompute
+       as needed.
+       * main.c (IGNORECASE): Change type from int to bool.
+       * re.c (re_update): Simplify the code. No need to check CASE flag
+       any longer. Recompute only if text of regexp changed.
+       * symbol.c (free_bc_internal): Adjust to free both elements of
+       m_re_reg.
+
 2017-01-18         Andrew J. Schorr     <address@hidden>
 
        * interpret.h (r_interpret): Increase robustness of the optimization
diff --git a/NEWS b/NEWS
index 40bf9cd..71867cc 100644
--- a/NEWS
+++ b/NEWS
@@ -98,6 +98,8 @@ Changes from 4.1.x to 4.2.0
     recommend that you do so.  Fortunately, the changes are fairly minor
     and straightforward.
 
+24. Programs that toggle IGNORECASE a lot should now be noticeably faster.
+
 Changes from 4.1.3 to 4.1.4
 ---------------------------
 
diff --git a/awk.h b/awk.h
index 278f54c..d5c88fd 100644
--- a/awk.h
+++ b/awk.h
@@ -343,7 +343,7 @@ typedef struct exp_node {
                        } l;
                        union {
                                struct exp_node *rptr;
-                               Regexp *preg;
+                               Regexp *preg[2];
                                struct exp_node **av;
                                BUCKET **bv;
                                void *aq;
@@ -361,9 +361,8 @@ typedef struct exp_node {
                        struct exp_node *rn;
                        unsigned long cnt;
                        unsigned long reflags;
-#                              define  CASE            1
-#                              define  CONSTANT        2
-#                              define  FS_DFLT         4
+#                              define  CONSTANT        1
+#                              define  FS_DFLT         2
                } nodep;
 
                struct {
@@ -1083,7 +1082,7 @@ extern long NF;
 extern long NR;
 extern long FNR;
 extern int BINMODE;
-extern int IGNORECASE;
+extern bool IGNORECASE;
 extern bool RS_is_null;
 extern char *OFS;
 extern int OFSlen;
diff --git a/awkgram.c b/awkgram.c
index b80caa1..c6f47db 100644
--- a/awkgram.c
+++ b/awkgram.c
@@ -7422,8 +7422,14 @@ make_regnode(int type, NODE *exp)
        n->re_cnt = 1;
 
        if (type == Node_regex) {
-               n->re_reg = make_regexp(exp->stptr, exp->stlen, false, true, 
false);
-               if (n->re_reg == NULL) {
+               n->re_reg[0] = make_regexp(exp->stptr, exp->stlen, false, true, 
false);
+               if (n->re_reg[0] == NULL) {
+                       freenode(n);
+                       return NULL;
+               }
+               n->re_reg[1] = make_regexp(exp->stptr, exp->stlen, true, true, 
false);
+               if (n->re_reg[1] == NULL) {
+                       refree(n->re_reg[0]);
                        freenode(n);
                        return NULL;
                }
diff --git a/awkgram.y b/awkgram.y
index 7f957bc..8027881 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -5002,8 +5002,14 @@ make_regnode(int type, NODE *exp)
        n->re_cnt = 1;
 
        if (type == Node_regex) {
-               n->re_reg = make_regexp(exp->stptr, exp->stlen, false, true, 
false);
-               if (n->re_reg == NULL) {
+               n->re_reg[0] = make_regexp(exp->stptr, exp->stlen, false, true, 
false);
+               if (n->re_reg[0] == NULL) {
+                       freenode(n);
+                       return NULL;
+               }
+               n->re_reg[1] = make_regexp(exp->stptr, exp->stlen, true, true, 
false);
+               if (n->re_reg[1] == NULL) {
+                       refree(n->re_reg[0]);
                        freenode(n);
                        return NULL;
                }
diff --git a/io.c b/io.c
index 688723f..d65f2aa 100644
--- a/io.c
+++ b/io.c
@@ -318,8 +318,7 @@ static long read_default_timeout;
 
 static struct redirect *red_head = NULL;
 static NODE *RS = NULL;
-static Regexp *RS_re_yes_case; /* regexp for RS when ignoring case */
-static Regexp *RS_re_no_case;  /* regexp for RS when not ignoring case */
+static Regexp *RS_re[2];       /* index 0 - don't ignore case, index 1, do */
 static Regexp *RS_regexp;
 
 static const char nonfatal[] = "NONFATAL";
@@ -3870,7 +3869,7 @@ set_RS()
                 * set_IGNORECASE() relies on this routine to call
                 * set_FS().
                 */
-               RS_regexp = (IGNORECASE ? RS_re_no_case : RS_re_yes_case);
+               RS_regexp = RS_re[IGNORECASE];
                goto set_FS;
        }
        unref(save_rs);
@@ -3882,9 +3881,9 @@ set_RS()
         * Please do not remerge the if condition; hinders memory deallocation
         * in case of fatal error in make_regexp.
         */
-       refree(RS_re_yes_case); /* NULL argument is ok */
-       refree(RS_re_no_case);
-       RS_re_yes_case = RS_re_no_case = RS_regexp = NULL;
+       refree(RS_re[0]);       /* NULL argument is ok */
+       refree(RS_re[1]);
+       RS_re[0] = RS_re[1] = RS_regexp = NULL;
 
        if (RS->stlen == 0) {
                RS_is_null = true;
@@ -3892,9 +3891,9 @@ set_RS()
        } else if (RS->stlen > 1 && ! do_traditional) {
                static bool warned = false;
 
-               RS_re_yes_case = make_regexp(RS->stptr, RS->stlen, false, true, 
true);
-               RS_re_no_case = make_regexp(RS->stptr, RS->stlen, true, true, 
true);
-               RS_regexp = (IGNORECASE ? RS_re_no_case : RS_re_yes_case);
+               RS_re[0] = make_regexp(RS->stptr, RS->stlen, false, true, true);
+               RS_re[1] = make_regexp(RS->stptr, RS->stlen, true, true, true);
+               RS_regexp = RS_re[IGNORECASE];
 
                matchrec = rsrescan;
 
diff --git a/main.c b/main.c
index 5c814b9..56482f5 100644
--- a/main.c
+++ b/main.c
@@ -85,7 +85,7 @@ long NF;
 long NR;
 long FNR;
 int BINMODE;
-int IGNORECASE;
+bool IGNORECASE;
 char *OFS;
 char *ORS;
 char *OFMT;
diff --git a/re.c b/re.c
index 5be3d17..73e75cb 100644
--- a/re.c
+++ b/re.c
@@ -349,50 +349,48 @@ re_update(NODE *t)
        NODE *t1;
 
        if (t->type == Node_val && (t->flags & REGEX) != 0)
-               return t->typed_re->re_reg;
-
-       if ((t->re_flags & CASE) == IGNORECASE) {
-               /* regex was compiled with settings matching IGNORECASE */
-               if ((t->re_flags & CONSTANT) != 0) {
-                       /* it's a constant, so just return it as is */
-                       assert(t->type == Node_regex);
-                       return t->re_reg;
-               }
-               t1 = t->re_exp;
-               if (t->re_text != NULL) {
-                       /* if contents haven't changed, just return it */
-                       if (cmp_nodes(t->re_text, t1, true) == 0)
-                               return t->re_reg;
-                       /* things changed, fall through to recompile */
-                       unref(t->re_text);
-               }
-               /* get fresh copy of the text of the regexp */
-               t->re_text = dupnode(t1);
+               return t->typed_re->re_reg[IGNORECASE];
+
+       if ((t->re_flags & CONSTANT) != 0) {
+               /* it's a constant, so just return it as is */
+               assert(t->type == Node_regex);
+               return t->re_reg[IGNORECASE];
        }
-       /* was compiled with different IGNORECASE or text changed */
+       t1 = t->re_exp;
+       if (t->re_text != NULL) {
+               /* if contents haven't changed, just return it */
+               if (cmp_nodes(t->re_text, t1, true) == 0)
+                       return t->re_reg[IGNORECASE];
+               /* things changed, fall through to recompile */
+               unref(t->re_text);
+       }
+       /* get fresh copy of the text of the regexp */
+       t->re_text = dupnode(t1);
+
+       /* text changed */
 
        /* free old */
-       if (t->re_reg != NULL)
-               refree(t->re_reg);
+       if (t->re_reg[0] != NULL)
+               refree(t->re_reg[0]);
+       if (t->re_reg[1] != NULL)
+               refree(t->re_reg[1]);
        if (t->re_cnt > 0)
                t->re_cnt++;
        if (t->re_cnt > 10)
                t->re_cnt = 0;
-       if (t->re_text == NULL || (t->re_flags & CASE) != IGNORECASE) {
+       if (t->re_text == NULL) {
                /* reset regexp text if needed */
                t1 = t->re_exp;
                unref(t->re_text);
                t->re_text = dupnode(t1);
        }
        /* compile it */
-       t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen,
-                               IGNORECASE, t->re_cnt, true);
-
-       /* clear case flag */
-       t->re_flags &= ~CASE;
-       /* set current value of case flag */
-       t->re_flags |= IGNORECASE;
-       return t->re_reg;
+       t->re_reg[0] = make_regexp(t->re_text->stptr, t->re_text->stlen,
+                               false, t->re_cnt, true);
+       t->re_reg[1] = make_regexp(t->re_text->stptr, t->re_text->stlen,
+                               true, t->re_cnt, true);
+
+       return t->re_reg[IGNORECASE];
 }
 
 /* resetup --- choose what kind of regexps we match */
diff --git a/symbol.c b/symbol.c
index e150430..65ed4d9 100644
--- a/symbol.c
+++ b/symbol.c
@@ -881,8 +881,10 @@ free_bc_internal(INSTRUCTION *cp)
        case Op_match:
        case Op_nomatch:
                m = cp->memory;
-               if (m->re_reg != NULL)
-                       refree(m->re_reg);
+               if (m->re_reg[0] != NULL)
+                       refree(m->re_reg[0]);
+               if (m->re_reg[1] != NULL)
+                       refree(m->re_reg[1]);
                if (m->re_exp != NULL)
                        unref(m->re_exp);
                if (m->re_text != NULL)

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog |   19 +++++++++++++++++++
 NEWS      |    2 ++
 awk.h     |    9 ++++-----
 awkgram.c |   10 ++++++++--
 awkgram.y |   10 ++++++++--
 io.c      |   17 ++++++++---------
 main.c    |    2 +-
 re.c      |   60 +++++++++++++++++++++++++++++-------------------------------
 symbol.c  |    6 ++++--
 9 files changed, 83 insertions(+), 52 deletions(-)


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]