[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: ^ in FS
From: |
Aharon Robbins |
Subject: |
Re: ^ in FS |
Date: |
Tue, 03 Feb 2009 22:14:23 +0200 |
Greetings. I have looked into the issue of ^ in FS in more detail,
and also consulted with Brian Kernighan.
I have changed gawk-stable to work the way the Bell Labs awk does. I
think that it is conceptually more correct to match ^ only at the
beginning of the entire record. The full patch is included below.
The first patch I sent you offline is incomplete.
This will show up in CVS on Savannah shortly.
Thanks,
Arnold
--------------------------------------------------------------------
Thu Jan 29 21:14:30 2009 Arnold D. Robbins <address@hidden>
* field.c (parse_field, re_parse_field, def_parse_field,
posix_def_parse_field, null_parse_field, sc_parse_field,
fw_parse_field): Add new last arg `in_middle'. Ignored by all
except re_parse_field.
(re_parse_field): Enhance logic to only allow ^ in a regex to match
if indeed at the beginning of a record.
(getfield): Adjust call to parse_field.
Index: awk.h
===================================================================
RCS file: /d/mongo/cvsrep/gawk-stable/awk.h,v
retrieving revision 1.20
diff -u -r1.20 awk.h
--- awk.h 27 Jan 2009 20:01:13 -0000 1.20
+++ awk.h 27 Jan 2009 20:31:32 -0000
@@ -265,7 +265,7 @@
#endif /* GNU_REGEX */
/* regexp matching flags: */
#define RE_NEED_START 1 /* need to know start/end of match */
-#define RE_NO_BOL 2 /* for RS, not allowed to match ^ in regexp */
+#define RE_NO_BOL 2 /* not allowed to match ^ in regexp */
/* Stuff for losing systems. */
#if !defined(HAVE_STRTOD)
Index: field.c
===================================================================
RCS file: /d/mongo/cvsrep/gawk-stable/field.c,v
retrieving revision 1.10
diff -u -r1.10 field.c
--- field.c 23 Apr 2008 19:35:57 -0000 1.10
+++ field.c 29 Jan 2009 19:23:18 -0000
@@ -39,20 +39,20 @@
typedef void (* Setfunc) P((long, char *, long, NODE *));
static long (*parse_field) P((long, char **, int, NODE *,
- Regexp *, Setfunc, NODE *));
+ Regexp *, Setfunc, NODE *, int));
static void rebuild_record P((void));
static long re_parse_field P((long, char **, int, NODE *,
- Regexp *, Setfunc, NODE *));
+ Regexp *, Setfunc, NODE *, int));
static long def_parse_field P((long, char **, int, NODE *,
- Regexp *, Setfunc, NODE *));
+ Regexp *, Setfunc, NODE *, int));
static long posix_def_parse_field P((long, char **, int, NODE *,
- Regexp *, Setfunc, NODE *));
+ Regexp *, Setfunc, NODE *, int));
static long null_parse_field P((long, char **, int, NODE *,
- Regexp *, Setfunc, NODE *));
+ Regexp *, Setfunc, NODE *, int));
static long sc_parse_field P((long, char **, int, NODE *,
- Regexp *, Setfunc, NODE *));
+ Regexp *, Setfunc, NODE *, int));
static long fw_parse_field P((long, char **, int, NODE *,
- Regexp *, Setfunc, NODE *));
+ Regexp *, Setfunc, NODE *, int));
static void set_element P((long num, char * str, long len, NODE *arr));
static void grow_fields_arr P((long num));
static void set_field P((long num, char *str, long len, NODE *dummy));
@@ -364,12 +364,14 @@
NODE *fs ATTRIBUTE_UNUSED,
Regexp *rp,
Setfunc set, /* routine to set the value of the parsed field */
- NODE *n)
+ NODE *n,
+ int in_middle)
{
register char *scan = *buf;
register long nf = parse_high_water;
register char *field;
register char *end = scan + len;
+ int regex_flags = RE_NEED_START;
#ifdef MBS_SUPPORT
size_t mbclen = 0;
mbstate_t mbs;
@@ -377,6 +379,9 @@
memset(&mbs, 0, sizeof(mbstate_t));
#endif
+ if (in_middle)
+ regex_flags |= RE_NO_BOL;
+
if (up_to == UNLIMITED)
nf = 0;
if (len == 0)
@@ -387,8 +392,9 @@
scan++;
field = scan;
while (scan < end
- && research(rp, scan, 0, (end - scan), RE_NEED_START) != -1
+ && research(rp, scan, 0, (end - scan), regex_flags) != -1
&& nf < up_to) {
+ regex_flags |= RE_NO_BOL;
if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
#ifdef MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
@@ -439,7 +445,8 @@
NODE *fs,
Regexp *rp ATTRIBUTE_UNUSED,
Setfunc set, /* routine to set the value of the parsed field */
- NODE *n)
+ NODE *n,
+ int in_middle ATTRIBUTE_UNUSED)
{
register char *scan = *buf;
register long nf = parse_high_water;
@@ -506,7 +513,8 @@
NODE *fs,
Regexp *rp ATTRIBUTE_UNUSED,
Setfunc set, /* routine to set the value of the parsed field */
- NODE *n)
+ NODE *n,
+ int in_middle ATTRIBUTE_UNUSED)
{
register char *scan = *buf;
register long nf = parse_high_water;
@@ -570,7 +578,8 @@
NODE *fs ATTRIBUTE_UNUSED,
Regexp *rp ATTRIBUTE_UNUSED,
Setfunc set, /* routine to set the value of the parsed field */
- NODE *n)
+ NODE *n,
+ int in_middle ATTRIBUTE_UNUSED)
{
register char *scan = *buf;
register long nf = parse_high_water;
@@ -618,7 +627,8 @@
NODE *fs,
Regexp *rp ATTRIBUTE_UNUSED,
Setfunc set, /* routine to set the value of the parsed field */
- NODE *n)
+ NODE *n,
+ int in_middle ATTRIBUTE_UNUSED)
{
register char *scan = *buf;
register char fschar;
@@ -695,7 +705,8 @@
NODE *fs ATTRIBUTE_UNUSED,
Regexp *rp ATTRIBUTE_UNUSED,
Setfunc set, /* routine to set the value of the parsed field */
- NODE *n)
+ NODE *n,
+ int in_middle ATTRIBUTE_UNUSED)
{
register char *scan = *buf;
register long nf = parse_high_water;
@@ -763,6 +774,7 @@
NODE **
get_field(register long requested, Func_ptr *assign)
{
+ int in_middle = FALSE;
/*
* if requesting whole line but some other field has been altered,
* then the whole line must be rebuilt
@@ -775,7 +787,7 @@
fields_arr[0]->stlen -
(parse_extent - fields_arr[0]->stptr),
save_FS, FS_regexp, set_field,
- (NODE *) NULL);
+ (NODE *) NULL, in_middle);
parse_high_water = NF;
}
rebuild_record();
@@ -800,9 +812,11 @@
*/
if (parse_high_water == 0) /* starting at the beginning */
parse_extent = fields_arr[0]->stptr;
+ else
+ in_middle = TRUE;
parse_high_water = (*parse_field)(requested, &parse_extent,
fields_arr[0]->stlen - (parse_extent -
fields_arr[0]->stptr),
- save_FS, FS_regexp, set_field, (NODE *) NULL);
+ save_FS, FS_regexp, set_field, (NODE *) NULL, in_middle);
/*
* if we reached the end of the record, set NF to the number of
@@ -851,7 +865,7 @@
NODE *src, *arr, *sep, *fs, *src2, *fs2, *tmp;
char *s;
long (*parseit) P((long, char **, int, NODE *,
- Regexp *, Setfunc, NODE *));
+ Regexp *, Setfunc, NODE *, int));
Regexp *rp = NULL;
src = force_string(tree_eval(tree->lnode));
@@ -924,7 +938,7 @@
s = src2->stptr;
tmp = tmp_number((AWKNUM) (*parseit)(UNLIMITED, &s, (int) src2->stlen,
- fs2, rp, set_element, arr));
+ fs2, rp, set_element, arr, FALSE));
unref(src2);
unref(fs2);
return tmp;
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Re: ^ in FS,
Aharon Robbins <=