[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
exclude: support posix regexps
From: |
Sergey Poznyakoff |
Subject: |
exclude: support posix regexps |
Date: |
Sat, 15 Feb 2014 20:34:08 +0200 |
Hello,
I'm currently implementing in GNU tar exclusion lists based on various
VCS ignore files (.gitignore, .hgignore, etc.). To do so, I needed to
extend the existing exclude module to support posix extended regexps.
Attached is the patch that implements it. I tried not to disturb the
existing API, so that the module can be used by existing code without
modifications.
Is it OK to push?
Regards,
Sergey
>From 8341b9150246b1a6dfcf71946fa701294f0e3ada Mon Sep 17 00:00:00 2001
From: Sergey Poznyakoff <address@hidden>
Date: Sat, 15 Feb 2014 19:21:04 +0200
Subject: [PATCH] exclude: add support for posix regexps
This commit adds support for POSIX extended regular expressions
and fixes a long-standing memory leak (pattern buffer was never
freed). It also implements a new interface function to read
exclude patterns from a FILE, which passes an additional parameter
to its callback function, thereby allowing to preserve its state
between invocations.
* lib/exclude.c (struct patopts): Pack regex and pattern into union.
(pattern_buffer): New struct.
(exclude): New member patbuf.
(exclude_add_pattern_buffer): New function.
(free_exclude_segment): Free regexps.
(free_exclude): Free allocated pattern buffers.
(exclude_patopts): New function.
(file_pattern_matches): Use exclude_patopts.
(add_exclude): support regexps.
(add_exclude_fp): New function.
(add_exclude_file): Rewrite using add_exclude_fp.
* lib/exclude.h (EXCLUDE_REGEX, EXCLUDE_ALLOC): New flags.
(add_exclude_fp)
(exclude_add_pattern_buffer): New prototypes.
* modules/exclude: Depends on regex and filename.
---
lib/exclude.c | 191 +++++++++++++++++++++++++++++++++++++++++++++-----------
lib/exclude.h | 12 +++-
modules/exclude | 2 +
3 files changed, 169 insertions(+), 36 deletions(-)
diff --git a/lib/exclude.c b/lib/exclude.c
index 0865cee..be386bc 100644
--- a/lib/exclude.c
+++ b/lib/exclude.c
@@ -32,6 +32,7 @@
#include <stdlib.h>
#include <string.h>
#include <wctype.h>
+#include <regex.h>
#include "exclude.h"
#include "hash.h"
@@ -39,6 +40,7 @@
#include "fnmatch.h"
#include "xalloc.h"
#include "verify.h"
+#include "filename.h"
#if USE_UNLOCKED_IO
# include "unlocked-io.h"
@@ -73,8 +75,12 @@ verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE |
EXCLUDE_WILDCARDS)
struct patopts
{
- char const *pattern;
int options;
+ union
+ {
+ char const *pattern;
+ regex_t re;
+ } v;
};
/* An array of pattern-options pairs. */
@@ -104,13 +110,33 @@ struct exclude_segment
} v;
};
+struct pattern_buffer
+ {
+ struct pattern_buffer *next;
+ char *base;
+ };
+
/* The exclude structure keeps a singly-linked list of exclude segments,
maintained in reverse order. */
struct exclude
{
struct exclude_segment *head;
+ struct pattern_buffer *patbuf;
};
+/* Register BUF in the pattern buffer list of EX. ADD_FUNC (see
+ add_exclude_file and add_exclude_fp below) can use this function
+ if it modifies the pattern, to ensure the allocated memory will be
+ properly reclaimed upon calling free_exclude. */
+void
+exclude_add_pattern_buffer (struct exclude *ex, char *buf)
+{
+ struct pattern_buffer *pbuf = xmalloc (sizeof *pbuf);
+ pbuf->base = buf;
+ pbuf->next = ex->patbuf;
+ ex->patbuf = pbuf;
+}
+
/* Return true if STR has or may have wildcards, when matched with OPTIONS.
Return false if STR definitely does not have wildcards. */
bool
@@ -243,9 +269,16 @@ new_exclude_segment (struct exclude *ex, enum exclude_type
type, int options)
static void
free_exclude_segment (struct exclude_segment *seg)
{
+ size_t i;
+
switch (seg->type)
{
case exclude_pattern:
+ for (i = 0; i < seg->v.pat.exclude_count; i++)
+ {
+ if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX)
+ regfree (&seg->v.pat.exclude[i].v.re);
+ }
free (seg->v.pat.exclude);
break;
@@ -261,12 +294,23 @@ void
free_exclude (struct exclude *ex)
{
struct exclude_segment *seg;
+ struct pattern_buffer *pbuf;
+
for (seg = ex->head; seg; )
{
struct exclude_segment *next = seg->next;
free_exclude_segment (seg);
seg = next;
}
+
+ for (pbuf = ex->patbuf; pbuf; )
+ {
+ struct pattern_buffer *next = pbuf->next;
+ free (pbuf->base);
+ free (pbuf);
+ pbuf = next;
+ }
+
free (ex);
}
@@ -327,14 +371,24 @@ exclude_fnmatch (char const *pattern, char const *f, int
options)
: fnmatch_no_wildcards);
bool matched = ((*matcher) (pattern, f, options) == 0);
char const *p;
-
+
if (! (options & EXCLUDE_ANCHORED))
for (p = f; *p && ! matched; p++)
if (*p == '/' && p[1] != '/')
- matched = ((*matcher) (pattern, p + 1, options) == 0);
+ matched = ((*matcher) (pattern, p + 1, options) == 0);
return matched;
}
+
+bool
+exclude_patopts (struct patopts const *opts, char const *f)
+{
+ int options = opts->options;
+
+ return (options & EXCLUDE_REGEX)
+ ? regexec (&opts->v.re, f, 0, NULL, 0) == 0
+ : exclude_fnmatch (opts->v.pattern, f, options);
+}
/* Return true if the exclude_pattern segment SEG matches F. */
@@ -347,9 +401,7 @@ file_pattern_matches (struct exclude_segment const *seg,
char const *f)
for (i = 0; i < exclude_count; i++)
{
- char const *pattern = exclude[i].pattern;
- int options = exclude[i].options;
- if (exclude_fnmatch (pattern, f, options))
+ if (exclude_patopts (exclude + i, f))
return true;
}
return false;
@@ -454,26 +506,70 @@ void
add_exclude (struct exclude *ex, char const *pattern, int options)
{
struct exclude_segment *seg;
+ struct exclude_pattern *pat;
+ struct patopts *patopts;
- if ((options & EXCLUDE_WILDCARDS)
- && fnmatch_pattern_has_wildcards (pattern, options))
+ if ((options & EXCLUDE_REGEX)
+ || ((options & EXCLUDE_WILDCARDS)
+ && fnmatch_pattern_has_wildcards (pattern, options)))
{
- struct exclude_pattern *pat;
- struct patopts *patopts;
-
if (! (ex->head && ex->head->type == exclude_pattern
- && ((ex->head->options & EXCLUDE_INCLUDE)
- == (options & EXCLUDE_INCLUDE))))
- new_exclude_segment (ex, exclude_pattern, options);
- seg = ex->head;
+ && ((ex->head->options & EXCLUDE_INCLUDE)
+ == (options & EXCLUDE_INCLUDE))))
+ new_exclude_segment (ex, exclude_pattern, options);
+ seg = ex->head;
+
pat = &seg->v.pat;
if (pat->exclude_count == pat->exclude_alloc)
pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc,
sizeof *pat->exclude);
patopts = &pat->exclude[pat->exclude_count++];
- patopts->pattern = pattern;
+
patopts->options = options;
+ if (options & EXCLUDE_REGEX)
+ {
+ int rc;
+ int cflags = REG_NOSUB|REG_EXTENDED|
+ ((options & FNM_CASEFOLD) ? REG_ICASE : 0);
+
+ if (options & FNM_LEADING_DIR)
+ {
+ char *tmp;
+ size_t len = strlen (pattern);
+
+ while (len > 0 && ISSLASH (pattern[len-1]))
+ --len;
+
+ if (len == 0)
+ rc = 1;
+ else
+ {
+ tmp = xmalloc (len + 7);
+ memcpy (tmp, pattern, len);
+ strcpy (tmp + len, "(/.*)?");
+ rc = regcomp (&patopts->v.re, tmp, cflags);
+ free (tmp);
+ }
+ }
+ else
+ rc = regcomp (&patopts->v.re, pattern, cflags);
+
+ if (rc)
+ {
+ pat->exclude_count--;
+ return;
+ }
+ }
+ else
+ {
+ if (options & EXCLUDE_ALLOC)
+ {
+ pattern = xstrdup (pattern);
+ exclude_add_pattern_buffer (ex, (char*) pattern);
+ }
+ patopts->v.pattern = pattern;
+ }
}
else
{
@@ -498,45 +594,39 @@ add_exclude (struct exclude *ex, char const *pattern, int
options)
/* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
OPTIONS. LINE_END terminates each pattern in the file. If
LINE_END is a space character, ignore trailing spaces and empty
- lines in FILE. Return -1 on failure, 0 on success. */
+ lines in FP. Return -1 on failure, 0 on success. */
int
-add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
- struct exclude *ex, char const *file_name, int options,
- char line_end)
+add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *),
+ struct exclude *ex, FILE *fp, int options,
+ char line_end,
+ void *data)
{
- bool use_stdin = file_name[0] == '-' && !file_name[1];
- FILE *in;
char *buf = NULL;
char *p;
- char const *pattern;
+ char *pattern;
char const *lim;
size_t buf_alloc = 0;
size_t buf_count = 0;
int c;
int e = 0;
-
- if (use_stdin)
- in = stdin;
- else if (! (in = fopen (file_name, "r")))
- return -1;
-
- while ((c = getc (in)) != EOF)
+
+ while ((c = getc (fp)) != EOF)
{
if (buf_count == buf_alloc)
buf = x2realloc (buf, &buf_alloc);
buf[buf_count++] = c;
}
- if (ferror (in))
- e = errno;
-
- if (!use_stdin && fclose (in) != 0)
+ if (ferror (fp))
e = errno;
buf = xrealloc (buf, buf_count + 1);
buf[buf_count] = line_end;
lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
+
+ exclude_add_pattern_buffer (ex, buf);
+
pattern = buf;
for (p = buf; p < lim; p++)
@@ -554,7 +644,7 @@ add_exclude_file (void (*add_func) (struct exclude *, char
const *, int),
}
*pattern_end = '\0';
- (*add_func) (ex, pattern, options);
+ (*add_func) (ex, pattern, options, data);
next_pattern:
pattern = p + 1;
@@ -563,3 +653,34 @@ add_exclude_file (void (*add_func) (struct exclude *, char
const *, int),
errno = e;
return e ? -1 : 0;
}
+
+static void
+call_addfn (struct exclude *ex, char const *pattern, int options, void *data)
+{
+ void (*addfn) (struct exclude *, char const *, int) = data;
+ addfn (ex, pattern, options);
+}
+
+int
+add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
+ struct exclude *ex, char const *file_name, int options,
+ char line_end)
+{
+ bool use_stdin = file_name[0] == '-' && !file_name[1];
+ FILE *in;
+ int rc = 0;
+
+ if (use_stdin)
+ in = stdin;
+ else if (! (in = fopen (file_name, "r")))
+ return -1;
+
+ rc = add_exclude_fp (call_addfn, ex, in, options, line_end, add_func);
+
+ if (!use_stdin && fclose (in) != 0)
+ rc = -1;
+
+ return rc;
+}
+
+
diff --git a/lib/exclude.h b/lib/exclude.h
index 45dbe69..9555218 100644
--- a/lib/exclude.h
+++ b/lib/exclude.h
@@ -20,6 +20,7 @@
#define _GL_EXCLUDE_H 1
#include <stdbool.h>
+#include <stdio.h>
/* Written by Paul Eggert <address@hidden>
and Sergey Poznyakoff <address@hidden> */
@@ -37,6 +38,12 @@
option, these characters are ordinary and fnmatch is not used. */
#define EXCLUDE_WILDCARDS (1 << 28)
+/* Patterns are POSIX extended regular expressions */
+#define EXCLUDE_REGEX (1 << 27)
+
+/* Allocate storage for the pattern */
+#define EXCLUDE_ALLOC (1 << 26)
+
struct exclude;
bool fnmatch_pattern_has_wildcards (const char *, int) _GL_ATTRIBUTE_PURE;
@@ -46,7 +53,10 @@ void free_exclude (struct exclude *);
void add_exclude (struct exclude *, char const *, int);
int add_exclude_file (void (*) (struct exclude *, char const *, int),
struct exclude *, char const *, int, char);
+int add_exclude_fp (void (*) (struct exclude *, char const *, int, void *),
+ struct exclude *, FILE *, int, char, void *);
bool excluded_file_name (struct exclude const *, char const *);
-bool exclude_fnmatch (char const *pattern, char const *f, int options);
+void exclude_add_pattern_buffer (struct exclude *ex, char *buf);
+bool exclude_fnmatch (char const *, char const *, int);
#endif /* _GL_EXCLUDE_H */
diff --git a/modules/exclude b/modules/exclude
index b1f8399..04fb0e5 100644
--- a/modules/exclude
+++ b/modules/exclude
@@ -6,10 +6,12 @@ lib/exclude.h
lib/exclude.c
Depends-on:
+filename
fnmatch
hash
mbscasecmp
mbuiter
+regex
stdbool
verify
xalloc
--
1.7.12.1
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- exclude: support posix regexps,
Sergey Poznyakoff <=