[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Help-smalltalk] Regex sub -expressions
From: |
Mike Anderson |
Subject: |
[Help-smalltalk] Regex sub -expressions |
Date: |
Fri, 27 May 2005 07:44:58 +0000 |
User-agent: |
Mozilla Thunderbird 0.7.3 (X11/20040803) |
For amusement, I was having a go at the Python Challenge
(http://www.pythonchallenge.com/) in Smalltalk. That made me thing that
our regex implementation really needs to be able to capture
sub-expressions, so here is a patch that exposes them.
The reason for passing the C structure to Smalltalk is that it allows
the user to access it directly as an optimization, and also to decide
that they would prefer the results in something other than a Dictionary
of Intervals.
Incidentally, the fact that both regex implementations are in the
examples directory suggests that they are not 'official' packages. Could
we not elevate one to 'official' status?
Regards to all,
Mike
Common subdirectories: smalltalk-2.1g-orig/examples/.deps and
smalltalk-2.1g/examples/.deps
Common subdirectories: smalltalk-2.1g-orig/examples/.libs and
smalltalk-2.1g/examples/.libs
diff -u smalltalk-2.1g-orig/examples/re.c smalltalk-2.1g/examples/re.c
--- smalltalk-2.1g-orig/examples/re.c 2003-09-04 05:48:58.000000000 +0000
+++ smalltalk-2.1g/examples/re.c 2005-05-27 07:22:26.586941880 +0000
@@ -74,6 +74,9 @@
static int reh_search (OOP srcOOP, OOP patternOOP, int from, int to),
reh_match (OOP srcOOP, OOP patternOOP, int from, int to);
+static struct pre_registers *reh_search_full (OOP srcOOP, OOP patternOOP, int
from, int to);
+static void reh_free_registers(struct pre_registers *regs);
+
static RegexCacheEntry cache[REGEX_CACHE_SIZE];
/* Smalltalk globals */
@@ -252,6 +255,38 @@
return res + 1;
}
+/* Search helper function */
+struct pre_registers *
+reh_search_full (OOP srcOOP, OOP patternOOP, int from, int to)
+{
+ int res = 0;
+ const char *src;
+ struct pre_pattern_buffer *regex;
+ struct pre_registers *regs;
+ RegexCaching caching;
+
+ caching = lookupRegex (patternOOP, ®ex);
+ if (caching != REGEX_CACHE_HIT && compileRegex (patternOOP, regex) != NULL)
+ return NULL;
+
+ /* now search */
+ src = &STRING_OOP_AT (OOP_TO_OBJ (srcOOP), 1);
+ regs = (struct pre_registers *) calloc (1, sizeof (struct pre_registers));
+ res = pre_search (regex, src, to, from - 1, to - from + 1, regs);
+
+ if (caching == REGEX_NOT_CACHED)
+ pre_free_pattern (regex);
+
+ return regs;
+}
+
+void
+reh_free_registers(struct pre_registers *regs)
+{
+ pre_free_registers(regs);
+ free(regs);
+}
+
/* Match helper function */
int
reh_match (OOP srcOOP, OOP patternOOP, int from, int to)
@@ -280,6 +315,8 @@
{
vmProxy = proxy;
vmProxy->defineCFunc ("reh_search", reh_search);
+ vmProxy->defineCFunc ("reh_search_full", reh_search_full);
+ vmProxy->defineCFunc ("reh_free_registers", reh_free_registers);
vmProxy->defineCFunc ("reh_match", reh_match);
vmProxy->defineCFunc ("reh_make_cacheable", reh_make_cacheable);
Files smalltalk-2.1g-orig/examples/re.o and smalltalk-2.1g/examples/re.o differ
diff -u smalltalk-2.1g-orig/examples/regex.la smalltalk-2.1g/examples/regex.la
--- smalltalk-2.1g-orig/examples/regex.la 2004-11-02 21:07:15.000000000
+0000
+++ smalltalk-2.1g/examples/regex.la 2005-05-27 07:22:30.489348624 +0000
@@ -32,4 +32,4 @@
dlpreopen=''
# Directory that this library needs to be installed in:
-libdir='/usr/local/lib/smalltalk'
+libdir='/usr/lib/smalltalk'
diff -u smalltalk-2.1g-orig/examples/regex.st smalltalk-2.1g/examples/regex.st
--- smalltalk-2.1g-orig/examples/regex.st 2003-09-04 05:48:58.000000000
+0000
+++ smalltalk-2.1g/examples/regex.st 2005-05-27 07:25:22.609182432 +0000
@@ -68,7 +68,31 @@
defineCFunc: 'reh_match'
withSelectorArgs: 'lengthOfRegexMatch: pattern from: from to: to'
returning: #int
- args: #(#selfSmalltalk #smalltalk #int #int)!
+ args: #(#selfSmalltalk #smalltalk #int #int)
+!
+
+CStruct subclass: #CPreRegisters
+ declaration: #( (#allocated #int)
+ (#numRegs #int)
+ (#beg (#ptr #int))
+ (#end (#ptr #int)) )
+ classVariableNames: ''
+ poolDictionaries: ''
+ category: 'Regex'
+!
+
+String
+ defineCFunc: 'reh_search_full'
+ withSelectorArgs: 'searchRegexFull: pattern from: from to: to'
+ returning: CPreRegisters type
+ args: #(#selfSmalltalk #smalltalk #int #int).
+
+String
+ defineCFunc: 'reh_free_registers'
+ withSelectorArgs: 'freeCPreRegisters: regs'
+ returning: #void
+ args: #(#cObject).
+!
"--------------------------------------------------------------------------"
@@ -379,5 +403,21 @@
tokenize
^self tokenize: '[\n\t ]+' from: 1 to: self size
-! !
+!
+
+regexSubExprs: pattern from: from to: to
+ | cregs regs |
+ cregs := self searchRegexFull: pattern from: from to: to.
+ [ cregs beg value value >= 0 ifFalse: [ ^nil ].
+ regs := Dictionary new.
+ 0 to: cregs numRegs value - 1 do:
+ [ :i |
+ regs at: i put:
+ ((cregs beg value + i) value + 1
+ to: (cregs end value + i) value + 1) ].
+ ^regs ]
+ ensure:
+ [ self freeCPreRegisters: cregs. ].
+!
+!
- [Help-smalltalk] Regex sub -expressions,
Mike Anderson <=