[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: regexp character classes not supported?
From: |
Marco Maggi |
Subject: |
Re: regexp character classes not supported? |
Date: |
Sun, 30 Dec 2012 12:38:44 +0100 |
Limbo Peng wrote:
> On Sat, Dec 29, 2012 at 1:22 AM, Mark H Weaver <address@hidden> wrote:
> Regular expression syntax is not standardized, and there are
> several
> different variants. The "\d" syntax for character classes is a
> non-standard perl extension, and is not supported by Guile.
> Thx...seems that I've been taking such syntax for granted for a long
> time :(
Shameless plug: if you do not mind installing stuff, you
can try the regexp library re2[1] (written in C++) through
its C wrapper CRE2[2][3].
[1] <http://code.google.com/p/re2/>
[2] <http://github.com/marcomaggi/cre2/>
[3] <http://code.google.com/p/cre2/downloads/list>
Here is a Guile program making use of it through the
foreign functions interface (sorry for the R6RS code, it
also needs to be polished here and there):
;; guile-cre2.sps --
;;
;; Show off CRE2 with Guile.
#!r6rs
(import (rnrs)
(system foreign)
(ice-9 format))
(define-syntax begin0
(syntax-rules ()
((_ ?expr0 ?expr ...)
(call-with-values
(lambda () ?expr0)
(lambda args ?expr ... (apply values args))))))
(define-syntax unwind-protect
;;Not general, but enough.
(syntax-rules ()
((_ ?body ?cleanup0 ?cleanup ...)
(let ((cleanup (lambda () ?cleanup0 ?cleanup ...)))
(with-exception-handler
(lambda (E) (cleanup) (raise E))
(lambda () (begin0 ?body (cleanup))))))))
(define (main)
(let* ((ptn "(ciao) (hello)")
(ptn.str (string->pointer ptn))
(ptn.len (string-length ptn))
(opt (cre2_opt_new))
(rex (cre2_new ptn.str ptn.len opt)))
(unwind-protect
(let* ((txt "ciao hello")
(txt.str (string->pointer txt))
(txt.len (string-length txt))
(nmatch 3)
(matches (make-cre2_string_t nmatch))
(ranges (make-cre2_range_t nmatch)))
(let ((rv (cre2_match rex
txt.str txt.len 0 txt.len
CRE2_UNANCHORED matches nmatch)))
(when (positive? rv)
(cre2_strings_to_ranges txt.str ranges matches nmatch)
(let ((R (parse-cre2_range_t ranges nmatch)))
(print "Full match: ~s\n"
(substring txt (list-ref R 0) (list-ref R 1)))
(print "1st submatch: ~s\n"
(substring txt (list-ref R 2) (list-ref R 3)))
(print "2nd submatch: ~s\n"
(substring txt (list-ref R 4) (list-ref R 5)))
))))
(cre2_delete rex)
(cre2_opt_delete opt))))
(define cre2
(dynamic-link "libcre2.so"))
(define cre2_new
(let* ((ptr (dynamic-func "cre2_new" cre2))
(callout (pointer->procedure '* ptr (list '* int '*))))
(lambda (ptn.str ptn.len options)
(callout ptn.str ptn.len options))))
(define cre2_delete
(let* ((ptr (dynamic-func "cre2_delete" cre2))
(callout (pointer->procedure void ptr (list '*))))
(lambda (rex)
(callout rex))))
(define cre2_opt_new
(let* ((ptr (dynamic-func "cre2_opt_new" cre2))
(callout (pointer->procedure '* ptr '())))
(lambda ()
(callout))))
(define cre2_opt_delete
(let* ((ptr (dynamic-func "cre2_opt_delete" cre2))
(callout (pointer->procedure void ptr (list '*))))
(lambda (options)
(callout options))))
(define cre2_match
(let* ((ptr (dynamic-func "cre2_match" cre2))
(callout (pointer->procedure
int ptr (list '* '* int
int int int '* int))))
(lambda (rex txt.str txt.len txt.start txt.end anchor match nmatch)
(callout rex
txt.str txt.len txt.start txt.end
anchor match nmatch))))
(define cre2_strings_to_ranges
(let* ((ptr (dynamic-func "cre2_strings_to_ranges" cre2))
(callout (pointer->procedure
void ptr (list '* '* '* int))))
(lambda (txt.str ranges strings nmatch)
(callout txt.str ranges strings nmatch))))
(define CRE2_UNANCHORED 1)
(define (make-cre2_string_t nmatch)
(do ((i 0 (+ 1 i))
(T '() (append (list '* int) T))
(V '() (append (list %null-pointer 0) V)))
((= i nmatch)
(make-c-struct T V))))
(define (make-cre2_range_t nmatch)
(do ((i 0 (+ 1 i))
(T '() (append (list long long) T))
(V '() (append '(0 0) V)))
((= i nmatch)
(make-c-struct T V))))
(define (parse-cre2_string_t S nmatch)
(do ((i 0 (+ 1 i))
(T '() (append (list '* int) T)))
((= i nmatch)
(parse-c-struct S T))))
(define (parse-cre2_range_t S nmatch)
(do ((i 0 (+ 1 i))
(T '() (append (list long long) T)))
((= i nmatch)
(parse-c-struct S T))))
(define (print template . args)
(apply format (current-output-port) template args))
(main)
;;; end of file
--
Marco Maggi