[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[WIP][PATCH 2/2] pkl,pvm: add support for regular expression
From: |
Mohammad-Reza Nabipoor |
Subject: |
[WIP][PATCH 2/2] pkl,pvm: add support for regular expression |
Date: |
Tue, 14 Feb 2023 23:52:49 +0100 |
2023-02-14 Mohammad-Reza Nabipoor <mnabipoor@gnu.org>
* bootstrap.conf (libpoke_modules): Add `regexp'.
* libpoke/pkl-insn.def (recomp): New instruction.
(refree): Likewise.
(remtch): Likewise.
(resubnum): Likewise.
(resubref): Likewise.
* libpoke/pvm-wrappers.h: Include <config.h> and <regexp.h>.
(struct pvm_re_context): New type.
(pvm_re_context_alloc): New function decl.
(pvm_re_context_free): Likewise.
(pvm_re_compile_pattern): Likewise.
(pvm_re_match): Likewise.
* libpoke/pvm-wrappers.c: Include "pvm-wrappers.h".
(pvm_re_context_alloc): New function def.
(pvm_re_context_free): Likewise.
(pvm_re_compile_pattern): Likewise.
(pvm_re_match): Likewise.
* libpoke/pvm.jitter (wrapped-functions): Add new functions.
(recomp): New instruction to compile regexp.
(refree): New instruction to free the compiled regexp.
(remtch): New instruction to match the regexp against input string.
(resubnum): New instruction for getting the number of matched
sub-groups.
(resubref): New function to get the range of i-th matched sub-group.
* libpoke/pkl-rt.pk (_Pkl_Regexp_Match): New type.
(_pkl_re_match): New function.
(_pkl_re_gmatch): Likewise.
* libpoke/std.pk (pk_regexp_match): Likewise.
(pk_regexp_gmatch): Likewise.
(Pk_Regexp_Match): New type.
---
ChangeLog | 33 ++++++++++
bootstrap.conf | 1 +
libpoke/pkl-insn.def | 8 +++
libpoke/pkl-rt.pk | 81 ++++++++++++++++++++++++
libpoke/pvm-wrappers.c | 34 ++++++++++
libpoke/pvm-wrappers.h | 16 +++++
libpoke/pvm.jitter | 138 +++++++++++++++++++++++++++++++++++++++++
libpoke/std.pk | 22 +++++++
8 files changed, 333 insertions(+)
diff --git a/ChangeLog b/ChangeLog
index 9b07b666..826195ff 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,36 @@
+2023-02-14 Mohammad-Reza Nabipoor <mnabipoor@gnu.org>
+
+ * bootstrap.conf (libpoke_modules): Add `regexp'.
+ * libpoke/pkl-insn.def (recomp): New instruction.
+ (refree): Likewise.
+ (remtch): Likewise.
+ (resubnum): Likewise.
+ (resubref): Likewise.
+ * libpoke/pvm-wrappers.h: Include <config.h> and <regexp.h>.
+ (struct pvm_re_context): New type.
+ (pvm_re_context_alloc): New function decl.
+ (pvm_re_context_free): Likewise.
+ (pvm_re_compile_pattern): Likewise.
+ (pvm_re_match): Likewise.
+ * libpoke/pvm-wrappers.c: Include "pvm-wrappers.h".
+ (pvm_re_context_alloc): New function def.
+ (pvm_re_context_free): Likewise.
+ (pvm_re_compile_pattern): Likewise.
+ (pvm_re_match): Likewise.
+ * libpoke/pvm.jitter (wrapped-functions): Add new functions.
+ (recomp): New instruction to compile regexp.
+ (refree): New instruction to free the compiled regexp.
+ (remtch): New instruction to match the regexp against input string.
+ (resubnum): New instruction for getting the number of matched
+ sub-groups.
+ (resubref): New function to get the range of i-th matched sub-group.
+ * libpoke/pkl-rt.pk (_Pkl_Regexp_Match): New type.
+ (_pkl_re_match): New function.
+ (_pkl_re_gmatch): Likewise.
+ * libpoke/std.pk (pk_regexp_match): Likewise.
+ (pk_regexp_gmatch): Likewise.
+ (Pk_Regexp_Match): New type.
+
2023-02-14 Mohammad-Reza Nabipoor <mnabipoor@gnu.org>
* libpoke/pkl-insn.def (opqgetn): New instruction.
diff --git a/bootstrap.conf b/bootstrap.conf
index d6a8e71b..259fd176 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -109,6 +109,7 @@ libpoke_modules="
xalloc
strstr
lib-symbol-visibility
+ regexp
"
# Don't overwrite the INSTALL file.
diff --git a/libpoke/pkl-insn.def b/libpoke/pkl-insn.def
index cf30396b..ffe721c3 100644
--- a/libpoke/pkl-insn.def
+++ b/libpoke/pkl-insn.def
@@ -480,6 +480,14 @@ PKL_DEF_INSN(PKL_INSN_RAND,"","rand")
PKL_DEF_INSN(PKL_INSN_TIME,"","time")
PKL_DEF_INSN(PKL_INSN_SLEEP,"","sleep")
+/* Regular expression instructions. */
+
+PKL_DEF_INSN(PKL_INSN_RECOMP,"","recomp")
+PKL_DEF_INSN(PKL_INSN_REFREE,"","refree")
+PKL_DEF_INSN(PKL_INSN_REMTCH,"","remtch")
+PKL_DEF_INSN(PKL_INSN_RESUBNUM,"","resubnum")
+PKL_DEF_INSN(PKL_INSN_RESUBREF,"","resubref")
+
/* System interaction instructions. */
PKL_DEF_INSN(PKL_INSN_GETENV,"","getenv")
diff --git a/libpoke/pkl-rt.pk b/libpoke/pkl-rt.pk
index 7371f982..896aeb44 100644
--- a/libpoke/pkl-rt.pk
+++ b/libpoke/pkl-rt.pk
@@ -1738,6 +1738,87 @@ immutable fun _pkl_format_any = (any val, int<32> depth
= 0) string:
return result;
}
+/* Regular expression support code. */
+
+immutable fun _pkl_re_match = (string regex, string str,
+ int<32> start = 0) int<32>:
+{
+ /* HACK This is equivalent to `push null'.
+ Until we get a more powerful assembler, we have to use this
+ trick. */
+ var opq = asm any: ("push 7"),
+ index = -1;
+
+ {
+ var err = asm any: ("push 7");
+
+ asm ("recomp" : opq, err : regex);
+ if (asm int<32>: ("nn; nip" : err))
+ raise Exception {code = EC_inval,
+ name = "invalid regular expression: " + err as string,
+ exit_status = 1};
+ }
+
+ asm ("remtch; nip" : index : opq, str, start);
+ asm ("refree" :: opq);
+
+ if (index == -2)
+ raise Exception {code = EC_inval,
+ name = "regular expression match function internal error",
+ exit_status = 1};
+ return index;
+}
+
+type _Pkl_Regexp_Match =
+ struct
+ {
+ int<32> count;
+ int<32>[2][] submatches;
+ };
+
+fun _pkl_re_gmatch = (string regex, string str,
+ int<32> start = 0) _Pkl_Regexp_Match:
+{
+ var result = _Pkl_Regexp_Match {};
+
+ /* HACK This is equivalent to `push null'.
+ Until we get a more powerful assembler, we have to use this
+ trick. */
+ var opq = asm any: ("push 7");
+
+ {
+ var err = asm any: ("push 7");
+
+ asm ("recomp" : opq, err : regex);
+ if (asm int<32>: ("nn; nip" : err))
+ raise Exception {code = EC_inval,
+ name = "invalid regular expression: " + err as string,
+ exit_status = 1};
+ }
+
+ asm ("remtch; nip" : result.count : opq, str, start);
+ {
+ var subnum = 0UL;
+
+ asm ("resubnum; nip" : subnum : opq);
+ result.submatches = int<32>[2][subnum] ();
+ for (var i = 0UL; i != subnum; ++i)
+ {
+ asm ("resubref; rot; drop"
+ : result.submatches[i][0], result.submatches[i][1]
+ : opq, i);
+ }
+ }
+ asm ("refree" :: opq);
+
+ if (result.count == -2)
+ raise Exception {code = EC_inval,
+ name = "regular expression match function internal error",
+ exit_status = 1};
+
+ return result;
+}
+
/**** Set the default load path ****/
immutable var load_path = "";
diff --git a/libpoke/pvm-wrappers.c b/libpoke/pvm-wrappers.c
index ef57fd50..497f20d7 100644
--- a/libpoke/pvm-wrappers.c
+++ b/libpoke/pvm-wrappers.c
@@ -24,6 +24,8 @@
#include <config.h>
+#include "pvm-wrappers.h"
+
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
@@ -126,3 +128,35 @@ pvm_strcat (char *restrict dest, const char *restrict src)
{
return strcat (dest, src);
}
+
+struct pvm_re_context *pvm_re_context_alloc (void)
+{
+ return calloc (1, sizeof (struct pvm_re_context));
+}
+
+void
+pvm_re_context_free (struct pvm_re_context* ctx)
+{
+ if (ctx)
+ {
+ regfree (&ctx->pat);
+ free (ctx->regs.start);
+ free (ctx->regs.end);
+ free (ctx);
+ }
+}
+
+const char *
+pvm_re_compile_pattern (struct pvm_re_context *ctx,
+ const char *regex, const int regex_size)
+{
+ re_set_syntax (RE_SYNTAX_EGREP);
+ return re_compile_pattern (regex, regex_size, &ctx->pat);
+}
+
+int
+pvm_re_match (struct pvm_re_context *ctx, const char *string,
+ const int size, const int start)
+{
+ return re_match (&ctx->pat, string, size, start, &ctx->regs);
+}
diff --git a/libpoke/pvm-wrappers.h b/libpoke/pvm-wrappers.h
index cd0e0dd1..d94b676c 100644
--- a/libpoke/pvm-wrappers.h
+++ b/libpoke/pvm-wrappers.h
@@ -19,6 +19,10 @@
#ifndef PVM_WRAPPERS_H
#define PVM_WRAPPERS_H
+#include <config.h>
+
+#include <regex.h>
+
void pvm_free (void *p);
int pvm_nanosleep (const struct timespec *rqtp, struct timespec *rmtp);
int pvm_asprintf (char **resultp, const char *format, ...);
@@ -34,4 +38,16 @@ char *pvm_strcpy (char *restrict dest, const char *src);
char *pvm_strncpy (char *restrict dest, const char *restrict src, size_t n);
char *pvm_strcat (char *restrict dest, const char *restrict src);
+struct pvm_re_context
+{
+ struct re_pattern_buffer pat;
+ struct re_registers regs;
+};
+struct pvm_re_context *pvm_re_context_alloc (void);
+void pvm_re_context_free (struct pvm_re_context *);
+const char *pvm_re_compile_pattern (struct pvm_re_context *ctx,
+ const char *regex, const int regex_size);
+int pvm_re_match (struct pvm_re_context *ctx, const char *string,
+ const int size, const int start);
+
#endif /* ! PVM_WRAPPERS_H */
diff --git a/libpoke/pvm.jitter b/libpoke/pvm.jitter
index b42df3ab..7f74e1b3 100644
--- a/libpoke/pvm.jitter
+++ b/libpoke/pvm.jitter
@@ -150,6 +150,10 @@ wrapped-functions
pvm_strcat
pvm_nanosleep
pvm_snprintf
+ pvm_re_context_alloc
+ pvm_re_context_free
+ pvm_re_compile_pattern
+ pvm_re_match
end
wrapped-globals
@@ -7017,6 +7021,140 @@ instruction getenv ()
end
end
+
+## Regular Expressions Instructions
+
+# Instruction: recomp
+#
+# Compile the given regular expression and pushes a handle to the
+# compiled regular expression as an opaque value OPQ, and also
+# depending on success of compilation either a NULL or a STR of
+# error message.
+#
+# Stack: ( STR -- OPQ NULL|STR )
+
+instruction recomp ()
+ code
+ const char *str = PVM_VAL_STR (JITTER_TOP_STACK ());
+ pvm_val opq = pvm_make_opaque (pvm_make_string ("regex"), 0);
+ struct pvm_re_context *ctx;
+ const char *errmsg;
+
+ ctx = pvm_re_context_alloc ();
+ PVM_ASSERT (ctx != NULL);
+ errmsg = pvm_re_compile_pattern (ctx, str, strlen (str));
+
+ JITTER_TOP_STACK () = opq;
+ if (errmsg == NULL)
+ {
+ PVM_VAL_OPQ_PAYLOAD (opq) = (uintptr_t)ctx;
+ JITTER_PUSH_STACK (PVM_NULL);
+ }
+ else
+ {
+ pvm_re_context_free (ctx);
+ JITTER_PUSH_STACK (pvm_make_string (errmsg));
+ }
+ end
+end
+
+# Instruction: refree
+#
+# Given an opaque value containing a compiled regular expression
+# pattern, free the payload.
+#
+# Stack: ( OPQ -- )
+
+instruction refree ()
+ code
+ pvm_val opq = JITTER_TOP_STACK ();
+
+ pvm_re_context_free ((struct pvm_re_context*)PVM_VAL_OPQ_PAYLOAD (opq));
+ PVM_VAL_OPQ_PAYLOAD (opq) = 0;
+ JITTER_DROP_STACK ();
+ end
+end
+
+# Instruction: remtch
+#
+# Given an opaque value OPQ containing a compiled regular expression
+# pattern, a string STR, and an start index INDEX, this instruction
+# tries to match the regular expression in the pattern against the
+# STR from INDEX. The instruction pushes how many (possibly
+# zero) characters of STR the pattern matched. If the STR doesn't match
+# the pattern it pushes -1. On internal error, it pushes -2.
+#
+# Stack: ( OPQ STR INT -- OPQ INT )
+
+instruction remtch ()
+ code
+ pvm_val opq = JITTER_AT_DEPTH_STACK (2);
+ pvm_val str = JITTER_UNDER_TOP_STACK ();
+ pvm_val start = JITTER_TOP_STACK ();
+ struct pvm_re_context *ctx;
+ int index;
+
+ ctx = (struct pvm_re_context *)PVM_VAL_OPQ_PAYLOAD (opq);
+ index = pvm_re_match (ctx, PVM_VAL_STR (str), strlen (PVM_VAL_STR (str)),
+ PVM_VAL_INT (start));
+
+ JITTER_NIP_STACK ();
+ JITTER_TOP_STACK () = PVM_MAKE_INT (index, 32);
+ end
+end
+
+# Instruction: resubnum
+#
+# Given an opaque value OPQ containing a compiled regular expression
+# pattern which is already given to REMTCH instruction, this instruction
+# pushes the number of matched sub-groups to the stack.
+#
+# Stack: ( OPQ -- OPQ ULONG )
+
+instruction resubnum ()
+ code
+ pvm_val opq = JITTER_TOP_STACK ();
+ struct pvm_re_context *ctx;
+ uint64_t index;
+
+ ctx = (struct pvm_re_context *)PVM_VAL_OPQ_PAYLOAD (opq);
+ PVM_ASSERT (ctx != NULL);
+ index = ctx->regs.num_regs;
+ index -= index ? 1 : 0;
+ JITTER_PUSH_STACK (PVM_MAKE_ULONG (index, 64));
+ end
+end
+
+# Instruction: resubref
+#
+# Given an opaque value OPQ containing a compiled regular expression
+# pattern which is already given to REMTCH instruction, and an INDEX,
+# this instruction pushes a pair of integers denoting the range of
+# INDEX'th matched sub-group to the stack.
+#
+# Stack: ( OPQ ULONG -- OPQ INT INT )
+
+instruction resubref ()
+ code
+ pvm_val opq = JITTER_UNDER_TOP_STACK ();
+ uint64_t index = PVM_VAL_ULONG (JITTER_TOP_STACK ());
+ struct pvm_re_context *ctx;
+
+ ctx = (struct pvm_re_context *)PVM_VAL_OPQ_PAYLOAD (opq);
+ PVM_ASSERT (ctx != NULL);
+ if (index < (uint64_t)ctx->regs.num_regs)
+ {
+ JITTER_TOP_STACK () = PVM_MAKE_INT (ctx->regs.start[index], 32);
+ JITTER_PUSH_STACK (PVM_MAKE_INT (ctx->regs.end[index], 32));
+ }
+ else
+ {
+ JITTER_TOP_STACK () = PVM_MAKE_INT (-1, 32);
+ JITTER_PUSH_STACK (PVM_MAKE_INT (-1, 32));
+ }
+ end
+end
+
## Miscellaneous Instructions
diff --git a/libpoke/std.pk b/libpoke/std.pk
index 8ed331d9..bcc0d1cc 100644
--- a/libpoke/std.pk
+++ b/libpoke/std.pk
@@ -861,3 +861,25 @@ fun pk_vercmp = (any _a, any _b) int<32>:
diff = cmp (a.offset, b.offset);
return diff;
}
+
+/* Regular expression functions. */
+
+fun pk_regexp_match = (string regex, string str, int<32> start = 0) int<32>:
+{
+ return _pkl_re_match (regex, str, start);
+}
+
+type Pk_Regexp_Match =
+ struct
+ {
+ int<32> count;
+ int<32>[2][] submatches;
+ };
+
+fun pk_regexp_gmatch = (string regex, string str,
+ int<32> start = 0) Pk_Regexp_Match:
+{
+ var result = _pkl_re_gmatch (regex, str, start);
+
+ return Pk_Regexp_Match {count=result.count, submatches=result.submatches};
+}
--
2.39.1
- [WIP][PATCH 1/2] pvm: add new pvm value: opaque values, Mohammad-Reza Nabipoor, 2023/02/14
- [WIP][PATCH 2/2] pkl,pvm: add support for regular expression,
Mohammad-Reza Nabipoor <=
- Re: [WIP][PATCH 2/2] pkl,pvm: add support for regular expression, Jose E. Marchesi, 2023/02/15
- Re: [WIP][PATCH 2/2] pkl,pvm: add support for regular expression, Mohammad-Reza Nabipoor, 2023/02/15
- Re: [WIP][PATCH 2/2] pkl,pvm: add support for regular expression, Jose E. Marchesi, 2023/02/15
- Re: [WIP][PATCH 2/2] pkl,pvm: add support for regular expression, Mohammad-Reza Nabipoor, 2023/02/16
- Re: [WIP][PATCH 2/2] pkl,pvm: add support for regular expression, Jose E. Marchesi, 2023/02/17
- Re: [WIP][PATCH 2/2] pkl,pvm: add support for regular expression, Mohammad-Reza Nabipoor, 2023/02/19
- Re: [WIP][PATCH 2/2] pkl,pvm: add support for regular expression, Jose E. Marchesi, 2023/02/20
Re: [WIP][PATCH 1/2] pvm: add new pvm value: opaque values, Jose E. Marchesi, 2023/02/15