poke-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[WIP][PATCH 2/2] pkl,pvm: add support for regular expression


From: Mohammad-Reza Nabipoor
Subject: [WIP][PATCH 2/2] pkl,pvm: add support for regular expression
Date: Tue, 14 Feb 2023 23:52:49 +0100

2023-02-14  Mohammad-Reza Nabipoor  <mnabipoor@gnu.org>

        * bootstrap.conf (libpoke_modules): Add `regexp'.
        * libpoke/pkl-insn.def (recomp): New instruction.
        (refree): Likewise.
        (remtch): Likewise.
        (resubnum): Likewise.
        (resubref): Likewise.
        * libpoke/pvm-wrappers.h: Include <config.h> and <regexp.h>.
        (struct pvm_re_context): New type.
        (pvm_re_context_alloc): New function decl.
        (pvm_re_context_free): Likewise.
        (pvm_re_compile_pattern): Likewise.
        (pvm_re_match): Likewise.
        * libpoke/pvm-wrappers.c: Include "pvm-wrappers.h".
        (pvm_re_context_alloc): New function def.
        (pvm_re_context_free): Likewise.
        (pvm_re_compile_pattern): Likewise.
        (pvm_re_match): Likewise.
        * libpoke/pvm.jitter (wrapped-functions): Add new functions.
        (recomp): New instruction to compile regexp.
        (refree): New instruction to free the compiled regexp.
        (remtch): New instruction to match the regexp against input string.
        (resubnum): New instruction for getting the number of matched
        sub-groups.
        (resubref): New function to get the range of i-th matched sub-group.
        * libpoke/pkl-rt.pk (_Pkl_Regexp_Match): New type.
        (_pkl_re_match): New function.
        (_pkl_re_gmatch): Likewise.
        * libpoke/std.pk (pk_regexp_match): Likewise.
        (pk_regexp_gmatch): Likewise.
        (Pk_Regexp_Match): New type.
---




 ChangeLog              |  33 ++++++++++
 bootstrap.conf         |   1 +
 libpoke/pkl-insn.def   |   8 +++
 libpoke/pkl-rt.pk      |  81 ++++++++++++++++++++++++
 libpoke/pvm-wrappers.c |  34 ++++++++++
 libpoke/pvm-wrappers.h |  16 +++++
 libpoke/pvm.jitter     | 138 +++++++++++++++++++++++++++++++++++++++++
 libpoke/std.pk         |  22 +++++++
 8 files changed, 333 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 9b07b666..826195ff 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,36 @@
+2023-02-14  Mohammad-Reza Nabipoor  <mnabipoor@gnu.org>
+
+       * bootstrap.conf (libpoke_modules): Add `regexp'.
+       * libpoke/pkl-insn.def (recomp): New instruction.
+       (refree): Likewise.
+       (remtch): Likewise.
+       (resubnum): Likewise.
+       (resubref): Likewise.
+       * libpoke/pvm-wrappers.h: Include <config.h> and <regexp.h>.
+       (struct pvm_re_context): New type.
+       (pvm_re_context_alloc): New function decl.
+       (pvm_re_context_free): Likewise.
+       (pvm_re_compile_pattern): Likewise.
+       (pvm_re_match): Likewise.
+       * libpoke/pvm-wrappers.c: Include "pvm-wrappers.h".
+       (pvm_re_context_alloc): New function def.
+       (pvm_re_context_free): Likewise.
+       (pvm_re_compile_pattern): Likewise.
+       (pvm_re_match): Likewise.
+       * libpoke/pvm.jitter (wrapped-functions): Add new functions.
+       (recomp): New instruction to compile regexp.
+       (refree): New instruction to free the compiled regexp.
+       (remtch): New instruction to match the regexp against input string.
+       (resubnum): New instruction for getting the number of matched
+       sub-groups.
+       (resubref): New function to get the range of i-th matched sub-group.
+       * libpoke/pkl-rt.pk (_Pkl_Regexp_Match): New type.
+       (_pkl_re_match): New function.
+       (_pkl_re_gmatch): Likewise.
+       * libpoke/std.pk (pk_regexp_match): Likewise.
+       (pk_regexp_gmatch): Likewise.
+       (Pk_Regexp_Match): New type.
+
 2023-02-14  Mohammad-Reza Nabipoor  <mnabipoor@gnu.org>
 
        * libpoke/pkl-insn.def (opqgetn): New instruction.
diff --git a/bootstrap.conf b/bootstrap.conf
index d6a8e71b..259fd176 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -109,6 +109,7 @@ libpoke_modules="
   xalloc
   strstr
   lib-symbol-visibility
+  regexp
   "
 
 # Don't overwrite the INSTALL file.
diff --git a/libpoke/pkl-insn.def b/libpoke/pkl-insn.def
index cf30396b..ffe721c3 100644
--- a/libpoke/pkl-insn.def
+++ b/libpoke/pkl-insn.def
@@ -480,6 +480,14 @@ PKL_DEF_INSN(PKL_INSN_RAND,"","rand")
 PKL_DEF_INSN(PKL_INSN_TIME,"","time")
 PKL_DEF_INSN(PKL_INSN_SLEEP,"","sleep")
 
+/* Regular expression instructions.  */
+
+PKL_DEF_INSN(PKL_INSN_RECOMP,"","recomp")
+PKL_DEF_INSN(PKL_INSN_REFREE,"","refree")
+PKL_DEF_INSN(PKL_INSN_REMTCH,"","remtch")
+PKL_DEF_INSN(PKL_INSN_RESUBNUM,"","resubnum")
+PKL_DEF_INSN(PKL_INSN_RESUBREF,"","resubref")
+
 /* System interaction instructions.  */
 
 PKL_DEF_INSN(PKL_INSN_GETENV,"","getenv")
diff --git a/libpoke/pkl-rt.pk b/libpoke/pkl-rt.pk
index 7371f982..896aeb44 100644
--- a/libpoke/pkl-rt.pk
+++ b/libpoke/pkl-rt.pk
@@ -1738,6 +1738,87 @@ immutable fun _pkl_format_any = (any val, int<32> depth 
= 0) string:
   return result;
 }
 
+/* Regular expression support code.  */
+
+immutable fun _pkl_re_match = (string regex, string str,
+                               int<32> start = 0) int<32>:
+{
+  /* HACK This is equivalent to `push null'.
+     Until we get a more powerful assembler, we have to use this
+     trick.  */
+  var opq = asm any: ("push 7"),
+      index = -1;
+
+  {
+    var err = asm any: ("push 7");
+
+    asm ("recomp" : opq, err : regex);
+    if (asm int<32>: ("nn; nip" : err))
+      raise Exception {code = EC_inval,
+                       name = "invalid regular expression: " + err as string,
+                       exit_status = 1};
+  }
+
+  asm ("remtch; nip" : index : opq, str, start);
+  asm ("refree" :: opq);
+
+  if (index == -2)
+    raise Exception {code = EC_inval,
+                     name = "regular expression match function internal error",
+                     exit_status = 1};
+  return index;
+}
+
+type _Pkl_Regexp_Match =
+  struct
+  {
+    int<32> count;
+    int<32>[2][] submatches;
+  };
+
+fun _pkl_re_gmatch = (string regex, string str,
+                          int<32> start = 0) _Pkl_Regexp_Match:
+{
+  var result = _Pkl_Regexp_Match {};
+
+  /* HACK This is equivalent to `push null'.
+     Until we get a more powerful assembler, we have to use this
+     trick.  */
+  var opq = asm any: ("push 7");
+
+  {
+    var err = asm any: ("push 7");
+
+    asm ("recomp" : opq, err : regex);
+    if (asm int<32>: ("nn; nip" : err))
+      raise Exception {code = EC_inval,
+                       name = "invalid regular expression: " + err as string,
+                       exit_status = 1};
+  }
+
+  asm ("remtch; nip" : result.count : opq, str, start);
+  {
+    var subnum = 0UL;
+
+    asm ("resubnum; nip" : subnum : opq);
+    result.submatches = int<32>[2][subnum] ();
+    for (var i = 0UL; i != subnum; ++i)
+      {
+        asm ("resubref; rot; drop"
+             : result.submatches[i][0], result.submatches[i][1]
+             : opq, i);
+      }
+  }
+  asm ("refree" :: opq);
+
+  if (result.count == -2)
+    raise Exception {code = EC_inval,
+                     name = "regular expression match function internal error",
+                     exit_status = 1};
+
+  return result;
+}
+
 /**** Set the default load path ****/
 
 immutable var load_path = "";
diff --git a/libpoke/pvm-wrappers.c b/libpoke/pvm-wrappers.c
index ef57fd50..497f20d7 100644
--- a/libpoke/pvm-wrappers.c
+++ b/libpoke/pvm-wrappers.c
@@ -24,6 +24,8 @@
 
 #include <config.h>
 
+#include "pvm-wrappers.h"
+
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
@@ -126,3 +128,35 @@ pvm_strcat (char *restrict dest, const char *restrict src)
 {
   return strcat (dest, src);
 }
+
+struct pvm_re_context *pvm_re_context_alloc (void)
+{
+  return calloc (1, sizeof (struct pvm_re_context));
+}
+
+void
+pvm_re_context_free (struct pvm_re_context* ctx)
+{
+  if (ctx)
+    {
+      regfree (&ctx->pat);
+      free (ctx->regs.start);
+      free (ctx->regs.end);
+      free (ctx);
+    }
+}
+
+const char *
+pvm_re_compile_pattern (struct pvm_re_context *ctx,
+                        const char *regex, const int regex_size)
+{
+  re_set_syntax (RE_SYNTAX_EGREP);
+  return re_compile_pattern (regex, regex_size, &ctx->pat);
+}
+
+int
+pvm_re_match (struct pvm_re_context *ctx, const char *string,
+              const int size, const int start)
+{
+  return re_match (&ctx->pat, string, size, start, &ctx->regs);
+}
diff --git a/libpoke/pvm-wrappers.h b/libpoke/pvm-wrappers.h
index cd0e0dd1..d94b676c 100644
--- a/libpoke/pvm-wrappers.h
+++ b/libpoke/pvm-wrappers.h
@@ -19,6 +19,10 @@
 #ifndef PVM_WRAPPERS_H
 #define PVM_WRAPPERS_H
 
+#include <config.h>
+
+#include <regex.h>
+
 void pvm_free (void *p);
 int pvm_nanosleep (const struct timespec *rqtp, struct timespec *rmtp);
 int pvm_asprintf (char **resultp, const char *format, ...);
@@ -34,4 +38,16 @@ char *pvm_strcpy (char *restrict dest, const char *src);
 char *pvm_strncpy (char *restrict dest, const char *restrict src, size_t n);
 char *pvm_strcat (char *restrict dest, const char *restrict src);
 
+struct pvm_re_context
+{
+  struct re_pattern_buffer pat;
+  struct re_registers regs;
+};
+struct pvm_re_context *pvm_re_context_alloc (void);
+void pvm_re_context_free (struct pvm_re_context *);
+const char *pvm_re_compile_pattern (struct pvm_re_context *ctx,
+                                    const char *regex, const int regex_size);
+int pvm_re_match (struct pvm_re_context *ctx, const char *string,
+                  const int size, const int start);
+
 #endif /* ! PVM_WRAPPERS_H */
diff --git a/libpoke/pvm.jitter b/libpoke/pvm.jitter
index b42df3ab..7f74e1b3 100644
--- a/libpoke/pvm.jitter
+++ b/libpoke/pvm.jitter
@@ -150,6 +150,10 @@ wrapped-functions
   pvm_strcat
   pvm_nanosleep
   pvm_snprintf
+  pvm_re_context_alloc
+  pvm_re_context_free
+  pvm_re_compile_pattern
+  pvm_re_match
 end
 
 wrapped-globals
@@ -7017,6 +7021,140 @@ instruction getenv ()
   end
 end
 
+
+## Regular Expressions Instructions
+
+# Instruction: recomp
+#
+# Compile the given regular expression and pushes a handle to the
+# compiled regular expression as an opaque value OPQ, and also
+# depending on success of compilation either a NULL or a STR of
+# error message.
+#
+# Stack: ( STR -- OPQ NULL|STR )
+
+instruction recomp ()
+  code
+    const char *str = PVM_VAL_STR (JITTER_TOP_STACK ());
+    pvm_val opq = pvm_make_opaque (pvm_make_string ("regex"), 0);
+    struct pvm_re_context *ctx;
+    const char *errmsg;
+
+    ctx = pvm_re_context_alloc ();
+    PVM_ASSERT (ctx != NULL);
+    errmsg = pvm_re_compile_pattern (ctx, str, strlen (str));
+
+    JITTER_TOP_STACK () = opq;
+    if (errmsg == NULL)
+      {
+        PVM_VAL_OPQ_PAYLOAD (opq) = (uintptr_t)ctx;
+        JITTER_PUSH_STACK (PVM_NULL);
+      }
+    else
+      {
+        pvm_re_context_free (ctx);
+        JITTER_PUSH_STACK (pvm_make_string (errmsg));
+      }
+  end
+end
+
+# Instruction: refree
+#
+# Given an opaque value containing a compiled regular expression
+# pattern, free the payload.
+#
+# Stack: ( OPQ -- )
+
+instruction refree ()
+  code
+    pvm_val opq = JITTER_TOP_STACK ();
+
+    pvm_re_context_free ((struct pvm_re_context*)PVM_VAL_OPQ_PAYLOAD (opq));
+    PVM_VAL_OPQ_PAYLOAD (opq) = 0;
+    JITTER_DROP_STACK ();
+  end
+end
+
+# Instruction: remtch
+#
+# Given an opaque value OPQ containing a compiled regular expression
+# pattern, a string STR, and an start index INDEX, this instruction
+# tries to match the regular expression in the pattern against the
+# STR from INDEX.  The instruction pushes how many (possibly
+# zero) characters of STR the pattern matched.  If the STR doesn't match
+# the pattern it pushes -1.  On internal error, it pushes -2.
+#
+# Stack: ( OPQ STR INT -- OPQ INT )
+
+instruction remtch ()
+  code
+    pvm_val opq = JITTER_AT_DEPTH_STACK (2);
+    pvm_val str = JITTER_UNDER_TOP_STACK ();
+    pvm_val start = JITTER_TOP_STACK ();
+    struct pvm_re_context *ctx;
+    int index;
+
+    ctx = (struct pvm_re_context *)PVM_VAL_OPQ_PAYLOAD (opq);
+    index = pvm_re_match (ctx, PVM_VAL_STR (str), strlen (PVM_VAL_STR (str)),
+                          PVM_VAL_INT (start));
+
+    JITTER_NIP_STACK ();
+    JITTER_TOP_STACK () = PVM_MAKE_INT (index, 32);
+  end
+end
+
+# Instruction: resubnum
+#
+# Given an opaque value OPQ containing a compiled regular expression
+# pattern which is already given to REMTCH instruction, this instruction
+# pushes the number of matched sub-groups to the stack.
+#
+# Stack: ( OPQ -- OPQ ULONG )
+
+instruction resubnum ()
+  code
+    pvm_val opq = JITTER_TOP_STACK ();
+    struct pvm_re_context *ctx;
+    uint64_t index;
+
+    ctx = (struct pvm_re_context *)PVM_VAL_OPQ_PAYLOAD (opq);
+    PVM_ASSERT (ctx != NULL);
+    index = ctx->regs.num_regs;
+    index -= index ? 1 : 0;
+    JITTER_PUSH_STACK (PVM_MAKE_ULONG (index, 64));
+  end
+end
+
+# Instruction: resubref
+#
+# Given an opaque value OPQ containing a compiled regular expression
+# pattern which is already given to REMTCH instruction, and an INDEX,
+# this instruction pushes a pair of integers denoting the range of
+# INDEX'th matched sub-group to the stack.
+#
+# Stack: ( OPQ ULONG -- OPQ INT INT )
+
+instruction resubref ()
+  code
+    pvm_val opq = JITTER_UNDER_TOP_STACK ();
+    uint64_t index = PVM_VAL_ULONG (JITTER_TOP_STACK ());
+    struct pvm_re_context *ctx;
+
+    ctx = (struct pvm_re_context *)PVM_VAL_OPQ_PAYLOAD (opq);
+    PVM_ASSERT (ctx != NULL);
+    if (index < (uint64_t)ctx->regs.num_regs)
+      {
+        JITTER_TOP_STACK () = PVM_MAKE_INT (ctx->regs.start[index], 32);
+        JITTER_PUSH_STACK (PVM_MAKE_INT (ctx->regs.end[index], 32));
+      }
+    else
+      {
+        JITTER_TOP_STACK () = PVM_MAKE_INT (-1, 32);
+        JITTER_PUSH_STACK (PVM_MAKE_INT (-1, 32));
+      }
+  end
+end
+
 
 ## Miscellaneous Instructions
 
diff --git a/libpoke/std.pk b/libpoke/std.pk
index 8ed331d9..bcc0d1cc 100644
--- a/libpoke/std.pk
+++ b/libpoke/std.pk
@@ -861,3 +861,25 @@ fun pk_vercmp = (any _a, any _b) int<32>:
   diff = cmp (a.offset, b.offset);
   return diff;
 }
+
+/* Regular expression functions.  */
+
+fun pk_regexp_match = (string regex, string str, int<32> start = 0) int<32>:
+{
+  return _pkl_re_match (regex, str, start);
+}
+
+type Pk_Regexp_Match =
+  struct
+  {
+    int<32> count;
+    int<32>[2][] submatches;
+  };
+
+fun pk_regexp_gmatch = (string regex, string str,
+                        int<32> start = 0) Pk_Regexp_Match:
+{
+  var result = _pkl_re_gmatch (regex, str, start);
+
+  return Pk_Regexp_Match {count=result.count, submatches=result.submatches};
+}
-- 
2.39.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]