gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r21892 - gnunet/src/regex


From: gnunet
Subject: [GNUnet-SVN] r21892 - gnunet/src/regex
Date: Mon, 11 Jun 2012 17:10:21 +0200

Author: szengel
Date: 2012-06-11 17:10:21 +0200 (Mon, 11 Jun 2012)
New Revision: 21892

Modified:
   gnunet/src/regex/regex.c
   gnunet/src/regex/test_regex_eval_api.c
   gnunet/src/regex/test_regex_iterate_api.c
Log:
simplified regex/proof generation


Modified: gnunet/src/regex/regex.c
===================================================================
--- gnunet/src/regex/regex.c    2012-06-11 15:08:19 UTC (rev 21891)
+++ gnunet/src/regex/regex.c    2012-06-11 15:10:21 UTC (rev 21892)
@@ -456,6 +456,7 @@
 {
   int is_dup;
   struct Transition *t;
+  struct Transition *oth;
 
   if (NULL == from_state)
   {
@@ -478,6 +479,13 @@
   if (is_dup)
     return;
 
+  // sort transitions by label
+  for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
+  {
+    if (oth->label > label)
+      break;
+  }
+
   t = GNUNET_malloc (sizeof (struct Transition));
   t->id = ctx->transition_id++;
   t->label = label;
@@ -486,8 +494,8 @@
 
   // Add outgoing transition to 'from_state'
   from_state->transition_count++;
-  GNUNET_CONTAINER_DLL_insert (from_state->transitions_head,
-                               from_state->transitions_tail, t);
+  GNUNET_CONTAINER_DLL_insert_before (from_state->transitions_head,
+                                      from_state->transitions_tail, oth, t);
 }
 
 /**
@@ -831,11 +839,14 @@
   char *R_cur_r;
   int length_l;
   int length_r;
+  int s_cnt;
+  int l_cnt;
   char *complete_regex;
 
   k = 0;
   n = a->state_count;
 
+  // number the states
   for (i = 0, s = a->states_head; NULL != s; s = s->next, i++)
   {
     s->marked = i;
@@ -891,6 +902,8 @@
     {
       for (j = 0; j < n; j++)
       {
+        //construct R_cur
+
         // 0*R = R*0 = 0
         // 0+R = R+0 = R
         if (NULL == R_last[i][k] || NULL == R_last[k][j])
@@ -898,6 +911,15 @@
           if (NULL != R_last[i][j])
             R_cur[i][j] = GNUNET_strdup (R_last[i][j]);
         }
+        // a(ba)*b = (ab)+
+        /*else if ((NULL == R_last[i][j] || 0 == strlen (R_last[i][j])) && */
+        /*NULL != R_last[k][k] && 0 < strlen (R_last[k][k]) && */
+        /*NULL != R_last[k][j] && 0 < strlen (R_last[k][j]) && */
+        /*0 == strncmp (R_last[k][k], R_last[k][j], (strlen (R_last[k][k]) - 
1)) && */
+        /*R_last[i][k][0] == R_last[k][k][strlen (R_last[k][k]) - 1]) */
+        /*{ */
+        /*GNUNET_asprintf (&R_cur[i][j], "(%s%s)+", R_last[i][k], 
R_last[k][j]); */
+        /*} */
         else
         {
           // R(k)ij = R(k-1)ij + R(k-1)ik (R(k-1)kk)* R(k-1)kj
@@ -911,27 +933,35 @@
           if (NULL != R_last[i][j])
             strcat (R_cur_l, R_last[i][j]);
 
-          if (NULL != R_last[i][k])
+          if (NULL != R_last[i][k] && 0 != strcmp (R_last[i][k], R_last[k][k]))
             strcat (R_cur_r, R_last[i][k]);
 
           if (NULL != R_last[k][k] && 0 != strcmp (R_last[k][k], ""))
           {
-            if (R_last[k][k][0] == '(' && R_last[k][k][strlen 
(R_last[k][k])-1] == ')')
+            if (R_last[k][k][0] == '(' &&
+                R_last[k][k][strlen (R_last[k][k]) - 1] == ')')
             {
               strcat (R_cur_r, R_last[k][k]);
-              strcat (R_cur_r, "*");
             }
             else
             {
               strcat (R_cur_r, "(");
               strcat (R_cur_r, R_last[k][k]);
-              strcat (R_cur_r, ")*");
+              strcat (R_cur_r, ")");
             }
+
+            if (0 == strcmp (R_last[i][k], R_last[k][k]) ||
+                0 == strcmp (R_last[k][k], R_last[k][j]))
+              strcat (R_cur_r, "+");
+            else
+              strcat (R_cur_r, "*");
           }
 
-          if (NULL != R_last[k][j])
+          if (NULL != R_last[k][j] && 0 != strcmp (R_last[k][k], R_last[k][j]))
             strcat (R_cur_r, R_last[k][j]);
 
+          // simplifications...
+
           // | is idempotent: a | a = a for all a in A
           if (0 == strcmp (R_cur_l, R_cur_r) || 0 == strcmp (R_cur_l, "") ||
               0 == strcmp (R_cur_r, ""))
@@ -941,32 +971,102 @@
             else
               GNUNET_asprintf (&R_cur[i][j], "%s", R_cur_l);
           }
-          // a | a a* a = a+
+          // TODO: in theory only applicable if (e + a) | (e + a)(e + a)*(e+a)
+          // where e means epsilon... check if practical!
+          // a | a a* a = a*
           else if (R_last[i][j] == R_last[i][k] && R_last[i][k] == R_last[k][k]
                    && R_last[k][k] == R_last[k][j])
           {
-            GNUNET_asprintf (&R_cur[i][j], "%s+", R_last[i][j]);
+            if (1 >= strlen (R_last[k][k]) ||
+                (R_last[k][k][0] == '(' &&
+                 R_last[k][k][strlen (R_last[k][k]) - 1] == ')'))
+              GNUNET_asprintf (&R_cur[i][j], "%s*", R_last[k][k]);
+            else
+              GNUNET_asprintf (&R_cur[i][j], "(%s)*", R_last[k][k]);
           }
           // a | a b* b => a | a b | a b b | ... => a b*
           else if (R_last[i][j] == R_last[i][k] && R_last[k][k] == 
R_last[k][j])
           {
-            GNUNET_asprintf (&R_cur[i][j], "%s%s*", R_last[i][k], 
R_last[k][k]);
+            // if a is xb then a b* becomes x b b* = x b+
+
+            s_cnt = strlen (R_last[k][k]);
+            l_cnt = strlen (R_last[i][k]);
+            R_cur[i][j] = GNUNET_malloc (s_cnt + l_cnt + 4);
+
+            if (l_cnt > 0 && s_cnt >= l_cnt)
+              for (; s_cnt > 0; s_cnt--, l_cnt--)
+                if (R_last[i][k][l_cnt] != R_last[k][k][s_cnt])
+                  break;
+
+            if (strlen (R_last[i][k]) > 0 && 0 == s_cnt && 0 <= l_cnt)
+              strncat (R_cur[i][j], R_last[i][k], l_cnt);
+            else
+              strcat (R_cur[i][j], R_last[i][k]);
+
+            if (1 >= strlen (R_last[k][k]) &&
+                !(R_last[k][k][0] == '(' &&
+                  R_last[k][k][strlen (R_last[k][k]) - 1] == ')'))
+            {
+              strcat (R_cur[i][j], "(");
+              strcat (R_cur[i][j], R_last[k][k]);
+              strcat (R_cur[i][j], ")");
+            }
+            else
+              strcat (R_cur[i][j], R_last[k][k]);
+
+            if (0 == s_cnt && 0 <= l_cnt)
+              strcat (R_cur[i][j], "+");
+            else
+              strcat (R_cur[i][j], "*");
           }
           // a | b b* a => a | b a | b b a | ... => b* a
           else if (R_last[i][j] == R_last[k][j] && R_last[i][k] == 
R_last[k][k])
           {
-            GNUNET_asprintf (&R_cur[i][j], "%s*%s", R_last[k][k], 
R_last[k][j]);
+            // if a is bx then b* a becomes b+ x
+
+            temp = NULL;
+            s_cnt = strlen (R_last[k][k]);
+            l_cnt = strlen (R_last[k][j]);
+            R_cur[i][j] = GNUNET_malloc (s_cnt + l_cnt + 4);
+
+            int bla;
+
+            if (l_cnt > 0 && s_cnt >= l_cnt)
+              for (bla = 0; bla < s_cnt; bla++)
+                if (R_last[k][k][bla] != R_last[k][j][bla])
+                  break;
+
+            if (1 >= strlen (R_last[k][k]) &&
+                !(R_last[k][k][0] == '(' &&
+                  R_last[k][k][strlen (R_last[k][k]) - 1] == ')'))
+            {
+              strcat (R_cur[i][j], "(");
+              strcat (R_cur[i][j], R_last[k][k]);
+              strcat (R_cur[i][j], ")");
+            }
+            else
+              strcat (R_cur[i][j], R_last[k][k]);
+
+            if (bla == s_cnt)
+              strcat (R_cur[i][j], "+");
+            else
+              strcat (R_cur[i][j], "*");
+
+            if (strlen (R_last[k][j]) > 0 && bla == s_cnt)
+              strcat (R_cur[i][j], &R_last[k][j][bla]);
+            else
+              strcat (R_cur[i][j], R_last[k][j]);
           }
           else
             GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r);
 
           GNUNET_free_non_null (R_cur_l);
           GNUNET_free_non_null (R_cur_r);
-
         }
       }
     }
 
+    // set R_last = R_cur
     for (i = 0; i < n; i++)
     {
       for (j = 0; j < n; j++)
@@ -991,7 +1091,7 @@
                         &states[i]->hash);
   }
 
-  // complete regex for whole DFA
+  // complete regex for whole DFA: union of all pairs (start state/accepting 
state(s)).
   complete_regex = NULL;
   for (i = 0; i < n; i++)
   {
@@ -1486,6 +1586,7 @@
   GNUNET_assert (0 == cls_check->len);
   GNUNET_free (cls_check);
 
+  // sort the states
   if (cls->len > 1)
     qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *),
            state_compare);
@@ -2047,6 +2148,7 @@
     return;
 
   GNUNET_free (a->regex);
+  GNUNET_free_non_null (a->computed_regex);
 
   for (s = a->states_head; NULL != s;)
   {
@@ -2059,6 +2161,81 @@
 }
 
 /**
+ * Save a state to an open file pointer. cls is expected to be a file pointer 
to
+ * an open file. Used only in conjunction with
+ * GNUNET_REGEX_automaton_save_graph.
+ *
+ * @param cls file pointer
+ * @param s state
+ */
+void
+GNUNET_REGEX_automaton_save_graph_step (void *cls, struct GNUNET_REGEX_State 
*s)
+{
+  FILE *p;
+  struct Transition *ctran;
+  char *s_acc = NULL;
+  char *s_tran = NULL;
+
+  p = cls;
+
+  if (s->accepting)
+  {
+    GNUNET_asprintf (&s_acc,
+                     "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n",
+                     s->name, s->scc_id);
+  }
+  else
+  {
+    GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name,
+                     s->scc_id);
+  }
+
+  if (NULL == s_acc)
+  {
+    GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", 
s->name);
+    return;
+  }
+  fwrite (s_acc, strlen (s_acc), 1, p);
+  GNUNET_free (s_acc);
+  s_acc = NULL;
+
+  for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next)
+  {
+    if (NULL == ctran->to_state)
+    {
+      GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+                  "Transition from State %i has has no state for 
transitioning\n",
+                  s->id);
+      continue;
+    }
+
+    if (ctran->label == 0)
+    {
+      GNUNET_asprintf (&s_tran,
+                       "\"%s\" -> \"%s\" [label = \"epsilon\", color=\"0.%i 
0.8 0.95\"];\n",
+                       s->name, ctran->to_state->name, s->scc_id);
+    }
+    else
+    {
+      GNUNET_asprintf (&s_tran,
+                       "\"%s\" -> \"%s\" [label = \"%c\", color=\"0.%i 0.8 
0.95\"];\n",
+                       s->name, ctran->to_state->name, ctran->label, 
s->scc_id);
+    }
+
+    if (NULL == s_tran)
+    {
+      GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n",
+                  s->name);
+      return;
+    }
+
+    fwrite (s_tran, strlen (s_tran), 1, p);
+    GNUNET_free (s_tran);
+    s_tran = NULL;
+  }
+}
+
+/**
  * Save the given automaton as a GraphViz dot file
  *
  * @param a the automaton to be saved
@@ -2068,10 +2245,6 @@
 GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
                                    const char *filename)
 {
-  struct GNUNET_REGEX_State *s;
-  struct Transition *ctran;
-  char *s_acc = NULL;
-  char *s_tran = NULL;
   char *start;
   char *end;
   FILE *p;
@@ -2100,67 +2273,8 @@
   start = "digraph G {\nrankdir=LR\n";
   fwrite (start, strlen (start), 1, p);
 
-  for (s = a->states_head; NULL != s; s = s->next)
-  {
-    if (s->accepting)
-    {
-      GNUNET_asprintf (&s_acc,
-                       "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 
0.95\"];\n",
-                       s->name, s->scc_id);
-    }
-    else
-    {
-      GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name,
-                       s->scc_id);
-    }
+  automaton_traverse (p, a, GNUNET_REGEX_automaton_save_graph_step);
 
-    if (NULL == s_acc)
-    {
-      GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n",
-                  s->name);
-      return;
-    }
-    fwrite (s_acc, strlen (s_acc), 1, p);
-    GNUNET_free (s_acc);
-    s_acc = NULL;
-
-    for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next)
-    {
-      if (NULL == ctran->to_state)
-      {
-        GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
-                    "Transition from State %i has has no state for 
transitioning\n",
-                    s->id);
-        continue;
-      }
-
-      if (ctran->label == 0)
-      {
-        GNUNET_asprintf (&s_tran,
-                         "\"%s\" -> \"%s\" [label = \"epsilon\", color=\"0.%i 
0.8 0.95\"];\n",
-                         s->name, ctran->to_state->name, s->scc_id);
-      }
-      else
-      {
-        GNUNET_asprintf (&s_tran,
-                         "\"%s\" -> \"%s\" [label = \"%c\", color=\"0.%i 0.8 
0.95\"];\n",
-                         s->name, ctran->to_state->name, ctran->label,
-                         s->scc_id);
-      }
-
-      if (NULL == s_tran)
-      {
-        GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n",
-                    s->name);
-        return;
-      }
-
-      fwrite (s_tran, strlen (s_tran), 1, p);
-      GNUNET_free (s_tran);
-      s_tran = NULL;
-    }
-  }
-
   end = "\n}\n";
   fwrite (end, strlen (end), 1, p);
   fclose (p);
@@ -2189,6 +2303,10 @@
 
   s = a->start;
 
+  // If the string is empty but the starting state is accepting, we accept.
+  if ((NULL == string || 0 == strlen (string)) && s->accepting)
+    return 0;
+
   for (strp = string; NULL != strp && *strp; strp++)
   {
     s = dfa_move (s, *strp);
@@ -2227,6 +2345,10 @@
     return -1;
   }
 
+  // If the string is empty but the starting state is accepting, we accept.
+  if ((NULL == string || 0 == strlen (string)) && a->start->accepting)
+    return 0;
+
   result = 1;
   strp = string;
   sset = nfa_closure_create (a, a->start, 0);

Modified: gnunet/src/regex/test_regex_eval_api.c
===================================================================
--- gnunet/src/regex/test_regex_eval_api.c      2012-06-11 15:08:19 UTC (rev 
21891)
+++ gnunet/src/regex/test_regex_eval_api.c      2012-06-11 15:10:21 UTC (rev 
21892)
@@ -263,8 +263,9 @@
   int check_nfa;
   int check_dfa;
   int check_rand;
+  char *check_proof;
 
-  struct Regex_String_Pair rxstr[8] = {
+  struct Regex_String_Pair rxstr[12] = {
     {"ab?(abcd)?", 5,
      {"ababcd", "abab", "aabcd", "a", "abb"},
      {match, nomatch, match, match, nomatch}},
@@ -288,6 +289,19 @@
     
{"V|M*o?x*p*d+h+b|E*m?h?Y*E*O?W*W*P+o?Z+H*M|I*q+C*a+5?5*9|b?z|G*y*k?R|p+u|8*h?B+l*H|e|L*O|1|F?v*0?5|C+",
 1,
      {"VMoxpdhbEmhYEOWWPoZHMIqCa559bzGykRpu8hBlHeLO1Fv05C"},
      {nomatch}},
+    {"(bla)*", 8,
+     {"", "bla", "blabla", "bl", "la", "b", "l", "a"},
+     {match, match, match, nomatch, nomatch, nomatch, nomatch, nomatch}},
+    {"ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", 8,
+     {"ab", "abcabdbla", "abdcccccccccccabcbccdblablabla", "bl", "la", "b", 
"l",
+      "a"},
+     {nomatch, match, match, nomatch, nomatch, nomatch, nomatch, nomatch}},
+    {"a|aa*a", 6,
+     {"", "a", "aa", "aaa", "aaaa", "aaaaa"},
+     {nomatch, match, match, match, match, match}},
+    {"ab(c|d)+c*(a(b|c)+d)+(bla)+", 1,
+     {"abcabdblaacdbla"},
+     {nomatch}},
     {"ab(c|d)+c*(a(b|c)d)+", 1,
      {"abacd"},
      {nomatch}}
@@ -297,7 +311,7 @@
   check_dfa = 0;
   check_rand = 0;
 
-  for (i = 0; i < 8; i++)
+  for (i = 0; i < 12; i++)
   {
     if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED))
     {
@@ -314,7 +328,14 @@
     // DFA test
     a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex));
     check_dfa += test_automaton (a, &rx, &rxstr[i]);
+    check_proof = GNUNET_strdup (GNUNET_REGEX_get_computed_regex (a));
     GNUNET_REGEX_automaton_destroy (a);
+    a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof));
+    check_dfa += test_automaton (a, &rx, &rxstr[i]);
+    GNUNET_REGEX_automaton_destroy (a);
+    if (0 != check_dfa)
+      GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "check_proof: %s\n", check_proof);
+    GNUNET_free_non_null (check_proof);
 
     regfree (&rx);
   }

Modified: gnunet/src/regex/test_regex_iterate_api.c
===================================================================
--- gnunet/src/regex/test_regex_iterate_api.c   2012-06-11 15:08:19 UTC (rev 
21891)
+++ gnunet/src/regex/test_regex_iterate_api.c   2012-06-11 15:10:21 UTC (rev 
21892)
@@ -60,7 +60,10 @@
   struct GNUNET_REGEX_Automaton *dfa;
 
   error = 0;
-  /*regex = "ab(c|d)+c*(a(b|c)+d)+(bla)+"; */
+  regex = "ab(c|d)+c*(a(b|c)+d)+(bla)+";
+  /*regex = "(bla)+"; */
+  /*regex = "b(lab)*la"; */
+  /*regex = "(bla)*"; */
   /*regex = "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*"; */
   /*regex = "z(abc|def)?xyz"; */
   /*regex = "1*0(0|1)*"; */
@@ -68,7 +71,15 @@
   /*regex = 
"abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)";
 */
   /*regex = "abc(1|0)*def"; */
   /*regex = "ab|ac"; */
-  regex = "(ab)(ab)*";
+  /*regex = "(ab)(ab)*"; */
+  /*regex = "ab|cd|ef|gh"; */
+  /*regex = "a|b|c|d|e|f|g"; */
+  /*regex = "(ab)|(ac)"; */
+  /*regex = "a(b|c)"; */
+  /*regex = "a*a"; */
+  /*regex = "ab?(abcd)?"; */
+  /*regex = "(ab)+"; */
+  /*regex = "(abcsdfsdf)+"; */
   dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex));
   GNUNET_REGEX_automaton_save_graph (dfa, "dfa.dot");
   GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, NULL);




reply via email to

[Prev in Thread] Current Thread [Next in Thread]