[GNUnet-SVN] r20924 - gnunet/src/regex

gnunet-svn
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r20924 - gnunet/src/regex

From:	gnunet
Subject:	[GNUnet-SVN] r20924 - gnunet/src/regex
Date:	Tue, 10 Apr 2012 16:30:19 +0200
Author: szengel
Date: 2012-04-10 16:30:19 +0200 (Tue, 10 Apr 2012)
New Revision: 20924

Modified:
   gnunet/src/regex/regex.c
   gnunet/src/regex/test_regex.c
Log:
dfa minimization wip


Modified: gnunet/src/regex/regex.c
===================================================================
--- gnunet/src/regex/regex.c    2012-04-10 14:21:14 UTC (rev 20923)
+++ gnunet/src/regex/regex.c    2012-04-10 14:30:19 UTC (rev 20924)
@@ -177,6 +177,16 @@
   }
 }
 
+/**
+ * Compare two states. Used for sorting.
+ *
+ * @param a first state
+ * @param b second state
+ *
+ * @return an integer less than, equal to, or greater than zero
+ *         if the first argument is considered to be respectively
+ *         less than, equal to, or greater than the second.
+ */
 static int
 state_compare (const void *a, const void *b)
 {
@@ -329,17 +339,111 @@
   GNUNET_free (s);
 }
 
+/**
+ * Remove a state from the given automaton 'a'. Always use this function
+ * when altering the states of an automaton. Will also remove all transitions
+ * leading to this state, before destroying it.
+ *
+ * @param a automaton
+ * @param s state to remove
+ */
 static void
 automaton_remove_state (struct GNUNET_REGEX_Automaton *a, struct State *s)
 {
   struct State *ss;
+  struct State *s_check;
+  struct Transition *t_check;
+
+  // remove state
   ss = s;
   GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
   a->state_count--;
+
+  // remove all transitions leading to this state
+  for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
+  {
+    for (t_check = s_check->transitions_head; NULL != t_check;
+         t_check = t_check->next)
+    {
+      if (t_check->state == ss)
+      {
+        GNUNET_CONTAINER_DLL_remove (s_check->transitions_head,
+                                     s_check->transitions_tail, t_check);
+        s_check->transition_count--;
+      }
+    }
+  }
+
   automaton_destroy_state (ss);
 }
 
+/**
+ * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy 
's2'.
+ *
+ * @param ctx context
+ * @param a automaton
+ * @param s1 first state
+ * @param s2 second state, will be destroyed
+ */
 static void
+automaton_merge_states (struct GNUNET_REGEX_Context *ctx,
+                        struct GNUNET_REGEX_Automaton *a, struct State *s1,
+                        struct State *s2)
+{
+  struct State *s_check;
+  struct Transition *t_check;
+  struct Transition *t;
+  char *new_name;
+
+  GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2);
+
+  // 1. Make all transitions pointing to s2 point to s1
+  for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
+  {
+    for (t_check = s_check->transitions_head; NULL != t_check;
+         t_check = t_check->next)
+    {
+      if (s_check != s2 && s2 == t_check->state)
+        t_check->state = s1;
+    }
+  }
+
+  // 2. Add all transitions from s2 to sX to s1
+  for (t_check = s2->transitions_head; NULL != t_check; t_check = 
t_check->next)
+  {
+    for (t = s1->transitions_head; NULL != t; t = t->next)
+    {
+      if (t_check->literal != t->literal && NULL != t_check->state &&
+          t_check->state != t->state && t_check->state != s2)
+      {
+        add_transition (ctx, s1, t_check->literal, t_check->state);
+      }
+    }
+  }
+
+  // 3. Rename s1 to {s1,s2}
+  new_name = GNUNET_malloc (strlen (s1->name) + strlen (s2->name) + 1);
+  strncat (new_name, s1->name, strlen (s1->name));
+  strncat (new_name, s2->name, strlen (s2->name));
+  if (NULL != s1->name)
+    GNUNET_free (s1->name);
+  s1->name = new_name;
+
+  // remove state
+  s_check = s2;
+  GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s_check);
+  a->state_count--;
+  automaton_destroy_state (s_check);
+}
+
+/**
+ * Add a state to the automaton 'a', always use this function to
+ * alter the states DLL of the automaton.
+ *
+ * @param a automaton to add the state to
+ * @param s state that should be added
+ */
+static void
 automaton_add_state (struct GNUNET_REGEX_Automaton *a, struct State *s)
 {
   GNUNET_CONTAINER_DLL_insert (a->states_head, a->states_tail, s);
@@ -493,9 +597,9 @@
   stack_len++;
   while (stack_len > 0)
   {
-    s = stack[stack_len-1];
+    s = stack[stack_len - 1];
     stack_len--;
-    s->marked = 1; // mark s as visited
+    s->marked = 1;              // mark s as visited
     for (t = s->transitions_head; NULL != t; t = t->next)
     {
       if (NULL != t->state && 0 == t->state->marked)
@@ -525,9 +629,7 @@
 dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a)
 {
   struct State *s;
-  struct State *s_check;
   struct Transition *t;
-  struct Transition *t_check;
   int dead;
 
   GNUNET_assert (DFA == a->type);
@@ -551,20 +653,6 @@
       continue;
 
     // state s is dead, remove it
-    // 1. remove all transitions to this state
-    for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
-    {
-      for (t_check = s_check->transitions_head; NULL != t_check;
-           t_check = t_check->next)
-      {
-        if (t_check->state == s)
-        {
-          GNUNET_CONTAINER_DLL_remove (s_check->transitions_head,
-                                       s_check->transitions_tail, t_check);
-        }
-      }
-    }
-    // 2. remove state
     automaton_remove_state (a, s);
   }
 }
@@ -575,9 +663,80 @@
  * @param a DFA automaton
  */
 static void
-dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Automaton *a)
+dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
+                                     struct GNUNET_REGEX_Automaton *a)
 {
+  int i;
+  int table[a->state_count][a->state_count];
+  struct State *s1;
+  struct State *s2;
+  struct Transition *t1;
+  struct Transition *t2;
+  int change;
 
+  change = 1;
+  for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1;
+       i++, s1 = s1->next)
+    s1->marked = i;
+
+  // Mark all pairs of accepting/!accepting states
+  for (s1 = a->states_head; NULL != s1; s1 = s1->next)
+  {
+    for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
+    {
+      if ((s1->accepting && !s2->accepting) ||
+          (!s1->accepting && s2->accepting))
+      {
+        table[s1->marked][s2->marked] = 1;
+      }
+      else
+        table[s1->marked][s2->marked] = 0;
+    }
+  }
+
+  while (0 != change)
+  {
+    change = 0;
+    for (s1 = a->states_head; NULL != s1; s1 = s1->next)
+    {
+      for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
+      {
+        if (0 != table[s1->marked][s2->marked])
+          continue;
+
+        for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
+        {
+          for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
+          {
+            if (t1->literal == t2->literal && t1->state == t2->state &&
+                (0 != table[t1->state->marked][t2->state->marked] ||
+                 0 != table[t2->state->marked][t1->state->marked]))
+            {
+              table[s1->marked][s2->marked] = t1->literal;
+              change = 1;
+            }
+            else if (t1->literal != t2->literal && t1->state != t2->state)
+            {
+              table[s1->marked][s2->marked] = -1;
+              change = 1;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  struct State *s2_next;
+
+  for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
+  {
+    for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
+    {
+      s2_next = s2->next;
+      if (s1 != s2 && table[s1->marked][s2->marked] == 0)
+        automaton_merge_states (ctx, a, s1, s2);
+    }
+  }
 }
 
 /**
@@ -587,7 +746,8 @@
  * @param a DFA automaton
  */
 static void
-dfa_minimize (struct GNUNET_REGEX_Automaton *a)
+dfa_minimize (struct GNUNET_REGEX_Context *ctx,
+              struct GNUNET_REGEX_Automaton *a)
 {
   if (NULL == a)
     return;
@@ -601,7 +761,7 @@
   dfa_remove_dead_states (a);
 
   // 3. Merge nondistinguishable states
-  dfa_merge_nondistinguishable_states (a);
+  dfa_merge_nondistinguishable_states (ctx, a);
 }
 
 /**
@@ -1169,7 +1329,7 @@
 
   if (NULL == nfa)
   {
-    GNUNET_log (GNUNET_ERROR_TYPE_ERROR, 
+    GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
                 "Could not create DFA, because NFA creation failed\n");
     return NULL;
   }
@@ -1225,7 +1385,7 @@
   GNUNET_free (dfa_stack);
   GNUNET_REGEX_automaton_destroy (nfa);
 
-  dfa_minimize (dfa);
+  /*dfa_minimize (&ctx, dfa);*/
 
   return dfa;
 }
@@ -1401,7 +1561,6 @@
   return result;
 }
 
-
 /**
  * Evaluates the given 'string' against the given compiled regex
  *

Modified: gnunet/src/regex/test_regex.c
===================================================================
--- gnunet/src/regex/test_regex.c       2012-04-10 14:21:14 UTC (rev 20923)
+++ gnunet/src/regex/test_regex.c       2012-04-10 14:30:19 UTC (rev 20924)
@@ -42,18 +42,17 @@
 };
 
 static const char allowed_literals[] =
-        "0123456789"
-        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-        "abcdefghijklmnopqrstuvwxyz";
+    "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz";
 
 int
-test_random (unsigned int rx_length, unsigned int max_str_len, unsigned int 
str_count)
+test_random (unsigned int rx_length, unsigned int max_str_len,
+             unsigned int str_count)
 {
   int i;
   int j;
   int rx_exp;
-  char rand_rx[rx_length+1];
-  char matching_str[str_count][max_str_len+1];
+  char rand_rx[rx_length + 1];
+  char matching_str[str_count][max_str_len + 1];
   char *rand_rxp;
   char *matching_strp;
   int char_op_switch;
@@ -79,41 +78,40 @@
   last_was_op = 1;
 
   // Generate random regex and a string that matches the regex
-  for (i=0; i<rx_length; i++)
+  for (i = 0; i < rx_length; i++)
   {
-    char_op_switch = 0 + (int)(1.0 * rand() / (RAND_MAX + 1.0));
+    char_op_switch = 0 + (int) (1.0 * rand () / (RAND_MAX + 1.0));
 
-    if (0 == char_op_switch
-        && !last_was_op)
+    if (0 == char_op_switch && !last_was_op)
     {
       last_was_op = 1;
       rx_exp = rand () % 3;
 
       switch (rx_exp)
       {
-        case 0:
-          current_char = '+';
-          break;
-        case 1:
-          current_char = '*';
-          break;
-        case 2:
-          if (i < rx_length -1) // '|' cannot be at the end
-            current_char = '|';
-          else
-            current_char = allowed_literals[rand() % (sizeof(allowed_literals) 
- 1)];
-          break;
+      case 0:
+        current_char = '+';
+        break;
+      case 1:
+        current_char = '*';
+        break;
+      case 2:
+        if (i < rx_length - 1)  // '|' cannot be at the end
+          current_char = '|';
+        else
+          current_char =
+              allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
+        break;
       }
     }
     else
     {
-      current_char = allowed_literals[rand() % (sizeof(allowed_literals) - 1)];
+      current_char =
+          allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
       last_was_op = 0;
     }
 
-    if (current_char != '+'
-        && current_char != '*'
-        && current_char != '|')
+    if (current_char != '+' && current_char != '*' && current_char != '|')
     {
       *matching_strp = current_char;
       matching_strp++;
@@ -127,17 +125,18 @@
 
   // Generate some random strings for matching...
   // Start at 1, because the first string is generated above during regex 
generation
-  for (i=1; i<str_count; i++)
+  for (i = 1; i < str_count; i++)
   {
-    str_len = rand() % max_str_len;
-    for (j=0; j<str_len; j++)
-      matching_str[i][j] = allowed_literals[rand() % (sizeof(allowed_literals) 
- 1)];
+    str_len = rand () % max_str_len;
+    for (j = 0; j < str_len; j++)
+      matching_str[i][j] =
+          allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
     matching_str[i][str_len] = '\0';
   }
 
   // Now match
   result = 0;
-  for (i=0; i<str_count; i++)
+  for (i = 0; i < str_count; i++)
   {
     // Match string using DFA
     dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx));
@@ -153,7 +152,8 @@
     // Match string using glibc regex
     if (0 != regcomp (&rx, rand_rx, REG_EXTENDED))
     {
-      GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not compile regex using 
regcomp\n");
+      GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+                  "Could not compile regex using regcomp\n");
       return -1;
     }
 
@@ -161,7 +161,9 @@
     regfree (&rx);
 
     // We only want to match the whole string, because that's what our DFA 
does, too.
-    if (eval_check == 0 && (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != 
strlen (matching_str[i])))
+    if (eval_check == 0 &&
+        (matchptr[0].rm_so != 0 ||
+         matchptr[0].rm_eo != strlen (matching_str[i])))
       eval_check = 1;
 
     // compare result
@@ -178,7 +180,8 @@
 }
 
 int
-test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t *rx, struct 
Regex_String_Pair *rxstr)
+test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t * rx,
+                struct Regex_String_Pair *rxstr)
 {
   int result;
   int eval;
@@ -195,28 +198,29 @@
 
   result = 0;
 
-  for (i=0; i<rxstr->string_count; i++)
+  for (i = 0; i < rxstr->string_count; i++)
   {
     eval = GNUNET_REGEX_eval (a, rxstr->strings[i]);
     eval_check = regexec (rx, rxstr->strings[i], 1, matchptr, 0);
 
     // We only want to match the whole string, because that's what our DFA 
does, too.
-    if (eval_check == 0 && (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != 
strlen (rxstr->strings[i])))
+    if (eval_check == 0 &&
+        (matchptr[0].rm_so != 0 ||
+         matchptr[0].rm_eo != strlen (rxstr->strings[i])))
       eval_check = 1;
 
-    if ((rxstr->expected_results[i] == match
-         && (0 != eval || 0 != eval_check))
-        ||
-        (rxstr->expected_results[i] == nomatch
-         && (0 == eval || 0 == eval_check)))
+    if ((rxstr->expected_results[i] == match && (0 != eval || 0 != eval_check))
+        || (rxstr->expected_results[i] == nomatch &&
+            (0 == eval || 0 == eval_check)))
     {
-        result = 1;
-        regerror (eval_check, rx, error, sizeof error);
-        GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
-                    "Unexpected result:\nregex: %s\nstring: %s\nexpected 
result: %i\n"
-                    "gnunet regex: %i\nglibc regex: %i\nglibc error: 
%s\nrm_so: %i\nrm_eo: %i\n\n",
-                    rxstr->regex, rxstr->strings[i], 
rxstr->expected_results[i],
-                    eval, eval_check, error, matchptr[0].rm_so, 
matchptr[0].rm_eo);
+      result = 1;
+      regerror (eval_check, rx, error, sizeof error);
+      GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+                  "Unexpected result:\nregex: %s\nstring: %s\nexpected result: 
%i\n"
+                  "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\nrm_so: 
%i\nrm_eo: %i\n\n",
+                  rxstr->regex, rxstr->strings[i], rxstr->expected_results[i],
+                  eval, eval_check, error, matchptr[0].rm_so,
+                  matchptr[0].rm_eo);
     }
   }
   return result;
@@ -239,23 +243,34 @@
   int check_nfa;
   int check_dfa;
   int check_rand;
-  struct Regex_String_Pair rxstr[2] = {
-    {"ab(c|d)+c*(a(b|c)d)+", 5, 
-     {"abcdcdcdcdddddabd", "abcd", 
"abcddddddccccccccccccccccccccccccabdacdabd", "abccccca", "abcdcdcdccdabdabd"}, 
+
+  struct Regex_String_Pair rxstr[4] = {
+    {"ab(c|d)+c*(a(b|c)d)+", 5,
+     {"abcdcdcdcdddddabd", "abcd", 
"abcddddddccccccccccccccccccccccccabdacdabd",
+      "abccccca", "abcdcdcdccdabdabd"},
      {match, nomatch, match, nomatch, match}},
-    {"ab+c*(a(bx|c)d)+", 5, 
-     {"abcdcdcdcdddddabd", "abcd", 
"abcddddddccccccccccccccccccccccccabdacdabd", "abccccca", "abcdcdcdccdabdabd"}, 
-     {nomatch, nomatch, nomatch, nomatch, nomatch}}};
+    {"ab+c*(a(bx|c)d)+", 5,
+     {"abcdcdcdcdddddabd", "abcd", 
"abcddddddccccccccccccccccccccccccabdacdabd",
+      "abccccca", "abcdcdcdccdabdabd"},
+     {nomatch, nomatch, nomatch, nomatch, nomatch}},
+    
{"k|a+X*y+c|Q*e|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*g|N+V|t+L|P*j*3*9+X*h*J|J*6|b|E*i*f*R+S|Z|R|Y*Z|g*",
 1,
+     {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"},
+     {nomatch}},
+    
{"k|a+X*y+c|Q*e|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*g|N+V|t+L|P*j*3*9+X*h*J|J*6|b|E*i*f*R+S|Z|R|Y*Z|g*",
 1,
+     {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"},
+     {nomatch}}
+  };
 
   check_nfa = 0;
   check_dfa = 0;
   check_rand = 0;
 
-  for (i=0; i<2; i++)
+  for (i = 0; i < 4; i++)
   {
     if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED))
     {
-      GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not compile regex using 
regcomp()\n");
+      GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+                  "Could not compile regex using regcomp()\n");
       return 1;
     }
 
@@ -272,8 +287,8 @@
     regfree (&rx);
   }
 
-  srand (time(NULL));
-  for (i=0; i< 100; i++)
+  srand (time (NULL));
+  for (i = 0; i < 100; i++)
     check_rand += test_random (100, 150, 10);
 
   return check_nfa + check_dfa + check_rand;
[Prev in Thread]
Current Thread
[Next in Thread]
[GNUnet-SVN] r20924 - gnunet/src/regex, gnunet <=
Prev by Date: [GNUnet-SVN] r20923 - gnunet/src/gns
Next by Date: [GNUnet-SVN] r20925 - gnunet/src/regex
Previous by thread: [GNUnet-SVN] r20923 - gnunet/src/gns
Next by thread: [GNUnet-SVN] r20925 - gnunet/src/regex
Index(es):
- Date
- Thread