[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Eliot-dev] Changes to eliot/dic/automaton.c [antoine-1]
From: |
eliot-dev |
Subject: |
[Eliot-dev] Changes to eliot/dic/automaton.c [antoine-1] |
Date: |
Sun, 23 Oct 2005 13:13:57 -0400 |
Index: eliot/dic/automaton.c
diff -u /dev/null eliot/dic/automaton.c:1.9.2.1
--- /dev/null Sun Oct 23 17:13:57 2005
+++ eliot/dic/automaton.c Sun Oct 23 17:13:56 2005
@@ -0,0 +1,700 @@
+/* Eliot */
+/* Copyright (C) 2005 Antoine Fraboulet */
+/* */
+/* This file is part of Eliot. */
+/* */
+/* Eliot is free software; you can redistribute it and/or modify */
+/* it under the terms of the GNU General Public License as published by */
+/* the Free Software Foundation; either version 2 of the License, or */
+/* (at your option) any later version. */
+/* */
+/* Elit is distributed in the hope that it will be useful, */
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+/* GNU General Public License for more details. */
+/* */
+/* You should have received a copy of the GNU General Public License */
+/* along with this program; if not, write to the Free Software */
+/* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+
+<<<<<<< automaton.c
+/* $Id: automaton.c,v 1.9.2.1 2005/10/23 17:13:56 afrab Exp $ */
+
+/**
+ * \file automaton.c
+ * \brief (Non)Deterministic Finite Automaton for Regexp
+ * \author Antoine Fraboulet
+ * \date 2005
+=======
+/*
+ * $Id: automaton.c,v 1.9.2.1 2005/10/23 17:13:56 afrab Exp $
+>>>>>>> 1.9
+ */
+
+#include "config.h"
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#ifdef HAVE_SYS_WAIT_H
+# include <sys/wait.h>
+#endif
+#include <unistd.h>
+
+#include "dic.h"
+#include "regexp.h"
+#include "alist.h"
+#include "automaton.h"
+
+#ifdef DEBUG_AUTOMATON
+#define DMSG(a) a
+#else
+#define DMSG(a)
+#endif
+
+#define MAX_TRANSITION_LETTERS 256
+
+typedef struct automaton_state_t *astate;
+typedef struct Automaton_t *Automaton;
+
+/* ************************************************** *
+ exported functions for static automata
+ * ************************************************** */
+
+automaton automaton_build (int init_state, int *ptl, int *PS, struct
search_RegE_list_t *list);
+void automaton_delete (automaton a);
+int automaton_get_nstate (automaton a);
+int automaton_get_init (automaton a);
+int automaton_get_accept (automaton a, int state);
+int automaton_get_next_state (automaton a, int start, char l);
+void automaton_dump (automaton a, char* filename);
+
+
+/* ************************************************** *
+ static functions for dynamic automata
+ * ************************************************** */
+
+static Automaton s_automaton_create ();
+static void s_automaton_delete (Automaton a);
+
+static alist s_automaton_id_create (int id);
+static char* s_automaton_id_to_str (alist id);
+
+static astate s_automaton_state_create (alist id);
+
+static void s_automaton_add_state (Automaton a, astate s);
+static astate s_automaton_get_state (Automaton a, alist id);
+
+static Automaton s_automaton_PS_to_NFA (int init_state, int *ptl, int
*PS);
+static Automaton s_automaton_NFA_to_DFA (Automaton a, struct
search_RegE_list_t *list);
+static automaton s_automaton_finalize (Automaton a);
+#ifdef DEBUG_AUTOMATON
+static void s_automaton_dump (Automaton a, char* filename);
+#endif
+
+/* ************************************************** *
+ data types
+ * ************************************************** */
+
+struct automaton_state_t {
+ alist id; // alist of int
+ int accept;
+ int id_static;
+ astate next[MAX_TRANSITION_LETTERS];
+};
+
+struct Automaton_t {
+ int nstates;
+ astate init_state;
+ alist states; // alist of alist of int
+};
+
+struct automaton_t {
+ int nstates;
+ int init;
+ int *accept;
+ int **trans;
+};
+
+/* ************************************************** *
+ exported functions for static automata
+ * ************************************************** */
+
+automaton
+automaton_build(int init_state, int *ptl, int *PS, struct search_RegE_list_t
*list)
+{
+ Automaton nfa,dfa;
+ automaton final;
+
+ nfa = s_automaton_PS_to_NFA(init_state,ptl,PS);
+ DMSG(printf("\n non deterministic automaton OK \n\n"));
+ DMSG(s_automaton_dump(nfa,"auto_nfa"));
+
+ dfa = s_automaton_NFA_to_DFA(nfa, list);
+ DMSG(printf("\n deterministic automaton OK \n\n"));
+ DMSG(s_automaton_dump(dfa,"auto_dfa"));
+
+ final = s_automaton_finalize(dfa);
+ DMSG(printf("\n final automaton OK \n\n"));
+ DMSG(automaton_dump(final,"auto_fin"));
+
+ s_automaton_delete(nfa);
+ s_automaton_delete(dfa);
+ return final;
+}
+
+void
+automaton_delete(automaton a)
+{
+ int i;
+ free(a->accept);
+ for(i=0; i <= a->nstates; i++)
+ free(a->trans[i]);
+ free(a->trans);
+ free(a);
+}
+
+inline int
+automaton_get_nstates(automaton a)
+{
+ return a->nstates;
+}
+
+inline int
+automaton_get_init(automaton a)
+{
+ return a->init;
+}
+
+inline int
+automaton_get_accept(automaton a, int state)
+{
+ return a->accept[state];
+}
+
+inline int
+automaton_get_next_state(automaton a, int state, char l)
+{
+ return a->trans[state][(int)l];
+}
+
+void
+automaton_dump(automaton a, char* filename)
+{
+ int i,l;
+ FILE* f;
+#ifdef HAVE_SYS_WAIT_H
+ pid_t pid;
+#endif
+
+ if (a == NULL)
+ return ;
+ f=fopen(filename,"w");
+ fprintf(f,"digraph automaton {\n");
+ for(i=1; i<=a->nstates; i++)
+ {
+ fprintf(f,"\t%d [label = \"%d\"",i,i);
+ if (i == a->init)
+ fprintf(f,", style = filled, color=lightgrey");
+ if (a->accept[i])
+ fprintf(f,", shape = doublecircle");
+ fprintf(f,"];\n");
+ }
+ fprintf(f,"\n");
+ for(i=1; i<=a->nstates; i++)
+ for(l=0; l < MAX_TRANSITION_LETTERS; l++)
+ if (a->trans[i][l])
+ {
+ fprintf(f,"\t%d -> %d [label = \"",i,a->trans[i][l]);
+ regexp_print_letter(f,l);
+ fprintf(f,"\"];\n");
+ }
+ fprintf(f,"fontsize=20;\n");
+ fprintf(f,"}\n");
+ fclose(f);
+
+#ifdef HAVE_SYS_WAIT_H
+ pid = fork ();
+ if (pid > 0) {
+ wait(NULL);
+ } else if (pid == 0) {
+ execlp("dotty","dotty",filename,NULL);
+ printf("exec dotty failed\n");
+ exit(1);
+ }
+#endif
+}
+
+/* ************************************************** *
+ * ************************************************** *
+ * ************************************************** */
+
+void
+state_delete_fun(void* ps)
+{
+ astate s = ps;
+ alist_delete(s->id);
+ free(s);
+}
+
+static Automaton
+s_automaton_create()
+{
+ Automaton a;
+ a = (Automaton)malloc(sizeof(struct Automaton_t));
+ a->nstates = 0;
+ a->init_state = NULL;
+ a->states = alist_create();
+ alist_set_delete(a->states,state_delete_fun);
+ return a;
+}
+
+
+static void
+s_automaton_delete(Automaton a)
+{
+ alist_delete(a->states);
+ free(a);
+}
+
+static alist
+s_automaton_id_create(int id)
+{
+ alist a = alist_create();
+ alist_add(a,(void*)id);
+ return a;
+}
+
+static char* s_automaton_id_to_str(alist id)
+{
+ static char s[250];
+ memset(s,0,sizeof(s));
+ alist_elt ptr;
+ for(ptr = alist_get_first(id); ptr ; ptr = alist_get_next(id,ptr))
+ {
+ char tmp[50];
+ sprintf(tmp,"%d ",(int)alist_elt_get_value(ptr));
+ strcat(s,tmp);
+ }
+ return s;
+}
+
+static astate
+s_automaton_state_create(alist id)
+{
+ astate s;
+ s = (astate)malloc(sizeof(struct automaton_state_t));
+ s->id = id;
+ s->accept = 0;
+ memset(s->next,0,sizeof(astate)*MAX_TRANSITION_LETTERS);
+ DMSG(printf("** state %s creation\n",s_automaton_id_to_str(id)));
+ return s;
+}
+
+static void
+s_automaton_add_state(Automaton a, astate s)
+{
+ a->nstates ++;
+ alist_add(a->states,(void*)s);
+ DMSG(printf("** state %s added to
automaton\n",s_automaton_id_to_str(s->id)));
+}
+
+static astate
+s_automaton_get_state(Automaton a, alist id)
+{
+ astate s;
+ alist_elt ptr;
+ for(ptr = alist_get_first(a->states) ; ptr ; ptr =
alist_get_next(a->states,ptr))
+ {
+ s = alist_elt_get_value(ptr);
+ if (alist_equal(s->id,id))
+ {
+ //DMSG(printf("** get state %s ok\n",s_automaton_id_to_str(s->id)));
+ return s;
+ }
+ }
+ return NULL;
+}
+
+/* ************************************************** *
+ * ************************************************** *
+ * ************************************************** */
+
+Automaton
+s_automaton_PS_to_NFA(int init_state_id, int *ptl, int *PS)
+{
+ int p;
+ int maxpos = PS[0];
+ Automaton nfa = NULL;
+ alist temp_id;
+ alist_elt ptr;
+ astate temp_state,current_state;
+ alist L;
+ char used_letter[MAX_TRANSITION_LETTERS];
+
+ nfa = s_automaton_create();
+ L = alist_create();
+
+ /* 1: init_state = root->PP */
+ temp_id = s_automaton_id_create(init_state_id);
+ temp_state = s_automaton_state_create(temp_id);
+ nfa->init_state = temp_state;
+ s_automaton_add_state(nfa,temp_state);
+ alist_add(L,temp_state);
+ /* 2: while \exist state \in state_list */
+ while (! alist_is_empty(L))
+ {
+ current_state = (astate)alist_pop_first_value(L);
+ DMSG(printf("** current state =
%s\n",s_automaton_id_to_str(current_state->id)));
+ memset(used_letter,0,sizeof(used_letter));
+ /* 3: \foreach l in \sigma | l \neq # */
+ for(p=1; p < maxpos; p++)
+ {
+ int current_letter = ptl[p];
+ if (used_letter[current_letter] == 0)
+ {
+ /* 4: int set = \cup { PS(pos) | pos \in state \wedge pos == l }
*/
+ int pos, ens = 0;
+ for(pos = 1; pos <= maxpos; pos++)
+ {
+ if (ptl[pos] == current_letter &&
+
(int)alist_elt_get_value(alist_get_first(current_state->id)) & (1 << (pos - 1)))
+ ens |= PS[pos];
+ }
+ /* 5: transition from current_state to temp_state */
+ if (ens)
+ {
+ temp_id = s_automaton_id_create(ens);
+ temp_state = s_automaton_get_state(nfa,temp_id);
+ if (temp_state == NULL)
+ {
+ temp_state = s_automaton_state_create(temp_id);
+ s_automaton_add_state (nfa,temp_state);
+ current_state->next[current_letter] = temp_state;
+ alist_add(L,temp_state);
+ }
+ else
+ {
+ alist_delete(temp_id);
+ current_state->next[current_letter] = temp_state;
+ }
+ }
+ used_letter[current_letter] = 1;
+ }
+ }
+ }
+
+ alist_delete(L);
+
+ for(ptr = alist_get_first(nfa->states); ptr ; ptr =
alist_get_next(nfa->states,ptr))
+ {
+ astate s = (astate)alist_elt_get_value(ptr);
+ if ((int)alist_elt_get_value(alist_get_first(s->id)) & (1 << (maxpos -
1)))
+ s->accept = 1;
+ }
+
+ return nfa;
+}
+
+/* ************************************************** *
+ * ************************************************** *
+ * ************************************************** */
+
+static alist
+s_automaton_successor(alist S, int letter, Automaton nfa, struct
search_RegE_list_t *list)
+{
+ alist R,r;
+ alist_elt ptr;
+ R = alist_create(); /* R = \empty */
+ /* \forall y \in S
*/
+ for(ptr = alist_get_first(S); ptr ; ptr = alist_get_next(S,ptr))
+ {
+ int i;
+ alist t, Ry; astate y,z;
+
+ i = (int)alist_elt_get_value(ptr);
+ t = s_automaton_id_create(i);
+ assert(y = s_automaton_get_state(nfa,t));
+ alist_delete(t);
+
+ Ry = alist_create(); /* Ry = \empty
*/
+
+ if ((z = y->next[letter]) != NULL) /* \delta (y,z) = l
*/
+ {
+ r = s_automaton_successor(z->id,RE_EPSILON,nfa, list);
+ alist_insert(Ry,r);
+ alist_delete(r);
+ alist_insert(Ry,z->id); /* Ry = Ry \cup
succ(z) */
+ }
+
+ /* \epsilon transition from start node */
+ if ((z = y->next[RE_EPSILON]) != NULL) /* \delta (y,z) =
\epsilon */
+ {
+ r = s_automaton_successor(z->id,letter,nfa, list);
+ alist_insert(Ry,r); /* Ry = Ry \cup
succ(z) */
+ alist_delete(r);
+ }
+
+ if (letter < RE_FINAL_TOK)
+ {
+ for(i = 0 ; i < DIC_SEARCH_REGE_LIST ; i++)
+ if (list->valid[i])
+ {
+ if (list->letters[i][letter] && (z =
y->next[(int)list->symbl[i]]) != NULL)
+ {
+ DMSG(printf("*** letter "));
+ DMSG(regexp_print_letter(stdout,letter));
+ DMSG(printf("is in "));
+ DMSG(regexp_print_letter(stdout,i));
+
+ r = s_automaton_successor(z->id,RE_EPSILON,nfa, list);
+ alist_insert(Ry,r);
+ alist_delete(r);
+ alist_insert(Ry,z->id);
+ }
+ }
+ }
+
+#if 0
+ if (alist_is_empty(Ry)) /* Ry = \empty
*/
+ return Ry;
+#endif
+
+ alist_insert(R,Ry); /* R = R \cup Ry
*/
+ alist_delete(Ry);
+ }
+
+ return R;
+}
+
+static void
+s_automaton_node_set_accept(astate s, Automaton nfa)
+{
+ void* idx;
+ alist_elt ptr;
+
+ DMSG(printf("=== setting accept for node (%s)
:",s_automaton_id_to_str(s->id)));
+ for(ptr = alist_get_first(nfa->states) ; ptr ; ptr =
alist_get_next(nfa->states,ptr))
+ {
+ astate ns = (astate)alist_elt_get_value(ptr);
+ idx = alist_elt_get_value(alist_get_first(ns->id));
+ DMSG(printf("%s ",s_automaton_id_to_str(ns->id)));
+ if (ns->accept && alist_is_in(s->id,idx))
+ {
+ DMSG(printf("(ok) "));
+ s->accept = 1;
+ }
+ }
+ DMSG(printf("\n"));
+}
+
+static Automaton
+s_automaton_NFA_to_DFA(Automaton nfa, struct search_RegE_list_t *list)
+{
+ Automaton dfa = NULL;
+ alist temp_id;
+ alist_elt ptr;
+ astate temp_state, current_state;
+ alist L;
+ int letter;
+
+ dfa = s_automaton_create();
+ L = alist_create();
+
+ temp_id = alist_clone(nfa->init_state->id);
+ temp_state = s_automaton_state_create(temp_id);
+ dfa->init_state = temp_state;
+ s_automaton_add_state(dfa,temp_state);
+ alist_add(L,temp_state);
+ while (! alist_is_empty(L))
+ {
+ current_state = (astate)alist_pop_first_value(L);
+ DMSG(printf("** current state =
%s\n",s_automaton_id_to_str(current_state->id)));
+ for(letter = 1; letter < DIC_LETTERS; letter++)
+ {
+ // DMSG(printf("*** start successor of
%s\n",s_automaton_id_to_str(current_state->id)));
+
+ temp_id = s_automaton_successor(current_state->id,letter,nfa,list);
+
+ if (! alist_is_empty(temp_id))
+ {
+
+ DMSG(printf("*** successor of %s for
",s_automaton_id_to_str(current_state->id)));
+ DMSG(regexp_print_letter(stdout,letter));
+ DMSG(printf(" = %s\n", s_automaton_id_to_str(temp_id)));
+
+ temp_state = s_automaton_get_state(dfa,temp_id);
+
+ // DMSG(printf("*** automaton get state -%s-
ok\n",s_automaton_id_to_str(temp_id)));
+
+ if (temp_state == NULL)
+ {
+ temp_state = s_automaton_state_create(temp_id);
+ s_automaton_add_state(dfa,temp_state);
+ current_state->next[letter] = temp_state;
+ alist_add(L,temp_state);
+ }
+ else
+ {
+ alist_delete(temp_id);
+ current_state->next[letter] = temp_state;
+ }
+ }
+ else
+ {
+ alist_delete(temp_id);
+ }
+ }
+ }
+
+ for(ptr = alist_get_first(dfa->states) ; ptr ; ptr =
alist_get_next(dfa->states,ptr))
+ {
+ astate s = (astate)alist_elt_get_value(ptr);
+ s_automaton_node_set_accept(s,nfa);
+ }
+
+ alist_delete(L);
+ return dfa;
+}
+
+/* ************************************************** *
+ * ************************************************** *
+ * ************************************************** */
+
+static automaton
+s_automaton_finalize(Automaton a)
+{
+ int i,l;
+ automaton fa = NULL;
+ alist_elt ptr;
+ astate s;
+
+ if (a == NULL)
+ return NULL;
+
+ /* creation */
+ fa = (automaton)malloc(sizeof(struct automaton_t));
+ fa->nstates = a->nstates;
+ fa->accept = (int*) malloc((fa->nstates + 1)*sizeof(int));
+ memset(fa->accept,0,(fa->nstates + 1)*sizeof(int));
+ fa->trans = (int**)malloc((fa->nstates + 1)*sizeof(int*));
+ for(i=0; i <= fa->nstates; i++)
+ {
+ fa->trans[i] = (int*)malloc(MAX_TRANSITION_LETTERS * sizeof(int));
+ memset(fa->trans[i],0,MAX_TRANSITION_LETTERS * sizeof(int));
+ }
+
+ /* create new id for states */
+ for(i = 1 , ptr = alist_get_first(a->states); ptr ; ptr =
alist_get_next(a->states,ptr), i++)
+ {
+ s = (astate)alist_elt_get_value(ptr);
+ s->id_static = i;
+ }
+
+ /* build new automaton */
+ for(ptr = alist_get_first(a->states); ptr ; ptr =
alist_get_next(a->states,ptr))
+ {
+ s = (astate)alist_elt_get_value(ptr);
+ i = s->id_static;
+
+ if (s == a->init_state)
+ fa->init = i;
+ if (s->accept == 1)
+ fa->accept[i] = 1;
+
+ for(l=0; l < MAX_TRANSITION_LETTERS; l++)
+ if (s->next[l])
+ fa->trans[i][l] = s->next[l]->id_static;
+ }
+
+ return fa;
+}
+
+
+/* ************************************************** *
+ * ************************************************** *
+ * ************************************************** */
+
+static void
+s_automaton_print_nodes(FILE* f, Automaton a)
+{
+ char * sid;
+ astate s;
+ alist_elt ptr;
+ for(ptr = alist_get_first(a->states) ; ptr != NULL ; ptr =
alist_get_next(a->states,ptr))
+ {
+ s = alist_elt_get_value(ptr);
+ sid = s_automaton_id_to_str(s->id);
+ fprintf(f,"\t\"%s\" [label = \"%s\"",sid,sid);
+ if (s == a->init_state)
+ {
+ fprintf(f,", style = filled, color=lightgrey");
+ }
+ if (s->accept)
+ {
+ fprintf(f,", shape = doublecircle");
+ }
+ fprintf(f,"];\n");
+ }
+ fprintf(f,"\n");
+}
+
+static void
+s_automaton_print_edges(FILE* f, Automaton a)
+{
+ int letter;
+ char * sid;
+ astate s;
+ alist_elt ptr;
+ for(ptr = alist_get_first(a->states) ; ptr != NULL ; ptr =
alist_get_next(a->states,ptr))
+ {
+ s = (astate)alist_elt_get_value(ptr);
+ for(letter=0; letter < 255; letter++)
+ {
+ if (s->next[letter])
+ {
+ sid = s_automaton_id_to_str(s->id);
+ fprintf(f,"\t\"%s\" -> ",sid);
+ sid = s_automaton_id_to_str(s->next[letter]->id);
+ fprintf(f,"\"%s\" [label = \"",sid);
+ regexp_print_letter(f,letter);
+ fprintf(f,"\"];\n");
+ }
+ }
+ }
+}
+
+static void
+s_automaton_dump(Automaton a, char* filename)
+{
+ FILE* f;
+#ifdef HAVE_SYS_WAIT_H
+ pid_t pid;
+#endif
+ if (a == NULL)
+ return;
+ f=fopen(filename,"w");
+ fprintf(f,"digraph automaton {\n");
+ s_automaton_print_nodes(f,a);
+ s_automaton_print_edges(f,a);
+ fprintf(f,"fontsize=20;\n");
+ fprintf(f,"}\n");
+ fclose(f);
+
+#ifdef HAVE_SYS_WAIT_H
+ pid = fork ();
+ if (pid > 0) {
+ wait(NULL);
+ } else if (pid == 0) {
+ execlp("dotty","dotty",filename,NULL);
+ printf("exec dotty failed\n");
+ exit(1);
+ }
+#endif
+}
+
+/* ************************************************** *
+ * ************************************************** *
+ * ************************************************** */
+
- [Eliot-dev] Changes to eliot/dic/automaton.c [antoine-1],
eliot-dev <=