eliot-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Eliot-dev] eliot dic/automaton.cpp dic/dic.cpp dic/dic.h d...


From: eliot-dev
Subject: [Eliot-dev] eliot dic/automaton.cpp dic/dic.cpp dic/dic.h d...
Date: Sun, 27 Jul 2008 13:32:48 +0000

CVSROOT:        /sources/eliot
Module name:    eliot
Changes by:     Olivier Teulière <ipkiss>      08/07/27 13:32:48

Modified files:
        dic            : automaton.cpp dic.cpp dic.h dic_search.cpp 
                         grammar.cpp regexp.h regexpmain.cpp 
        utils          : eliottxt.cpp 

Log message:
         - Fixed a bug in the parser, preventing to parse correctly some 
regular expressions with wide chars. The regular expressions seem now to work 
fine with a dictionary using polish words.
         - More C++ in the automaton code
         - Simplified the code in several places

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/automaton.cpp?cvsroot=eliot&r1=1.4&r2=1.5
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/dic.cpp?cvsroot=eliot&r1=1.2&r2=1.3
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/dic.h?cvsroot=eliot&r1=1.16&r2=1.17
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/dic_search.cpp?cvsroot=eliot&r1=1.6&r2=1.7
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/grammar.cpp?cvsroot=eliot&r1=1.1&r2=1.2
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/regexp.h?cvsroot=eliot&r1=1.15&r2=1.16
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/regexpmain.cpp?cvsroot=eliot&r1=1.6&r2=1.7
http://cvs.savannah.gnu.org/viewcvs/eliot/utils/eliottxt.cpp?cvsroot=eliot&r1=1.22&r2=1.23

Patches:
Index: dic/automaton.cpp
===================================================================
RCS file: /sources/eliot/eliot/dic/automaton.cpp,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -b -r1.4 -r1.5
--- dic/automaton.cpp   20 Jul 2008 12:15:51 -0000      1.4
+++ dic/automaton.cpp   27 Jul 2008 13:32:47 -0000      1.5
@@ -54,8 +54,41 @@
 
 #define MAX_TRANSITION_LETTERS 256
 
-typedef struct automaton_state_t *astate;
 
+/* ************************************************** *
+   Definition of the automaton state
+ * ************************************************** */
+
+static string idToString(const set<uint64_t> &iId);
+
+class State
+{
+public:
+    State(const set<uint64_t> iId) : m_id(iId) { init(); }
+    State(uint64_t iId)
+    {
+        m_id.insert(iId);
+        init();
+    }
+
+    const set<uint64_t> & getId() const { return m_id; }
+
+    // FIXME: should be private
+    bool m_accept;
+    int id_static;
+    State * m_next[MAX_TRANSITION_LETTERS];
+
+private:
+    set<uint64_t> m_id;
+
+    void init()
+    {
+        m_accept = false;
+        id_static = 0;
+        memset(m_next, 0, sizeof(State*) * MAX_TRANSITION_LETTERS);
+        DMSG(printf("** state %s creation\n", idToString(m_id).c_str()));
+    }
+};
 
 /* ************************************************** *
    Helper class, allowing to build a NFA, then a DFA
@@ -64,10 +97,10 @@
 class AutomatonHelper
 {
 public:
-    AutomatonHelper(astate iInitState);
+    AutomatonHelper(State * iInitState);
     ~AutomatonHelper();
 
-    astate getInitState() const { return m_initState; }
+    State * getInitState() const { return m_initState; }
 #ifdef DEBUG_AUTOMATON
     void dump(const string &iFileName) const;
 #endif
@@ -77,39 +110,22 @@
                                     struct search_RegE_list_t *iList);
 
     /// List of states
-    list<astate> m_states;
+    list<State *> m_states;
 
 private:
     /// Initial state of the automaton
-    astate m_initState;
+    State * m_initState;
 
-    void addState(astate s);
-    astate getState(const set<uint64_t> &iId) const;
+    void addState(State * s);
+    State * getState(const set<uint64_t> &iId) const;
     void printNodes(FILE* f) const;
     void printEdges(FILE* f) const;
-    void setAccept(astate s) const;
+    void setAccept(State * s) const;
     set<uint64_t> getSuccessor(const set<uint64_t> &S, int letter, struct 
search_RegE_list_t *iList) const;
 };
 
 
 /* ************************************************** *
-   State handling
- * ************************************************** */
-
-static set<uint64_t> s_state_id_create(uint64_t id);
-static string   s_state_id_to_str(const set<uint64_t> &iId);
-static astate   s_state_create   (const set<uint64_t> &iId);
-
-struct automaton_state_t
-{
-    set<uint64_t> id;
-    bool accept;
-    int      id_static;
-    astate   next[MAX_TRANSITION_LETTERS];
-};
-
-
-/* ************************************************** *
    Definition of the Automaton class
  * ************************************************** */
 
@@ -125,7 +141,7 @@
 
     finalize(*dfa);
     DMSG(printf("\n final automaton OK \n\n"));
-    DMSG(automaton_dump("auto_fin"));
+    DMSG(dump("auto_fin"));
 
     delete nfa;
     delete dfa;
@@ -157,7 +173,7 @@
     }
 
     /* Create new id for states */
-    list<astate>::const_iterator it;
+    list<State *>::const_iterator it;
     int i;
     for (i = 1, it = iHelper.m_states.begin();
          it != iHelper.m_states.end(); it++, i++)
@@ -168,18 +184,18 @@
     /* Build new automaton */
     for (it = iHelper.m_states.begin(); it != iHelper.m_states.end(); it++)
     {
-        astate s = *it;
+        State * s = *it;
         int i = s->id_static;
 
         if (s == iHelper.getInitState())
             m_init = i;
-        if (s->accept)
+        if (s->m_accept)
             m_acceptors[i] = true;
 
         for (int l = 0; l < MAX_TRANSITION_LETTERS; l++)
         {
-            if (s->next[l])
-                m_transitions[i][l] = s->next[l]->id_static;
+            if (s->m_next[l])
+                m_transitions[i][l] = s->m_next[l]->id_static;
         }
     }
 }
@@ -232,48 +248,10 @@
 
 
 /* ************************************************** *
-   Definition of the state handling methods
- * ************************************************** */
-
-static set<uint64_t> s_state_id_create(uint64_t id)
-{
-    set<uint64_t> l;
-    l.insert(id);
-    return l;
-}
-
-
-static string s_state_id_to_str(const set<uint64_t> &iId)
-{
-    string s;
-    set<uint64_t>::const_iterator it;
-    for (it = iId.begin(); it != iId.end(); it++)
-    {
-        char tmp[50];
-        sprintf(tmp, "%llu ", *it);
-        s += tmp;
-    }
-    return s;
-}
-
-
-static astate s_state_create(const set<uint64_t> &iId)
-{
-    astate s = new automaton_state_t();
-    // TODO: use copy constructor
-    s->id     = iId;
-    s->accept = false;
-    memset(s->next, 0, sizeof(astate)*MAX_TRANSITION_LETTERS);
-    DMSG(printf("** state %s creation\n", s_state_id_to_str(iId).c_str()));
-    return s;
-}
-
-
-/* ************************************************** *
    Definition of the AutomatonHelper class
  * ************************************************** */
 
-AutomatonHelper::AutomatonHelper(astate iInitState)
+AutomatonHelper::AutomatonHelper(State * iInitState)
     : m_initState(iInitState)
 {
 }
@@ -281,7 +259,7 @@
 
 AutomatonHelper::~AutomatonHelper()
 {
-    list<astate>::const_iterator it;
+    list<State *>::const_iterator it;
     for (it = m_states.begin(); it != m_states.end(); it++)
     {
         delete *it;
@@ -289,22 +267,22 @@
 }
 
 
-void AutomatonHelper::addState(astate s)
+void AutomatonHelper::addState(State * s)
 {
     m_states.push_front(s);
-    DMSG(printf("** state %s added to automaton\n", 
s_state_id_to_str(s->id).c_str()));
+    DMSG(printf("** state %s added to automaton\n", 
idToString(s->getId()).c_str()));
 }
 
 
-astate AutomatonHelper::getState(const set<uint64_t> &iId) const
+State * AutomatonHelper::getState(const set<uint64_t> &iId) const
 {
-    list<astate>::const_iterator it;
+    list<State *>::const_iterator it;
     for (it = m_states.begin(); it != m_states.end(); it++)
     {
-        astate s = *it;
-        if (s->id == iId)
+        State * s = *it;
+        if (s->getId() == iId)
         {
-            //DMSG(printf("** get state %s ok\n", 
s_state_id_to_str(s->id).c_str()));
+            //DMSG(printf("** get state %s ok\n", 
idToString(s->getId()).c_str()));
             return s;
         }
     }
@@ -318,66 +296,62 @@
 AutomatonHelper *AutomatonHelper::ps2nfa(uint64_t init_state_id, int *ptl, 
uint64_t *PS)
 {
     uint64_t maxpos = PS[0];
-    astate current_state;
-    char used_letter[MAX_TRANSITION_LETTERS];
+    State * current_state;
+    bool used_letter[MAX_TRANSITION_LETTERS];
 
 
     /* 1: init_state = root->PP */
-    set<uint64_t> temp_id0 = s_state_id_create(init_state_id);
-    astate temp_state = s_state_create(temp_id0);
+    State * temp_state = new State(init_state_id);
     AutomatonHelper *nfa = new AutomatonHelper(temp_state);
     nfa->addState(temp_state);
-    list<astate> L;
+    list<State *> L;
     L.push_front(temp_state);
     /* 2: while \exist state \in state_list */
     while (! L.empty())
     {
         current_state = L.front();
         L.pop_front();
-        DMSG(printf("** current state = %s\n", 
s_state_id_to_str(current_state->id).c_str()));
+        DMSG(printf("** current state = %s\n", 
idToString(current_state->getId()).c_str()));
         memset(used_letter, 0, sizeof(used_letter));
         /* 3: \foreach l in \sigma | l \neq # */
         for (uint32_t p = 1; p < maxpos; p++)
         {
             int current_letter = ptl[p];
-            if (used_letter[current_letter] == 0)
+            if (used_letter[current_letter] == false)
             {
                 /* 4: int set = \cup { PS(pos) | pos \in state \wedge pos == l 
} */
                 uint64_t ens = 0;
                 for (uint32_t pos = 1; pos <= maxpos; pos++)
                 {
                     if (ptl[pos] == current_letter &&
-                        (unsigned int)*(current_state->id.begin()) & (1 << 
(pos - 1)))
+                        (unsigned int)*(current_state->getId().begin()) & (1 
<< (pos - 1)))
                         ens |= PS[pos];
                 }
                 /* 5: transition from current_state to temp_state */
                 if (ens)
                 {
-                    set<uint64_t> temp_id = s_state_id_create(ens);
+                    set<uint64_t> temp_id;
+                    temp_id.insert(ens);
                     temp_state = nfa->getState(temp_id);
                     if (temp_state == NULL)
                     {
-                        temp_state = s_state_create(temp_id);
+                        temp_state = new State(temp_id);
                         nfa->addState(temp_state);
-                        current_state->next[current_letter] = temp_state;
                         L.push_front(temp_state);
                     }
-                    else
-                    {
-                        current_state->next[current_letter] = temp_state;
-                    }
+                    current_state->m_next[current_letter] = temp_state;
                 }
-                used_letter[current_letter] = 1;
+                used_letter[current_letter] = true;
             }
         }
     }
 
-    list<astate>::const_iterator it;
+    list<State *>::const_iterator it;
     for (it = nfa->m_states.begin(); it != nfa->m_states.end(); it++)
     {
-        astate s = *it;
-        if (*(s->id.begin()) & (1 << (maxpos - 1)))
-            s->accept = true;
+        State * s = *it;
+        if (*(s->getId().begin()) & (1 << (maxpos - 1)))
+            s->m_accept = true;
     }
 
     return nfa;
@@ -395,24 +369,26 @@
     set<uint64_t>::const_iterator it;
     for (it = S.begin(); it != S.end(); it++)                /* \forall y \in 
S */
     {
-        astate y, z;
 
-        set<uint64_t> t = s_state_id_create(*it);
-        assert(y = getState(t));
+        set<uint64_t> t;
+        t.insert(*it);
+        State *y = getState(t);
+        assert(y != NULL);
 
         set<uint64_t> Ry;                                        /* Ry = 
\empty             */
 
-        if ((z = y->next[letter]) != NULL)                   /* \delta (y,z) = 
l        */
+        State *z;
+        if ((z = y->m_next[letter]) != NULL)                   /* \delta (y,z) 
= l        */
         {
-            r = getSuccessor(z->id, RE_EPSILON, iList);
+            r = getSuccessor(z->getId(), RE_EPSILON, iList);
             Ry.insert(r.begin(), r.end());
-            Ry.insert(z->id.begin(), z->id.end()); /* Ry = Ry \cup succ(z)    
*/
+            Ry.insert(z->getId().begin(), z->getId().end()); /* Ry = Ry \cup 
succ(z)    */
         }
 
         /* \epsilon transition from start node */
-        if ((z = y->next[RE_EPSILON]) != NULL)               /* \delta (y,z) = 
\epsilon */
+        if ((z = y->m_next[RE_EPSILON]) != NULL)               /* \delta (y,z) 
= \epsilon */
         {
-            r = getSuccessor(z->id, letter, iList);
+            r = getSuccessor(z->getId(), letter, iList);
             Ry.insert(r.begin(), r.end());       /* Ry = Ry \cup succ(z)    */
         }
 
@@ -422,26 +398,21 @@
             {
                 if (iList->valid[i])
                 {
-                    if (iList->letters[i][letter] && (z = 
y->next[(int)iList->symbl[i]]) != NULL)
+                    if (iList->letters[i][letter] && (z = 
y->m_next[(int)iList->symbl[i]]) != NULL)
                     {
                         DMSG(printf("*** letter "));
                         DMSG(regexp_print_letter(stdout, letter));
                         DMSG(printf("is in "));
                         DMSG(regexp_print_letter(stdout, i));
 
-                        r = getSuccessor(z->id, RE_EPSILON, iList);
+                        r = getSuccessor(z->getId(), RE_EPSILON, iList);
                         Ry.insert(r.begin(), r.end());
-                        Ry.insert(z->id.begin(), z->id.end());
+                        Ry.insert(z->getId().begin(), z->getId().end());
                     }
                 }
             }
         }
 
-#if 0
-        if (alist_is_empty(Ry))                              /* Ry = \empty    
         */
-            return Ry;
-#endif
-
         R.insert(Ry.begin(), Ry.end());                      /* R = R \cup Ry  
         */
     }
 
@@ -449,19 +420,19 @@
 }
 
 
-void AutomatonHelper::setAccept(astate s) const
+void AutomatonHelper::setAccept(State * s) const
 {
-    DMSG(printf("=== setting accept for node (%s) :", 
s_state_id_to_str(s->id).c_str()));
-    list<astate>::const_iterator it;
+    DMSG(printf("=== setting accept for node (%s) :", 
idToString(s->getId()).c_str()));
+    list<State *>::const_iterator it;
     for (it = m_states.begin(); it != m_states.end(); it++)
     {
-        astate ns = *it;
-        int idx = *(ns->id.begin());
-        DMSG(printf("%s ", s_state_id_to_str(ns->id).c_str()));
-        if (ns->accept && (std::find(s->id.begin(), s->id.end(), idx) != 
s->id.end()))
+        State * ns = *it;
+        uint64_t idx = *(ns->getId().begin());
+        DMSG(printf("%s ", idToString(ns->getId()).c_str()));
+        if (ns->m_accept && (std::find(s->getId().begin(), s->getId().end(), 
idx) != s->getId().end()))
         {
             DMSG(printf("(ok) "));
-            s->accept = true;
+            s->m_accept = true;
         }
     }
     DMSG(printf("\n"));
@@ -471,13 +442,12 @@
 AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
                                           struct search_RegE_list_t *iList)
 {
-    astate current_state;
+    State * current_state;
 
-    list<astate> L;
+    list<State *> L;
 
     // Clone the list
-    set<uint64_t> temp_id0 = iNfa.m_initState->id;
-    astate temp_state = s_state_create(temp_id0);
+    State * temp_state = new State(iNfa.m_initState->getId());
     AutomatonHelper *dfa = new AutomatonHelper(temp_state);
     dfa->addState(temp_state);
     L.push_front(temp_state);
@@ -485,40 +455,35 @@
     {
         current_state = L.front();
         L.pop_front();
-        DMSG(printf("** current state = %s\n", 
s_state_id_to_str(current_state->id).c_str()));
+        DMSG(printf("** current state = %s\n", 
idToString(current_state->getId()).c_str()));
         for (int letter = 1; letter < DIC_LETTERS; letter++)
         {
-            // DMSG(printf("*** start successor of %s\n", 
s_state_id_to_str(current_state->id).c_str()));
+            // DMSG(printf("*** start successor of %s\n", 
idToString(current_state->getId()).c_str()));
 
-            set<uint64_t> temp_id = iNfa.getSuccessor(current_state->id, 
letter, iList);
+            set<uint64_t> temp_id = iNfa.getSuccessor(current_state->getId(), 
letter, iList);
 
             if (! temp_id.empty())
             {
-
-                DMSG(printf("*** successor of %s for ", 
s_state_id_to_str(current_state->id).c_str()));
+                DMSG(printf("*** successor of %s for ", 
idToString(current_state->getId()).c_str()));
                 DMSG(regexp_print_letter(stdout, letter));
-                DMSG(printf(" = %s\n", s_state_id_to_str(temp_id).c_str()));
+                DMSG(printf(" = %s\n", idToString(temp_id).c_str()));
 
                 temp_state = dfa->getState(temp_id);
 
-                // DMSG(printf("*** automaton get state -%s- ok\n", 
s_state_id_to_str(temp_id).c_str()));
+                // DMSG(printf("*** automaton get state -%s- ok\n", 
idToString(temp_id).c_str()));
 
                 if (temp_state == NULL)
                 {
-                    temp_state = s_state_create(temp_id);
+                    temp_state = new State(temp_id);
                     dfa->addState(temp_state);
-                    current_state->next[letter] = temp_state;
                     L.push_front(temp_state);
                 }
-                else
-                {
-                    current_state->next[letter] = temp_state;
-                }
+                current_state->m_next[letter] = temp_state;
             }
         }
     }
 
-    list<astate>::const_iterator it;
+    list<State *>::const_iterator it;
     for (it = dfa->m_states.begin(); it != dfa->m_states.end(); it++)
     {
         iNfa.setAccept(*it);
@@ -531,19 +496,33 @@
  * ************************************************** *
  * ************************************************** */
 
+static string idToString(const set<uint64_t> &iId)
+{
+    string s;
+    set<uint64_t>::const_iterator it;
+    for (it = iId.begin(); it != iId.end(); it++)
+    {
+        char tmp[50];
+        sprintf(tmp, "%llu ", *it);
+        s += tmp;
+    }
+    return s;
+}
+
+
 void AutomatonHelper::printNodes(FILE* f) const
 {
-    list<astate>::const_iterator it;
+    list<State *>::const_iterator it;
     for (it = m_states.begin(); it != m_states.end(); it++)
     {
-        astate s = *it;
-        string sid = s_state_id_to_str(s->id);
+        State * s = *it;
+        string sid = idToString(s->getId());
         fprintf(f, "\t\"%s\" [label = \"%s\"", sid.c_str(), sid.c_str());
         if (s == m_initState)
         {
             fprintf(f, ", style = filled, color=lightgrey");
         }
-        if (s->accept)
+        if (s->m_accept)
         {
             fprintf(f, ", shape = doublecircle");
         }
@@ -555,18 +534,16 @@
 
 void AutomatonHelper::printEdges(FILE* f) const
 {
-    list<astate>::const_iterator it;
+    list<State *>::const_iterator it;
     for (it = m_states.begin(); it != m_states.end(); it++)
     {
-        astate s = *it;
+        State * s = *it;
         for (int letter = 0; letter < 255; letter++)
         {
-            if (s->next[letter])
+            if (s->m_next[letter])
             {
-                string sid = s_state_id_to_str(s->id);
-                fprintf(f, "\t\"%s\" -> ", sid.c_str());
-                sid = s_state_id_to_str(s->next[letter]->id);
-                fprintf(f, "\"%s\" [label = \"", sid.c_str());
+                fprintf(f, "\t\"%s\" -> ", idToString(s->getId()).c_str());
+                fprintf(f, "\"%s\" [label = \"", 
idToString(s->m_next[letter]->getId()).c_str());
                 regexp_print_letter(f, letter);
                 fprintf(f, "\"];\n");
             }

Index: dic/dic.cpp
===================================================================
RCS file: /sources/eliot/eliot/dic/dic.cpp,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- dic/dic.cpp 8 Jan 2008 13:52:34 -0000       1.2
+++ dic/dic.cpp 27 Jul 2008 13:32:47 -0000      1.3
@@ -161,7 +161,7 @@
 }
 
 
-const dic_elt_t Dictionary::getNext(const dic_elt_t &e) const
+dic_elt_t Dictionary::getNext(const dic_elt_t &e) const
 {
      if (!isLast(e))
          return e + 1;
@@ -169,7 +169,7 @@
 }
 
 
-const dic_elt_t Dictionary::getSucc(const dic_elt_t &e) const
+dic_elt_t Dictionary::getSucc(const dic_elt_t &e) const
 {
     if (m_header->getVersion() == 0)
         return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->ptr;
@@ -178,13 +178,13 @@
 }
 
 
-const dic_elt_t Dictionary::getRoot() const
+dic_elt_t Dictionary::getRoot() const
 {
     return m_header->getRoot();
 }
 
 
-const dic_code_t Dictionary::getCode(const dic_elt_t &e) const
+dic_code_t Dictionary::getCode(const dic_elt_t &e) const
 {
     if (m_header->getVersion() == 0)
         return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->chr;

Index: dic/dic.h
===================================================================
RCS file: /sources/eliot/eliot/dic/dic.h,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -b -r1.16 -r1.17
--- dic/dic.h   7 Jul 2008 17:30:00 -0000       1.16
+++ dic/dic.h   27 Jul 2008 13:32:47 -0000      1.17
@@ -103,7 +103,7 @@
      * codes may range from 0 to 63. 0 is the null character.
      * @returns code for the encoded character
      */
-    const dic_code_t getCode(const dic_elt_t &elt) const;
+    dic_code_t getCode(const dic_elt_t &elt) const;
 
     /**
      * Returns the wide character associated with an element.
@@ -129,13 +129,13 @@
      * Returns the root of the dictionary
      * @returns root element
      */
-    const dic_elt_t getRoot() const;
+    dic_elt_t getRoot() const;
 
     /**
      * Returns the next available neighbor (see isLast())
      * @returns next dictionary element at the same depth
      */
-    const dic_elt_t getNext(const dic_elt_t &elt) const;
+    dic_elt_t getNext(const dic_elt_t &elt) const;
 
     /**
      * Returns the first element available at the next depth
@@ -143,7 +143,7 @@
      * @params elt : current dictionary element
      * @returns next element (successor)
      */
-    const dic_elt_t getSucc(const dic_elt_t &elt) const;
+    dic_elt_t getSucc(const dic_elt_t &elt) const;
 
     /**
      * Find the dictionary element matching the pattern starting
@@ -226,7 +226,8 @@
      */
     void searchRegExp(const wstring &iRegexp,
                       vector<wstring> &oWordList,
-                      struct search_RegE_list_t *iList,
+                      unsigned int iMinLength,
+                      unsigned int iMaxLength,
                       unsigned int iMaxResults = 0) const;
 
 

Index: dic/dic_search.cpp
===================================================================
RCS file: /sources/eliot/eliot/dic/dic_search.cpp,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -b -r1.6 -r1.7
--- dic/dic_search.cpp  13 Jul 2008 07:55:47 -0000      1.6
+++ dic/dic_search.cpp  27 Jul 2008 13:32:47 -0000      1.7
@@ -453,7 +453,6 @@
     int minlength;
     int maxlength;
     Automaton *automaton_field;
-    struct search_RegE_list_t *charlist;
     wchar_t word[DIC_WORD_MAX];
     int  wordlen;
 };
@@ -500,9 +499,40 @@
 }
 
 
+static void init_letter_lists(const Dictionary &iDic, struct 
search_RegE_list_t &iList)
+{
+    memset(&iList, 0, sizeof(iList));
+    // Prepare the space for 5 items
+    iList.symbl.assign(5, 0);
+
+    iList.valid[0] = true; // all letters
+    iList.symbl[0] = RE_ALL_MATCH;
+    iList.valid[1] = true; // vowels
+    iList.symbl[1] = RE_VOWL_MATCH;
+    iList.valid[2] = true; // consonants
+    iList.symbl[2] = RE_CONS_MATCH;
+    iList.letters[0][0] = false;
+    iList.letters[1][0] = false;
+    iList.letters[2][0] = false;
+    const wstring &allLetters = iDic.getHeader().getLetters();
+    for (size_t i = 1; i <= allLetters.size(); ++i)
+    {
+        iList.letters[0][i] = true;
+        iList.letters[1][i] = iDic.getHeader().isVowel(i);
+        iList.letters[2][i] = iDic.getHeader().isConsonant(i);
+    }
+
+    iList.valid[3] = false; // user defined list 1
+    iList.symbl[3] = RE_USR1_MATCH;
+    iList.valid[4] = false; // user defined list 2
+    iList.symbl[4] = RE_USR2_MATCH;
+}
+
+
 void Dictionary::searchRegExp(const wstring &iRegexp,
                               vector<wstring> &oWordList,
-                              struct search_RegE_list_t *iList,
+                              unsigned int iMinLength,
+                              unsigned int iMaxLength,
                               unsigned int iMaxResults) const
 {
     if (iRegexp == L"")
@@ -514,27 +544,21 @@
     else
         oWordList.reserve(DEFAULT_VECT_ALLOC);
 
-    struct regexp_error_report_t report;
-    report.pos1 = 0;
-    report.pos2 = 0;
-    report.msg[0] = '\0';
-
-    /* parsing */
+    // Parsing
     Node *root = NULL;
-    bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, 
iList);
+    struct search_RegE_list_t llist;
+    init_letter_lists(*this, llist);
+    bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, 
&llist);
 
     if (!parsingOk)
     {
-#if 0
-        fprintf(stderr, "parser error at pos %d - %d: %s\n",
-                report.pos1, report.pos2, report.msg);
-#endif
+        // TODO
         delete root;
         return;
     }
 
     int ptl[REGEXP_MAX+1];
-    uint64_t PS [REGEXP_MAX+1];
+    uint64_t PS[REGEXP_MAX+1];
 
     for (int i = 0; i < REGEXP_MAX; i++)
     {
@@ -550,14 +574,13 @@
 
     root->nextPos(PS);
 
-    Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, iList);
+    Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, &llist);
     if (a)
     {
         struct params_regexp_t params;
-        params.minlength = iList->minlength;
-        params.maxlength = iList->maxlength;
+        params.minlength = iMinLength;
+        params.maxlength = iMaxLength;
         params.automaton_field = a;
-        params.charlist = iList;
         memset(params.word, L'\0', sizeof(params.word));
         params.wordlen = 0;
         if (getHeader().getVersion() == 0)

Index: dic/grammar.cpp
===================================================================
RCS file: /sources/eliot/eliot/dic/grammar.cpp,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -b -r1.1 -r1.2
--- dic/grammar.cpp     7 Jul 2008 17:30:00 -0000       1.1
+++ dic/grammar.cpp     27 Jul 2008 13:32:47 -0000      1.2
@@ -104,7 +104,7 @@
                 ;
 
             alphavar
-                = chset<>(self.m_allLetters.c_str())
+                = chset<wchar_t>(self.m_allLetters.c_str())
                 ;
         }
 
@@ -137,30 +137,6 @@
     }
     else if (i->value.id() == RegexpGrammar::choiceId)
     {
-#if 0
-        assert(i->children.size() == 0);
-
-        string choiceLetters(i->value.begin(), i->value.end());
-        int j;
-        for (j = RE_LIST_USER_END + 1; j < DIC_SEARCH_REGE_LIST; j++)
-        {
-            if (!iList->valid[j])
-            {
-                iList->valid[j] = true;
-                iList->symbl[j] = RE_ALL_MATCH + j;
-                iList->letters[j][0] = false;
-                for (int k = 1; k < DIC_LETTERS; k++)
-                {
-                    bool contains = (choiceLetters.find(k + L'a' - 1) != 
string::npos);
-                    iList->letters[j][k] = (contains ? !negate : negate);
-                }
-                break;
-            }
-        }
-        Node *node = new Node(NODE_VAR, iList->symbl[j], NULL, NULL);
-        evalStack.push(node);
-#endif
-#if 1
         assert(i->children.size() == 0);
 
         wstring choiceLetters(i->value.begin(), i->value.end());
@@ -176,7 +152,7 @@
             if (!iList->valid[j])
             {
                 iList->valid[j] = true;
-                iList->symbl[j] = RE_ALL_MATCH + j;
+                iList->symbl.push_back(RE_ALL_MATCH + j);
                 iList->letters[j][0] = false;
                 for (itLetter = letters.begin(); itLetter != letters.end(); 
++itLetter)
                 {
@@ -189,7 +165,6 @@
         }
         Node *node = new Node(NODE_VAR, iList->symbl[j], NULL, NULL);
         evalStack.push(node);
-#endif
     }
     else if (i->value.id() == RegexpGrammar::varId)
     {

Index: dic/regexp.h
===================================================================
RCS file: /sources/eliot/eliot/dic/regexp.h,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -b -r1.15 -r1.16
--- dic/regexp.h        13 Jul 2008 07:55:47 -0000      1.15
+++ dic/regexp.h        27 Jul 2008 13:32:47 -0000      1.16
@@ -139,18 +139,14 @@
 #define DIC_SEARCH_REGE_LIST (REGEXP_MAX)
 
 /**
- * Structure used for Dic_search_RegE \n
- * this structure is used to explicit letters list that will be matched
+ * Structure used for dic.searchRegExp
+ * This structure is used to explicit letters list that will be matched
  * against special tokens in the regular expression search
  */
 struct search_RegE_list_t
 {
-    /** maximum length for results */
-    int minlength;
-    /** maximum length for results */
-    int maxlength;
     /** special symbol associated with the list */
-    char symbl[DIC_SEARCH_REGE_LIST];
+    vector<char> symbl;
     /** 0 or 1 if list is valid */
     bool valid[DIC_SEARCH_REGE_LIST];
     /** 0 or 1 if letter is present in the list */
@@ -163,15 +159,6 @@
 #define RE_LIST_USER_BEGIN 3
 #define RE_LIST_USER_END   4
 
-#define MAX_REGEXP_ERROR_LENGTH 500
-
-struct regexp_error_report_t
-{
-    int pos1;
-    int pos2;
-    char msg[MAX_REGEXP_ERROR_LENGTH];
-};
-
 #include <cstdio>
 
 void  regexp_print_letter(FILE* f, char l);

Index: dic/regexpmain.cpp
===================================================================
RCS file: /sources/eliot/eliot/dic/regexpmain.cpp,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -b -r1.6 -r1.7
--- dic/regexpmain.cpp  20 Jul 2008 12:15:52 -0000      1.6
+++ dic/regexpmain.cpp  27 Jul 2008 13:32:47 -0000      1.7
@@ -1,7 +1,8 @@
 /*****************************************************************************
  * Eliot
- * Copyright (C) 2005-2007 Antoine Fraboulet
+ * Copyright (C) 2005-2008 Antoine Fraboulet & Olivier Teulière
  * Authors: Antoine Fraboulet <antoine.fraboulet @@ free.fr>
+ *          Olivier Teulière  <ipkiss @@ gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -40,39 +41,9 @@
 
 #include "dic.h"
 #include "header.h"
-#include "regexp.h"
 #include "encoding.h"
 
 
-void init_letter_lists(const Dictionary &iDic, struct search_RegE_list_t 
*iList)
-{
-    memset(iList, 0, sizeof(*iList));
-    iList->minlength = 1;
-    iList->maxlength = 15;
-    iList->valid[0] = true; // all letters
-    iList->symbl[0] = RE_ALL_MATCH;
-    iList->valid[1] = true; // vowels
-    iList->symbl[1] = RE_VOWL_MATCH;
-    iList->valid[2] = true; // consonants
-    iList->symbl[2] = RE_CONS_MATCH;
-    iList->letters[0][0] = false;
-    iList->letters[1][0] = false;
-    iList->letters[2][0] = false;
-    const wstring &allLetters = iDic.getHeader().getLetters();
-    for (size_t i = 1; i <= allLetters.size(); ++i)
-    {
-        iList->letters[0][i] = true;
-        iList->letters[1][i] = iDic.getHeader().isVowel(i);
-        iList->letters[2][i] = iDic.getHeader().isConsonant(i);
-    }
-
-    iList->valid[3] = false; // user defined list 1
-    iList->symbl[3] = RE_USR1_MATCH;
-    iList->valid[4] = false; // user defined list 2
-    iList->symbl[4] = RE_USR2_MATCH;
-}
-
-
 void usage(const char *iBinaryName)
 {
     cerr << _("usage: %s dictionary") << iBinaryName << endl;
@@ -103,20 +74,18 @@
     {
         Dictionary dic(argv[1]);
 
-        struct search_RegE_list_t regList;
         string line;
         cout << 
"**************************************************************" << endl;
         cout << 
"**************************************************************" << endl;
-        cout << _("enter a regular expression:") << endl;
+        cout << _("Enter a regular expression:") << endl;
         while (getline(cin, line))
         {
             if (line == "")
                 break;
 
             /* Automaton */
-            init_letter_lists(dic, &regList);
             vector<wstring> wordList;
-            dic.searchRegExp(convertToWc(line), wordList, &regList);
+            dic.searchRegExp(convertToWc(line), wordList, 1, 15);
 
             cout << _("result:") << endl;
             vector<wstring>::const_iterator it;
@@ -126,7 +95,7 @@
             }
             cout << 
"**************************************************************" << endl;
             cout << 
"**************************************************************" << endl;
-            cout << _("enter a regular expression:") << endl;
+            cout << _("Enter a regular expression:") << endl;
         }
 
         return 0;

Index: utils/eliottxt.cpp
===================================================================
RCS file: /sources/eliot/eliot/utils/eliottxt.cpp,v
retrieving revision 1.22
retrieving revision 1.23
diff -u -b -r1.22 -r1.23
--- utils/eliottxt.cpp  7 Jul 2008 17:30:02 -0000       1.22
+++ utils/eliottxt.cpp  27 Jul 2008 13:32:48 -0000      1.23
@@ -35,7 +35,6 @@
 #endif
 
 #include "dic.h"
-#include "regexp.h"
 #include "game_io.h"
 #include "game_factory.h"
 #include "training.h"
@@ -786,53 +785,6 @@
 }
 
 
-void eliot_regexp_build_default_llist(const Dictionary &iDic,
-                                      struct search_RegE_list_t &llist)
-{
-    memset(&llist, 0, sizeof(llist));
-
-    llist.minlength = 1;
-    llist.maxlength = 15;
-
-    llist.symbl[0] = RE_ALL_MATCH;
-    llist.symbl[1] = RE_VOWL_MATCH;
-    llist.symbl[2] = RE_CONS_MATCH;
-    llist.symbl[3] = RE_USR1_MATCH;
-    llist.symbl[5] = RE_USR2_MATCH;
-
-    llist.valid[0] = true; // all letters
-    llist.valid[1] = true; // vowels
-    llist.valid[2] = true; // consonants
-    llist.valid[3] = false; // user defined list 1
-    llist.valid[4] = false; // user defined list 2
-
-    for (int i = 0; i < DIC_SEARCH_REGE_LIST; i++)
-    {
-        memset(llist.letters[i], 0, sizeof(llist.letters[i]));
-    }
-
-    const vector<Tile>& allTiles = iDic.getAllTiles();
-    vector<Tile>::const_iterator it;
-    for (it = allTiles.begin(); it != allTiles.end(); it++)
-    {
-        if (! it->isJoker() && ! it->isEmpty())
-        {
-            // all tiles
-            llist.letters[0][it->toCode()] = 1;
-            // vowels
-            if (it->isVowel())
-            {
-                llist.letters[1][it->toCode()] = 1;
-            }
-            // consonants
-            if (it->isConsonant())
-            {
-                llist.letters[2][it->toCode()] = 1;
-            }
-        }
-    }
-}
-
 void eliot_regexp(const Dictionary& iDic, wchar_t __attribute__((unused)) *cmd,
                   const wchar_t *delim, wchar_t **state)
 {
@@ -844,11 +796,6 @@
     printf("          {3} longueur maximum d'un mot\n");
     */
 
-#define DIC_RE_MAX (3*DIC_WORD_MAX) // yes, it's 3
-
-    struct search_RegE_list_t llist;
-    eliot_regexp_build_default_llist(iDic, llist);
-
     wchar_t *regexp = _wcstok(NULL, delim, state);
     wchar_t *cnres = _wcstok(NULL, delim, state);
     wchar_t *clmin = _wcstok(NULL, delim, state);
@@ -858,16 +805,11 @@
     {
         return;
     }
-    int nres = cnres ? _wtoi(cnres) : 50;
-    int lmin = clmin ? _wtoi(clmin) : 1;
-    int lmax = clmax ? _wtoi(clmax) : DIC_WORD_MAX - 1;
+    unsigned int nres = cnres ? _wtoi(cnres) : 50;
+    unsigned int lmin = clmin ? _wtoi(clmin) : 1;
+    unsigned int lmax = clmax ? _wtoi(clmax) : DIC_WORD_MAX - 1;
 
-    if (lmax <= (DIC_WORD_MAX - 1) && lmin >= 1 && lmin <= lmax)
-    {
-        llist.minlength = lmin;
-        llist.maxlength = lmax;
-    }
-    else
+    if (lmax > (DIC_WORD_MAX - 1) || lmin < 1 || lmin > lmax)
     {
         printf("bad length -%s,%s-\n", (const char*)clmin, (const char*)clmax);
         return;
@@ -877,16 +819,14 @@
            nres, lmin, lmax);
 
     vector<wstring> wordList;
-    iDic.searchRegExp(regexp, wordList, &llist);
+    iDic.searchRegExp(regexp, wordList, lmin, lmax, nres);
 
-    int nresult = 0;
     vector<wstring>::const_iterator it;
-    for (it = wordList.begin(); it != wordList.end() && nresult < nres; it++)
+    for (it = wordList.begin(); it != wordList.end(); it++)
     {
         printf("%s\n", convertToMb(*it).c_str());
-        nresult++;
     }
-    printf("%d printed results\n", nresult);
+    printf("%d printed results\n", wordList.size());
 }
 
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]