eliot-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Eliot-dev] eliot/dic automaton.cpp automaton.h dic_search....


From: eliot-dev
Subject: [Eliot-dev] eliot/dic automaton.cpp automaton.h dic_search....
Date: Sun, 27 Jul 2008 15:28:51 +0000

CVSROOT:        /sources/eliot
Module name:    eliot
Changes by:     Olivier Teulière <ipkiss>      08/07/27 15:28:51

Modified files:
        dic            : automaton.cpp automaton.h dic_search.cpp 
                         grammar.cpp grammar.h regexp.h 

Log message:
        Added consts, renamed a class, and simplified the code using vectors

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/automaton.cpp?cvsroot=eliot&r1=1.5&r2=1.6
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/automaton.h?cvsroot=eliot&r1=1.13&r2=1.14
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/dic_search.cpp?cvsroot=eliot&r1=1.7&r2=1.8
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/grammar.cpp?cvsroot=eliot&r1=1.2&r2=1.3
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/grammar.h?cvsroot=eliot&r1=1.1&r2=1.2
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/regexp.h?cvsroot=eliot&r1=1.16&r2=1.17

Patches:
Index: automaton.cpp
===================================================================
RCS file: /sources/eliot/eliot/dic/automaton.cpp,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -b -r1.5 -r1.6
--- automaton.cpp       27 Jul 2008 13:32:47 -0000      1.5
+++ automaton.cpp       27 Jul 2008 15:28:50 -0000      1.6
@@ -18,13 +18,6 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  *****************************************************************************/
 
-/**
- *  \file   automaton.c
- *  \brief  (Non)Deterministic Finite AutomatonHelper for Regexp
- *  \author Antoine Fraboulet
- *  \date   2005
- */
-
 #include "config.h"
 
 #include <set>
@@ -79,6 +72,11 @@
     State * m_next[MAX_TRANSITION_LETTERS];
 
 private:
+    /**
+     * Id of the state. For the first automaton, each ID contains only 1
+     * integer, but the ID of the deterministic automaton will contain
+     * several integers, according to the usual "determinization" algorithm.
+     */
     set<uint64_t> m_id;
 
     void init()
@@ -107,7 +105,7 @@
 
     static AutomatonHelper *ps2nfa(uint64_t iInitState, int *ptl, uint64_t 
*PS);
     static AutomatonHelper *nfa2dfa(const AutomatonHelper &iNfa,
-                                    struct search_RegE_list_t *iList);
+                                    const searchRegExpLists &iList);
 
     /// List of states
     list<State *> m_states;
@@ -121,7 +119,8 @@
     void printNodes(FILE* f) const;
     void printEdges(FILE* f) const;
     void setAccept(State * s) const;
-    set<uint64_t> getSuccessor(const set<uint64_t> &S, int letter, struct 
search_RegE_list_t *iList) const;
+    set<uint64_t> getSuccessor(const set<uint64_t> &S, int letter,
+                               const searchRegExpLists &iList) const;
 };
 
 
@@ -129,7 +128,8 @@
    Definition of the Automaton class
  * ************************************************** */
 
-Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS, struct 
search_RegE_list_t *iList)
+Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS,
+                     const searchRegExpLists &iList)
 {
     AutomatonHelper *nfa = AutomatonHelper::ps2nfa(iInitState, ptl, PS);
     DMSG(printf("\n non deterministic automaton OK \n\n"));
@@ -151,7 +151,7 @@
 Automaton::~Automaton()
 {
     delete[] m_acceptors;
-    for (int i = 0; i <= m_nbStates; i++)
+    for (unsigned int i = 0; i <= m_nbStates; i++)
     {
         delete[] m_transitions[i];
     }
@@ -166,7 +166,7 @@
     m_acceptors = new bool[m_nbStates + 1];
     memset(m_acceptors, 0, (m_nbStates + 1) * sizeof(bool));
     m_transitions = new int*[m_nbStates + 1];
-    for (int i = 0; i <= m_nbStates; i++)
+    for (unsigned int i = 0; i <= m_nbStates; i++)
     {
         m_transitions[i] = new int[MAX_TRANSITION_LETTERS];
         memset(m_transitions[i], 0, MAX_TRANSITION_LETTERS * sizeof(int));
@@ -205,7 +205,7 @@
 {
     FILE *f = fopen(iFileName.c_str(), "w");
     fprintf(f, "digraph automaton {\n");
-    for (int i = 1; i <= m_nbStates; i++)
+    for (unsigned int i = 1; i <= m_nbStates; i++)
     {
         fprintf(f, "\t%d [label = \"%d\"", i, i);
         if (i == m_init)
@@ -215,7 +215,7 @@
         fprintf(f, "];\n");
     }
     fprintf(f, "\n");
-    for (int i = 1; i <= m_nbStates; i++)
+    for (unsigned int i = 1; i <= m_nbStates; i++)
     {
         for (int l = 0; l < MAX_TRANSITION_LETTERS; l++)
         {
@@ -363,7 +363,7 @@
 
 set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
                                             int letter,
-                                            struct search_RegE_list_t *iList) 
const
+                                            const searchRegExpLists &iList) 
const
 {
     set<uint64_t> R, r;
     set<uint64_t>::const_iterator it;
@@ -394,11 +394,9 @@
 
         if (letter < RE_FINAL_TOK)
         {
-            for (int i = 0; i < DIC_SEARCH_REGE_LIST; i++)
+            for (unsigned int i = 0; i < iList.symbl.size(); i++)
             {
-                if (iList->valid[i])
-                {
-                    if (iList->letters[i][letter] && (z = 
y->m_next[(int)iList->symbl[i]]) != NULL)
+                if (iList.letters[i][letter] && (z = 
y->m_next[(int)iList.symbl[i]]) != NULL)
                     {
                         DMSG(printf("*** letter "));
                         DMSG(regexp_print_letter(stdout, letter));
@@ -411,7 +409,6 @@
                     }
                 }
             }
-        }
 
         R.insert(Ry.begin(), Ry.end());                      /* R = R \cup Ry  
         */
     }
@@ -440,7 +437,7 @@
 
 
 AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
-                                          struct search_RegE_list_t *iList)
+                                          const searchRegExpLists &iList)
 {
     State * current_state;
 

Index: automaton.h
===================================================================
RCS file: /sources/eliot/eliot/dic/automaton.h,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -b -r1.13 -r1.14
--- automaton.h 13 Jul 2008 07:55:47 -0000      1.13
+++ automaton.h 27 Jul 2008 15:28:50 -0000      1.14
@@ -18,17 +18,11 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  *****************************************************************************/
 
-/**
- *  \file   automaton.h
- *  \brief  (Non)Deterministic Finite Automaton for Regexp
- *  \author Antoine Fraboulet
- *  \date   2005
- */
-
 #ifndef _DIC_AUTOMATON_H_
 #define _DIC_AUTOMATON_H_
 
 class AutomatonHelper;
+struct searchRegExpLists;
 
 class Automaton
 {
@@ -38,7 +32,8 @@
      * Build a static deterministic finite automaton from
      * "init_state", "ptl" and "PS" given by the parser
      */
-    Automaton(uint64_t init_state, int *ptl, uint64_t *PS, struct 
search_RegE_list_t *iList);
+    Automaton(uint64_t init_state, int *ptl, uint64_t *PS,
+              const searchRegExpLists &iList);
 
     /// Destructor
     ~Automaton();
@@ -77,10 +72,10 @@
 
 private:
     /// Number of states
-    int m_nbStates;
+    unsigned int m_nbStates;
 
     /// ID of the init state
-    int m_init;
+    uint64_t m_init;
 
     /// Array of booleans, one for each state
     bool *m_acceptors;

Index: dic_search.cpp
===================================================================
RCS file: /sources/eliot/eliot/dic/dic_search.cpp,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -b -r1.7 -r1.8
--- dic_search.cpp      27 Jul 2008 13:32:47 -0000      1.7
+++ dic_search.cpp      27 Jul 2008 15:28:51 -0000      1.8
@@ -499,18 +499,27 @@
 }
 
 
-static void init_letter_lists(const Dictionary &iDic, struct 
search_RegE_list_t &iList)
+/**
+ * Initialize the lists of letters with pre-defined lists
+ 
+ * 0: all tiles
+ * 1: vowels
+ * 2: consonants
+ * 3: user defined 1
+ * 4: user defined 2
+ * x: lists used during parsing
+ */
+static void initLetterLists(const Dictionary &iDic,
+                            searchRegExpLists &iList)
 {
     memset(&iList, 0, sizeof(iList));
     // Prepare the space for 5 items
     iList.symbl.assign(5, 0);
+    iList.letters.assign(5, vector<bool>(DIC_LETTERS, false));
 
-    iList.valid[0] = true; // all letters
-    iList.symbl[0] = RE_ALL_MATCH;
-    iList.valid[1] = true; // vowels
-    iList.symbl[1] = RE_VOWL_MATCH;
-    iList.valid[2] = true; // consonants
-    iList.symbl[2] = RE_CONS_MATCH;
+    iList.symbl[0] = RE_ALL_MATCH; // All letters
+    iList.symbl[1] = RE_VOWL_MATCH; // Vowels
+    iList.symbl[2] = RE_CONS_MATCH; // Consonants
     iList.letters[0][0] = false;
     iList.letters[1][0] = false;
     iList.letters[2][0] = false;
@@ -522,10 +531,8 @@
         iList.letters[2][i] = iDic.getHeader().isConsonant(i);
     }
 
-    iList.valid[3] = false; // user defined list 1
-    iList.symbl[3] = RE_USR1_MATCH;
-    iList.valid[4] = false; // user defined list 2
-    iList.symbl[4] = RE_USR2_MATCH;
+    iList.symbl[3] = RE_USR1_MATCH; // User defined list 1
+    iList.symbl[4] = RE_USR2_MATCH; // User defined list 2
 }
 
 
@@ -546,9 +553,10 @@
 
     // Parsing
     Node *root = NULL;
-    struct search_RegE_list_t llist;
-    init_letter_lists(*this, llist);
-    bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, 
&llist);
+    searchRegExpLists llist;
+    // Initialize the lists of letters
+    initLetterLists(*this, llist);
+    bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, 
llist);
 
     if (!parsingOk)
     {
@@ -574,7 +582,7 @@
 
     root->nextPos(PS);
 
-    Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, &llist);
+    Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, llist);
     if (a)
     {
         struct params_regexp_t params;

Index: grammar.cpp
===================================================================
RCS file: /sources/eliot/eliot/dic/grammar.cpp,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- grammar.cpp 27 Jul 2008 13:32:47 -0000      1.2
+++ grammar.cpp 27 Jul 2008 15:28:51 -0000      1.3
@@ -124,7 +124,7 @@
 
 
 void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
-              struct search_RegE_list_t *iList, bool negate = false)
+              searchRegExpLists &iList, bool negate = false)
 {
     if (i->value.id() == RegexpGrammar::alphavarId)
     {
@@ -146,24 +146,17 @@
         // The dictionary letters are already in upper case
         const wstring &letters = iHeader.getLetters();
         wstring::const_iterator itLetter;
-        int j;
-        for (j = RE_LIST_USER_END + 1; j < DIC_SEARCH_REGE_LIST; ++j)
-        {
-            if (!iList->valid[j])
-            {
-                iList->valid[j] = true;
-                iList->symbl.push_back(RE_ALL_MATCH + j);
-                iList->letters[j][0] = false;
+        // j is the index of the new list we create
+        size_t j = iList.symbl.size();
+        iList.symbl.push_back(RE_ALL_MATCH + j);
+        iList.letters.push_back(vector<bool>(DIC_LETTERS, false));
                 for (itLetter = letters.begin(); itLetter != letters.end(); 
++itLetter)
                 {
                     bool contains = (choiceLetters.find(*itLetter) != 
string::npos);
-                    iList->letters[j][iHeader.getCodeFromChar(*itLetter)] =
+            iList.letters[j][iHeader.getCodeFromChar(*itLetter)] =
                         (contains ? !negate : negate);
                 }
-                break;
-            }
-        }
-        Node *node = new Node(NODE_VAR, iList->symbl[j], NULL, NULL);
+        Node *node = new Node(NODE_VAR, iList.symbl[j], NULL, NULL);
         evalStack.push(node);
     }
     else if (i->value.id() == RegexpGrammar::varId)
@@ -279,7 +272,8 @@
 }
 
 
-bool parseRegexp(const Dictionary &iDic, const wchar_t *input, Node **root, 
struct search_RegE_list_t *iList)
+bool parseRegexp(const Dictionary &iDic, const wchar_t *input, Node **root,
+                 searchRegExpLists &iList)
 {
     // Create a grammar object
     RegexpGrammar g(iDic.getHeader().getLetters());

Index: grammar.h
===================================================================
RCS file: /sources/eliot/eliot/dic/grammar.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -b -r1.1 -r1.2
--- grammar.h   7 Jul 2008 17:30:01 -0000       1.1
+++ grammar.h   27 Jul 2008 15:28:51 -0000      1.2
@@ -23,9 +23,12 @@
 
 class Dictionary;
 class Node;
-struct search_RegE_list_t;
+class searchRegExpLists;
 
-bool parseRegexp(const Dictionary &iDic, const wchar_t *input, Node **root, 
struct search_RegE_list_t *iList);
+bool parseRegexp(const Dictionary &iDic,
+                 const wchar_t *input,
+                 Node **root,
+                 searchRegExpLists &iList);
 
 #endif
 

Index: regexp.h
===================================================================
RCS file: /sources/eliot/eliot/dic/regexp.h,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -b -r1.16 -r1.17
--- regexp.h    27 Jul 2008 13:32:47 -0000      1.16
+++ regexp.h    27 Jul 2008 15:28:51 -0000      1.17
@@ -128,29 +128,19 @@
 #define RE_USR2_MATCH  (DIC_LETTERS + 6)
 
 /**
- * number of lists for regexp letter match \n
- * 0 : all tiles                           \n
- * 1 : vowels                              \n
- * 2 : consonants                          \n
- * 3 : user defined 1                      \n
- * 4 : user defined 2                      \n
- * x : lists used during parsing           \n
- */
-#define DIC_SEARCH_REGE_LIST (REGEXP_MAX)
-
-/**
  * Structure used for dic.searchRegExp
  * This structure is used to explicit letters list that will be matched
  * against special tokens in the regular expression search
  */
-struct search_RegE_list_t
+struct searchRegExpLists
 {
     /** special symbol associated with the list */
     vector<char> symbl;
-    /** 0 or 1 if list is valid */
-    bool valid[DIC_SEARCH_REGE_LIST];
-    /** 0 or 1 if letter is present in the list */
-    bool letters[DIC_SEARCH_REGE_LIST][DIC_LETTERS];
+    /**
+     * 0 or 1 if letter is present in the list.
+     * The inner vector should have a length of DIC_LETTERS (it is a bitmask)
+     */
+    vector<vector<bool> > letters;
 };
 
 #define RE_LIST_ALL_MATCH  0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]