[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Eliot-dev] eliot dic/automaton.cpp dic/dic.cpp dic/dic.h d...
From: |
eliot-dev |
Subject: |
[Eliot-dev] eliot dic/automaton.cpp dic/dic.cpp dic/dic.h d... |
Date: |
Sun, 27 Jul 2008 13:32:48 +0000 |
CVSROOT: /sources/eliot
Module name: eliot
Changes by: Olivier Teulière <ipkiss> 08/07/27 13:32:48
Modified files:
dic : automaton.cpp dic.cpp dic.h dic_search.cpp
grammar.cpp regexp.h regexpmain.cpp
utils : eliottxt.cpp
Log message:
- Fixed a bug in the parser, preventing to parse correctly some
regular expressions with wide chars. The regular expressions seem now to work
fine with a dictionary using polish words.
- More C++ in the automaton code
- Simplified the code in several places
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/automaton.cpp?cvsroot=eliot&r1=1.4&r2=1.5
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/dic.cpp?cvsroot=eliot&r1=1.2&r2=1.3
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/dic.h?cvsroot=eliot&r1=1.16&r2=1.17
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/dic_search.cpp?cvsroot=eliot&r1=1.6&r2=1.7
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/grammar.cpp?cvsroot=eliot&r1=1.1&r2=1.2
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/regexp.h?cvsroot=eliot&r1=1.15&r2=1.16
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/regexpmain.cpp?cvsroot=eliot&r1=1.6&r2=1.7
http://cvs.savannah.gnu.org/viewcvs/eliot/utils/eliottxt.cpp?cvsroot=eliot&r1=1.22&r2=1.23
Patches:
Index: dic/automaton.cpp
===================================================================
RCS file: /sources/eliot/eliot/dic/automaton.cpp,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -b -r1.4 -r1.5
--- dic/automaton.cpp 20 Jul 2008 12:15:51 -0000 1.4
+++ dic/automaton.cpp 27 Jul 2008 13:32:47 -0000 1.5
@@ -54,8 +54,41 @@
#define MAX_TRANSITION_LETTERS 256
-typedef struct automaton_state_t *astate;
+/* ************************************************** *
+ Definition of the automaton state
+ * ************************************************** */
+
+static string idToString(const set<uint64_t> &iId);
+
+class State
+{
+public:
+ State(const set<uint64_t> iId) : m_id(iId) { init(); }
+ State(uint64_t iId)
+ {
+ m_id.insert(iId);
+ init();
+ }
+
+ const set<uint64_t> & getId() const { return m_id; }
+
+ // FIXME: should be private
+ bool m_accept;
+ int id_static;
+ State * m_next[MAX_TRANSITION_LETTERS];
+
+private:
+ set<uint64_t> m_id;
+
+ void init()
+ {
+ m_accept = false;
+ id_static = 0;
+ memset(m_next, 0, sizeof(State*) * MAX_TRANSITION_LETTERS);
+ DMSG(printf("** state %s creation\n", idToString(m_id).c_str()));
+ }
+};
/* ************************************************** *
Helper class, allowing to build a NFA, then a DFA
@@ -64,10 +97,10 @@
class AutomatonHelper
{
public:
- AutomatonHelper(astate iInitState);
+ AutomatonHelper(State * iInitState);
~AutomatonHelper();
- astate getInitState() const { return m_initState; }
+ State * getInitState() const { return m_initState; }
#ifdef DEBUG_AUTOMATON
void dump(const string &iFileName) const;
#endif
@@ -77,39 +110,22 @@
struct search_RegE_list_t *iList);
/// List of states
- list<astate> m_states;
+ list<State *> m_states;
private:
/// Initial state of the automaton
- astate m_initState;
+ State * m_initState;
- void addState(astate s);
- astate getState(const set<uint64_t> &iId) const;
+ void addState(State * s);
+ State * getState(const set<uint64_t> &iId) const;
void printNodes(FILE* f) const;
void printEdges(FILE* f) const;
- void setAccept(astate s) const;
+ void setAccept(State * s) const;
set<uint64_t> getSuccessor(const set<uint64_t> &S, int letter, struct
search_RegE_list_t *iList) const;
};
/* ************************************************** *
- State handling
- * ************************************************** */
-
-static set<uint64_t> s_state_id_create(uint64_t id);
-static string s_state_id_to_str(const set<uint64_t> &iId);
-static astate s_state_create (const set<uint64_t> &iId);
-
-struct automaton_state_t
-{
- set<uint64_t> id;
- bool accept;
- int id_static;
- astate next[MAX_TRANSITION_LETTERS];
-};
-
-
-/* ************************************************** *
Definition of the Automaton class
* ************************************************** */
@@ -125,7 +141,7 @@
finalize(*dfa);
DMSG(printf("\n final automaton OK \n\n"));
- DMSG(automaton_dump("auto_fin"));
+ DMSG(dump("auto_fin"));
delete nfa;
delete dfa;
@@ -157,7 +173,7 @@
}
/* Create new id for states */
- list<astate>::const_iterator it;
+ list<State *>::const_iterator it;
int i;
for (i = 1, it = iHelper.m_states.begin();
it != iHelper.m_states.end(); it++, i++)
@@ -168,18 +184,18 @@
/* Build new automaton */
for (it = iHelper.m_states.begin(); it != iHelper.m_states.end(); it++)
{
- astate s = *it;
+ State * s = *it;
int i = s->id_static;
if (s == iHelper.getInitState())
m_init = i;
- if (s->accept)
+ if (s->m_accept)
m_acceptors[i] = true;
for (int l = 0; l < MAX_TRANSITION_LETTERS; l++)
{
- if (s->next[l])
- m_transitions[i][l] = s->next[l]->id_static;
+ if (s->m_next[l])
+ m_transitions[i][l] = s->m_next[l]->id_static;
}
}
}
@@ -232,48 +248,10 @@
/* ************************************************** *
- Definition of the state handling methods
- * ************************************************** */
-
-static set<uint64_t> s_state_id_create(uint64_t id)
-{
- set<uint64_t> l;
- l.insert(id);
- return l;
-}
-
-
-static string s_state_id_to_str(const set<uint64_t> &iId)
-{
- string s;
- set<uint64_t>::const_iterator it;
- for (it = iId.begin(); it != iId.end(); it++)
- {
- char tmp[50];
- sprintf(tmp, "%llu ", *it);
- s += tmp;
- }
- return s;
-}
-
-
-static astate s_state_create(const set<uint64_t> &iId)
-{
- astate s = new automaton_state_t();
- // TODO: use copy constructor
- s->id = iId;
- s->accept = false;
- memset(s->next, 0, sizeof(astate)*MAX_TRANSITION_LETTERS);
- DMSG(printf("** state %s creation\n", s_state_id_to_str(iId).c_str()));
- return s;
-}
-
-
-/* ************************************************** *
Definition of the AutomatonHelper class
* ************************************************** */
-AutomatonHelper::AutomatonHelper(astate iInitState)
+AutomatonHelper::AutomatonHelper(State * iInitState)
: m_initState(iInitState)
{
}
@@ -281,7 +259,7 @@
AutomatonHelper::~AutomatonHelper()
{
- list<astate>::const_iterator it;
+ list<State *>::const_iterator it;
for (it = m_states.begin(); it != m_states.end(); it++)
{
delete *it;
@@ -289,22 +267,22 @@
}
-void AutomatonHelper::addState(astate s)
+void AutomatonHelper::addState(State * s)
{
m_states.push_front(s);
- DMSG(printf("** state %s added to automaton\n",
s_state_id_to_str(s->id).c_str()));
+ DMSG(printf("** state %s added to automaton\n",
idToString(s->getId()).c_str()));
}
-astate AutomatonHelper::getState(const set<uint64_t> &iId) const
+State * AutomatonHelper::getState(const set<uint64_t> &iId) const
{
- list<astate>::const_iterator it;
+ list<State *>::const_iterator it;
for (it = m_states.begin(); it != m_states.end(); it++)
{
- astate s = *it;
- if (s->id == iId)
+ State * s = *it;
+ if (s->getId() == iId)
{
- //DMSG(printf("** get state %s ok\n",
s_state_id_to_str(s->id).c_str()));
+ //DMSG(printf("** get state %s ok\n",
idToString(s->getId()).c_str()));
return s;
}
}
@@ -318,66 +296,62 @@
AutomatonHelper *AutomatonHelper::ps2nfa(uint64_t init_state_id, int *ptl,
uint64_t *PS)
{
uint64_t maxpos = PS[0];
- astate current_state;
- char used_letter[MAX_TRANSITION_LETTERS];
+ State * current_state;
+ bool used_letter[MAX_TRANSITION_LETTERS];
/* 1: init_state = root->PP */
- set<uint64_t> temp_id0 = s_state_id_create(init_state_id);
- astate temp_state = s_state_create(temp_id0);
+ State * temp_state = new State(init_state_id);
AutomatonHelper *nfa = new AutomatonHelper(temp_state);
nfa->addState(temp_state);
- list<astate> L;
+ list<State *> L;
L.push_front(temp_state);
/* 2: while \exist state \in state_list */
while (! L.empty())
{
current_state = L.front();
L.pop_front();
- DMSG(printf("** current state = %s\n",
s_state_id_to_str(current_state->id).c_str()));
+ DMSG(printf("** current state = %s\n",
idToString(current_state->getId()).c_str()));
memset(used_letter, 0, sizeof(used_letter));
/* 3: \foreach l in \sigma | l \neq # */
for (uint32_t p = 1; p < maxpos; p++)
{
int current_letter = ptl[p];
- if (used_letter[current_letter] == 0)
+ if (used_letter[current_letter] == false)
{
/* 4: int set = \cup { PS(pos) | pos \in state \wedge pos == l
} */
uint64_t ens = 0;
for (uint32_t pos = 1; pos <= maxpos; pos++)
{
if (ptl[pos] == current_letter &&
- (unsigned int)*(current_state->id.begin()) & (1 <<
(pos - 1)))
+ (unsigned int)*(current_state->getId().begin()) & (1
<< (pos - 1)))
ens |= PS[pos];
}
/* 5: transition from current_state to temp_state */
if (ens)
{
- set<uint64_t> temp_id = s_state_id_create(ens);
+ set<uint64_t> temp_id;
+ temp_id.insert(ens);
temp_state = nfa->getState(temp_id);
if (temp_state == NULL)
{
- temp_state = s_state_create(temp_id);
+ temp_state = new State(temp_id);
nfa->addState(temp_state);
- current_state->next[current_letter] = temp_state;
L.push_front(temp_state);
}
- else
- {
- current_state->next[current_letter] = temp_state;
- }
+ current_state->m_next[current_letter] = temp_state;
}
- used_letter[current_letter] = 1;
+ used_letter[current_letter] = true;
}
}
}
- list<astate>::const_iterator it;
+ list<State *>::const_iterator it;
for (it = nfa->m_states.begin(); it != nfa->m_states.end(); it++)
{
- astate s = *it;
- if (*(s->id.begin()) & (1 << (maxpos - 1)))
- s->accept = true;
+ State * s = *it;
+ if (*(s->getId().begin()) & (1 << (maxpos - 1)))
+ s->m_accept = true;
}
return nfa;
@@ -395,24 +369,26 @@
set<uint64_t>::const_iterator it;
for (it = S.begin(); it != S.end(); it++) /* \forall y \in
S */
{
- astate y, z;
- set<uint64_t> t = s_state_id_create(*it);
- assert(y = getState(t));
+ set<uint64_t> t;
+ t.insert(*it);
+ State *y = getState(t);
+ assert(y != NULL);
set<uint64_t> Ry; /* Ry =
\empty */
- if ((z = y->next[letter]) != NULL) /* \delta (y,z) =
l */
+ State *z;
+ if ((z = y->m_next[letter]) != NULL) /* \delta (y,z)
= l */
{
- r = getSuccessor(z->id, RE_EPSILON, iList);
+ r = getSuccessor(z->getId(), RE_EPSILON, iList);
Ry.insert(r.begin(), r.end());
- Ry.insert(z->id.begin(), z->id.end()); /* Ry = Ry \cup succ(z)
*/
+ Ry.insert(z->getId().begin(), z->getId().end()); /* Ry = Ry \cup
succ(z) */
}
/* \epsilon transition from start node */
- if ((z = y->next[RE_EPSILON]) != NULL) /* \delta (y,z) =
\epsilon */
+ if ((z = y->m_next[RE_EPSILON]) != NULL) /* \delta (y,z)
= \epsilon */
{
- r = getSuccessor(z->id, letter, iList);
+ r = getSuccessor(z->getId(), letter, iList);
Ry.insert(r.begin(), r.end()); /* Ry = Ry \cup succ(z) */
}
@@ -422,26 +398,21 @@
{
if (iList->valid[i])
{
- if (iList->letters[i][letter] && (z =
y->next[(int)iList->symbl[i]]) != NULL)
+ if (iList->letters[i][letter] && (z =
y->m_next[(int)iList->symbl[i]]) != NULL)
{
DMSG(printf("*** letter "));
DMSG(regexp_print_letter(stdout, letter));
DMSG(printf("is in "));
DMSG(regexp_print_letter(stdout, i));
- r = getSuccessor(z->id, RE_EPSILON, iList);
+ r = getSuccessor(z->getId(), RE_EPSILON, iList);
Ry.insert(r.begin(), r.end());
- Ry.insert(z->id.begin(), z->id.end());
+ Ry.insert(z->getId().begin(), z->getId().end());
}
}
}
}
-#if 0
- if (alist_is_empty(Ry)) /* Ry = \empty
*/
- return Ry;
-#endif
-
R.insert(Ry.begin(), Ry.end()); /* R = R \cup Ry
*/
}
@@ -449,19 +420,19 @@
}
-void AutomatonHelper::setAccept(astate s) const
+void AutomatonHelper::setAccept(State * s) const
{
- DMSG(printf("=== setting accept for node (%s) :",
s_state_id_to_str(s->id).c_str()));
- list<astate>::const_iterator it;
+ DMSG(printf("=== setting accept for node (%s) :",
idToString(s->getId()).c_str()));
+ list<State *>::const_iterator it;
for (it = m_states.begin(); it != m_states.end(); it++)
{
- astate ns = *it;
- int idx = *(ns->id.begin());
- DMSG(printf("%s ", s_state_id_to_str(ns->id).c_str()));
- if (ns->accept && (std::find(s->id.begin(), s->id.end(), idx) !=
s->id.end()))
+ State * ns = *it;
+ uint64_t idx = *(ns->getId().begin());
+ DMSG(printf("%s ", idToString(ns->getId()).c_str()));
+ if (ns->m_accept && (std::find(s->getId().begin(), s->getId().end(),
idx) != s->getId().end()))
{
DMSG(printf("(ok) "));
- s->accept = true;
+ s->m_accept = true;
}
}
DMSG(printf("\n"));
@@ -471,13 +442,12 @@
AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
struct search_RegE_list_t *iList)
{
- astate current_state;
+ State * current_state;
- list<astate> L;
+ list<State *> L;
// Clone the list
- set<uint64_t> temp_id0 = iNfa.m_initState->id;
- astate temp_state = s_state_create(temp_id0);
+ State * temp_state = new State(iNfa.m_initState->getId());
AutomatonHelper *dfa = new AutomatonHelper(temp_state);
dfa->addState(temp_state);
L.push_front(temp_state);
@@ -485,40 +455,35 @@
{
current_state = L.front();
L.pop_front();
- DMSG(printf("** current state = %s\n",
s_state_id_to_str(current_state->id).c_str()));
+ DMSG(printf("** current state = %s\n",
idToString(current_state->getId()).c_str()));
for (int letter = 1; letter < DIC_LETTERS; letter++)
{
- // DMSG(printf("*** start successor of %s\n",
s_state_id_to_str(current_state->id).c_str()));
+ // DMSG(printf("*** start successor of %s\n",
idToString(current_state->getId()).c_str()));
- set<uint64_t> temp_id = iNfa.getSuccessor(current_state->id,
letter, iList);
+ set<uint64_t> temp_id = iNfa.getSuccessor(current_state->getId(),
letter, iList);
if (! temp_id.empty())
{
-
- DMSG(printf("*** successor of %s for ",
s_state_id_to_str(current_state->id).c_str()));
+ DMSG(printf("*** successor of %s for ",
idToString(current_state->getId()).c_str()));
DMSG(regexp_print_letter(stdout, letter));
- DMSG(printf(" = %s\n", s_state_id_to_str(temp_id).c_str()));
+ DMSG(printf(" = %s\n", idToString(temp_id).c_str()));
temp_state = dfa->getState(temp_id);
- // DMSG(printf("*** automaton get state -%s- ok\n",
s_state_id_to_str(temp_id).c_str()));
+ // DMSG(printf("*** automaton get state -%s- ok\n",
idToString(temp_id).c_str()));
if (temp_state == NULL)
{
- temp_state = s_state_create(temp_id);
+ temp_state = new State(temp_id);
dfa->addState(temp_state);
- current_state->next[letter] = temp_state;
L.push_front(temp_state);
}
- else
- {
- current_state->next[letter] = temp_state;
- }
+ current_state->m_next[letter] = temp_state;
}
}
}
- list<astate>::const_iterator it;
+ list<State *>::const_iterator it;
for (it = dfa->m_states.begin(); it != dfa->m_states.end(); it++)
{
iNfa.setAccept(*it);
@@ -531,19 +496,33 @@
* ************************************************** *
* ************************************************** */
+static string idToString(const set<uint64_t> &iId)
+{
+ string s;
+ set<uint64_t>::const_iterator it;
+ for (it = iId.begin(); it != iId.end(); it++)
+ {
+ char tmp[50];
+ sprintf(tmp, "%llu ", *it);
+ s += tmp;
+ }
+ return s;
+}
+
+
void AutomatonHelper::printNodes(FILE* f) const
{
- list<astate>::const_iterator it;
+ list<State *>::const_iterator it;
for (it = m_states.begin(); it != m_states.end(); it++)
{
- astate s = *it;
- string sid = s_state_id_to_str(s->id);
+ State * s = *it;
+ string sid = idToString(s->getId());
fprintf(f, "\t\"%s\" [label = \"%s\"", sid.c_str(), sid.c_str());
if (s == m_initState)
{
fprintf(f, ", style = filled, color=lightgrey");
}
- if (s->accept)
+ if (s->m_accept)
{
fprintf(f, ", shape = doublecircle");
}
@@ -555,18 +534,16 @@
void AutomatonHelper::printEdges(FILE* f) const
{
- list<astate>::const_iterator it;
+ list<State *>::const_iterator it;
for (it = m_states.begin(); it != m_states.end(); it++)
{
- astate s = *it;
+ State * s = *it;
for (int letter = 0; letter < 255; letter++)
{
- if (s->next[letter])
+ if (s->m_next[letter])
{
- string sid = s_state_id_to_str(s->id);
- fprintf(f, "\t\"%s\" -> ", sid.c_str());
- sid = s_state_id_to_str(s->next[letter]->id);
- fprintf(f, "\"%s\" [label = \"", sid.c_str());
+ fprintf(f, "\t\"%s\" -> ", idToString(s->getId()).c_str());
+ fprintf(f, "\"%s\" [label = \"",
idToString(s->m_next[letter]->getId()).c_str());
regexp_print_letter(f, letter);
fprintf(f, "\"];\n");
}
Index: dic/dic.cpp
===================================================================
RCS file: /sources/eliot/eliot/dic/dic.cpp,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- dic/dic.cpp 8 Jan 2008 13:52:34 -0000 1.2
+++ dic/dic.cpp 27 Jul 2008 13:32:47 -0000 1.3
@@ -161,7 +161,7 @@
}
-const dic_elt_t Dictionary::getNext(const dic_elt_t &e) const
+dic_elt_t Dictionary::getNext(const dic_elt_t &e) const
{
if (!isLast(e))
return e + 1;
@@ -169,7 +169,7 @@
}
-const dic_elt_t Dictionary::getSucc(const dic_elt_t &e) const
+dic_elt_t Dictionary::getSucc(const dic_elt_t &e) const
{
if (m_header->getVersion() == 0)
return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->ptr;
@@ -178,13 +178,13 @@
}
-const dic_elt_t Dictionary::getRoot() const
+dic_elt_t Dictionary::getRoot() const
{
return m_header->getRoot();
}
-const dic_code_t Dictionary::getCode(const dic_elt_t &e) const
+dic_code_t Dictionary::getCode(const dic_elt_t &e) const
{
if (m_header->getVersion() == 0)
return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->chr;
Index: dic/dic.h
===================================================================
RCS file: /sources/eliot/eliot/dic/dic.h,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -b -r1.16 -r1.17
--- dic/dic.h 7 Jul 2008 17:30:00 -0000 1.16
+++ dic/dic.h 27 Jul 2008 13:32:47 -0000 1.17
@@ -103,7 +103,7 @@
* codes may range from 0 to 63. 0 is the null character.
* @returns code for the encoded character
*/
- const dic_code_t getCode(const dic_elt_t &elt) const;
+ dic_code_t getCode(const dic_elt_t &elt) const;
/**
* Returns the wide character associated with an element.
@@ -129,13 +129,13 @@
* Returns the root of the dictionary
* @returns root element
*/
- const dic_elt_t getRoot() const;
+ dic_elt_t getRoot() const;
/**
* Returns the next available neighbor (see isLast())
* @returns next dictionary element at the same depth
*/
- const dic_elt_t getNext(const dic_elt_t &elt) const;
+ dic_elt_t getNext(const dic_elt_t &elt) const;
/**
* Returns the first element available at the next depth
@@ -143,7 +143,7 @@
* @params elt : current dictionary element
* @returns next element (successor)
*/
- const dic_elt_t getSucc(const dic_elt_t &elt) const;
+ dic_elt_t getSucc(const dic_elt_t &elt) const;
/**
* Find the dictionary element matching the pattern starting
@@ -226,7 +226,8 @@
*/
void searchRegExp(const wstring &iRegexp,
vector<wstring> &oWordList,
- struct search_RegE_list_t *iList,
+ unsigned int iMinLength,
+ unsigned int iMaxLength,
unsigned int iMaxResults = 0) const;
Index: dic/dic_search.cpp
===================================================================
RCS file: /sources/eliot/eliot/dic/dic_search.cpp,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -b -r1.6 -r1.7
--- dic/dic_search.cpp 13 Jul 2008 07:55:47 -0000 1.6
+++ dic/dic_search.cpp 27 Jul 2008 13:32:47 -0000 1.7
@@ -453,7 +453,6 @@
int minlength;
int maxlength;
Automaton *automaton_field;
- struct search_RegE_list_t *charlist;
wchar_t word[DIC_WORD_MAX];
int wordlen;
};
@@ -500,9 +499,40 @@
}
+static void init_letter_lists(const Dictionary &iDic, struct
search_RegE_list_t &iList)
+{
+ memset(&iList, 0, sizeof(iList));
+ // Prepare the space for 5 items
+ iList.symbl.assign(5, 0);
+
+ iList.valid[0] = true; // all letters
+ iList.symbl[0] = RE_ALL_MATCH;
+ iList.valid[1] = true; // vowels
+ iList.symbl[1] = RE_VOWL_MATCH;
+ iList.valid[2] = true; // consonants
+ iList.symbl[2] = RE_CONS_MATCH;
+ iList.letters[0][0] = false;
+ iList.letters[1][0] = false;
+ iList.letters[2][0] = false;
+ const wstring &allLetters = iDic.getHeader().getLetters();
+ for (size_t i = 1; i <= allLetters.size(); ++i)
+ {
+ iList.letters[0][i] = true;
+ iList.letters[1][i] = iDic.getHeader().isVowel(i);
+ iList.letters[2][i] = iDic.getHeader().isConsonant(i);
+ }
+
+ iList.valid[3] = false; // user defined list 1
+ iList.symbl[3] = RE_USR1_MATCH;
+ iList.valid[4] = false; // user defined list 2
+ iList.symbl[4] = RE_USR2_MATCH;
+}
+
+
void Dictionary::searchRegExp(const wstring &iRegexp,
vector<wstring> &oWordList,
- struct search_RegE_list_t *iList,
+ unsigned int iMinLength,
+ unsigned int iMaxLength,
unsigned int iMaxResults) const
{
if (iRegexp == L"")
@@ -514,27 +544,21 @@
else
oWordList.reserve(DEFAULT_VECT_ALLOC);
- struct regexp_error_report_t report;
- report.pos1 = 0;
- report.pos2 = 0;
- report.msg[0] = '\0';
-
- /* parsing */
+ // Parsing
Node *root = NULL;
- bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root,
iList);
+ struct search_RegE_list_t llist;
+ init_letter_lists(*this, llist);
+ bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root,
&llist);
if (!parsingOk)
{
-#if 0
- fprintf(stderr, "parser error at pos %d - %d: %s\n",
- report.pos1, report.pos2, report.msg);
-#endif
+ // TODO
delete root;
return;
}
int ptl[REGEXP_MAX+1];
- uint64_t PS [REGEXP_MAX+1];
+ uint64_t PS[REGEXP_MAX+1];
for (int i = 0; i < REGEXP_MAX; i++)
{
@@ -550,14 +574,13 @@
root->nextPos(PS);
- Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, iList);
+ Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, &llist);
if (a)
{
struct params_regexp_t params;
- params.minlength = iList->minlength;
- params.maxlength = iList->maxlength;
+ params.minlength = iMinLength;
+ params.maxlength = iMaxLength;
params.automaton_field = a;
- params.charlist = iList;
memset(params.word, L'\0', sizeof(params.word));
params.wordlen = 0;
if (getHeader().getVersion() == 0)
Index: dic/grammar.cpp
===================================================================
RCS file: /sources/eliot/eliot/dic/grammar.cpp,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -b -r1.1 -r1.2
--- dic/grammar.cpp 7 Jul 2008 17:30:00 -0000 1.1
+++ dic/grammar.cpp 27 Jul 2008 13:32:47 -0000 1.2
@@ -104,7 +104,7 @@
;
alphavar
- = chset<>(self.m_allLetters.c_str())
+ = chset<wchar_t>(self.m_allLetters.c_str())
;
}
@@ -137,30 +137,6 @@
}
else if (i->value.id() == RegexpGrammar::choiceId)
{
-#if 0
- assert(i->children.size() == 0);
-
- string choiceLetters(i->value.begin(), i->value.end());
- int j;
- for (j = RE_LIST_USER_END + 1; j < DIC_SEARCH_REGE_LIST; j++)
- {
- if (!iList->valid[j])
- {
- iList->valid[j] = true;
- iList->symbl[j] = RE_ALL_MATCH + j;
- iList->letters[j][0] = false;
- for (int k = 1; k < DIC_LETTERS; k++)
- {
- bool contains = (choiceLetters.find(k + L'a' - 1) !=
string::npos);
- iList->letters[j][k] = (contains ? !negate : negate);
- }
- break;
- }
- }
- Node *node = new Node(NODE_VAR, iList->symbl[j], NULL, NULL);
- evalStack.push(node);
-#endif
-#if 1
assert(i->children.size() == 0);
wstring choiceLetters(i->value.begin(), i->value.end());
@@ -176,7 +152,7 @@
if (!iList->valid[j])
{
iList->valid[j] = true;
- iList->symbl[j] = RE_ALL_MATCH + j;
+ iList->symbl.push_back(RE_ALL_MATCH + j);
iList->letters[j][0] = false;
for (itLetter = letters.begin(); itLetter != letters.end();
++itLetter)
{
@@ -189,7 +165,6 @@
}
Node *node = new Node(NODE_VAR, iList->symbl[j], NULL, NULL);
evalStack.push(node);
-#endif
}
else if (i->value.id() == RegexpGrammar::varId)
{
Index: dic/regexp.h
===================================================================
RCS file: /sources/eliot/eliot/dic/regexp.h,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -b -r1.15 -r1.16
--- dic/regexp.h 13 Jul 2008 07:55:47 -0000 1.15
+++ dic/regexp.h 27 Jul 2008 13:32:47 -0000 1.16
@@ -139,18 +139,14 @@
#define DIC_SEARCH_REGE_LIST (REGEXP_MAX)
/**
- * Structure used for Dic_search_RegE \n
- * this structure is used to explicit letters list that will be matched
+ * Structure used for dic.searchRegExp
+ * This structure is used to explicit letters list that will be matched
* against special tokens in the regular expression search
*/
struct search_RegE_list_t
{
- /** maximum length for results */
- int minlength;
- /** maximum length for results */
- int maxlength;
/** special symbol associated with the list */
- char symbl[DIC_SEARCH_REGE_LIST];
+ vector<char> symbl;
/** 0 or 1 if list is valid */
bool valid[DIC_SEARCH_REGE_LIST];
/** 0 or 1 if letter is present in the list */
@@ -163,15 +159,6 @@
#define RE_LIST_USER_BEGIN 3
#define RE_LIST_USER_END 4
-#define MAX_REGEXP_ERROR_LENGTH 500
-
-struct regexp_error_report_t
-{
- int pos1;
- int pos2;
- char msg[MAX_REGEXP_ERROR_LENGTH];
-};
-
#include <cstdio>
void regexp_print_letter(FILE* f, char l);
Index: dic/regexpmain.cpp
===================================================================
RCS file: /sources/eliot/eliot/dic/regexpmain.cpp,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -b -r1.6 -r1.7
--- dic/regexpmain.cpp 20 Jul 2008 12:15:52 -0000 1.6
+++ dic/regexpmain.cpp 27 Jul 2008 13:32:47 -0000 1.7
@@ -1,7 +1,8 @@
/*****************************************************************************
* Eliot
- * Copyright (C) 2005-2007 Antoine Fraboulet
+ * Copyright (C) 2005-2008 Antoine Fraboulet & Olivier Teulière
* Authors: Antoine Fraboulet <antoine.fraboulet @@ free.fr>
+ * Olivier Teulière <ipkiss @@ gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -40,39 +41,9 @@
#include "dic.h"
#include "header.h"
-#include "regexp.h"
#include "encoding.h"
-void init_letter_lists(const Dictionary &iDic, struct search_RegE_list_t
*iList)
-{
- memset(iList, 0, sizeof(*iList));
- iList->minlength = 1;
- iList->maxlength = 15;
- iList->valid[0] = true; // all letters
- iList->symbl[0] = RE_ALL_MATCH;
- iList->valid[1] = true; // vowels
- iList->symbl[1] = RE_VOWL_MATCH;
- iList->valid[2] = true; // consonants
- iList->symbl[2] = RE_CONS_MATCH;
- iList->letters[0][0] = false;
- iList->letters[1][0] = false;
- iList->letters[2][0] = false;
- const wstring &allLetters = iDic.getHeader().getLetters();
- for (size_t i = 1; i <= allLetters.size(); ++i)
- {
- iList->letters[0][i] = true;
- iList->letters[1][i] = iDic.getHeader().isVowel(i);
- iList->letters[2][i] = iDic.getHeader().isConsonant(i);
- }
-
- iList->valid[3] = false; // user defined list 1
- iList->symbl[3] = RE_USR1_MATCH;
- iList->valid[4] = false; // user defined list 2
- iList->symbl[4] = RE_USR2_MATCH;
-}
-
-
void usage(const char *iBinaryName)
{
cerr << _("usage: %s dictionary") << iBinaryName << endl;
@@ -103,20 +74,18 @@
{
Dictionary dic(argv[1]);
- struct search_RegE_list_t regList;
string line;
cout <<
"**************************************************************" << endl;
cout <<
"**************************************************************" << endl;
- cout << _("enter a regular expression:") << endl;
+ cout << _("Enter a regular expression:") << endl;
while (getline(cin, line))
{
if (line == "")
break;
/* Automaton */
- init_letter_lists(dic, ®List);
vector<wstring> wordList;
- dic.searchRegExp(convertToWc(line), wordList, ®List);
+ dic.searchRegExp(convertToWc(line), wordList, 1, 15);
cout << _("result:") << endl;
vector<wstring>::const_iterator it;
@@ -126,7 +95,7 @@
}
cout <<
"**************************************************************" << endl;
cout <<
"**************************************************************" << endl;
- cout << _("enter a regular expression:") << endl;
+ cout << _("Enter a regular expression:") << endl;
}
return 0;
Index: utils/eliottxt.cpp
===================================================================
RCS file: /sources/eliot/eliot/utils/eliottxt.cpp,v
retrieving revision 1.22
retrieving revision 1.23
diff -u -b -r1.22 -r1.23
--- utils/eliottxt.cpp 7 Jul 2008 17:30:02 -0000 1.22
+++ utils/eliottxt.cpp 27 Jul 2008 13:32:48 -0000 1.23
@@ -35,7 +35,6 @@
#endif
#include "dic.h"
-#include "regexp.h"
#include "game_io.h"
#include "game_factory.h"
#include "training.h"
@@ -786,53 +785,6 @@
}
-void eliot_regexp_build_default_llist(const Dictionary &iDic,
- struct search_RegE_list_t &llist)
-{
- memset(&llist, 0, sizeof(llist));
-
- llist.minlength = 1;
- llist.maxlength = 15;
-
- llist.symbl[0] = RE_ALL_MATCH;
- llist.symbl[1] = RE_VOWL_MATCH;
- llist.symbl[2] = RE_CONS_MATCH;
- llist.symbl[3] = RE_USR1_MATCH;
- llist.symbl[5] = RE_USR2_MATCH;
-
- llist.valid[0] = true; // all letters
- llist.valid[1] = true; // vowels
- llist.valid[2] = true; // consonants
- llist.valid[3] = false; // user defined list 1
- llist.valid[4] = false; // user defined list 2
-
- for (int i = 0; i < DIC_SEARCH_REGE_LIST; i++)
- {
- memset(llist.letters[i], 0, sizeof(llist.letters[i]));
- }
-
- const vector<Tile>& allTiles = iDic.getAllTiles();
- vector<Tile>::const_iterator it;
- for (it = allTiles.begin(); it != allTiles.end(); it++)
- {
- if (! it->isJoker() && ! it->isEmpty())
- {
- // all tiles
- llist.letters[0][it->toCode()] = 1;
- // vowels
- if (it->isVowel())
- {
- llist.letters[1][it->toCode()] = 1;
- }
- // consonants
- if (it->isConsonant())
- {
- llist.letters[2][it->toCode()] = 1;
- }
- }
- }
-}
-
void eliot_regexp(const Dictionary& iDic, wchar_t __attribute__((unused)) *cmd,
const wchar_t *delim, wchar_t **state)
{
@@ -844,11 +796,6 @@
printf(" {3} longueur maximum d'un mot\n");
*/
-#define DIC_RE_MAX (3*DIC_WORD_MAX) // yes, it's 3
-
- struct search_RegE_list_t llist;
- eliot_regexp_build_default_llist(iDic, llist);
-
wchar_t *regexp = _wcstok(NULL, delim, state);
wchar_t *cnres = _wcstok(NULL, delim, state);
wchar_t *clmin = _wcstok(NULL, delim, state);
@@ -858,16 +805,11 @@
{
return;
}
- int nres = cnres ? _wtoi(cnres) : 50;
- int lmin = clmin ? _wtoi(clmin) : 1;
- int lmax = clmax ? _wtoi(clmax) : DIC_WORD_MAX - 1;
+ unsigned int nres = cnres ? _wtoi(cnres) : 50;
+ unsigned int lmin = clmin ? _wtoi(clmin) : 1;
+ unsigned int lmax = clmax ? _wtoi(clmax) : DIC_WORD_MAX - 1;
- if (lmax <= (DIC_WORD_MAX - 1) && lmin >= 1 && lmin <= lmax)
- {
- llist.minlength = lmin;
- llist.maxlength = lmax;
- }
- else
+ if (lmax > (DIC_WORD_MAX - 1) || lmin < 1 || lmin > lmax)
{
printf("bad length -%s,%s-\n", (const char*)clmin, (const char*)clmax);
return;
@@ -877,16 +819,14 @@
nres, lmin, lmax);
vector<wstring> wordList;
- iDic.searchRegExp(regexp, wordList, &llist);
+ iDic.searchRegExp(regexp, wordList, lmin, lmax, nres);
- int nresult = 0;
vector<wstring>::const_iterator it;
- for (it = wordList.begin(); it != wordList.end() && nresult < nres; it++)
+ for (it = wordList.begin(); it != wordList.end(); it++)
{
printf("%s\n", convertToMb(*it).c_str());
- nresult++;
}
- printf("%d printed results\n", nresult);
+ printf("%d printed results\n", wordList.size());
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Eliot-dev] eliot dic/automaton.cpp dic/dic.cpp dic/dic.h d...,
eliot-dev <=