eliot-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Eliot-dev] Changes to eliot/dic/dic_search.c [antoine-1]


From: eliot-dev
Subject: [Eliot-dev] Changes to eliot/dic/dic_search.c [antoine-1]
Date: Sun, 23 Oct 2005 13:13:59 -0400

Index: eliot/dic/dic_search.c
diff -u /dev/null eliot/dic/dic_search.c:1.12.2.1
--- /dev/null   Sun Oct 23 17:13:59 2005
+++ eliot/dic/dic_search.c      Sun Oct 23 17:13:56 2005
@@ -0,0 +1,561 @@
+/* Eliot                                                                     */
+/* Copyright (C) 1999  Antoine Fraboulet                                     */
+/*                                                                           */
+/* This file is part of Eliot.                                               */
+/*                                                                           */
+/* Eliot is free software; you can redistribute it and/or modify             */
+/* it under the terms of the GNU General Public License as published by      */
+/* the Free Software Foundation; either version 2 of the License, or         */
+/* (at your option) any later version.                                       */
+/*                                                                           */
+/* Elit is distributed in the hope that it will be useful,                   */
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of            */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             */
+/* GNU General Public License for more details.                              */
+/*                                                                           */
+/* You should have received a copy of the GNU General Public License         */
+/* along with this program; if not, write to the Free Software               */
+/* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA 
*/
+
+<<<<<<< dic_search.c
+/* $Id: dic_search.c,v 1.12.2.1 2005/10/23 17:13:56 afrab Exp $ */
+=======
+/*
+ * $Id: dic_search.c,v 1.12.2.1 2005/10/23 17:13:56 afrab Exp $
+ */
+>>>>>>> 1.12
+
+/**
+ *  \file   dic_search.c
+ *  \brief  Dictionary lookup functions
+ *  \author Antoine Fraboulet
+ *  \date   2002
+ */
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "dic_internals.h"
+#include "dic.h"
+#include "regexp.h"
+#include "dic_search.h"
+#include "libdic_a-er.h" /* generated by bison */
+#include "scanner.h"     /* generated by flex  */
+#include "automaton.h"
+
+/*
+ * shut down the compiler
+ */
+static int yy_init_globals (yyscan_t yyscanner )
+{
+  yy_init_globals(yyscanner);
+  return 0;
+}
+
+/**
+ * Dic_seel_edgeptr
+ * walk the dictionary until the end of the word
+ * @param dic : dictionnary
+ * @param s : current pointer to letters
+ * @param eptr : current edge in the dawg
+ */
+static Dawg_edge*
+Dic_seek_edgeptr(const Dictionary dic, const char* s, Dawg_edge *eptr)
+{
+  if (*s)
+    {
+      Dawg_edge *p = dic->dawg + eptr->ptr;
+      do {
+        if (p->chr == (unsigned)(*s & DIC_CHAR_MASK))
+          return Dic_seek_edgeptr (dic,s + 1, p);
+      } while (!(*p++).last);
+      return dic->dawg;
+    }
+  else
+    return eptr;
+}
+
+
+/**
+ * Dic_search_word : direct application of Dic_seek_edgeptr
+ * @param dic : dictionary
+ * @param word : word to lookup
+ * @result 0 not a valid word, 1 ok
+ */
+
+int
+Dic_search_word(const Dictionary dic, const char* word)
+{
+  Dawg_edge *e;
+  e = Dic_seek_edgeptr(dic,word,dic->dawg + dic->root);
+  return e->term;
+}
+
+
+/**
+ * global variables for Dic_search_word_by_len :
+ *
+ * a pointer to the structure is passed as a parameter
+ * so that all the search_* variables appear to the functions
+ * as global but the code remains re-entrant.
+ * Should be better to change the algorithm ...
+ */
+
+struct params_7plus1_t {
+ Dictionary search_dic;
+ int search_len;
+ int search_wordlistlen;
+ int search_wordlistlenmax;
+ char search_wordtst[DIC_WORD_MAX];
+ char search_letters[DIC_LETTERS];
+ char (*search_wordlist)[RES_7PL1_MAX][DIC_WORD_MAX];
+};
+
+static void
+Dic_search_word_by_len(struct params_7plus1_t *params, int i, Dawg_edge 
*edgeptr)
+{
+  /* depth first search in the dictionary */
+  do {
+    /* we use a static array and not a real list so we have to stop if
+     * the array is full */
+    if (params->search_wordlistlen >= params->search_wordlistlenmax)
+      break;
+
+    /* the test is false only when reach the end-node */
+    if (edgeptr->chr)
+      {
+
+       /* is the letter available in search_letters */
+       if (params->search_letters[edgeptr->chr])
+         {
+           params->search_wordtst[i] = edgeptr->chr + 'A' - 1;
+           params->search_letters[edgeptr->chr] --;
+           if (i == params->search_len)
+             {
+               if ((edgeptr->term)
+                 /* && (params->search_wordlistlen < 
params->search_wordlistlenmax) */)
+                 
strcpy((*params->search_wordlist)[params->search_wordlistlen++],params->search_wordtst);
+             }
+           else /* if (params->search_wordlistlen < 
params->search_wordlistlenmax) */
+             {
+               Dic_search_word_by_len(params,i + 1, params->search_dic->dawg + 
edgeptr->ptr);
+             }
+           params->search_letters[edgeptr->chr] ++;
+           params->search_wordtst[i] = '\0';
+         }
+
+       /* the letter is of course available if we have a joker available */
+       if (params->search_letters[0])
+         {
+           params->search_wordtst[i] = edgeptr->chr + 'a' - 1;
+           params->search_letters[0] --;
+           if (i == params->search_len)
+             {
+               if ((edgeptr->term)
+                    /* && (params->search_wordlistlen < 
params->search_wordlistlenmax) */)
+                 
strcpy((*(params->search_wordlist))[params->search_wordlistlen++],params->search_wordtst);
+             }
+           else /* if (params->search_wordlistlen < 
params->search_wordlistlenmax) */
+             {
+               Dic_search_word_by_len(params,i + 1,params->search_dic->dawg + 
edgeptr->ptr);
+             }
+           params->search_letters[0] ++;
+           params->search_wordtst[i] = '\0';
+         }
+      }
+  } while (! (*edgeptr++).last);
+}
+
+void
+Dic_search_7pl1(const Dictionary dic, const char* rack,
+                char buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX],
+                int joker)
+{
+  int i,j,wordlen;
+  const char* r = rack;
+  struct params_7plus1_t params;
+  Dawg_edge *root_edge;
+
+  for(i=0; i < DIC_LETTERS; i++)
+    for(j=0; j < RES_7PL1_MAX; j++)
+      buff[i][j][0] = '\0';
+
+  for(i=0; i<DIC_LETTERS; i++)
+    params.search_letters[i] = 0;
+
+  if (dic == NULL || rack == NULL)
+    return;
+
+  /*
+   * the letters are verified and changed to the dic internal
+   * representation (*r & DIC_CHAR_MASK)
+   */
+  for(wordlen=0; wordlen < DIC_WORD_MAX && *r; r++)
+    {
+      if (isalpha(*r))
+       {
+         params.search_letters[(int)*r & DIC_CHAR_MASK]++;
+          wordlen++;
+       }
+      else if (*r == '?')
+       {
+         if (joker)
+           {
+             params.search_letters[0]++;
+             wordlen++;
+           }
+         else
+           {
+             strncpy(buff[0][0],"** joker **",DIC_WORD_MAX);
+             return;
+           }
+       }
+    }
+
+  if (wordlen < 1)
+    return;
+
+  root_edge = dic->dawg + (dic->dawg[dic->root].ptr);
+
+  params.search_dic = dic;
+  params.search_wordlistlenmax = RES_7PL1_MAX;
+
+  /* search for all the words that can be done with the letters */
+  params.search_len = wordlen - 1;
+  params.search_wordtst[wordlen]='\0';
+  params.search_wordlist = & buff[0];
+  params.search_wordlistlen = 0;
+  Dic_search_word_by_len(&params,0,root_edge);
+
+  /* search for all the words that can be done with the letters +1 */
+  params.search_len = wordlen;
+  params.search_wordtst[wordlen + 1]='\0';
+  for(i='a'; i <= 'z'; i++)
+    {
+      params.search_letters[i & DIC_CHAR_MASK]++;
+
+      params.search_wordlist = & buff[i & DIC_CHAR_MASK];
+      params.search_wordlistlen = 0;
+      Dic_search_word_by_len(&params,0,root_edge);
+
+      params.search_letters[i & DIC_CHAR_MASK]--;
+    }
+}
+
+/****************************************/
+/****************************************/
+
+void
+Dic_search_Racc(const Dictionary dic, const char* word,
+                char wordlist[RES_RACC_MAX][DIC_WORD_MAX])
+{
+  /* search_racc will try to add a letter in front and at the end of a word */
+
+  int i,wordlistlen;
+  Dawg_edge *edge;
+  char wordtst[DIC_WORD_MAX];
+
+  for(i=0; i < RES_RACC_MAX; i++)
+    wordlist[i][0] = 0;
+
+  if (dic == NULL || wordlist == NULL)
+    return;
+
+  /* let's try for the front */
+  wordlistlen = 0;
+  strcpy(wordtst+1,word);
+  for(i='a'; i <= 'z'; i++)
+    {
+      wordtst[0] = i;
+      if (Dic_search_word(dic,wordtst) && wordlistlen < RES_RACC_MAX)
+       strcpy(wordlist[wordlistlen++],wordtst);
+    }
+
+  /* add a letter at the end */
+  for(i=0; word[i]; i++)
+    wordtst[i] = word[i];
+
+  wordtst[i  ] = '\0';
+  wordtst[i+1] = '\0';
+
+  edge = Dic_seek_edgeptr(dic,word,dic->dawg + dic->root);
+
+  /* points to what the next letter can be */
+  edge = dic->dawg + edge->ptr;
+
+  if (edge != dic->dawg)
+    {
+      do {
+         if (edge->term && wordlistlen < RES_RACC_MAX)
+           {
+             wordtst[i] = edge->chr + 'a' - 1;
+             strcpy(wordlist[wordlistlen++],wordtst);
+           }
+      } while (!(*edge++).last);
+    }
+}
+
+/****************************************/
+/****************************************/
+
+
+void
+Dic_search_Benj(const Dictionary dic, const char* word,
+                char wordlist[RES_BENJ_MAX][DIC_WORD_MAX])
+{
+  int i,wordlistlen;
+  char wordtst[DIC_WORD_MAX];
+  Dawg_edge *edge0,*edge1,*edge2,*edgetst;
+
+  for(i=0; i < RES_BENJ_MAX; i++)
+    wordlist[i][0] = 0;
+
+  if (dic == NULL || word == NULL)
+    return;
+
+  wordlistlen = 0;
+
+  strcpy(wordtst+3,word);
+  edge0 = dic->dawg + (dic->dawg[dic->root].ptr);
+  do {
+    wordtst[0] = edge0->chr + 'a' - 1;
+    edge1 = dic->dawg + edge0->ptr;
+    do {
+      wordtst[1] = edge1->chr + 'a' - 1;
+      edge2  = dic->dawg + edge1->ptr;
+      do {
+       wordtst[2] = edge2->chr + 'a' - 1;
+       edgetst = Dic_seek_edgeptr(dic,word,edge2);
+       if (edgetst->term && wordlistlen < RES_BENJ_MAX)
+         strcpy(wordlist[wordlistlen++],wordtst);
+      } while (!(*edge2++).last);
+    } while (!(*edge1++).last);
+  } while (!(*edge0++).last);
+}
+
+
+/****************************************/
+/****************************************/
+
+struct params_cross_t {
+ Dictionary dic;
+ int wordlen;
+ int wordlistlen;
+ int wordlistlenmax;
+ char mask[DIC_WORD_MAX];
+};
+
+
+void
+Dic_search_cross_rec(struct params_cross_t *params, 
+                    char wordlist[RES_CROS_MAX][DIC_WORD_MAX], 
+                    Dawg_edge *edgeptr)
+{
+  Dawg_edge *current = params->dic->dawg + edgeptr->ptr;
+
+  if (params->mask[params->wordlen] == '\0' && edgeptr->term)
+    {
+      if (params->wordlistlen < params->wordlistlenmax)
+       strcpy(wordlist[params->wordlistlen++],params->mask);
+    }
+  else if (params->mask[params->wordlen] == '.')
+    {
+      do
+       {
+         params->mask[params->wordlen] = current->chr + 'a' - 1;
+         params->wordlen ++;
+         Dic_search_cross_rec(params,wordlist,current);
+         params->wordlen --;
+         params->mask[params->wordlen] = '.';
+       }
+      while (!(*current++).last);
+    }
+  else
+    {
+      do
+       {
+         if (current->chr == (unsigned int)(params->mask[params->wordlen] & 
DIC_CHAR_MASK))
+           {
+             params->wordlen ++;
+             Dic_search_cross_rec(params,wordlist,current);
+             params->wordlen --;
+             break;
+           }
+       }
+      while (!(*current++).last);
+    }
+}
+
+
+
+void
+Dic_search_Cros(const Dictionary dic, const char* mask,
+                char wordlist[RES_CROS_MAX][DIC_WORD_MAX])
+{
+  int  i;
+  struct params_cross_t params;
+
+  for(i=0; i < RES_CROS_MAX; i++)
+    wordlist[i][0] = 0;
+
+  if (dic == NULL || mask == NULL)
+    return;
+
+  for(i=0; i < DIC_WORD_MAX && mask[i]; i++)
+    {
+      if (isalpha(mask[i]))
+       params.mask[i] = (mask[i] & DIC_CHAR_MASK) + 'A' - 1;
+      else
+       params.mask[i] = '.';
+    }
+  params.mask[i] = '\0';
+
+  params.dic            = dic;
+  params.wordlen        = 0;
+  params.wordlistlen    = 0;
+  params.wordlistlenmax = RES_CROS_MAX;
+  Dic_search_cross_rec(&params, wordlist, dic->dawg + dic->root);
+}
+
+/****************************************/
+/****************************************/
+
+struct params_regexp_t {
+  Dictionary dic;
+  int minlength;
+  int maxlength;
+  automaton automaton;
+  struct search_RegE_list_t *charlist;  
+  char word[DIC_WORD_MAX];
+  int  wordlen;
+  int  wordlistlen;
+  int  wordlistlenmax;
+};
+
+void
+Dic_search_regexp_rec(struct params_regexp_t *params, 
+                     int state,
+                     Dawg_edge *edgeptr, 
+                     char wordlist[RES_REGE_MAX][DIC_WORD_MAX])
+{
+  int next_state;
+  Dawg_edge *current;
+  /* if we have a valid word we store it */
+  if (automaton_get_accept(params->automaton,state) && edgeptr->term)
+    {
+      int l = strlen(params->word);
+      if (params->wordlistlen < params->wordlistlenmax && 
+         params->minlength <= l                        &&
+         params->maxlength >= l)
+       {
+         strcpy(wordlist[params->wordlistlen++],params->word);
+       }
+    }  
+  /* we now drive the search by exploring the dictionary */
+  current = params->dic->dawg + edgeptr->ptr;
+  do {
+    /* the current letter is current->chr */
+    next_state = 
automaton_get_next_state(params->automaton,state,current->chr);
+    /* 1 : the letter appears in the automaton as is */
+    if (next_state)
+      {
+       params->word[params->wordlen] = current->chr + 'a' - 1;
+       params->wordlen ++;
+       Dic_search_regexp_rec(params,next_state,current,wordlist);
+       params->wordlen --;
+       params->word[params->wordlen] = '\0';
+      }
+  } while (!(*current++).last);
+}
+
+
+    /** 
+     * function prototype for parser generated by bison
+     */
+int  regexpparse(yyscan_t scanner, NODE** root, 
+                struct search_RegE_list_t *list,
+                struct regexp_error_report_t *err);
+
+void
+Dic_search_RegE(const Dictionary dic, const char* re,
+                char wordlist[RES_REGE_MAX][DIC_WORD_MAX],
+               struct search_RegE_list_t *list)
+{
+  int i,p,n,value;
+  int ptl[REGEXP_MAX+1]; 
+  int PS [REGEXP_MAX+1]; 
+  NODE* root;
+  yyscan_t scanner;
+  YY_BUFFER_STATE buf;
+  automaton a;
+  char stringbuf[250];
+  struct params_regexp_t params;
+  struct regexp_error_report_t report;
+
+  /* init */
+  for(i=0; i < RES_REGE_MAX; i++)
+    wordlist[i][0] = 0;
+
+  if (dic == NULL || re == NULL)
+    return;
+
+  /* (expr)# */
+  sprintf(stringbuf,"(%s)#",re);
+  for(i=0; i < REGEXP_MAX; i++)
+    {
+      PS[i] = 0;
+      ptl[i] = 0;
+    }
+
+  report.pos1 = 0;
+  report.pos2 = 0;
+  report.msg[0] = '\0';
+
+  /* parsing */
+  regexplex_init( &scanner );
+  buf   = regexp_scan_string( stringbuf, scanner );
+  root  = NULL;
+  value = regexpparse( scanner , &root, list, &report);
+  regexp_delete_buffer(buf,scanner);
+  regexplex_destroy( scanner );
+
+  if (value)
+    {
+#ifdef DEBUG_FLEX_IS_BROKEN
+      fprintf(stderr,"parser error at pos %d - %d : %s\n",
+             report.pos1, report.pos2, report.msg);
+#endif      
+      regexp_delete_tree(root);
+      return ;
+    }
+
+  n = 1;
+  p = 1;
+  regexp_parcours(root, &p, &n, ptl);
+  PS [0] = p - 1;
+  ptl[0] = p - 1;
+
+  regexp_possuivante(root,PS);
+
+  if ((a = automaton_build(root->PP,ptl,PS,list)) != NULL)
+    {
+      params.dic            = dic;
+      params.minlength      = list->minlength;
+      params.maxlength      = list->maxlength;
+      params.automaton      = a;
+      params.charlist       = list;
+      memset(params.word,'\0',sizeof(params.word));
+      params.wordlen        = 0;
+      params.wordlistlen    = 0;
+      params.wordlistlenmax = RES_REGE_MAX;
+      Dic_search_regexp_rec(&params, automaton_get_init(a), dic->dawg + 
dic->root, wordlist);
+      
+      automaton_delete(a);
+    }
+  regexp_delete_tree(root);
+}
+
+/****************************************/
+/****************************************/
+




reply via email to

[Prev in Thread] Current Thread [Next in Thread]