eliot-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Eliot-dev] eliot dic/dic_search.c dic/dic_search.h dic/reg... [multibyt


From: eliot-dev
Subject: [Eliot-dev] eliot dic/dic_search.c dic/dic_search.h dic/reg... [multibyte]
Date: Wed, 28 Dec 2005 20:02:52 +0000

CVSROOT:        /sources/eliot
Module name:    eliot
Branch:         multibyte
Changes by:     Olivier Teulière <address@hidden>      05/12/28 20:02:52

Modified files:
        dic            : dic_search.c dic_search.h regexpmain.c 
        utils          : eliottxt.cpp 

Log message:
        Added wchar_t wrappers around Dic_search_* functions

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/eliot/eliot/dic/dic_search.c.diff?only_with_tag=multibyte&tr1=1.14.2.1&tr2=1.14.2.2&r1=text&r2=text
http://cvs.savannah.gnu.org/viewcvs/eliot/eliot/dic/dic_search.h.diff?only_with_tag=multibyte&tr1=1.10.2.1&tr2=1.10.2.2&r1=text&r2=text
http://cvs.savannah.gnu.org/viewcvs/eliot/eliot/dic/regexpmain.c.diff?only_with_tag=multibyte&tr1=1.10&tr2=1.10.2.1&r1=text&r2=text
http://cvs.savannah.gnu.org/viewcvs/eliot/eliot/utils/eliottxt.cpp.diff?only_with_tag=multibyte&tr1=1.12.2.1&tr2=1.12.2.2&r1=text&r2=text

Patches:
Index: eliot/dic/dic_search.c
diff -u eliot/dic/dic_search.c:1.14.2.1 eliot/dic/dic_search.c:1.14.2.2
--- eliot/dic/dic_search.c:1.14.2.1     Wed Dec 28 16:47:35 2005
+++ eliot/dic/dic_search.c      Wed Dec 28 20:02:52 2005
@@ -27,6 +27,7 @@
 #include <ctype.h>
 #include <stdlib.h>
 #include <string.h>
+#include <wchar.h>
 
 #include "dic_internals.h"
 #include "dic.h"
@@ -84,25 +85,14 @@
 
 
 /**
- * This method is a wrapper around the Dic_search_word_inner function.
- * It simply converts the wchar_t* string into a char* one.
- * XXX: This is a temporary hack until the dictionaries can handle multibyte
- * characters properly... the Dic_search_word_inner function should disappear!
+ * Wrapper around Dic_search_word_inner, until we have multibyte support in
+ * the dictionary
  */
 int Dic_search_word(const Dictionary dic, const wchar_t* word)
 {
     int res;
-    char *tmp_word;
-    size_t len;
-
-    // Get the needed length (we _can't_ use wstring::size())
-    len = wcstombs(NULL, word, 0);
-    if (len == (size_t)-1)
-        tmp_word = "";
-
-    // Convert the string
-    tmp_word = malloc(len + 1);
-    len = wcstombs(tmp_word, word, len + 1);
+    char *tmp_word = malloc(wcslen(word) + 1);
+    sprintf(tmp_word, "%ls", word);
 
     // Do the actual work
     res = Dic_search_word_inner(dic, tmp_word);
@@ -187,10 +177,10 @@
   } while (! (*edgeptr++).last);
 }
 
-void
-Dic_search_7pl1(const Dictionary dic, const char* rack,
-                char buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX],
-                int joker)
+static void
+Dic_search_7pl1_inner(const Dictionary dic, const char* rack,
+                      char buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX],
+                      int joker)
 {
   int i,j,wordlen;
   const char* r = rack;
@@ -263,12 +253,41 @@
     }
 }
 
+
+/**
+ * Wrapper around Dic_search_7pl1_inner, until we have multibyte support in
+ * the dictionary
+ */
+void
+Dic_search_7pl1(const Dictionary dic, const wchar_t* rack,
+                wchar_t buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX],
+                int joker)
+{
+    int i, j, k;
+    char tmp_buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX];
+    char *tmp_rack = malloc(wcslen(rack) + 1);
+    sprintf(tmp_rack, "%ls", rack);
+    // Do the actual work
+    Dic_search_7pl1_inner(dic, tmp_rack, tmp_buff, joker);
+
+    for (i = 0; i < DIC_LETTERS; i++)
+    {
+        for (j = 0; j < RES_7PL1_MAX; j++)
+        {
+            for (k = 0; k < DIC_WORD_MAX; k++)
+            {
+                buff[i][j][k] = tmp_buff[i][j][k];
+            }
+        }
+    }
+}
+
 /****************************************/
 /****************************************/
 
-void
-Dic_search_Racc(const Dictionary dic, const char* word,
-                char wordlist[RES_RACC_MAX][DIC_WORD_MAX])
+static void
+Dic_search_Racc_inner(const Dictionary dic, const char* word,
+                      char wordlist[RES_RACC_MAX][DIC_WORD_MAX])
 {
   /* search_racc will try to add a letter in front and at the end of a word */
 
@@ -316,13 +335,37 @@
     }
 }
 
+/**
+ * Wrapper around Dic_search_Racc_inner, until we have multibyte support in
+ * the dictionary
+ */
+void
+Dic_search_Racc(const Dictionary dic, const wchar_t* word,
+                wchar_t wordlist[RES_RACC_MAX][DIC_WORD_MAX])
+{
+    int i, j;
+    char tmp_buff[RES_RACC_MAX][DIC_WORD_MAX];
+    char *tmp_word = malloc(wcslen(word) + 1);
+    sprintf(tmp_word, "%ls", word);
+    // Do the actual work
+    Dic_search_Racc_inner(dic, tmp_word, tmp_buff);
+
+    for (i = 0; i < RES_RACC_MAX; i++)
+    {
+        for (j = 0; j < DIC_WORD_MAX; j++)
+        {
+            wordlist[i][j] = tmp_buff[i][j];
+        }
+    }
+}
+
 /****************************************/
 /****************************************/
 
 
-void
-Dic_search_Benj(const Dictionary dic, const char* word,
-                char wordlist[RES_BENJ_MAX][DIC_WORD_MAX])
+static void
+Dic_search_Benj_inner(const Dictionary dic, const char* word,
+                      char wordlist[RES_BENJ_MAX][DIC_WORD_MAX])
 {
   int i,wordlistlen;
   char wordtst[DIC_WORD_MAX];
@@ -354,6 +397,30 @@
   } while (!(*edge0++).last);
 }
 
+/**
+ * Wrapper around Dic_search_Benj_inner, until we have multibyte support in
+ * the dictionary
+ */
+void
+Dic_search_Benj(const Dictionary dic, const wchar_t* word,
+                wchar_t wordlist[RES_BENJ_MAX][DIC_WORD_MAX])
+{
+    int i, j;
+    char tmp_buff[RES_BENJ_MAX][DIC_WORD_MAX];
+    char *tmp_word = malloc(wcslen(word) + 1);
+    sprintf(tmp_word, "%ls", word);
+    // Do the actual work
+    Dic_search_Benj_inner(dic, tmp_word, tmp_buff);
+
+    for (i = 0; i < RES_BENJ_MAX; i++)
+    {
+        for (j = 0; j < DIC_WORD_MAX; j++)
+        {
+            wordlist[i][j] = tmp_buff[i][j];
+        }
+    }
+}
+
 
 /****************************************/
 /****************************************/
@@ -369,8 +436,8 @@
 
 void
 Dic_search_cross_rec(struct params_cross_t *params,
-                    char wordlist[RES_CROS_MAX][DIC_WORD_MAX],
-                    Dawg_edge *edgeptr)
+                     char wordlist[RES_CROS_MAX][DIC_WORD_MAX],
+                     Dawg_edge *edgeptr)
 {
   Dawg_edge *current = params->dic->dawg + edgeptr->ptr;
 
@@ -408,10 +475,9 @@
 }
 
 
-
-void
-Dic_search_Cros(const Dictionary dic, const char* mask,
-                char wordlist[RES_CROS_MAX][DIC_WORD_MAX])
+static void
+Dic_search_Cros_inner(const Dictionary dic, const char* mask,
+                      char wordlist[RES_CROS_MAX][DIC_WORD_MAX])
 {
   int  i;
   struct params_cross_t params;
@@ -438,6 +504,31 @@
   Dic_search_cross_rec(&params, wordlist, dic->dawg + dic->root);
 }
 
+
+/**
+ * Wrapper around Dic_search_Cros_inner, until we have multibyte support in
+ * the dictionary
+ */
+void
+Dic_search_Cros(const Dictionary dic, const wchar_t* mask,
+                wchar_t wordlist[RES_CROS_MAX][DIC_WORD_MAX])
+{
+    int i, j;
+    char tmp_buff[RES_CROS_MAX][DIC_WORD_MAX];
+    char *tmp_mask = malloc(wcslen(mask) + 1);
+    sprintf(tmp_mask, "%ls", mask);
+    // Do the actual work
+    Dic_search_Cros_inner(dic, tmp_mask, tmp_buff);
+
+    for (i = 0; i < RES_CROS_MAX; i++)
+    {
+        for (j = 0; j < DIC_WORD_MAX; j++)
+        {
+            wordlist[i][j] = tmp_buff[i][j];
+        }
+    }
+}
+
 /****************************************/
 /****************************************/
 
@@ -494,13 +585,13 @@
      * function prototype for parser generated by bison
      */
 int  regexpparse(yyscan_t scanner, NODE** root,
-                struct search_RegE_list_t *list,
-                struct regexp_error_report_t *err);
+                 struct search_RegE_list_t *list,
+                 struct regexp_error_report_t *err);
 
 void
-Dic_search_RegE(const Dictionary dic, const char* re,
-                char wordlist[RES_REGE_MAX][DIC_WORD_MAX],
-               struct search_RegE_list_t *list)
+Dic_search_RegE_inner(const Dictionary dic, const char* re,
+                      char wordlist[RES_REGE_MAX][DIC_WORD_MAX],
+                      struct search_RegE_list_t *list)
 {
   int i,p,n,value;
   int ptl[REGEXP_MAX+1];
@@ -576,6 +667,31 @@
   regexp_delete_tree(root);
 }
 
+/**
+ * Wrapper around Dic_search_RegE_inner, until we have multibyte support in
+ * the dictionary
+ */
+void
+Dic_search_RegE(const Dictionary dic, const wchar_t* re,
+                wchar_t wordlist[RES_REGE_MAX][DIC_WORD_MAX],
+                struct search_RegE_list_t *list)
+{
+    int i, j;
+    char tmp_buff[RES_REGE_MAX][DIC_WORD_MAX];
+    char *tmp_re = malloc(wcslen(re) + 1);
+    sprintf(tmp_re, "%ls", re);
+    // Do the actual work
+    Dic_search_RegE_inner(dic, tmp_re, tmp_buff, list);
+
+    for (i = 0; i < RES_REGE_MAX; i++)
+    {
+        for (j = 0; j < DIC_WORD_MAX; j++)
+        {
+            wordlist[i][j] = tmp_buff[i][j];
+        }
+    }
+}
+
 /****************************************/
 /****************************************/
 
Index: eliot/dic/dic_search.h
diff -u eliot/dic/dic_search.h:1.10.2.1 eliot/dic/dic_search.h:1.10.2.2
--- eliot/dic/dic_search.h:1.10.2.1     Wed Dec 28 16:47:35 2005
+++ eliot/dic/dic_search.h      Wed Dec 28 20:02:52 2005
@@ -62,7 +62,8 @@
      * @param path : lookup word
      * @return 1 present, 0 error
      */
-int  Dic_search_word(Dictionary dic, const wchar_t* path);
+int  Dic_search_word(Dictionary dic,
+                     const wchar_t* path);
 
     /**
      * Search for all feasible word with "rack" plus one letter
@@ -70,7 +71,10 @@
      * @param rack : letters
      * @param wordlist : results
      */
-void Dic_search_7pl1(Dictionary dic, const char* rack, char 
wordlist[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX], int joker);
+void Dic_search_7pl1(Dictionary dic,
+                     const wchar_t* rack,
+                     wchar_t wordlist[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX],
+                     int joker);
 
     /**
      * Search for all feasible word adding a letter in front or at the end
@@ -78,7 +82,9 @@
      * @param word : word
      * @param wordlist : results
      */
-void Dic_search_Racc(Dictionary dic, const char* word, char 
wordlist[RES_RACC_MAX][DIC_WORD_MAX]);
+void Dic_search_Racc(Dictionary dic,
+                     const wchar_t* word,
+                     wchar_t wordlist[RES_RACC_MAX][DIC_WORD_MAX]);
 
     /**
      * Search for benjamins
@@ -86,7 +92,9 @@
      * @param rack : letters
      * @param wordlist : results
      */
-void Dic_search_Benj(Dictionary dic, const char* word, char 
wordlist[RES_BENJ_MAX][DIC_WORD_MAX]);
+void Dic_search_Benj(Dictionary dic,
+                     const wchar_t* word,
+                     wchar_t wordlist[RES_BENJ_MAX][DIC_WORD_MAX]);
 
     /**
      * Search for crosswords
@@ -94,7 +102,9 @@
      * @param rack : letters
      * @param wordlist : results
      */
-void Dic_search_Cros(Dictionary dic, const char* mask, char 
wordlist[RES_CROS_MAX][DIC_WORD_MAX]);
+void Dic_search_Cros(Dictionary dic,
+                     const wchar_t* mask,
+                     wchar_t wordlist[RES_CROS_MAX][DIC_WORD_MAX]);
 
     /**
      * Search for words matching a regular expression
@@ -102,7 +112,10 @@
      * @param re : regular expression
      * @param wordlist : results
      */
-void Dic_search_RegE(Dictionary dic, const char* re, char 
wordlist[RES_REGE_MAX][DIC_WORD_MAX], struct search_RegE_list_t *list);
+void Dic_search_RegE(Dictionary dic,
+                     const wchar_t* re,
+                     wchar_t wordlist[RES_REGE_MAX][DIC_WORD_MAX],
+                     struct search_RegE_list_t *list);
 
 #if defined(__cplusplus)
   }
Index: eliot/dic/regexpmain.c
diff -u /dev/null eliot/dic/regexpmain.c:1.10.2.1
--- /dev/null   Wed Dec 28 20:02:52 2005
+++ eliot/dic/regexpmain.c      Wed Dec 28 20:02:52 2005
@@ -0,0 +1,138 @@
+/* Eliot                                                                     */
+/* Copyright (C) 1999  Antoine Fraboulet                                     */
+/*                                                                           */
+/* This file is part of Eliot.                                               */
+/*                                                                           */
+/* Eliot is free software; you can redistribute it and/or modify             */
+/* it under the terms of the GNU General Public License as published by      */
+/* the Free Software Foundation; either version 2 of the License, or         */
+/* (at your option) any later version.                                       */
+/*                                                                           */
+/* Elit is distributed in the hope that it will be useful,                   */
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of            */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             */
+/* GNU General Public License for more details.                              */
+/*                                                                           */
+/* You should have received a copy of the GNU General Public License         */
+/* along with this program; if not, write to the Free Software               */
+/* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA 
*/
+
+/**
+ *  \file   regexpmain.c
+ *  \brief  Program used to test regexp
+ *  \author Antoine Fraboulet
+ *  \date   2005
+ */
+
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "dic.h"
+#include "regexp.h"
+#include "dic_search.h"
+
+/********************************************************/
+/********************************************************/
+/********************************************************/
+
+const unsigned int all_letter[DIC_LETTERS] =
+  {
+    /*                      1  1 1 1 1 1 1 1 1 1 2 2 2  2  2  2  2 */
+    /* 0 1 2 3 4  5 6 7 8 9 0  1 2 3 4 5 6 7 8 9 0 1 2  3  4  5  6 */
+    /* x A B C D  E F G H I J  K L M N O P Q R S T U V  W  X  Y  Z */
+       0,1,1,1,1, 1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1, 1, 1, 1, 1
+  };
+
+const unsigned int vowels[DIC_LETTERS] =
+  {
+    /* x A B C D  E F G H I J  K L M N O P Q R S T U V  W  X  Y  Z */
+       0,1,0,0,0, 1,0,0,0,1,0, 0,0,0,0,1,0,0,0,0,0,1,0, 0, 0, 1, 0
+  };
+
+const unsigned int consonants[DIC_LETTERS] =
+  {
+    /* x A B C D  E F G H I J  K L M N O P Q R S T U V  W  X  Y  Z */
+       0,0,1,1,1, 0,1,1,1,0,1, 1,1,1,1,0,1,1,1,1,1,0,1, 1, 1, 1, 1
+  };
+
+void init_letter_lists(struct search_RegE_list_t *list)
+{
+  int i;
+  memset (list,0,sizeof(*list));
+  list->minlength = 1;
+  list->maxlength = 15;
+  list->valid[0] = 1; // all letters
+  list->symbl[0] = RE_ALL_MATCH;
+  list->valid[1] = 1; // vowels
+  list->symbl[1] = RE_VOWL_MATCH;
+  list->valid[2] = 1; // consonants
+  list->symbl[2] = RE_CONS_MATCH;
+  for(i=0; i < DIC_LETTERS; i++)
+    {
+      list->letters[0][i] = all_letter[i];
+      list->letters[1][i] = vowels[i];
+      list->letters[2][i] = consonants[i];
+    }
+  list->valid[3] = 0; // user defined list 1
+  list->symbl[3] = RE_USR1_MATCH;
+  list->valid[4] = 0; // user defined list 2
+  list->symbl[4] = RE_USR2_MATCH;
+}
+
+/********************************************************/
+/********************************************************/
+/********************************************************/
+void
+usage(int argc, char* argv[])
+{
+  fprintf(stderr,"usage: %s dictionary\n",argv[0]);
+  fprintf(stderr,"   dictionary : path to dawg eliot dictionary\n");
+}
+
+int main(int argc, char* argv[])
+{
+  int i;
+  Dictionary dic;
+  char wordlist[RES_REGE_MAX][DIC_WORD_MAX];
+  char er[200];
+  strcpy(er,".");
+  struct search_RegE_list_t list;
+
+  if (argc < 2)
+    {
+      usage(argc,argv);
+    }
+
+  if (Dic_load(&dic,argv[1]))
+    {
+      fprintf(stdout,"impossible de lire le dictionnaire\n");
+      return 1;
+    }
+
+  while (strcmp(er,""))
+    {
+      
fprintf(stdout,"**************************************************************\n");
+      
fprintf(stdout,"**************************************************************\n");
+      fprintf(stdout,"entrer une ER:\n");
+      fgets(er,sizeof(er),stdin);
+      /* strip \n */
+      er[strlen(er) - 1] = '\0';
+      if (strcmp(er,"") == 0)
+       break;
+
+      /* automaton */
+      init_letter_lists(&list);
+      Dic_search_RegE_inner(dic,er,wordlist,&list);
+
+      fprintf(stdout,"résultat:\n");
+      for(i=0; i<RES_REGE_MAX && wordlist[i][0]; i++)
+       {
+         fprintf(stderr,"%s\n",wordlist[i]);
+       }
+    }
+
+  Dic_destroy(dic);
+  return 0;
+}
Index: eliot/utils/eliottxt.cpp
diff -u eliot/utils/eliottxt.cpp:1.12.2.1 eliot/utils/eliottxt.cpp:1.12.2.2
--- eliot/utils/eliottxt.cpp:1.12.2.1   Wed Dec 28 16:47:35 2005
+++ eliot/utils/eliottxt.cpp    Wed Dec 28 20:02:52 2005
@@ -170,15 +170,12 @@
 
 void eliottxt_get_cross(const Dictionary &iDic, wchar_t *cros)
 {
-    // TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO
-#if 0
     wchar_t wordlist[RES_CROS_MAX][DIC_WORD_MAX];
     Dic_search_Cros(iDic, cros, wordlist);
     for (int i = 0; i < RES_CROS_MAX && wordlist[i][0]; i++)
     {
         printf("  %s\n", convertToMb(wordlist[i]).c_str());
     }
-#endif
 }
 
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]