gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r2620 - in Extractor: . src/plugins/printable


From: grothoff
Subject: [GNUnet-SVN] r2620 - in Extractor: . src/plugins/printable
Date: Sun, 16 Apr 2006 15:55:16 -0700 (PDT)

Author: grothoff
Date: 2006-04-16 15:55:07 -0700 (Sun, 16 Apr 2006)
New Revision: 2620

Added:
   Extractor/src/plugins/printable/makelang
Modified:
   Extractor/README
   Extractor/configure.ac
   Extractor/src/plugins/printable/Makefile.am
   Extractor/src/plugins/printable/bloomfilter-def.h
   Extractor/src/plugins/printable/bloomfilter.h
   Extractor/src/plugins/printable/dictionary-builder.c
   Extractor/src/plugins/printable/printableextractor.h
Log:
fixing memory utilization for compiling printable plugins

Modified: Extractor/README
===================================================================
--- Extractor/README    2006-04-16 20:28:13 UTC (rev 2619)
+++ Extractor/README    2006-04-16 22:55:07 UTC (rev 2620)
@@ -74,11 +74,6 @@
 Notes
 =====
 
-libextractor contains some very large C files.  gcc can easily use
-over (!) 100 MB of memory to compile them.  If you have that much,
-libextractor will compile in about a minute.  If you don't have that
-much, you may want to consider using the binaries.
-
 On Mac OS X, libextractor will avoid using GCC 3.1, because of
 problems compiling one of the extractors.  GCC 3.3 and 2.95.2 are
 known to work well; as such, libextractor will first look for 3.3 (by

Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac      2006-04-16 20:28:13 UTC (rev 2619)
+++ Extractor/configure.ac      2006-04-16 22:55:07 UTC (rev 2620)
@@ -341,7 +341,7 @@
 then
  AC_MSG_NOTICE([NOTICE: printable plugins disabled])
 else 
- AC_MSG_NOTICE([NOTICE: printable plugins enabled (will need 150 MB memory to 
compile)])
+ AC_MSG_NOTICE([NOTICE: printable plugins enabled])
 fi
 
 if test "x$without_glib" = "xtrue"

Modified: Extractor/src/plugins/printable/Makefile.am
===================================================================
--- Extractor/src/plugins/printable/Makefile.am 2006-04-16 20:28:13 UTC (rev 
2619)
+++ Extractor/src/plugins/printable/Makefile.am 2006-04-16 22:55:07 UTC (rev 
2620)
@@ -2,22 +2,30 @@
 
 noinst_PROGRAMS = dictionary-builder
 
-CLEANFILES = da.c de.c en.c es.c it.c no.c pt.c peda.c pede.c peen.c pees.c 
peit.c peno.c pept.c
+da_LANG=da_0.c da_1.c da_2.c da_3.c da_4.c da_5.c da_6.c da_7.c da_8.c da_9.c 
da_10.c da_11.c da_12.c da_13.c da_14.c da_15.c da_16.c da_17.c da_18.c da_19.c 
da_20.c da_21.c da_22.c da_23.c da_24.c da_25.c da_26.c da_27.c da_28.c da_29.c 
da_30.c da_31.c
+de_LANG=de_0.c de_1.c de_2.c de_3.c de_4.c de_5.c de_6.c de_7.c de_8.c de_9.c 
de_10.c de_11.c de_12.c de_13.c de_14.c de_15.c de_16.c de_17.c de_18.c de_19.c 
de_20.c de_21.c de_22.c de_23.c de_24.c de_25.c de_26.c de_27.c de_28.c de_29.c 
de_30.c de_31.c
+en_LANG=en_0.c en_1.c en_2.c en_3.c en_4.c en_5.c en_6.c en_7.c en_8.c en_9.c 
en_10.c en_11.c en_12.c en_13.c en_14.c en_15.c en_16.c en_17.c en_18.c en_19.c 
en_20.c en_21.c en_22.c en_23.c en_24.c en_25.c en_26.c en_27.c en_28.c en_29.c 
en_30.c en_31.c
+es_LANG=es_0.c es_1.c es_2.c es_3.c es_4.c es_5.c es_6.c es_7.c es_8.c es_9.c 
es_10.c es_11.c es_12.c es_13.c es_14.c es_15.c es_16.c es_17.c es_18.c es_19.c 
es_20.c es_21.c es_22.c es_23.c es_24.c es_25.c es_26.c es_27.c es_28.c es_29.c 
es_30.c es_31.c
+it_LANG=it_0.c it_1.c it_2.c it_3.c it_4.c it_5.c it_6.c it_7.c it_8.c it_9.c 
it_10.c it_11.c it_12.c it_13.c it_14.c it_15.c it_16.c it_17.c it_18.c it_19.c 
it_20.c it_21.c it_22.c it_23.c it_24.c it_25.c it_26.c it_27.c it_28.c it_29.c 
it_30.c it_31.c
+no_LANG=no_0.c no_1.c no_2.c no_3.c no_4.c no_5.c no_6.c no_7.c no_8.c no_9.c 
no_10.c no_11.c no_12.c no_13.c no_14.c no_15.c no_16.c no_17.c no_18.c no_19.c 
no_20.c no_21.c no_22.c no_23.c no_24.c no_25.c no_26.c no_27.c no_28.c no_29.c 
no_30.c no_31.c
+pt_LANG=pt_0.c pt_1.c pt_2.c pt_3.c pt_4.c pt_5.c pt_6.c pt_7.c pt_8.c pt_9.c 
pt_10.c pt_11.c pt_12.c pt_13.c pt_14.c pt_15.c pt_16.c pt_17.c pt_18.c pt_19.c 
pt_20.c pt_21.c pt_22.c pt_23.c pt_24.c pt_25.c pt_26.c pt_27.c pt_28.c pt_29.c 
pt_30.c pt_31.c
 
+CLEANFILES = da.c de.c en.c es.c it.c no.c pt.c peda.c pede.c peen.c pees.c 
peit.c peno.c pept.c $(da_LANG) $(de_LANG) $(es_LANG) $(en_LANG) $(it_LANG) 
$(no_LANG) $(pt_LANG)
+
 da.c: dictionary-builder$(EXEEXT)
-       ./dictionary-builder $(srcdir)/da > da.c
+       ./dictionary-builder $(srcdir)/da da > da.c
 de.c: dictionary-builder$(EXEEXT)
-       ./dictionary-builder $(srcdir)/de > de.c
+       ./dictionary-builder $(srcdir)/de de > de.c
 en.c: dictionary-builder$(EXEEXT)
-       ./dictionary-builder $(srcdir)/en > en.c
+       ./dictionary-builder $(srcdir)/en en > en.c
 es.c: dictionary-builder$(EXEEXT)
-       ./dictionary-builder $(srcdir)/es > es.c
+       ./dictionary-builder $(srcdir)/es es > es.c
 it.c: dictionary-builder$(EXEEXT)
-       ./dictionary-builder $(srcdir)/it > it.c
+       ./dictionary-builder $(srcdir)/it it > it.c
 no.c: dictionary-builder$(EXEEXT)
-       ./dictionary-builder $(srcdir)/no > no.c
+       ./dictionary-builder $(srcdir)/no no > no.c
 pt.c: dictionary-builder$(EXEEXT)
-       ./dictionary-builder $(srcdir)/pt > pt.c
+       ./dictionary-builder $(srcdir)/pt pt > pt.c
 
 peda.c:
        cat peXX.c | sed -e "s/XX/da/" > peda.c
@@ -66,36 +74,36 @@
 
 
 libextractor_printable_da_la_SOURCES = \
-  da.c peda.c bloomfilter.h printableextractor.h bloomfilter-def.h
+  da.c $(da_LANG) peda.c bloomfilter.h printableextractor.h bloomfilter-def.h
 libextractor_printable_da_la_LDFLAGS = \
  $(PLUGINFLAGS) $(retaincommand)
 
 libextractor_printable_de_la_SOURCES = \
-  de.c pede.c bloomfilter.h printableextractor.h bloomfilter-def.h
+  de.c $(de_LANG) pede.c bloomfilter.h printableextractor.h bloomfilter-def.h
 libextractor_printable_de_la_LDFLAGS = \
  $(PLUGINFLAGS) $(retaincommand)
 
 libextractor_printable_en_la_SOURCES = \
-  en.c peen.c bloomfilter.h printableextractor.h bloomfilter-def.h
+  en.c $(en_LANG) peen.c bloomfilter.h printableextractor.h bloomfilter-def.h
 libextractor_printable_en_la_LDFLAGS = \
  $(PLUGINFLAGS) $(retaincommand)
 
 libextractor_printable_es_la_SOURCES = \
-  es.c pees.c bloomfilter.h printableextractor.h bloomfilter-def.h
+  es.c $(es_LANG) pees.c bloomfilter.h printableextractor.h bloomfilter-def.h
 libextractor_printable_es_la_LDFLAGS = \
  $(PLUGINFLAGS) $(retaincommand)
 
 libextractor_printable_it_la_SOURCES = \
-  it.c peit.c bloomfilter.h printableextractor.h bloomfilter-def.h
+  it.c $(it_LANG) peit.c bloomfilter.h printableextractor.h bloomfilter-def.h
 libextractor_printable_it_la_LDFLAGS = \
  $(PLUGINFLAGS) $(retaincommand)
 
 libextractor_printable_no_la_SOURCES = \
-  no.c peno.c bloomfilter.h printableextractor.h bloomfilter-def.h
+  no.c $(no_LANG) peno.c bloomfilter.h printableextractor.h bloomfilter-def.h
 libextractor_printable_no_la_LDFLAGS = \
  $(PLUGINFLAGS) $(retaincommand)
 
 libextractor_printable_pt_la_SOURCES = \
-  pt.c pept.c bloomfilter.h printableextractor.h bloomfilter-def.h
+  pt.c $(pt_LANG) pept.c bloomfilter.h printableextractor.h bloomfilter-def.h
 libextractor_printable_pt_la_LDFLAGS = \
  $(PLUGINFLAGS) $(retaincommand)

Modified: Extractor/src/plugins/printable/bloomfilter-def.h
===================================================================
--- Extractor/src/plugins/printable/bloomfilter-def.h   2006-04-16 20:28:13 UTC 
(rev 2619)
+++ Extractor/src/plugins/printable/bloomfilter-def.h   2006-04-16 22:55:07 UTC 
(rev 2620)
@@ -28,11 +28,14 @@
 #include "platform.h"
 #include <string.h>
 
+#define SUBTABLES 32
+
 typedef struct {
   /** How many bits we set for each stored element */
   unsigned int addressesPerElement;
   /** The actual bloomfilter bit array */
   unsigned char * bitArray;
+  unsigned char ** sbitArray;
   /** Size of bitArray in bytes */
   unsigned int bitArraySize;
 } Bloomfilter;

Modified: Extractor/src/plugins/printable/bloomfilter.h
===================================================================
--- Extractor/src/plugins/printable/bloomfilter.h       2006-04-16 20:28:13 UTC 
(rev 2619)
+++ Extractor/src/plugins/printable/bloomfilter.h       2006-04-16 22:55:07 UTC 
(rev 2620)
@@ -29,6 +29,7 @@
 #include <string.h>
 #include "bloomfilter-def.h"
 
+
 typedef struct {
   unsigned char data[20];
 } HashCode160;

Modified: Extractor/src/plugins/printable/dictionary-builder.c
===================================================================
--- Extractor/src/plugins/printable/dictionary-builder.c        2006-04-16 
20:28:13 UTC (rev 2619)
+++ Extractor/src/plugins/printable/dictionary-builder.c        2006-04-16 
22:55:07 UTC (rev 2620)
@@ -1,6 +1,6 @@
 /*
      This file is part of libextractor.
-     (C) 2002, 2003, 2004, 2005 Vidyut Samanta and Christian Grothoff
+     (C) 2002, 2003, 2004, 2005, 2006 Vidyut Samanta and Christian Grothoff
 
      libextractor is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -16,11 +16,6 @@
      along with libextractor; see the file COPYING.  If not, write to the
      Free Software Foundation, Inc., 59 Temple Place - Suite 330,
      Boston, MA 02111-1307, USA.
-
-     Portions of this code were adapted from libhtmlparse by
-     Mooneer Salem (address@hidden).  The main changes
-     to libhtmlparse were the removal of globals to make the
-     code reentrant.
  */
 /**
  * Tool to build a bloomfilter from a dictionary.
@@ -81,11 +76,13 @@
 
 #define ADDR_PER_ELEMENT 46
 
+
 int main(int argc,
         char ** argv) {
   Bloomfilter bf;
   HashCode160 hc;
   int i;
+  int j;
   int cnt;
   char * fn;
   char ** words;
@@ -95,11 +92,11 @@
   char * charset = NULL;
 #define ALLOCSIZE 1024*1024
 
-  if (argc<2) {
+  if (argc<3) {
     fprintf(stderr,
            _("Please provide the name of the language you are building\n"
              "a dictionary for.  For example:\n"));
-    fprintf(stderr, "$ ./dictionary-builder en > en.c\n");
+    fprintf(stderr, "$ ./dictionary-builder ./en en > en.c\n");
     exit(-1);
   }
 
@@ -139,7 +136,7 @@
   }
 
   bf.addressesPerElement = ADDR_PER_ELEMENT;
-  bf.bitArraySize = cnt*4;
+  bf.bitArraySize = cnt * 4 / SUBTABLES * SUBTABLES;
   bf.bitArray = malloc(bf.bitArraySize);
   memset(bf.bitArray, 0, bf.bitArraySize);
 
@@ -158,12 +155,34 @@
      gcc versions then output tons of warnings about "decimal constant
      is so large that it is unsigned" (even for unsigned long long[]
      that warning is generated and dramatically increases compile times). */
+  for (j=0;j<SUBTABLES;j++) {
+    char fn[64];
+    FILE * btfile;
+
+    snprintf(fn, 64, "%s_%d.c", argv[1], j);
+    btfile = fopen(fn, "w+");    
+    fprintf(btfile,
+           "int %s_bits_%d[] = { ", argv[2], j);
+    for (i= j    * bf.bitArraySize/sizeof(int)/SUBTABLES;
+        i<(j+1) * bf.bitArraySize/sizeof(int)/SUBTABLES;
+        i++)
+      fprintf(btfile,
+             "%dL,",
+             (((int*)bf.bitArray)[i]));
+    fprintf(btfile,
+           "};\n");
+    fclose(btfile);
+    fprintf(stdout,
+           "extern int %s_bits_%d[];\n", argv[2], j);
+  }
+
   fprintf(stdout,
-         "static int bits[] = { ");
-  for (i=0;i<bf.bitArraySize/sizeof(int);i++)
+         "static int * bits[] = { ");
+  for (i=0;i<SUBTABLES;i++)
     fprintf(stdout,
-           "%dL,",
-           (((int*)bf.bitArray)[i]));
+           "%s_bits_%d,",
+           argv[2],
+           i);
   fprintf(stdout,
          "};\n");
   bn = &argv[1][strlen(argv[1])];
@@ -175,7 +194,8 @@
   fprintf(stdout,
          "Bloomfilter libextractor_printable_%s_filter = {\n"
          "  %u,\n"
-         "  (unsigned char*)bits,\n"
+         "  NULL,\n" /* bitarray */
+         "  (unsigned char **)bits,\n" /* sbitArray */
          "  %u };\n",
          bn,
          ADDR_PER_ELEMENT,

Added: Extractor/src/plugins/printable/makelang
===================================================================
--- Extractor/src/plugins/printable/makelang    2006-04-16 20:28:13 UTC (rev 
2619)
+++ Extractor/src/plugins/printable/makelang    2006-04-16 22:55:07 UTC (rev 
2620)
@@ -0,0 +1,11 @@
+#!/bin/sh
+for l in da de en es it no pt
+do
+  r=""
+  for n in `seq 0 31`
+  do 
+    m=`expr $n - 1`
+    r="$r${l}_$n.c "
+  done
+  echo "${l}_LANG=$r"
+done


Property changes on: Extractor/src/plugins/printable/makelang
___________________________________________________________________
Name: svn:executable
   + *

Modified: Extractor/src/plugins/printable/printableextractor.h
===================================================================
--- Extractor/src/plugins/printable/printableextractor.h        2006-04-16 
20:28:13 UTC (rev 2619)
+++ Extractor/src/plugins/printable/printableextractor.h        2006-04-16 
22:55:07 UTC (rev 2620)
@@ -33,6 +33,8 @@
 #include <string.h>
 #include "bloomfilter.h"
 
+
+
 /**
  * Checks if a bit is active in the bitArray
  *
@@ -40,14 +42,17 @@
  * @param bitIdx which bit to test
  * @return 1 if the bit is set, 0 if not.
  */
-static int testBit(unsigned char * bitArray,
+static int testBit(unsigned char ** bitArray,
+                  unsigned int size,
                   unsigned int bitIdx) {
   unsigned int slot;
   unsigned int targetBit;
+  unsigned int msize;
 
   slot = bitIdx / 8;
   targetBit = (1L << (bitIdx % 8));
-  return (bitArray[slot] & targetBit) != 0;
+  msize = size / SUBTABLES;
+  return (bitArray[slot / msize][slot % msize] & targetBit) != 0;
 }
 
 
@@ -62,7 +67,8 @@
                            unsigned int bit,
                            void * cls) {
   int * arg = cls;
-  if (! testBit(bf->bitArray,
+  if (! testBit(bf->sbitArray,
+               bf->bitArraySize,
                bit))
     *arg = 0;
 }





reply via email to

[Prev in Thread] Current Thread [Next in Thread]