gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r9800 - in Extractor/src: include main plugins


From: gnunet
Subject: [GNUnet-SVN] r9800 - in Extractor/src: include main plugins
Date: Fri, 18 Dec 2009 22:01:30 +0100

Author: grothoff
Date: 2009-12-18 22:01:30 +0100 (Fri, 18 Dec 2009)
New Revision: 9800

Added:
   Extractor/src/plugins/ps_extractor.c
Removed:
   Extractor/src/plugins/psextractor.c
Modified:
   Extractor/src/include/extractor.h
   Extractor/src/main/extractor_metatypes.c
   Extractor/src/plugins/Makefile.am
Log:
ps

Modified: Extractor/src/include/extractor.h
===================================================================
--- Extractor/src/include/extractor.h   2009-12-18 19:45:02 UTC (rev 9799)
+++ Extractor/src/include/extractor.h   2009-12-18 21:01:30 UTC (rev 9800)
@@ -275,8 +275,8 @@
     EXTRACTOR_METATYPE_SOURCE_DEVICE = 143,
     EXTRACTOR_METATYPE_DISCLAIMER = 144,
     EXTRACTOR_METATYPE_WARNING = 145,
+    EXTRACTOR_METATYPE_PAGE_ORDER = 146,
 
-
     /* fixme: used up to here! */
 
     EXTRACTOR_METATYPE_LYRICS = 67,
@@ -295,7 +295,6 @@
 
     /* FIXME: transcribe & renumber those below */
     EXTRACTOR_METATYPE_USED_FONTS = 37,
-    EXTRACTOR_METATYPE_PAGE_ORDER = 38,
 
 
     /* numeric metrics */

Modified: Extractor/src/main/extractor_metatypes.c
===================================================================
--- Extractor/src/main/extractor_metatypes.c    2009-12-18 19:45:02 UTC (rev 
9799)
+++ Extractor/src/main/extractor_metatypes.c    2009-12-18 21:01:30 UTC (rev 
9800)
@@ -358,14 +358,14 @@
   /* 145 */
   { gettext_noop ("warning"),
     gettext_noop ("warning about the nature of the content") }, 
+  { gettext_noop ("page order"),
+    gettext_noop ("order of the pages") }, 
   { gettext_noop (""),
     gettext_noop ("") }, 
   { gettext_noop (""),
     gettext_noop ("") }, 
   { gettext_noop (""),
     gettext_noop ("") }, 
-  { gettext_noop (""),
-    gettext_noop ("") }, 
 #if 0
   
   gettext_noop("author"),

Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am   2009-12-18 19:45:02 UTC (rev 9799)
+++ Extractor/src/plugins/Makefile.am   2009-12-18 21:01:30 UTC (rev 9800)
@@ -88,6 +88,7 @@
   $(ole2) \
   $(pdf) \
   libextractor_png.la \
+  libextractor_ps.la \
   libextractor_real.la \
   $(rpm) \
   libextractor_tar.la \
@@ -243,6 +244,11 @@
   $(top_builddir)/src/common/libextractor_common.la \
   -lz
 
+libextractor_ps_la_SOURCES = \
+  ps_extractor.c
+libextractor_ps_la_LDFLAGS = \
+  $(PLUGINFLAGS)
+
 libextractor_real_la_SOURCES = \
   real_extractor.c 
 libextractor_real_la_LDFLAGS = \
@@ -297,7 +303,6 @@
   $(extrampeg) \
   libextractor_nsf.la \
   libextractor_nsfe.la \
-  libextractor_ps.la \
   $(extraqt) \
   libextractor_riff.la \
   libextractor_s3m.la \
@@ -317,12 +322,6 @@
   -lz
 endif
 
-libextractor_ps_la_SOURCES = \
-  psextractor.c
-libextractor_ps_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_ps_la_LIBADD = \
-  $(top_builddir)/src/main/libextractor.la
 
 libextractor_id3v2_la_SOURCES = \
   id3v2extractor.c 

Copied: Extractor/src/plugins/ps_extractor.c (from rev 9791, 
Extractor/src/plugins/psextractor.c)
===================================================================
--- Extractor/src/plugins/ps_extractor.c                                (rev 0)
+++ Extractor/src/plugins/ps_extractor.c        2009-12-18 21:01:30 UTC (rev 
9800)
@@ -0,0 +1,192 @@
+/*
+     This file is part of libextractor.
+     (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff
+
+     libextractor is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published
+     by the Free Software Foundation; either version 2, or (at your
+     option) any later version.
+
+     libextractor is distributed in the hope that it will be useful, but
+     WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+     General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with libextractor; see the file COPYING.  If not, write to the
+     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+     Boston, MA 02111-1307, USA.
+ */
+
+#include "platform.h"
+#include "extractor.h"
+
+
+static char *
+readline (const char *data, size_t size, size_t pos)
+{
+  size_t end;
+  char *res;
+
+  while ((pos < size) &&
+         ((data[pos] == (char) 0x0d) || (data[pos] == (char) 0x0a)))
+    pos++;
+
+  if (pos >= size)
+    return NULL;                /* end of file */
+  end = pos;
+  while ((end < size) &&
+         (data[end] != (char) 0x0d) && (data[end] != (char) 0x0a))
+    end++;
+  res = malloc (end - pos + 1);
+  memcpy (res, &data[pos], end - pos);
+  res[end - pos] = '\0';
+
+  return res;
+}
+
+
+static int
+testmeta (char *line,
+          const char *match,
+          enum EXTRACTOR_MetaType type, 
+         EXTRACTOR_MetaDataProcessor proc,
+         void *proc_cls)
+{
+  char *key;
+
+  if ( (strncmp (line, match, strlen (match)) == 0) &&
+       (strlen (line) > strlen (match)) )
+    {
+      if ((line[strlen (line) - 1] == ')') && (line[strlen (match)] == '('))
+        {
+          key = &line[strlen (match) + 1];
+          key[strlen (key) - 1] = '\0'; /* remove ")" */
+        }
+      else
+        {
+          key = &line[strlen (match)];
+        }
+      if (0 != proc (proc_cls,
+                    "ps",
+                    type,
+                    EXTRACTOR_METAFORMAT_UTF8,
+                    "text/plain",
+                    key,
+                    strlen (key)+1))
+       return 1;
+    }
+  return 0;
+}
+
+typedef struct
+{
+  const char *prefix;
+  enum EXTRACTOR_MetaType type;
+} Matches;
+
+static Matches tests[] = {
+  {"%%Title: ", EXTRACTOR_METATYPE_TITLE},
+  {"%%Author: ", EXTRACTOR_METATYPE_AUTHOR_NAME},
+  {"%%Version: ", EXTRACTOR_METATYPE_REVISION_NUMBER},
+  {"%%Creator: ", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE},
+  {"%%CreationDate: ", EXTRACTOR_METATYPE_CREATION_DATE},
+  {"%%Pages: ", EXTRACTOR_METATYPE_PAGE_COUNT},
+  {"%%Orientation: ", EXTRACTOR_METATYPE_PAGE_ORIENTATION},
+  {"%%DocumentPaperSizes: ", EXTRACTOR_METATYPE_PAPER_SIZE},
+  {"%%PageOrder: ", EXTRACTOR_METATYPE_PAGE_ORDER},
+  {"%%LanguageLevel: ", EXTRACTOR_METATYPE_FORMAT_VERSION},
+  {"%%Magnification: ", EXTRACTOR_METATYPE_MAGNIFICATION},
+
+  /* Also widely used but not supported since they
+     probably make no sense:
+     "%%BoundingBox: ",
+     "%%DocumentNeededResources: ",
+     "%%DocumentSuppliedResources: ",
+     "%%DocumentProcSets: ",
+     "%%DocumentData: ", */
+
+  {NULL, 0}
+};
+
+#define PS_HEADER "%!PS-Adobe"
+
+/* mimetype = application/postscript */
+int 
+EXTRACTOR_ps_extract (const char *data,
+                     size_t size,
+                     EXTRACTOR_MetaDataProcessor proc,
+                     void *proc_cls,
+                     const char *options)
+{
+  size_t pos;
+  char *line;
+  int i;
+  int lastLine;
+  int ret;
+
+  pos = strlen (PS_HEADER);
+  if ( (size < pos) ||
+       (0 != strncmp (PS_HEADER,
+                     data,
+                     pos)) )
+    return 0;
+  ret = 0;
+
+  if (0 != proc (proc_cls,
+                "ps",
+                EXTRACTOR_METATYPE_MIMETYPE,
+                EXTRACTOR_METAFORMAT_UTF8,
+                "text/plain",
+                "application/postscript",
+                strlen ("application/postscript")+1))
+    return 1;
+  /* skip rest of first line */
+  while ((pos < size) && (data[pos] != '\n'))
+    pos++;
+
+  lastLine = -1;
+  line = NULL;
+  /* while Windows-PostScript does not seem to (always?) put
+     "%%EndComments", this should allow us to not read through most of
+     the file for all the sane applications... For Windows-generated
+     PS files, we will bail out at the end of the file. */
+  while (0 != strncmp ("%%EndComments", line, strlen ("%%EndComments")))
+    {
+      free (line);
+      line = readline (data, size, pos);
+      if (line == NULL)
+        break;
+      i = 0;
+      while (tests[i].prefix != NULL)
+        {
+          ret = testmeta (line, tests[i].prefix, tests[i].type, proc, 
proc_cls);
+         if (ret != 0)
+           break;
+          i++;
+        }
+      if (ret != 0)
+       break;
+
+      /* %%+ continues previous meta-data type... */
+      if ( (lastLine != -1) && (0 == strncmp (line, "%%+ ", strlen ("%%+ "))))
+        {
+          ret = testmeta (line, "%%+ ", tests[lastLine].type, proc, proc_cls);
+        }
+      else
+        {
+          /* update "previous" type */
+          if (tests[i].prefix == NULL)
+            lastLine = -1;
+          else
+            lastLine = i;
+        }
+      if (pos + strlen (line) + 1 <= pos)
+       break; /* overflow */
+      pos += strlen (line) + 1; /* skip newline, too; guarantee progress! */   
   
+    }
+  free (line);
+  return ret;
+}
+
+/* end of ps_extractor.c */

Deleted: Extractor/src/plugins/psextractor.c
===================================================================
--- Extractor/src/plugins/psextractor.c 2009-12-18 19:45:02 UTC (rev 9799)
+++ Extractor/src/plugins/psextractor.c 2009-12-18 21:01:30 UTC (rev 9800)
@@ -1,228 +0,0 @@
-/*
-     This file is part of libextractor.
-     (C) 2002, 2003 Vidyut Samanta and Christian Grothoff
-
-     libextractor is free software; you can redistribute it and/or modify
-     it under the terms of the GNU General Public License as published
-     by the Free Software Foundation; either version 2, or (at your
-     option) any later version.
-
-     libextractor is distributed in the hope that it will be useful, but
-     WITHOUT ANY WARRANTY; without even the implied warranty of
-     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-     General Public License for more details.
-
-     You should have received a copy of the GNU General Public License
-     along with libextractor; see the file COPYING.  If not, write to the
-     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-     Boston, MA 02111-1307, USA.
- **/
-
-#include "platform.h"
-#include "extractor.h"
-
-static struct EXTRACTOR_Keywords *
-addKeyword (EXTRACTOR_KeywordType type,
-            char *keyword, struct EXTRACTOR_Keywords *next)
-{
-  EXTRACTOR_KeywordList *result;
-
-  if (keyword == NULL)
-    return next;
-  result = malloc (sizeof (EXTRACTOR_KeywordList));
-  result->next = next;
-  result->keyword = strdup (keyword);
-  result->keywordType = type;
-  return result;
-}
-
-static char *
-readline (char *data, size_t size, size_t pos)
-{
-  size_t end;
-  char *res;
-
-  while ((pos < size) &&
-         ((data[pos] == (char) 0x0d) || (data[pos] == (char) 0x0a)))
-    pos++;
-
-  if (pos >= size)
-    return NULL;                /* end of file */
-  end = pos;
-  while ((end < size) &&
-         (data[end] != (char) 0x0d) && (data[end] != (char) 0x0a))
-    end++;
-  res = malloc (end - pos + 1);
-  memcpy (res, &data[pos], end - pos);
-  res[end - pos] = '\0';
-
-  return res;
-}
-
-static struct EXTRACTOR_Keywords *
-testmeta (char *line,
-          const char *match,
-          EXTRACTOR_KeywordType type, struct EXTRACTOR_Keywords *prev)
-{
-  if ((strncmp (line, match, strlen (match)) == 0) &&
-      (strlen (line) > strlen (match)))
-    {
-      char *key;
-
-      if ((line[strlen (line) - 1] == ')') && (line[strlen (match)] == '('))
-        {
-          key = &line[strlen (match) + 1];
-          key[strlen (key) - 1] = '\0'; /* remove ")" */
-        }
-      else
-        {
-          key = &line[strlen (match)];
-        }
-      prev = addKeyword (type, key, prev);
-    }
-  return prev;
-}
-
-typedef struct
-{
-  char *prefix;
-  EXTRACTOR_KeywordType type;
-} Matches;
-
-static Matches tests[] = {
-  {"%%Title: ", EXTRACTOR_TITLE},
-  {"%%Version: ", EXTRACTOR_VERSIONNUMBER},
-  {"%%Creator: ", EXTRACTOR_CREATOR},
-  {"%%CreationDate: ", EXTRACTOR_CREATION_DATE},
-  {"%%Pages: ", EXTRACTOR_PAGE_COUNT},
-  {"%%Orientation: ", EXTRACTOR_UNKNOWN},
-  {"%%DocumentPaperSizes: ", EXTRACTOR_UNKNOWN},
-  {"%%DocumentFonts: ", EXTRACTOR_UNKNOWN},
-  {"%%PageOrder: ", EXTRACTOR_UNKNOWN},
-  {"%%For: ", EXTRACTOR_UNKNOWN},
-  {"%%Magnification: ", EXTRACTOR_UNKNOWN},
-
-  /* Also widely used but not supported since they
-     probably make no sense:
-     "%%BoundingBox: ",
-     "%%DocumentNeededResources: ",
-     "%%DocumentSuppliedResources: ",
-     "%%DocumentProcSets: ",
-     "%%DocumentData: ", */
-
-  {NULL, 0},
-};
-
-/* which mime-types should not be subjected to
-   the PostScript extractor (no use trying) */
-static char *blacklist[] = {
-  "image/jpeg",
-  "image/gif",
-  "image/png",
-  "image/x-png",
-  "audio/real",
-  "audio/mpeg",
-  "application/x-gzip",
-  "application/x-dpkg",
-  "application/bz2",
-  "application/x-rpm",
-  "application/x-rar",
-  "application/x-zip",
-  "application/x-arj",
-  "application/x-compress",
-  "application/x-tar",
-  "application/x-lha",
-  "application/x-gtar",
-  "application/x-dpkg",
-  "application/ogg",
-  "video/real",
-  "video/asf",
-  "video/quicktime",
-  NULL,
-};
-
-/* mimetype = application/postscript */
-struct EXTRACTOR_Keywords *
-libextractor_ps_extract (const char *filename,
-                         char *data,
-                         size_t size, struct EXTRACTOR_Keywords *prev)
-{
-  size_t pos;
-  char *psheader = "%!PS-Adobe";
-  char *line;
-  int i;
-  int lastLine;
-  const char *mime;
-
-  /* if the mime-type of the file is blacklisted, don't
-     run the printable extactor! */
-  mime = EXTRACTOR_extractLast (EXTRACTOR_MIMETYPE, prev);
-  if (mime != NULL)
-    {
-      int j;
-      j = 0;
-      while (blacklist[j] != NULL)
-        {
-          if (0 == strcmp (blacklist[j], mime))
-            return prev;
-          j++;
-        }
-    }
-
-
-  pos = 0;
-  while ((pos < size) &&
-         (pos < strlen (psheader)) && (data[pos] == psheader[pos]))
-    pos++;
-  if (pos != strlen (psheader))
-    {
-      return prev;              /* no ps */
-    }
-
-  prev = addKeyword (EXTRACTOR_MIMETYPE, "application/postscript", prev);
-
-  /* skip rest of first line */
-  while ((pos < size) && (data[pos] != '\n'))
-    pos++;
-
-  lastLine = -1;
-  line = strdup (psheader);
-
-  /* while Windows-PostScript does not seem to (always?) put
-     "%%EndComments", this should allow us to not read through most of
-     the file for all the sane applications... For Windows-generated
-     PS files, we will bail out at the end of the file. */
-  while (0 != strncmp ("%%EndComments", line, strlen ("%%EndComments")))
-    {
-      free (line);
-      line = readline (data, size, pos);
-      if (line == NULL)
-        break;
-      i = 0;
-      while (tests[i].prefix != NULL)
-        {
-          prev = testmeta (line, tests[i].prefix, tests[i].type, prev);
-          i++;
-        }
-
-      /* %%+ continues previous meta-data type... */
-      if ((lastLine != -1) && (0 == strncmp (line, "%%+ ", strlen ("%%+ "))))
-        {
-          prev = testmeta (line, "%%+ ", tests[lastLine].type, prev);
-        }
-      else
-        {
-          /* update "previous" type */
-          if (tests[i].prefix == NULL)
-            lastLine = -1;
-          else
-            lastLine = i;
-        }
-      pos += strlen (line) + 1; /* skip newline, too; guarantee progress! */
-    }
-  free (line);
-
-  return prev;
-}
-
-/* end of psextractor.c */





reply via email to

[Prev in Thread] Current Thread [Next in Thread]