gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r23086 - in Extractor: . src/main src/plugins test


From: gnunet
Subject: [GNUnet-SVN] r23086 - in Extractor: . src/main src/plugins test
Date: Sat, 4 Aug 2012 02:13:00 +0200

Author: grothoff
Date: 2012-08-04 02:13:00 +0200 (Sat, 04 Aug 2012)
New Revision: 23086

Added:
   Extractor/src/plugins/test_mime.c
Removed:
   Extractor/test/courseclear.ogg
Modified:
   Extractor/INSTALL
   Extractor/configure.ac
   Extractor/src/main/extract.c
   Extractor/src/plugins/Makefile.am
   Extractor/src/plugins/mime_extractor.c
Log:
get mime plugin working again, this time using libmagic

Modified: Extractor/INSTALL
===================================================================
--- Extractor/INSTALL   2012-08-03 23:19:06 UTC (rev 23085)
+++ Extractor/INSTALL   2012-08-04 00:13:00 UTC (rev 23086)
@@ -1,8 +1,8 @@
 Installation Instructions
 *************************
 
-Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation,
-Inc.
+Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
+2006, 2007, 2008, 2009 Free Software Foundation, Inc.
 
    Copying and distribution of this file, with or without modification,
 are permitted in any medium without royalty provided the copyright
@@ -226,11 +226,6 @@
 
 and if that doesn't work, install pre-built binaries of GCC for HP-UX.
 
-   HP-UX `make' updates targets which have the same time stamps as
-their prerequisites, which makes it generally unusable when shipped
-generated files such as `configure' are involved.  Use GNU `make'
-instead.
-
    On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
 parse its `<wchar.h>' header file.  The option `-nodtk' can be used as
 a workaround.  If GNU CC is not installed, it is therefore recommended

Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac      2012-08-03 23:19:06 UTC (rev 23085)
+++ Extractor/configure.ac      2012-08-04 00:13:00 UTC (rev 23086)
@@ -312,6 +312,16 @@
 LDFLAGS=$SAVED_LDFLAGS
 AC_LANG_POP(C++)
 
+
+
+AC_MSG_CHECKING(for magic_open -lmagic)
+SAVED_LDFLAGS=$LDFLAGS
+AC_CHECK_LIB(magic, magic_open,
+  [AC_CHECK_HEADERS([magic.h],
+   AM_CONDITIONAL(HAVE_MAGIC, true),
+   AM_CONDITIONAL(HAVE_MAGIC, false))],
+  AM_CONDITIONAL(HAVE_MAGIC, false))
+
 # restore LIBS
 LIBS=$LIBSOLD
 

Modified: Extractor/src/main/extract.c
===================================================================
--- Extractor/src/main/extract.c        2012-08-03 23:19:06 UTC (rev 23085)
+++ Extractor/src/main/extract.c        2012-08-04 00:13:00 UTC (rev 23086)
@@ -857,6 +857,7 @@
     printf ("\n");
   free (print);
   EXTRACTOR_plugin_remove_all (plugins);
+  plugins = NULL;
   cleanup_bibtex (); /* actually free's stuff */
   return ret;
 }

Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am   2012-08-03 23:19:06 UTC (rev 23085)
+++ Extractor/src/plugins/Makefile.am   2012-08-04 00:13:00 UTC (rev 23086)
@@ -19,15 +19,22 @@
 TEST_OGG=test_ogg
 endif
 
+if HAVE_MAGIC
+PLUGIN_MIME=libextractor_mime.la
+TEST_MIME=test_mime
+endif
+
 plugin_LTLIBRARIES = \
-  $(PLUGIN_OGG) 
+  $(PLUGIN_OGG) \
+  $(PLUGIN_MIME)
 
 if HAVE_ZZUF
   fuzz_tests=fuzz_default.sh 
 endif
 
 check_PROGRAMS = \
-  $(TEST_OGG)
+  $(TEST_OGG) \
+  $(TEST_MIME)
 
 TESTS = \
   $(fuzz_tests) \
@@ -58,4 +65,19 @@
   $(top_builddir)/src/plugins/libtest.la
 
 
+libextractor_mime_la_SOURCES = \
+  mime_extractor.c
+libextractor_mime_la_LDFLAGS = \
+  $(PLUGINFLAGS)
+libextractor_mime_la_LIBADD = \
+  $(top_builddir)/src/main/libextractor.la \
+  $(top_builddir)/src/common/libextractor_common.la \
+  -lmagic
 
+test_mime_SOURCES = \
+  test_mime.c
+test_mime_LDADD = \
+  $(top_builddir)/src/plugins/libtest.la
+
+
+

Modified: Extractor/src/plugins/mime_extractor.c
===================================================================
--- Extractor/src/plugins/mime_extractor.c      2012-08-03 23:19:06 UTC (rev 
23085)
+++ Extractor/src/plugins/mime_extractor.c      2012-08-04 00:13:00 UTC (rev 
23086)
@@ -1,6 +1,6 @@
 /*
      This file is part of libextractor.
-     (C) 2002, 2003, 2006 Vidyut Samanta and Christian Grothoff
+     (C) 2012 Vidyut Samanta and Christian Grothoff
 
      libextractor is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -20,301 +20,67 @@
 
 #include "platform.h"
 #include "extractor.h"
+#include <magic.h>
 
 
 /**
- * Detect a file-type.
- * @param data the contents of the file
- * @param len the length of the file
- * @param arg closure...
- * @return 0 if the file does not match, 1 if it does
- **/
-typedef int (*Detector) (const char *data, size_t len, void *arg);
+ * Global handle to MAGIC data.
+ */
+static magic_t magic;
 
+ 
 /**
- * Detect a file-type.
- * @param data the contents of the file
- * @param len the length of the file
- * @return always 1
- **/
-static int
-defaultDetector (const char *data, size_t len, void *arg)
+ * Main entry method for the 'application/ogg' extraction plugin.
+ *
+ * @param ec extraction context provided to the plugin
+ */
+void
+EXTRACTOR_mime_extract_method (struct EXTRACTOR_ExtractContext *ec)
 {
-  return 1;
-}
+  void *buf;
+  ssize_t ret;
+  const char *mime;
 
-/**
- * Detect a file-type.
- * @param data the contents of the file
- * @param len the length of the file
- * @return always 0
- **/
-static int
-disableDetector (const char *data, size_t len, void *arg)
-{
-  return 0;
-}
-
-typedef struct ExtraPattern
-{
-  int pos;
-  int len;
-  const char *pattern;
-} ExtraPattern;
-
-/**
- * Define special matching rules for complicated formats...
- **/
-static ExtraPattern xpatterns[] = {
-#define AVI_XPATTERN 0
-  {8, 4, "AVI "},
-  {0, 0, NULL},
-#define WAVE_XPATTERN 2
-  {8, 4, "WAVE"},
-  {0, 0, NULL},
-#define ACE_XPATTERN 4
-  {4, 10, "\x00\x00\x90**ACE**"},
-  {0, 0, NULL},
-#define TAR_XPATTERN 6
-  {257, 6, "ustar\x00"},
-  {0, 0, NULL},
-#define GTAR_XPATTERN 8
-  {257, 8, "ustar\040\040\0"},
-  {0, 0, NULL},
-#define RMID_XPATTERN 10
-  {8, 4, "RMID"},
-  {0, 0, NULL},
-#define ACON_XPATTERN 12
-  {8, 4, "ACON"},
-  {0, 0, NULL},
-#define CR2_PATTERN 14
-  {8, 3, "CR\x02"},
-  {0, 0, NULL},
-};
-
-/**
- * Detect AVI. A pattern matches if all XPatterns until the next {0,
- * 0, NULL} slot match. OR-ing patterns can be achieved using multiple
- * entries in the main table, so this "AND" (all match) semantics are
- * the only reasonable answer.
- **/
-static int
-xPatternMatcher (const char *data, size_t len, void *cls)
-{
-  ExtraPattern *arg = cls;
-
-  while (arg->pattern != NULL)
+  ret = ec->read (ec->cls,
+                 &buf,
+                 16 * 1024);
+  if (-1 == ret)
+    return;
+  mime = magic_buffer (magic, buf, ret);
+  if (NULL == mime)
     {
-      if (arg->pos + arg->len > len)
-        return 0;
-      if (0 != memcmp (&data[arg->pos], arg->pattern, arg->len))
-        return 0;
-      arg++;
+      magic_close (magic);
+      return;
     }
-  return 1;
+  ec->proc (ec->cls,
+           "mime",
+           EXTRACTOR_METATYPE_MIMETYPE,
+           EXTRACTOR_METAFORMAT_UTF8,
+           "text/plain",
+           mime,
+           strlen (mime) + 1);
 }
 
+
 /**
- * Detect SVG
+ * Constructor for the library.  Loads the magic file.
  */
-static int
-svgMatcher (const char *data, size_t len, void *cls)
+void __attribute__ ((constructor)) 
+mime_ltdl_init () 
 {
-  enum
-  { XMLSTART, XMLCLOSE, SVGSTART } state;
-  size_t i;
-
-  i = 0;
-  state = XMLSTART;
-
-  while (i < len)
-    {
-      if (!isprint ( (unsigned char) data[i]))
-        return 0;
-      switch (state)
-        {
-        case XMLSTART:
-          if (i + 6 >= len)
-            return 0;
-          else if (memcmp (data + i, "<?xml", 5) == 0
-                   && isspace ( (unsigned char) *(data + i + 5)))
-            state = XMLCLOSE;
-          break;
-        case XMLCLOSE:
-          if (i + 2 >= len)
-            return 0;
-          else if (memcmp (data + i, "?>", 2) == 0)
-            state = SVGSTART;
-          break;
-        case SVGSTART:
-          if (i + 5 >= len)
-            return 0;
-          else if (memcmp (data + i, "<svg", 4) == 0
-                   && isspace ( (unsigned char) *(data + i + 4)))
-            return 1;
-          break;
-        default:
-          /* do nothing */
-          break;
-        }
-      i++;
-    }
-  return 0;
+  magic = magic_open (MAGIC_MIME_TYPE);
+  magic_load (magic, "/usr/share/misc/magic");
 }
 
-/**
- * Use this detector, if the simple header-prefix matching is
- * sufficient.
- **/
-#define DEFAULT &defaultDetector, NULL
 
 /**
- * Use this detector, to disable the mime-type (effectively comment it
- * out).
- **/
-#define DISABLED &disableDetector, NULL
-
-/**
- * Select an entry in xpatterns for matching
- **/
-#define XPATTERN(a) &xPatternMatcher, &xpatterns[(a)]
-
-typedef struct Pattern
+ * Destructor for the library, cleans up.
+ */
+void __attribute__ ((destructor)) 
+mime_ltdl_fini () 
 {
-  const char *pattern;
-  int size;
-  const char *mimetype;
-  Detector detector;
-  void *arg;
-} Pattern;
-
-static Pattern patterns[] = {
-  {"\xFF\xD8", 2, "image/jpeg", DEFAULT},
-  {"\211PNG\r\n\032\n", 8, "image/png", DEFAULT},
-  {"/* XPM */", 9, "image/x-xpm", DEFAULT},
-  {"GIF8", 4, "image/gif", DEFAULT},
-  {"P1", 2, "image/x-portable-bitmap", DEFAULT},
-  {"P2", 2, "image/x-portable-graymap", DEFAULT},
-  {"P3", 2, "image/x-portable-pixmap", DEFAULT},
-  {"P4", 2, "image/x-portable-bitmap", DEFAULT},
-  {"P5", 2, "image/x-portable-graymap", DEFAULT},
-  {"P6", 2, "image/x-portable-pixmap", DEFAULT},
-  {"P7", 2, "image/x-portable-anymap", DEFAULT},
-  {"BM", 2, "image/x-bmp", DEFAULT},
-  {"fLaC", 4, "audio/flac", DEFAULT},
-  {"\x89PNG", 4, "image/x-png", DEFAULT},
-  {"id=ImageMagick", 14, "application/x-imagemagick-image", DEFAULT},
-  {"hsi1", 4, "image/x-jpeg-proprietary", DEFAULT},
-  {"FLV", 3, "video/x-flv", DEFAULT},
-  {"FWS", 3, "application/x-shockwave-flash", DEFAULT},
-  {"CWS", 3, "application/x-shockwave-flash", DEFAULT},
-  {"\x2E\x52\x4d\x46", 4, "video/real", DEFAULT},
-  {"\x2e\x72\x61\xfd", 4, "audio/real", DEFAULT},
-  {"\x00\x05\x16\x00", 4, "application/applefile", DEFAULT},
-  {"\x00\x05\x16\x07", 4, "application/applefile", DEFAULT},
-  {"\177ELF", 4, "application/x-executable", DEFAULT},
-  /* FIXME: correct MIME-type for an ELF!? */
-  {"\xca\xfe\xba\xbe", 4, "application/java", DEFAULT},
-  /* FIXME: correct MIME for a class-file? */
-  {"gimp xcf", 8, "image/xcf", DEFAULT},
-  {"II\x2a\x00\x10", 5, "image/x-canon-cr2", XPATTERN (CR2_PATTERN)},
-  {"IIN1", 4, "image/tiff", DEFAULT},
-  {"MM\x00\x2a", 4, "image/tiff", DEFAULT},     /* big-endian */
-  {"II\x2a\x00", 4, "image/tiff", DEFAULT},     /* little-endian */
-  {"%PDF", 4, "application/pdf", DEFAULT},
-  {"%!PS-Adobe-", 11, "application/postscript", DEFAULT},
-  {"\004%!PS-Adobe-", 12, "application/postscript", DEFAULT},
-  {"RIFF", 4, "video/x-msvideo", XPATTERN (AVI_XPATTERN)},
-  {"RIFF", 4, "audio/x-wav", XPATTERN (WAVE_XPATTERN)},
-  {"RIFX", 4, "video/x-msvideo", XPATTERN (AVI_XPATTERN)},
-  {"RIFX", 4, "audio/x-wav", XPATTERN (WAVE_XPATTERN)},
-  {"RIFF", 4, "audio/midi", XPATTERN (RMID_XPATTERN)},
-  {"RIFX", 4, "audio/midi", XPATTERN (RMID_XPATTERN)},
-  {"RIFF", 4, "image/x-animated-cursor", XPATTERN (ACON_XPATTERN)},
-  {"RIFX", 4, "image/x-animated-cursor", XPATTERN (ACON_XPATTERN)},
-  {"\211GND\r\n\032\n", 8, "application/gnunet-directory", DEFAULT},
-  {"{\\rtf", 5, "application/rtf", DEFAULT},
-  {"\xf7\x02", 2, "application/x-dvi", DEFAULT},
-  {"\x1F\x8B\x08\x00", 4, "application/x-gzip", DEFAULT},
-  {"BZh91AY&SY", 10, "application/bz2", DEFAULT},
-  {"\xED\xAB\xEE\xDB", 4, "application/x-rpm", DEFAULT},        /* binary */
-  {"!<arch>\ndebian", 14, "application/x-dpkg", DEFAULT},       /* .deb */
-  {"PK\x03\x04", 4, "application/x-zip", DEFAULT},
-  {"\xea\x60", 2, "application/x-arj", DEFAULT},
-  {"\037\235", 2, "application/x-compress", DEFAULT},
-  {"Rar!", 4, "application/x-rar", DEFAULT},
-  {"", 0, "application/x-ace", XPATTERN (ACE_XPATTERN)},
-  {"", 0, "application/x-tar", XPATTERN (TAR_XPATTERN)},
-  {"", 0, "application/x-gtar", XPATTERN (GTAR_XPATTERN)},
-  {"-lh0-", 5, "application/x-lha", DEFAULT},
-  {"-lh1-", 5, "application/x-lha", DEFAULT},
-  {"-lh2-", 5, "application/x-lha", DEFAULT},
-  {"-lh3-", 5, "application/x-lha", DEFAULT},
-  {"-lh4-", 5, "application/x-lha", DEFAULT},
-  {"-lh5-", 5, "application/x-lha", DEFAULT},
-  {"-lh6-", 5, "application/x-lha", DEFAULT},
-  {"-lh7-", 5, "application/x-lha", DEFAULT},
-  {"-lhd-", 5, "application/x-lha", DEFAULT},
-  {"-lh\40-", 5, "application/x-lha", DEFAULT},
-  {"-lz4-", 5, "application/x-lha", DEFAULT},
-  {"-lz5-", 5, "application/x-lha", DEFAULT},
-  {"-lzs-", 5, "application/x-lha", DEFAULT},
-  {"\xFD\x76", 2, "application/x-lzh", DEFAULT},
-  {"\x00\x00\x01\xb3", 4, "video/mpeg", DEFAULT},
-  {"\x00\x00\x01\xba", 4, "video/mpeg", DEFAULT},
-  {"moov", 4, "video/quicktime", DEFAULT},
-  {"mdat", 4, "video/quicktime", DEFAULT},
-  {"\x8aMNG", 4, "video/x-mng", DEFAULT},
-  {"\x30\x26\xb2\x75\x8e\x66", 6, "video/x-ms-asf", DEFAULT},        /* same 
as .wmv ? */
-  {"FWS", 3, "application/x-shockwave-flash", DEFAULT},
-  {"MThd", 4, "audio/midi", DEFAULT},
-  {"ID3", 3, "audio/mpeg", DEFAULT},
-  {"\xFF\xFA", 2, "audio/mpeg", DEFAULT},
-  {"\xFF\xFB", 2, "audio/mpeg", DEFAULT},
-  {"\xFF\xFC", 2, "audio/mpeg", DEFAULT},
-  {"\xFF\xFD", 2, "audio/mpeg", DEFAULT},
-  {"\xFF\xFE", 2, "audio/mpeg", DEFAULT},
-  {"\xFF\xFF", 2, "audio/mpeg", DEFAULT},
-  {"OggS", 4, "application/ogg", DEFAULT},
-  {"#!/bin/sh", 9, "application/x-shellscript", DEFAULT},
-  {"#!/bin/bash", 11, "application/x-shellscript", DEFAULT},
-  {"#!/bin/csh", 10, "application/x-shellscript", DEFAULT},
-  {"#!/bin/tcsh", 11, "application/x-shellscript", DEFAULT},
-  {"#!/bin/perl", 11, "application/x-perl", DEFAULT},
-  {"<?xml", 5, "image/svg+xml", svgMatcher, NULL},
-  {NULL, 0, NULL, DISABLED}
-};
-
-
-int 
-EXTRACTOR_mime_extract (const char *data,
-                       size_t size,
-                       EXTRACTOR_MetaDataProcessor proc,
-                       void *proc_cls,
-                       const char *options)
-{
-  int i;
-
-  i = 0;
-  while (patterns[i].pattern != NULL)
-    {
-      if (size < patterns[i].size)
-        {
-          i++;
-          continue;
-        }
-      if (0 == memcmp (patterns[i].pattern, data, patterns[i].size))
-        {
-          if (patterns[i].detector (data, size, patterns[i].arg))
-            return proc (proc_cls,
-                        "mime",
-                        EXTRACTOR_METATYPE_MIMETYPE,
-                        EXTRACTOR_METAFORMAT_UTF8,
-                        "text/plain",
-                        patterns[i].mimetype,
-                        strlen(patterns[i].mimetype)+1);
-        }
-      i++;
-    }
-  return 0;
+  magic_close (magic);
+  magic = NULL;
 }
+
+/* end of mime_extractor.c */

Added: Extractor/src/plugins/test_mime.c
===================================================================
--- Extractor/src/plugins/test_mime.c                           (rev 0)
+++ Extractor/src/plugins/test_mime.c   2012-08-04 00:13:00 UTC (rev 23086)
@@ -0,0 +1,61 @@
+/*
+     This file is part of libextractor.
+     (C) 2012 Vidyut Samanta and Christian Grothoff
+
+     libextractor is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published
+     by the Free Software Foundation; either version 3, or (at your
+     option) any later version.
+
+     libextractor is distributed in the hope that it will be useful, but
+     WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+     General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with libextractor; see the file COPYING.  If not, write to the
+     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+     Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file plugins/test_mime.c
+ * @brief testcase for ogg plugin
+ * @author Christian Grothoff
+ */
+#include "platform.h"
+#include "test_lib.h"
+
+
+
+/**
+ * Main function for the MIME testcase.
+ *
+ * @param argc number of arguments (ignored)
+ * @param argv arguments (ignored)
+ * @return 0 on success
+ */
+int
+main (int argc, char *argv[])
+{
+  struct SolutionData courseclear_sol[] =
+    {
+      { 
+       EXTRACTOR_METATYPE_MIMETYPE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "application/ogg",
+       strlen ("application/ogg") + 1,
+       0 
+      },
+      { 0, 0, NULL, NULL, 0, -1 }
+    };
+  struct ProblemSet ps[] =
+    {
+      { "testdata/ogg_courseclear.ogg",
+       courseclear_sol },
+      { NULL, NULL }
+    };
+  return ET_main ("mime", ps);
+}
+
+/* end of test_mime.c */

Deleted: Extractor/test/courseclear.ogg
===================================================================
(Binary files differ)




reply via email to

[Prev in Thread] Current Thread [Next in Thread]