[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r23086 - in Extractor: . src/main src/plugins test
From: |
gnunet |
Subject: |
[GNUnet-SVN] r23086 - in Extractor: . src/main src/plugins test |
Date: |
Sat, 4 Aug 2012 02:13:00 +0200 |
Author: grothoff
Date: 2012-08-04 02:13:00 +0200 (Sat, 04 Aug 2012)
New Revision: 23086
Added:
Extractor/src/plugins/test_mime.c
Removed:
Extractor/test/courseclear.ogg
Modified:
Extractor/INSTALL
Extractor/configure.ac
Extractor/src/main/extract.c
Extractor/src/plugins/Makefile.am
Extractor/src/plugins/mime_extractor.c
Log:
get mime plugin working again, this time using libmagic
Modified: Extractor/INSTALL
===================================================================
--- Extractor/INSTALL 2012-08-03 23:19:06 UTC (rev 23085)
+++ Extractor/INSTALL 2012-08-04 00:13:00 UTC (rev 23086)
@@ -1,8 +1,8 @@
Installation Instructions
*************************
-Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation,
-Inc.
+Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
+2006, 2007, 2008, 2009 Free Software Foundation, Inc.
Copying and distribution of this file, with or without modification,
are permitted in any medium without royalty provided the copyright
@@ -226,11 +226,6 @@
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
- HP-UX `make' updates targets which have the same time stamps as
-their prerequisites, which makes it generally unusable when shipped
-generated files such as `configure' are involved. Use GNU `make'
-instead.
-
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
parse its `<wchar.h>' header file. The option `-nodtk' can be used as
a workaround. If GNU CC is not installed, it is therefore recommended
Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac 2012-08-03 23:19:06 UTC (rev 23085)
+++ Extractor/configure.ac 2012-08-04 00:13:00 UTC (rev 23086)
@@ -312,6 +312,16 @@
LDFLAGS=$SAVED_LDFLAGS
AC_LANG_POP(C++)
+
+
+AC_MSG_CHECKING(for magic_open -lmagic)
+SAVED_LDFLAGS=$LDFLAGS
+AC_CHECK_LIB(magic, magic_open,
+ [AC_CHECK_HEADERS([magic.h],
+ AM_CONDITIONAL(HAVE_MAGIC, true),
+ AM_CONDITIONAL(HAVE_MAGIC, false))],
+ AM_CONDITIONAL(HAVE_MAGIC, false))
+
# restore LIBS
LIBS=$LIBSOLD
Modified: Extractor/src/main/extract.c
===================================================================
--- Extractor/src/main/extract.c 2012-08-03 23:19:06 UTC (rev 23085)
+++ Extractor/src/main/extract.c 2012-08-04 00:13:00 UTC (rev 23086)
@@ -857,6 +857,7 @@
printf ("\n");
free (print);
EXTRACTOR_plugin_remove_all (plugins);
+ plugins = NULL;
cleanup_bibtex (); /* actually free's stuff */
return ret;
}
Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am 2012-08-03 23:19:06 UTC (rev 23085)
+++ Extractor/src/plugins/Makefile.am 2012-08-04 00:13:00 UTC (rev 23086)
@@ -19,15 +19,22 @@
TEST_OGG=test_ogg
endif
+if HAVE_MAGIC
+PLUGIN_MIME=libextractor_mime.la
+TEST_MIME=test_mime
+endif
+
plugin_LTLIBRARIES = \
- $(PLUGIN_OGG)
+ $(PLUGIN_OGG) \
+ $(PLUGIN_MIME)
if HAVE_ZZUF
fuzz_tests=fuzz_default.sh
endif
check_PROGRAMS = \
- $(TEST_OGG)
+ $(TEST_OGG) \
+ $(TEST_MIME)
TESTS = \
$(fuzz_tests) \
@@ -58,4 +65,19 @@
$(top_builddir)/src/plugins/libtest.la
+libextractor_mime_la_SOURCES = \
+ mime_extractor.c
+libextractor_mime_la_LDFLAGS = \
+ $(PLUGINFLAGS)
+libextractor_mime_la_LIBADD = \
+ $(top_builddir)/src/main/libextractor.la \
+ $(top_builddir)/src/common/libextractor_common.la \
+ -lmagic
+test_mime_SOURCES = \
+ test_mime.c
+test_mime_LDADD = \
+ $(top_builddir)/src/plugins/libtest.la
+
+
+
Modified: Extractor/src/plugins/mime_extractor.c
===================================================================
--- Extractor/src/plugins/mime_extractor.c 2012-08-03 23:19:06 UTC (rev
23085)
+++ Extractor/src/plugins/mime_extractor.c 2012-08-04 00:13:00 UTC (rev
23086)
@@ -1,6 +1,6 @@
/*
This file is part of libextractor.
- (C) 2002, 2003, 2006 Vidyut Samanta and Christian Grothoff
+ (C) 2012 Vidyut Samanta and Christian Grothoff
libextractor is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -20,301 +20,67 @@
#include "platform.h"
#include "extractor.h"
+#include <magic.h>
/**
- * Detect a file-type.
- * @param data the contents of the file
- * @param len the length of the file
- * @param arg closure...
- * @return 0 if the file does not match, 1 if it does
- **/
-typedef int (*Detector) (const char *data, size_t len, void *arg);
+ * Global handle to MAGIC data.
+ */
+static magic_t magic;
+
/**
- * Detect a file-type.
- * @param data the contents of the file
- * @param len the length of the file
- * @return always 1
- **/
-static int
-defaultDetector (const char *data, size_t len, void *arg)
+ * Main entry method for the 'application/ogg' extraction plugin.
+ *
+ * @param ec extraction context provided to the plugin
+ */
+void
+EXTRACTOR_mime_extract_method (struct EXTRACTOR_ExtractContext *ec)
{
- return 1;
-}
+ void *buf;
+ ssize_t ret;
+ const char *mime;
-/**
- * Detect a file-type.
- * @param data the contents of the file
- * @param len the length of the file
- * @return always 0
- **/
-static int
-disableDetector (const char *data, size_t len, void *arg)
-{
- return 0;
-}
-
-typedef struct ExtraPattern
-{
- int pos;
- int len;
- const char *pattern;
-} ExtraPattern;
-
-/**
- * Define special matching rules for complicated formats...
- **/
-static ExtraPattern xpatterns[] = {
-#define AVI_XPATTERN 0
- {8, 4, "AVI "},
- {0, 0, NULL},
-#define WAVE_XPATTERN 2
- {8, 4, "WAVE"},
- {0, 0, NULL},
-#define ACE_XPATTERN 4
- {4, 10, "\x00\x00\x90**ACE**"},
- {0, 0, NULL},
-#define TAR_XPATTERN 6
- {257, 6, "ustar\x00"},
- {0, 0, NULL},
-#define GTAR_XPATTERN 8
- {257, 8, "ustar\040\040\0"},
- {0, 0, NULL},
-#define RMID_XPATTERN 10
- {8, 4, "RMID"},
- {0, 0, NULL},
-#define ACON_XPATTERN 12
- {8, 4, "ACON"},
- {0, 0, NULL},
-#define CR2_PATTERN 14
- {8, 3, "CR\x02"},
- {0, 0, NULL},
-};
-
-/**
- * Detect AVI. A pattern matches if all XPatterns until the next {0,
- * 0, NULL} slot match. OR-ing patterns can be achieved using multiple
- * entries in the main table, so this "AND" (all match) semantics are
- * the only reasonable answer.
- **/
-static int
-xPatternMatcher (const char *data, size_t len, void *cls)
-{
- ExtraPattern *arg = cls;
-
- while (arg->pattern != NULL)
+ ret = ec->read (ec->cls,
+ &buf,
+ 16 * 1024);
+ if (-1 == ret)
+ return;
+ mime = magic_buffer (magic, buf, ret);
+ if (NULL == mime)
{
- if (arg->pos + arg->len > len)
- return 0;
- if (0 != memcmp (&data[arg->pos], arg->pattern, arg->len))
- return 0;
- arg++;
+ magic_close (magic);
+ return;
}
- return 1;
+ ec->proc (ec->cls,
+ "mime",
+ EXTRACTOR_METATYPE_MIMETYPE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ mime,
+ strlen (mime) + 1);
}
+
/**
- * Detect SVG
+ * Constructor for the library. Loads the magic file.
*/
-static int
-svgMatcher (const char *data, size_t len, void *cls)
+void __attribute__ ((constructor))
+mime_ltdl_init ()
{
- enum
- { XMLSTART, XMLCLOSE, SVGSTART } state;
- size_t i;
-
- i = 0;
- state = XMLSTART;
-
- while (i < len)
- {
- if (!isprint ( (unsigned char) data[i]))
- return 0;
- switch (state)
- {
- case XMLSTART:
- if (i + 6 >= len)
- return 0;
- else if (memcmp (data + i, "<?xml", 5) == 0
- && isspace ( (unsigned char) *(data + i + 5)))
- state = XMLCLOSE;
- break;
- case XMLCLOSE:
- if (i + 2 >= len)
- return 0;
- else if (memcmp (data + i, "?>", 2) == 0)
- state = SVGSTART;
- break;
- case SVGSTART:
- if (i + 5 >= len)
- return 0;
- else if (memcmp (data + i, "<svg", 4) == 0
- && isspace ( (unsigned char) *(data + i + 4)))
- return 1;
- break;
- default:
- /* do nothing */
- break;
- }
- i++;
- }
- return 0;
+ magic = magic_open (MAGIC_MIME_TYPE);
+ magic_load (magic, "/usr/share/misc/magic");
}
-/**
- * Use this detector, if the simple header-prefix matching is
- * sufficient.
- **/
-#define DEFAULT &defaultDetector, NULL
/**
- * Use this detector, to disable the mime-type (effectively comment it
- * out).
- **/
-#define DISABLED &disableDetector, NULL
-
-/**
- * Select an entry in xpatterns for matching
- **/
-#define XPATTERN(a) &xPatternMatcher, &xpatterns[(a)]
-
-typedef struct Pattern
+ * Destructor for the library, cleans up.
+ */
+void __attribute__ ((destructor))
+mime_ltdl_fini ()
{
- const char *pattern;
- int size;
- const char *mimetype;
- Detector detector;
- void *arg;
-} Pattern;
-
-static Pattern patterns[] = {
- {"\xFF\xD8", 2, "image/jpeg", DEFAULT},
- {"\211PNG\r\n\032\n", 8, "image/png", DEFAULT},
- {"/* XPM */", 9, "image/x-xpm", DEFAULT},
- {"GIF8", 4, "image/gif", DEFAULT},
- {"P1", 2, "image/x-portable-bitmap", DEFAULT},
- {"P2", 2, "image/x-portable-graymap", DEFAULT},
- {"P3", 2, "image/x-portable-pixmap", DEFAULT},
- {"P4", 2, "image/x-portable-bitmap", DEFAULT},
- {"P5", 2, "image/x-portable-graymap", DEFAULT},
- {"P6", 2, "image/x-portable-pixmap", DEFAULT},
- {"P7", 2, "image/x-portable-anymap", DEFAULT},
- {"BM", 2, "image/x-bmp", DEFAULT},
- {"fLaC", 4, "audio/flac", DEFAULT},
- {"\x89PNG", 4, "image/x-png", DEFAULT},
- {"id=ImageMagick", 14, "application/x-imagemagick-image", DEFAULT},
- {"hsi1", 4, "image/x-jpeg-proprietary", DEFAULT},
- {"FLV", 3, "video/x-flv", DEFAULT},
- {"FWS", 3, "application/x-shockwave-flash", DEFAULT},
- {"CWS", 3, "application/x-shockwave-flash", DEFAULT},
- {"\x2E\x52\x4d\x46", 4, "video/real", DEFAULT},
- {"\x2e\x72\x61\xfd", 4, "audio/real", DEFAULT},
- {"\x00\x05\x16\x00", 4, "application/applefile", DEFAULT},
- {"\x00\x05\x16\x07", 4, "application/applefile", DEFAULT},
- {"\177ELF", 4, "application/x-executable", DEFAULT},
- /* FIXME: correct MIME-type for an ELF!? */
- {"\xca\xfe\xba\xbe", 4, "application/java", DEFAULT},
- /* FIXME: correct MIME for a class-file? */
- {"gimp xcf", 8, "image/xcf", DEFAULT},
- {"II\x2a\x00\x10", 5, "image/x-canon-cr2", XPATTERN (CR2_PATTERN)},
- {"IIN1", 4, "image/tiff", DEFAULT},
- {"MM\x00\x2a", 4, "image/tiff", DEFAULT}, /* big-endian */
- {"II\x2a\x00", 4, "image/tiff", DEFAULT}, /* little-endian */
- {"%PDF", 4, "application/pdf", DEFAULT},
- {"%!PS-Adobe-", 11, "application/postscript", DEFAULT},
- {"\004%!PS-Adobe-", 12, "application/postscript", DEFAULT},
- {"RIFF", 4, "video/x-msvideo", XPATTERN (AVI_XPATTERN)},
- {"RIFF", 4, "audio/x-wav", XPATTERN (WAVE_XPATTERN)},
- {"RIFX", 4, "video/x-msvideo", XPATTERN (AVI_XPATTERN)},
- {"RIFX", 4, "audio/x-wav", XPATTERN (WAVE_XPATTERN)},
- {"RIFF", 4, "audio/midi", XPATTERN (RMID_XPATTERN)},
- {"RIFX", 4, "audio/midi", XPATTERN (RMID_XPATTERN)},
- {"RIFF", 4, "image/x-animated-cursor", XPATTERN (ACON_XPATTERN)},
- {"RIFX", 4, "image/x-animated-cursor", XPATTERN (ACON_XPATTERN)},
- {"\211GND\r\n\032\n", 8, "application/gnunet-directory", DEFAULT},
- {"{\\rtf", 5, "application/rtf", DEFAULT},
- {"\xf7\x02", 2, "application/x-dvi", DEFAULT},
- {"\x1F\x8B\x08\x00", 4, "application/x-gzip", DEFAULT},
- {"BZh91AY&SY", 10, "application/bz2", DEFAULT},
- {"\xED\xAB\xEE\xDB", 4, "application/x-rpm", DEFAULT}, /* binary */
- {"!<arch>\ndebian", 14, "application/x-dpkg", DEFAULT}, /* .deb */
- {"PK\x03\x04", 4, "application/x-zip", DEFAULT},
- {"\xea\x60", 2, "application/x-arj", DEFAULT},
- {"\037\235", 2, "application/x-compress", DEFAULT},
- {"Rar!", 4, "application/x-rar", DEFAULT},
- {"", 0, "application/x-ace", XPATTERN (ACE_XPATTERN)},
- {"", 0, "application/x-tar", XPATTERN (TAR_XPATTERN)},
- {"", 0, "application/x-gtar", XPATTERN (GTAR_XPATTERN)},
- {"-lh0-", 5, "application/x-lha", DEFAULT},
- {"-lh1-", 5, "application/x-lha", DEFAULT},
- {"-lh2-", 5, "application/x-lha", DEFAULT},
- {"-lh3-", 5, "application/x-lha", DEFAULT},
- {"-lh4-", 5, "application/x-lha", DEFAULT},
- {"-lh5-", 5, "application/x-lha", DEFAULT},
- {"-lh6-", 5, "application/x-lha", DEFAULT},
- {"-lh7-", 5, "application/x-lha", DEFAULT},
- {"-lhd-", 5, "application/x-lha", DEFAULT},
- {"-lh\40-", 5, "application/x-lha", DEFAULT},
- {"-lz4-", 5, "application/x-lha", DEFAULT},
- {"-lz5-", 5, "application/x-lha", DEFAULT},
- {"-lzs-", 5, "application/x-lha", DEFAULT},
- {"\xFD\x76", 2, "application/x-lzh", DEFAULT},
- {"\x00\x00\x01\xb3", 4, "video/mpeg", DEFAULT},
- {"\x00\x00\x01\xba", 4, "video/mpeg", DEFAULT},
- {"moov", 4, "video/quicktime", DEFAULT},
- {"mdat", 4, "video/quicktime", DEFAULT},
- {"\x8aMNG", 4, "video/x-mng", DEFAULT},
- {"\x30\x26\xb2\x75\x8e\x66", 6, "video/x-ms-asf", DEFAULT}, /* same
as .wmv ? */
- {"FWS", 3, "application/x-shockwave-flash", DEFAULT},
- {"MThd", 4, "audio/midi", DEFAULT},
- {"ID3", 3, "audio/mpeg", DEFAULT},
- {"\xFF\xFA", 2, "audio/mpeg", DEFAULT},
- {"\xFF\xFB", 2, "audio/mpeg", DEFAULT},
- {"\xFF\xFC", 2, "audio/mpeg", DEFAULT},
- {"\xFF\xFD", 2, "audio/mpeg", DEFAULT},
- {"\xFF\xFE", 2, "audio/mpeg", DEFAULT},
- {"\xFF\xFF", 2, "audio/mpeg", DEFAULT},
- {"OggS", 4, "application/ogg", DEFAULT},
- {"#!/bin/sh", 9, "application/x-shellscript", DEFAULT},
- {"#!/bin/bash", 11, "application/x-shellscript", DEFAULT},
- {"#!/bin/csh", 10, "application/x-shellscript", DEFAULT},
- {"#!/bin/tcsh", 11, "application/x-shellscript", DEFAULT},
- {"#!/bin/perl", 11, "application/x-perl", DEFAULT},
- {"<?xml", 5, "image/svg+xml", svgMatcher, NULL},
- {NULL, 0, NULL, DISABLED}
-};
-
-
-int
-EXTRACTOR_mime_extract (const char *data,
- size_t size,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls,
- const char *options)
-{
- int i;
-
- i = 0;
- while (patterns[i].pattern != NULL)
- {
- if (size < patterns[i].size)
- {
- i++;
- continue;
- }
- if (0 == memcmp (patterns[i].pattern, data, patterns[i].size))
- {
- if (patterns[i].detector (data, size, patterns[i].arg))
- return proc (proc_cls,
- "mime",
- EXTRACTOR_METATYPE_MIMETYPE,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- patterns[i].mimetype,
- strlen(patterns[i].mimetype)+1);
- }
- i++;
- }
- return 0;
+ magic_close (magic);
+ magic = NULL;
}
+
+/* end of mime_extractor.c */
Added: Extractor/src/plugins/test_mime.c
===================================================================
--- Extractor/src/plugins/test_mime.c (rev 0)
+++ Extractor/src/plugins/test_mime.c 2012-08-04 00:13:00 UTC (rev 23086)
@@ -0,0 +1,61 @@
+/*
+ This file is part of libextractor.
+ (C) 2012 Vidyut Samanta and Christian Grothoff
+
+ libextractor is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ libextractor is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libextractor; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file plugins/test_mime.c
+ * @brief testcase for ogg plugin
+ * @author Christian Grothoff
+ */
+#include "platform.h"
+#include "test_lib.h"
+
+
+
+/**
+ * Main function for the MIME testcase.
+ *
+ * @param argc number of arguments (ignored)
+ * @param argv arguments (ignored)
+ * @return 0 on success
+ */
+int
+main (int argc, char *argv[])
+{
+ struct SolutionData courseclear_sol[] =
+ {
+ {
+ EXTRACTOR_METATYPE_MIMETYPE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "application/ogg",
+ strlen ("application/ogg") + 1,
+ 0
+ },
+ { 0, 0, NULL, NULL, 0, -1 }
+ };
+ struct ProblemSet ps[] =
+ {
+ { "testdata/ogg_courseclear.ogg",
+ courseclear_sol },
+ { NULL, NULL }
+ };
+ return ET_main ("mime", ps);
+}
+
+/* end of test_mime.c */
Deleted: Extractor/test/courseclear.ogg
===================================================================
(Binary files differ)
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r23086 - in Extractor: . src/main src/plugins test,
gnunet <=