gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r20783 - in Extractor: . src/include src/main src/plugins


From: gnunet
Subject: [GNUnet-SVN] r20783 - in Extractor: . src/include src/main src/plugins
Date: Tue, 27 Mar 2012 15:05:17 +0200

Author: grothoff
Date: 2012-03-27 15:05:17 +0200 (Tue, 27 Mar 2012)
New Revision: 20783

Removed:
   Extractor/src/plugins/id3v23_extractor.c
   Extractor/src/plugins/id3v24_extractor.c
Modified:
   Extractor/AUTHORS
   Extractor/ChangeLog
   Extractor/configure.ac
   Extractor/src/include/extractor.h
   Extractor/src/main/extractor.c
   Extractor/src/main/extractor_plugins.c
   Extractor/src/main/extractor_plugins.h
   Extractor/src/plugins/Makefile.am
   Extractor/src/plugins/id3_extractor.c
   Extractor/src/plugins/id3v2_extractor.c
   Extractor/src/plugins/mp3_extractor.c
   Extractor/src/plugins/template_extractor.c
Log:
LRN is refactoring the plugin API, and hell breaks loose

Modified: Extractor/AUTHORS
===================================================================
--- Extractor/AUTHORS   2012-03-27 12:46:29 UTC (rev 20782)
+++ Extractor/AUTHORS   2012-03-27 13:05:17 UTC (rev 20783)
@@ -1,6 +1,7 @@
 Core Team:
 Christian Grothoff <address@hidden>
 Nils Durner <address@hidden>
+LRN <address@hidden>
 
 Formats:
 html          - core team with code from libhtmlparse 0.1.13, 
http://msalem.translator.cx/libhtmlparse.html

Modified: Extractor/ChangeLog
===================================================================
--- Extractor/ChangeLog 2012-03-27 12:46:29 UTC (rev 20782)
+++ Extractor/ChangeLog 2012-03-27 13:05:17 UTC (rev 20783)
@@ -1,3 +1,8 @@
+Tue Mar 27 15:04:00 CEST 2012
+       Refactoring plugin API to allow seeks to arbitrary positions in the
+       file (breaks existing plugins, so the current version will not
+       work). -LRN
+
 Sun Jan 29 17:27:08 CET 2012
        Documented recently discovered issues with pthreads and 
        out-of-process plugin executions in the manual. -CG

Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac      2012-03-27 12:46:29 UTC (rev 20782)
+++ Extractor/configure.ac      2012-03-27 13:05:17 UTC (rev 20783)
@@ -101,6 +101,8 @@
      if test "x$mingw32_ws2" = "xno" -a "x$mingw64_ws2" = "xno"; then
        AC_MSG_ERROR([libextractor requires Winsock2])
      fi
+     # Sufficiently new Windows XP
+     CFLAGS="-D__MSVCRT_VERSION__=0x0601 $CFLAGS"
 
     AC_MSG_CHECKING(for PlibC)
     plibc=0
@@ -136,6 +138,8 @@
      if test $plibc -ne 1;
      then
         AC_MSG_ERROR([libextractor requires PlibC])
+     else
+        LIBS="$LIBS -lplibc"
      fi
 
      LDFLAGS="$LDFLAGS -Wl,-no-undefined -Wl,--export-all-symbols"
@@ -336,7 +340,7 @@
 AC_FUNC_ERROR_AT_LINE
 AC_SEARCH_LIBS(dlopen, dl)
 AC_SEARCH_LIBS(shm_open, rt)
-AC_CHECK_FUNCS([mkstemp strndup munmap strcasecmp strdup strncasecmp memmove 
memset strtoul floor getcwd pow setenv sqrt strchr strcspn strrchr strnlen 
strndup ftruncate shm_open shm_unlink])
+AC_CHECK_FUNCS([mkstemp strndup munmap strcasecmp strdup strncasecmp memmove 
memset strtoul floor getcwd pow setenv sqrt strchr strcspn strrchr strnlen 
strndup ftruncate shm_open shm_unlink lseek64])
 LE_LIB_LIBS=$LIBS
 LIBS=$LIBSOLD
 

Modified: Extractor/src/include/extractor.h
===================================================================
--- Extractor/src/include/extractor.h   2012-03-27 12:46:29 UTC (rev 20782)
+++ Extractor/src/include/extractor.h   2012-03-27 13:05:17 UTC (rev 20783)
@@ -392,13 +392,7 @@
  * @param options options for this plugin; can be NULL
  * @return 0 if all calls to proc returned 0, otherwise 1
  */
-typedef int (*EXTRACTOR_ExtractMethod)(const char *data,
-                                      size_t datasize,
-                                      EXTRACTOR_MetaDataProcessor proc,
-                                      void *proc_cls,
-                                      const char *options);
 
-
 /**
  * Linked list of extractor plugins.  An application builds this list
  * by telling libextractor to load various keyword-extraction
@@ -407,7 +401,14 @@
  */
 struct EXTRACTOR_PluginList;
 
+typedef int (*EXTRACTOR_extract_method) (struct EXTRACTOR_PluginList *plugin,
+  EXTRACTOR_MetaDataProcessor proc, void *proc_cls);
 
+typedef void (*EXTRACTOR_discard_state_method) (struct EXTRACTOR_PluginList 
*plugin);
+typedef void (*EXTRACTOR_init_state_method) (struct EXTRACTOR_PluginList 
*plugin);
+
+
+
 /**
  * Load the default set of plugins.  The default can be changed
  * by setting the LIBEXTRACTOR_LIBRARIES environment variable;

Modified: Extractor/src/main/extractor.c
===================================================================
--- Extractor/src/main/extractor.c      2012-03-27 12:46:29 UTC (rev 20782)
+++ Extractor/src/main/extractor.c      2012-03-27 13:05:17 UTC (rev 20783)
@@ -23,7 +23,7 @@
 #include "extractor.h"
 #include <dirent.h>
 #include <sys/types.h>
-#ifndef WINDOWS
+#if !WINDOWS
 #include <sys/wait.h>
 #include <sys/shm.h>
 #endif
@@ -59,118 +59,54 @@
  */
 #define MAX_MIME_LEN 256
 
+#define MAX_SHM_NAME 255
+
 /**
  * Set to 1 to get failure info,
  * 2 for actual debug info.
  */ 
 #define DEBUG 1
 
+#define MESSAGE_INIT_STATE 0x01
+#define MESSAGE_UPDATED_SHM 0x02
+#define MESSAGE_DONE 0x03
+#define MESSAGE_SEEK 0x04
+#define MESSAGE_META 0x05
+#define MESSAGE_DISCARD_STATE 0x06
 
 /**
- * Stop the child process of this plugin.
+ * Header used for our IPC replies.  A header
+ * with all fields being zero is used to indicate
+ * the end of the stream.
  */
-static void
-stop_process (struct EXTRACTOR_PluginList *plugin)
+struct IpcHeader
 {
-  int status;
-#ifdef WINDOWS
-  HANDLE process;
-#endif
+  enum EXTRACTOR_MetaType meta_type;
+  enum EXTRACTOR_MetaFormat meta_format;
+  size_t data_len;
+  size_t mime_len;
+};
 
-#if DEBUG
-#ifndef WINDOWS
-  if (plugin->cpid == -1)
-#else
-  if (plugin->hProcess == INVALID_HANDLE_VALUE)
-#endif
-    fprintf (stderr,
-            "Plugin `%s' choked on this input\n",
-            plugin->short_libname);
-#endif
-#ifndef WINDOWS
-  if ( (plugin->cpid == -1) ||
-       (plugin->cpid == 0) )
-    return;
-  kill (plugin->cpid, SIGKILL);
-  waitpid (plugin->cpid, &status, 0);
-  plugin->cpid = -1;
-  close (plugin->cpipe_out);
-  fclose (plugin->cpipe_in);
-#else
-  if (plugin->hProcess == INVALID_HANDLE_VALUE ||
-      plugin->hProcess == NULL)
-  return;
-  TerminateProcess (plugin->hProcess, 0);
-  CloseHandle (plugin->hProcess);
-  plugin->hProcess = INVALID_HANDLE_VALUE;
-  close (plugin->cpipe_out);
-  fclose (plugin->cpipe_in);
-#endif
-  plugin->cpipe_out = -1;
-  plugin->cpipe_in = NULL;
-}
-
-
-/**
- * Remove a plugin from a list.
- *
- * @param prev the current list of plugins
- * @param library the name of the plugin to remove
- * @return the reduced list, unchanged if the plugin was not loaded
- */
-struct EXTRACTOR_PluginList *
-EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev,
-                       const char * library)
+#if !WINDOWS
+int
+plugin_open_shm (struct EXTRACTOR_PluginList *plugin, char *shm_name)
 {
-  struct EXTRACTOR_PluginList *pos;
-  struct EXTRACTOR_PluginList *first;
-
-  pos = prev;
-  first = prev;
-  while ((pos != NULL) && (0 != strcmp (pos->short_libname, library)))
-    {
-      prev = pos;
-      pos = pos->next;
-    }
-  if (pos != NULL)
-    {
-      /* found, close library */
-      if (first == pos)
-       first = pos->next;
-      else
-       prev->next = pos->next;
-      /* found */
-      stop_process (pos);
-      free (pos->short_libname);
-      free (pos->libname);
-      free (pos->plugin_options);
-      if (NULL != pos->libraryHandle) 
-       lt_dlclose (pos->libraryHandle);      
-      free (pos);
-    }
-#if DEBUG
-  else
-    fprintf(stderr,
-           "Unloading plugin `%s' failed!\n",
-           library);
-#endif
-  return first;
+  if (plugin->shm_id != -1)
+    close (plugin->shm_id);
+  plugin->shm_id = shm_open (shm_name, O_RDONLY, 0);
+  return plugin->shm_id;
 }
-
-
-/**
- * Remove all plugins from the given list (destroys the list).
- *
- * @param plugin the list of plugins
- */
-void 
-EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins)
+#else
+HANDLE
+plugin_open_shm (struct EXTRACTOR_PluginList *plugin, char *shm_name)
 {
-  while (plugins != NULL)
-    plugins = EXTRACTOR_plugin_remove (plugins, plugins->short_libname);
+  if (plugin->map_handle != 0)
+    CloseHandle (plugin->map_handle);
+  plugin->map_handle = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name);
+  return plugin->map_handle;
 }
+#endif
 
-
 static int
 write_all (int fd,
           const void *buf,
@@ -187,45 +123,10 @@
        return -1;
       off += ret;
     }
-  return 0;
+  return size;
 }
 
-
-static int
-read_all (int fd,
-         void *buf,
-         size_t size)
-{
-  char *data = buf;
-  size_t off = 0;
-  ssize_t ret;
-  
-  while (off < size)
-    {
-      ret = read (fd, &data[off], size - off);
-      if (ret <= 0)
-       return -1;
-      off += ret;
-    }
-  return 0;
-}
-
-
 /**
- * Header used for our IPC replies.  A header
- * with all fields being zero is used to indicate
- * the end of the stream.
- */
-struct IpcHeader
-{
-  enum EXTRACTOR_MetaType type;
-  enum EXTRACTOR_MetaFormat format;
-  size_t data_len;
-  size_t mime_len;
-};
-
-
-/**
  * Function called by a plugin in a child process.  Transmits
  * the meta data back to the parent process.
  *
@@ -254,6 +155,8 @@
   int *cpipe_out = cls;
   struct IpcHeader hdr;
   size_t mime_len;
+  unsigned char meta_byte = MESSAGE_META;
+  unsigned char zero_byte = 0;
 
   if (data_mime_type == NULL)
     mime_len = 0;
@@ -261,23 +164,19 @@
     mime_len = strlen (data_mime_type) + 1;
   if (mime_len > MAX_MIME_LEN)
     mime_len = MAX_MIME_LEN;
-  hdr.type = type;
-  hdr.format = format;
+  hdr.meta_type = type;
+  hdr.meta_format = format;
   hdr.data_len = data_len;
   hdr.mime_len = mime_len;
-  if ( (hdr.type == 0) &&
-       (hdr.format == 0) &&
-       (hdr.data_len == 0) &&
-       (hdr.mime_len == 0) )
-    return 0; /* better skip this one, would signal termination... */    
-  if ( (0 != write_all (*cpipe_out, &hdr, sizeof(hdr))) ||
-       (0 != write_all (*cpipe_out, data_mime_type, mime_len)) ||
-       (0 != write_all (*cpipe_out, data, data_len)) )
-    return 1;  
+  if ((1 != write_all (*cpipe_out, &meta_byte, 1)) ||
+      (sizeof(hdr) != write_all (*cpipe_out, &hdr, sizeof(hdr))) ||
+      (mime_len -1 != write_all (*cpipe_out, data_mime_type, mime_len - 1)) ||
+      (1 != write_all (*cpipe_out, &zero_byte, 1)) ||
+      (data_len != write_all (*cpipe_out, data, data_len)))
+    return 1;
   return 0;
 }
 
-
 /**
  * 'main' function of the child process.  Reads shm-filenames from
  * 'in' (line-by-line) and writes meta data blocks to 'out'.  The meta
@@ -288,23 +187,20 @@
  * @param out stream to write to
  */
 static void
-process_requests (struct EXTRACTOR_PluginList *plugin,
-                 int in,
-                 int out)
+process_requests (struct EXTRACTOR_PluginList *plugin, int in, int out)
 {
-  char hfn[256];
-  char tfn[256];
-  char sze[256];
-  size_t hfn_len;
-  size_t tfn_len;
-  size_t sze_len;
-  char *fn;
-  FILE *fin;
-  void *ptr;
-  int shmid;
+  int read_result1, read_result2, read_result3;
+  unsigned char code;
+  int64_t fsize = -1;
+  int64_t position = 0;
+  void *shm_ptr = NULL;
+  size_t shm_size = 0;
+  char *shm_name = NULL;
+  size_t shm_name_len;
+
+  int extract_reply;
+
   struct IpcHeader hdr;
-  size_t size;
-  int want_tail;
   int do_break;
 #ifdef WINDOWS
   HANDLE map;
@@ -312,167 +208,556 @@
 #endif
 
   if (plugin == NULL)
-    {
-      close (in);
-      close (out);
-      return;
-    }
+  {
+    close (in);
+    close (out);
+    return;
+  }
   if (0 != plugin_load (plugin))
-    {
-      close (in);
-      close (out);
+  {
+    close (in);
+    close (out);
 #if DEBUG
-      fprintf (stderr,
-              "Plugin `%s' failed to load!\n",
-              plugin->short_libname);
+    fprintf (stderr, "Plugin `%s' failed to load!\n", plugin->short_libname);
 #endif
-      return;
-    }  
-  want_tail = 0;
-  if ( (plugin->specials != NULL) &&
-       (NULL != strstr (plugin->specials,
-                       "want-tail")) )
-    {
-      want_tail = 1;
-    }
-  if ( (plugin->specials != NULL) &&
-       (NULL != strstr (plugin->specials,
-                       "close-stderr")) )
-    {
-      close (2);
-    }
-  if ( (plugin->specials != NULL) &&
-       (NULL != strstr (plugin->specials,
-                       "close-stdout")) )
-    {
-      close (1);
-    }
+    return;
+  }  
+  if ((plugin->specials != NULL) &&
+      (NULL != strstr (plugin->specials, "close-stderr")))
+    close (2);
+  if ((plugin->specials != NULL) &&
+      (NULL != strstr (plugin->specials, "close-stdout")))
+    close (1);
 
   memset (&hdr, 0, sizeof (hdr));
-  fin = fdopen (in, "r");
-  if (fin == NULL)
+  do_break = 0;
+  while (!do_break)
+  {
+    read_result1 = read (in, &code, 1);
+    if (read_result1 <= 0)
+      break;
+    switch (code)
     {
-      close (in);
-      close (out);
-      return;
-    }
-  while (NULL != fgets (hfn, sizeof(hfn), fin))
-    {
-      hfn_len = strlen (hfn);
-      if (hfn_len <= 1)
-       break;
-      ptr = NULL;
-      hfn[--hfn_len] = '\0'; /* kill newline */
-      if (NULL == fgets (tfn, sizeof(tfn), fin))
-       break;
-      if ('!' != tfn[0])
-       break;
-      tfn_len = strlen (tfn);
-      tfn[--tfn_len] = '\0'; /* kill newline */
-      if ( (want_tail) &&
-          (tfn_len > 1) )
-       {
-         fn = &tfn[1];
-       }
-      else
-       {
-         fn = hfn;     
-       }
-      if (NULL == fgets (sze, sizeof(sze), fin))
-       break;
-      if ('s' != sze[0])
-       break;
-      sze_len = strlen (sze);
-      sze[--sze_len] = '\0'; /* kill newline */
-      size = strtol (&sze[1], NULL, 10);
-      if (size == LONG_MIN || size == LONG_MAX || size == 0)
+    case MESSAGE_INIT_STATE:
+      read_result2 = read (in, &fsize, sizeof (int64_t));
+      read_result3 = read (in, &shm_name_len, sizeof (size_t));
+      if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof 
(size_t)) ||
+          shm_name_len > MAX_SHM_NAME || fsize <= 0)
+      {
+        do_break = 1;
         break;
-      do_break = 0;
-#ifndef WINDOWS
-      if ( (-1 != (shmid = shm_open (fn, O_RDONLY, 0))) &&
-          (SIZE_MAX != (size = lseek (shmid, 0, SEEK_END))) &&
-          (NULL != (ptr = mmap (NULL, size, PROT_READ, MAP_SHARED, shmid, 0))) 
&&
-          (ptr != (void*) -1) )
+      }
+      if (shm_name != NULL)
+        free (shm_name);
+      shm_name = malloc (shm_name_len);
+      if (shm_name == NULL)
+      {
+        do_break = 1;
+        break;
+      }
+      read_result2 = read (in, shm_name, shm_name_len);
+      if (read_result2 < shm_name_len)
+      {
+        do_break = 1;
+        break;
+      }
+      shm_name[shm_name_len - 1] = '\0';
+#if !WINDOWS
+      if (shm_ptr != NULL)
+        munmap (shm_ptr, shm_size);
+      if (-1 == plugin_open_shm (plugin, shm_name))
+      {
+        do_break = 1;
+        break;
+      }
 #else
-      /* Despite the obvious, this must be READWRITE, not READONLY */
-      map = OpenFileMapping (PAGE_READWRITE, FALSE, fn);
-      ptr = MapViewOfFile (map, FILE_MAP_READ, 0, 0, 0);
-      if (ptr != NULL)
+      if (shm_ptr != NULL)
+        UnmapViewOfFile (shm_ptr);
+      if (INVALID_HANDLE_VALUE == plugin_open_shm (plugin, shm_name))
       {
-        if (0 == VirtualQuery (ptr, &mi, sizeof (mi)) || mi.RegionSize < size)
-        {
-          UnmapViewOfFile (ptr);
-          ptr = NULL;
-        }
+        do_break = 1;
+        break;
       }
-      if (ptr != NULL)
 #endif
-       {
-         if ( ( (plugin->extractMethod != NULL) &&
-                (0 != plugin->extractMethod (ptr,
-                                             size,
-                                             &transmit_reply,
-                                             &out,
-                                             plugin->plugin_options)) ) ||
-              (0 != write_all (out, &hdr, sizeof(hdr))) )
-           do_break = 1;
-       }
-#ifndef WINDOWS
-      if ( (ptr != NULL) &&
-          (ptr != (void*) -1) )
-       munmap (ptr, size);
-      if (-1 != shmid)
-       close (shmid);
+      plugin->fsize = fsize;
+      plugin->init_state_method (plugin);
+      break;
+    case MESSAGE_DISCARD_STATE:
+      plugin->discard_state_method (plugin);
+#if !WINDOWS
+      if (shm_ptr != NULL && shm_size > 0)
+        munmap (shm_ptr, shm_size);
+      if (plugin->shm_id != -1)
+        close (plugin->shm_id);
+      plugin->shm_id = -1;
+      shm_size = 0;
 #else
-      if (ptr != NULL && ptr != (void*) -1)
-        UnmapViewOfFile (ptr);
-      if (map != NULL)
-        CloseHandle (map);
+      if (shm_ptr != NULL)
+        UnmapViewOfFile (shm_ptr);
+      if (plugin->map_handle != 0)
+        CloseHandle (plugin->map_handle);
+      plugin->map_handle = 0;
 #endif
-      if (do_break)
-       break;
-      if ( (plugin->specials != NULL) &&
-          (NULL != strstr (plugin->specials,
-                           "force-kill")) )
-       {
-         /* we're required to die after each file since this
-            plugin only supports a single file at a time */
-         _exit (0);
-       }
+      shm_ptr = NULL;
+      break;
+    case MESSAGE_UPDATED_SHM:
+      read_result2 = read (in, &position, sizeof (int64_t));
+      read_result3 = read (in, &shm_size, sizeof (size_t));
+      if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof 
(size_t)) ||
+          position < 0 || fsize <= 0 || position >= fsize)
+      {
+        do_break = 1;
+        break;
+      }
+      /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery 
for W32) */
+#if !WINDOWS
+      if ((-1 == plugin->shm_id) ||
+          (NULL == (shm_ptr = mmap (NULL, shm_size, PROT_READ, MAP_SHARED, 
plugin->shm_id, 0))) ||
+          (shm_ptr == (void *) -1))
+      {
+        do_break = 1;
+        break;
+      }
+#else
+      if ((plugin->map_handle == 0) ||
+         (NULL == (shm_ptr = MapViewOfFile (plugin->map_handle, FILE_MAP_READ, 
0, 0, 0))))
+      {
+        do_break = 1;
+        break;
+      }
+#endif
+      plugin->position = position;
+      plugin->shm_ptr = shm_ptr;
+      plugin->map_size = shm_size;
+      /* Now, ideally a plugin would do reads and seeks on a virtual "plugin" 
object
+       * completely transparently, and the underlying code would return bytes 
from
+       * the memory map, or would block and wait for a seek to happen.
+       * That, however, requires somewhat different architecture, and even 
more wrapping
+       * and hand-helding. It's easier to make plugins aware of the fact that 
they work
+       * with discrete in-memory buffers with expensive seeking, not 
continuous files.
+       */
+      extract_reply = plugin->extract_method (plugin, transmit_reply, &out);
+#if !WINDOWS
+      if ((shm_ptr != NULL) &&
+          (shm_ptr != (void*) -1) )
+        munmap (shm_ptr, shm_size);
+#else
+      if (shm_ptr != NULL)
+        UnmapViewOfFile (shm_ptr);
+#endif
+      if (extract_reply == 1)
+      {
+        unsigned char done_byte = MESSAGE_DONE;
+        if (write (out, &done_byte, 1) != 1)
+        {
+          do_break = 1;
+          break;
+        }
+        if ((plugin->specials != NULL) &&
+            (NULL != strstr (plugin->specials, "force-kill")))
+        {
+          /* we're required to die after each file since this
+             plugin only supports a single file at a time */
+#if !WINDOWS
+          fsync (out);
+#else
+          _commit (out);
+#endif
+          _exit (0);
+        }
+      }
+      else
+      {
+        unsigned char seek_byte = MESSAGE_SEEK;
+        if (write (out, &seek_byte, 1) != 1)
+        {
+          do_break = 1;
+          break;
+        }
+        if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof 
(int64_t))
+        {
+          do_break = 1;
+          break;
+        }
+      }
+      break;
     }
-  fclose (fin);
+  }
+  close (in);
   close (out);
 }
 
+#if !WINDOWS
 
-#ifdef WINDOWS
+/**
+ * Start the process for the given plugin.
+ */ 
 static void
-write_plugin_data (int fd, const struct EXTRACTOR_PluginList *plugin)
+start_process (struct EXTRACTOR_PluginList *plugin)
 {
-  size_t i;
-  DWORD len;
-  char *str;
+  int p1[2];
+  int p2[2];
+  pid_t pid;
+  int status;
 
-  i = strlen (plugin->libname) + 1;
-  write (fd, &i, sizeof (size_t));
-  write (fd, plugin->libname, i);
-  i = strlen (plugin->short_libname) + 1;
-  write (fd, &i, sizeof (size_t));
-  write (fd, plugin->short_libname, i);
-  if (plugin->plugin_options != NULL)
+  switch (plugin->flags)
+  {
+  case EXTRACTOR_OPTION_DEFAULT_POLICY:
+    if (-1 != plugin->cpid && 0 != plugin->cpid)
+      return;
+    break;
+  case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+    if (0 != plugin->cpid)
+      return;
+    break;
+  case EXTRACTOR_OPTION_IN_PROCESS:
+    return;
+    break;
+  case EXTRACTOR_OPTION_DISABLED:
+    return;
+    break;
+  }
+
+  plugin->cpid = -1;
+  if (0 != pipe (p1))
+  {
+    plugin->flags = EXTRACTOR_OPTION_DISABLED;
+    return;
+  }
+  if (0 != pipe (p2))
+  {
+    close (p1[0]);
+    close (p1[1]);
+    plugin->flags = EXTRACTOR_OPTION_DISABLED;
+    return;
+  }
+  pid = fork ();
+  plugin->cpid = pid;
+  if (pid == -1)
+  {
+    close (p1[0]);
+    close (p1[1]);
+    close (p2[0]);
+    close (p2[1]);
+    plugin->flags = EXTRACTOR_OPTION_DISABLED;
+    return;
+  }
+  if (pid == 0)
+  {
+    close (p1[1]);
+    close (p2[0]);
+    process_requests (plugin, p1[0], p2[1]);
+    _exit (0);
+  }
+  close (p1[0]);
+  close (p2[1]);
+  plugin->cpipe_in = fdopen (p1[1], "w");
+  if (plugin->cpipe_in == NULL)
+  {
+    perror ("fdopen");
+    (void) kill (plugin->cpid, SIGKILL);
+    waitpid (plugin->cpid, &status, 0);
+    close (p1[1]);
+    close (p2[0]);
+    plugin->cpid = -1;
+    plugin->flags = EXTRACTOR_OPTION_DISABLED;
+    return;
+  }
+  plugin->cpipe_out = p2[0];
+}
+
+/**
+ * Stop the child process of this plugin.
+ */
+static void
+stop_process (struct EXTRACTOR_PluginList *plugin)
+{
+  int status;
+
+#if DEBUG
+  if (plugin->cpid == -1)
+    fprintf (stderr,
+            "Plugin `%s' choked on this input\n",
+            plugin->short_libname);
+#endif
+  if ( (plugin->cpid == -1) ||
+       (plugin->cpid == 0) )
+    return;
+  kill (plugin->cpid, SIGKILL);
+  waitpid (plugin->cpid, &status, 0);
+  plugin->cpid = -1;
+  close (plugin->cpipe_out);
+  fclose (plugin->cpipe_in);
+  plugin->cpipe_out = -1;
+  plugin->cpipe_in = NULL;
+
+  if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
+    plugin->flags = EXTRACTOR_OPTION_DISABLED;
+
+  plugin->seek_request = -1;
+}
+
+static int
+write_plugin_data (const struct EXTRACTOR_PluginList *plugin)
+{
+  /* only does anything on Windows */
+  return 0;
+}
+
+#define plugin_print(plug, fmt, ...) fprintf (plug->cpipe_in, fmt, ...)
+#define plugin_write(plug, buf, size) write_all (fileno (plug->cpipe_in), buf, 
size)
+
+#else /* WINDOWS */
+
+#ifndef PIPE_BUF
+#define PIPE_BUF 512
+#endif
+
+/* Copyright Bob Byrnes  <byrnes <at> curl.com>
+   http://permalink.gmane.org/gmane.os.cygwin.patches/2121
+*/
+/* Create a pipe, and return handles to the read and write ends,
+   just like CreatePipe, but ensure that the write end permits
+   FILE_READ_ATTRIBUTES access, on later versions of win32 where
+   this is supported.  This access is needed by NtQueryInformationFile,
+   which is used to implement select and nonblocking writes.
+   Note that the return value is either NO_ERROR or GetLastError,
+   unlike CreatePipe, which returns a bool for success or failure.  */
+static int
+create_selectable_pipe (PHANDLE read_pipe_ptr, PHANDLE write_pipe_ptr,
+                        LPSECURITY_ATTRIBUTES sa_ptr, DWORD psize,
+                        DWORD dwReadMode, DWORD dwWriteMode)
+{
+  /* Default to error. */
+  *read_pipe_ptr = *write_pipe_ptr = INVALID_HANDLE_VALUE;
+
+  HANDLE read_pipe = INVALID_HANDLE_VALUE, write_pipe = INVALID_HANDLE_VALUE;
+
+  /* Ensure that there is enough pipe buffer space for atomic writes.  */
+  if (psize < PIPE_BUF)
+    psize = PIPE_BUF;
+
+  char pipename[MAX_PATH];
+
+  /* Retry CreateNamedPipe as long as the pipe name is in use.
+   * Retrying will probably never be necessary, but we want
+   * to be as robust as possible.  */
+  while (1)
+  {
+    static volatile LONG pipe_unique_id;
+
+    snprintf (pipename, sizeof pipename, "\\\\.\\pipe\\gnunet-%d-%ld",
+              getpid (), InterlockedIncrement ((LONG *) & pipe_unique_id));
+    /* Use CreateNamedPipe instead of CreatePipe, because the latter
+     * returns a write handle that does not permit FILE_READ_ATTRIBUTES
+     * access, on versions of win32 earlier than WinXP SP2.
+     * CreatePipe also stupidly creates a full duplex pipe, which is
+     * a waste, since only a single direction is actually used.
+     * It's important to only allow a single instance, to ensure that
+     * the pipe was not created earlier by some other process, even if
+     * the pid has been reused.  We avoid FILE_FLAG_FIRST_PIPE_INSTANCE
+     * because that is only available for Win2k SP2 and WinXP.  */
+    read_pipe = CreateNamedPipeA (pipename, PIPE_ACCESS_INBOUND | dwReadMode, 
PIPE_TYPE_BYTE | PIPE_READMODE_BYTE, 1,   /* max instances */
+                                  psize,        /* output buffer size */
+                                  psize,        /* input buffer size */
+                                  NMPWAIT_USE_DEFAULT_WAIT, sa_ptr);
+
+    if (read_pipe != INVALID_HANDLE_VALUE)
     {
-      i = strlen (plugin->plugin_options) + 1;
-      str = plugin->plugin_options;
+      break;
     }
-  else
+
+    DWORD err = GetLastError ();
+
+    switch (err)
     {
-      i = 0;
+    case ERROR_PIPE_BUSY:
+      /* The pipe is already open with compatible parameters.
+       * Pick a new name and retry.  */
+      continue;
+    case ERROR_ACCESS_DENIED:
+      /* The pipe is already open with incompatible parameters.
+       * Pick a new name and retry.  */
+      continue;
+    case ERROR_CALL_NOT_IMPLEMENTED:
+      /* We are on an older Win9x platform without named pipes.
+       * Return an anonymous pipe as the best approximation.  */
+      if (CreatePipe (read_pipe_ptr, write_pipe_ptr, sa_ptr, psize))
+      {
+        return 0;
+      }
+      err = GetLastError ();
+      return err;
+    default:
+      return err;
     }
-  write (fd, &i, sizeof (size_t));
-  if (i > 0)
-    write (fd, str, i);
+    /* NOTREACHED */
+  }
+
+  /* Open the named pipe for writing.
+   * Be sure to permit FILE_READ_ATTRIBUTES access.  */
+  write_pipe = CreateFileA (pipename, GENERIC_WRITE | FILE_READ_ATTRIBUTES, 0, 
 /* share mode */
+                            sa_ptr, OPEN_EXISTING, dwWriteMode, /* flags and 
attributes */
+                            0); /* handle to template file */
+
+  if (write_pipe == INVALID_HANDLE_VALUE)
+  {
+    /* Failure. */
+    DWORD err = GetLastError ();
+
+    CloseHandle (read_pipe);
+    return err;
+  }
+
+  /* Success. */
+  *read_pipe_ptr = read_pipe;
+  *write_pipe_ptr = write_pipe;
+  return 0;
 }
 
+static int
+write_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char *buf, size_t size, 
unsigned char **old_buf)
+{
+  DWORD written;
+  BOOL bresult;
+  DWORD err;
+
+  if (WAIT_OBJECT_0 != WaitForSingleObject (ov->hEvent, INFINITE))
+    return -1;
+  
+  ResetEvent (ov->hEvent);
+
+  if (*old_buf != NULL)
+    free (*old_buf);
+
+  *old_buf = malloc (size);
+  if (*old_buf == NULL)
+    return -1;
+  memcpy (*old_buf, buf, size);
+  written = 0;
+  ov->Offset = 0;
+  ov->OffsetHigh = 0;
+  ov->Pointer = 0;
+  ov->Internal = 0;
+  ov->InternalHigh = 0;
+  bresult = WriteFile (h, *old_buf, size, &written, ov);
+
+  if (bresult == TRUE)
+  {
+    SetEvent (ov->hEvent);
+    free (*old_buf);
+    *old_buf = NULL;
+    return written;
+  }
+
+  err = GetLastError ();
+  if (err == ERROR_IO_PENDING)
+    return size;
+  SetEvent (ov->hEvent);
+  *old_buf = NULL;
+  SetLastError (err);
+  return -1;
+}
+
+static int
+print_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char **buf, const char *fmt, 
...)
+{
+  va_list va;
+  va_list vacp;
+  size_t size;
+  char *print_buf;
+  int result;
+
+  va_start (va, fmt);
+  va_copy (vacp, va);
+  size = VSNPRINTF (NULL, 0, fmt, vacp) + 1;
+  va_end (vacp);
+  if (size <= 0)
+  {
+    va_end (va);
+    return size;
+  }
+
+  print_buf = malloc (size);
+  if (print_buf == NULL)
+    return -1;
+  VSNPRINTF (print_buf, size, fmt, va);
+  va_end (va);
+  
+  result = write_to_pipe (h, ov, print_buf, size, buf);
+  free (buf);
+  return result;
+}
+
+#define plugin_print(plug, fmt, ...) print_to_pipe (plug->cpipe_in, 
&plug->ov_write, &plug->ov_write_buffer, fmt, ...)
+#define plugin_write(plug, buf, size) write_to_pipe (plug->cpipe_in, 
&plug->ov_write, buf, size, &plug->ov_write_buffer)
+
+static int
+write_plugin_data (struct EXTRACTOR_PluginList *plugin)
+{
+  size_t libname_len, shortname_len, opts_len;
+  DWORD len;
+  char *str;
+  size_t total_len = 0;
+  unsigned char *buf, *ptr;
+
+  switch (plugin->flags)
+  {
+  case EXTRACTOR_OPTION_DEFAULT_POLICY:
+    break;
+  case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+    break;
+  case EXTRACTOR_OPTION_IN_PROCESS:
+    return 0;
+    break;
+  case EXTRACTOR_OPTION_DISABLED:
+    return 0;
+    break;
+  }
+
+  libname_len = strlen (plugin->libname) + 1;
+  total_len += sizeof (size_t) + libname_len;
+  shortname_len = strlen (plugin->short_libname) + 1;
+  total_len += sizeof (size_t) + shortname_len;
+  if (plugin->plugin_options != NULL)
+  {
+    opts_len = strlen (plugin->plugin_options) + 1;
+    total_len += opts_len;
+  }
+  else
+  {
+    opts_len = 0;
+  }
+  total_len += sizeof (size_t);
+
+  buf = malloc (total_len);
+  if (buf == NULL)
+    return -1;
+  ptr = buf;
+  memcpy (ptr, &libname_len, sizeof (size_t));
+  ptr += sizeof (size_t);
+  memcpy (ptr, plugin->libname, libname_len);
+  ptr += libname_len;
+  memcpy (ptr, &shortname_len, sizeof (size_t));
+  ptr += sizeof (size_t);
+  memcpy (ptr, plugin->short_libname, shortname_len);
+  ptr += shortname_len;
+  memcpy (ptr, &opts_len, sizeof (size_t));
+  ptr += sizeof (size_t);
+  if (opts_len > 0)
+  {
+    memcpy (ptr, plugin->plugin_options, opts_len);
+    ptr += opts_len;
+  }
+  if (total_len != write_to_pipe (plugin->cpipe_in, &plugin->ov_write, buf, 
total_len, &plugin->ov_write_buffer))
+  {
+    free (buf);
+    return -1;
+  }
+  free (buf);
+  return 0;
+}
+
 static struct EXTRACTOR_PluginList *
 read_plugin_data (int fd)
 {
@@ -485,183 +770,112 @@
   read (fd, &i, sizeof (size_t));
   ret->libname = malloc (i);
   if (ret->libname == NULL)
-    {
-      free (ret);
-      return NULL;
-    }
+  {
+    free (ret);
+    return NULL;
+  }
   read (fd, ret->libname, i);
+  ret->libname[i - 1] = '\0';
 
   read (fd, &i, sizeof (size_t));
   ret->short_libname = malloc (i);
   if (ret->short_libname == NULL)
-    {
-      free (ret->libname);
-      free (ret);
-      return NULL;
-    }
+  {
+    free (ret->libname);
+    free (ret);
+    return NULL;
+  }
   read (fd, ret->short_libname, i);
+  ret->short_libname[i - 1] = '\0';
 
   read (fd, &i, sizeof (size_t));
   if (i == 0)
-    {
-      ret->plugin_options = NULL;
-    }
+  {
+    ret->plugin_options = NULL;
+  }
   else
+  {
+    ret->plugin_options = malloc (i);
+    if (ret->plugin_options == NULL)
     {
-      ret->plugin_options = malloc (i);
-      if (ret->plugin_options == NULL)
-       {
-         free (ret->short_libname);
-         free (ret->libname);
-         free (ret);
-         return NULL;
-       }
-      read (fd, ret->plugin_options, i);
+      free (ret->short_libname);
+      free (ret->libname);
+      free (ret);
+      return NULL;
     }
+    read (fd, ret->plugin_options, i);
+    ret->plugin_options[i - 1] = '\0';
+  }
   return ret;
 }
 
-
-void CALLBACK 
-RundllEntryPoint (HWND hwnd, 
-                 HINSTANCE hinst, 
-                 LPSTR lpszCmdLine, 
-                 int nCmdShow)
-{
-  intptr_t in_h;
-  intptr_t out_h;
-  int in, out;
-
-  sscanf(lpszCmdLine, "%lu %lu", &in_h, &out_h);
-  in = _open_osfhandle (in_h, _O_RDONLY);
-  out = _open_osfhandle (out_h, 0);
-  setmode (in, _O_BINARY);
-  setmode (out, _O_BINARY);
-  process_requests (read_plugin_data (in),
-                   in, out);
-}
-
-void CALLBACK 
-RundllEntryPointA (HWND hwnd, 
-                 HINSTANCE hinst, 
-                 LPSTR lpszCmdLine, 
-                 int nCmdShow)
-{
-  return RundllEntryPoint(hwnd, hinst, lpszCmdLine, nCmdShow);
-}
-#endif
-
-
 /**
  * Start the process for the given plugin.
  */ 
 static void
 start_process (struct EXTRACTOR_PluginList *plugin)
 {
-#if !WINDOWS
-  int p1[2];
-  int p2[2];
-  pid_t pid;
-  int status;
-
-  plugin->cpid = -1;
-  if (0 != pipe (p1))
-    {
-      plugin->flags = EXTRACTOR_OPTION_DISABLED;
-      return;
-    }
-  if (0 != pipe (p2))
-    {
-      close (p1[0]);
-      close (p1[1]);
-      plugin->flags = EXTRACTOR_OPTION_DISABLED;
-      return;
-    }
-  pid = fork ();
-  plugin->cpid = pid;
-  if (pid == -1)
-    {
-      close (p1[0]);
-      close (p1[1]);
-      close (p2[0]);
-      close (p2[1]);
-      plugin->flags = EXTRACTOR_OPTION_DISABLED;
-      return;
-    }
-  if (pid == 0)
-    {
-      close (p1[1]);
-      close (p2[0]);
-      process_requests (plugin, p1[0], p2[1]);
-      _exit (0);
-    }
-  close (p1[0]);
-  close (p2[1]);
-  plugin->cpipe_in = fdopen (p1[1], "w");
-  if (plugin->cpipe_in == NULL)
-    {
-      perror ("fdopen");
-      (void) kill (plugin->cpid, SIGKILL);
-      waitpid (plugin->cpid, &status, 0);
-      close (p1[1]);
-      close (p2[0]);
-      plugin->cpid = -1;
-      plugin->flags = EXTRACTOR_OPTION_DISABLED;
-      return;
-    }
-  plugin->cpipe_out = p2[0];
-#else
-  int p1[2];
-  int p2[2];
+  HANDLE p1[2];
+  HANDLE p2[2];
   STARTUPINFO startup;
   PROCESS_INFORMATION proc;
   char cmd[MAX_PATH + 1];
   char arg1[10], arg2[10];
-  HANDLE p10_os = INVALID_HANDLE_VALUE, p21_os = INVALID_HANDLE_VALUE;
   HANDLE p10_os_inh = INVALID_HANDLE_VALUE, p21_os_inh = INVALID_HANDLE_VALUE;
+  SECURITY_ATTRIBUTES sa;
 
-  plugin->hProcess = NULL;
-  if (0 != _pipe (p1, 0, _O_BINARY | _O_NOINHERIT))
-    {
-      plugin->flags = EXTRACTOR_OPTION_DISABLED;
+  switch (plugin->flags)
+  {
+  case EXTRACTOR_OPTION_DEFAULT_POLICY:
+    if (plugin->hProcess != INVALID_HANDLE_VALUE && plugin->hProcess != 0)
       return;
-    }
-  if (0 != _pipe (p2, 0, _O_BINARY | _O_NOINHERIT))
-    {
-      close (p1[0]);
-      close (p1[1]);
-      plugin->flags = EXTRACTOR_OPTION_DISABLED;
+    break;
+  case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+    if (plugin->hProcess != 0)
       return;
-    }
+    break;
+  case EXTRACTOR_OPTION_IN_PROCESS:
+    return;
+    break;
+  case EXTRACTOR_OPTION_DISABLED:
+    return;
+    break;
+  }
 
-  memset (&startup, 0, sizeof (STARTUPINFO));
+  sa.nLength = sizeof (sa);
+  sa.lpSecurityDescriptor = NULL;
+  sa.bInheritHandle = FALSE;
 
-  p10_os = (HANDLE) _get_osfhandle (p1[0]);
-  p21_os = (HANDLE) _get_osfhandle (p2[1]);
+  plugin->hProcess = NULL;
 
-  if (p10_os == INVALID_HANDLE_VALUE || p21_os == INVALID_HANDLE_VALUE)
+  if (0 != create_selectable_pipe (&p1[0], &p1[1], &sa, 1024, 
FILE_FLAG_OVERLAPPED, FILE_FLAG_OVERLAPPED))
   {
-    close (p1[0]);
-    close (p1[1]);
-    close (p2[0]);
-    close (p2[1]);
     plugin->flags = EXTRACTOR_OPTION_DISABLED;
     return;
   }
+  if (0 != create_selectable_pipe (&p2[0], &p2[1], &sa, 1024, 
FILE_FLAG_OVERLAPPED, FILE_FLAG_OVERLAPPED))
+  {
+    CloseHandle (p1[0]);
+    CloseHandle (p1[1]);
+    plugin->flags = EXTRACTOR_OPTION_DISABLED;
+    return;
+  }
 
-  if (!DuplicateHandle (GetCurrentProcess (), p10_os, GetCurrentProcess (),
+  memset (&startup, 0, sizeof (STARTUPINFO));
+
+  if (!DuplicateHandle (GetCurrentProcess (), p1[0], GetCurrentProcess (),
       &p10_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS)
-      || !DuplicateHandle (GetCurrentProcess (), p21_os, GetCurrentProcess (),
+      || !DuplicateHandle (GetCurrentProcess (), p2[1], GetCurrentProcess (),
       &p21_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS))
   {
     if (p10_os_inh != INVALID_HANDLE_VALUE)
       CloseHandle (p10_os_inh);
     if (p21_os_inh != INVALID_HANDLE_VALUE)
       CloseHandle (p21_os_inh);
-    close (p1[0]);
-    close (p1[1]);
-    close (p2[0]);
-    close (p2[1]);
+    CloseHandle (p1[0]);
+    CloseHandle (p1[1]);
+    CloseHandle (p2[0]);
+    CloseHandle (p2[1]);
     plugin->flags = EXTRACTOR_OPTION_DISABLED;
     return;
   }
@@ -675,190 +889,588 @@
     CloseHandle (proc.hThread);
   }
   else
-    {
-      close (p1[0]);
-      close (p1[1]);
-      close (p2[0]);
-      close (p2[1]);
-      plugin->flags = EXTRACTOR_OPTION_DISABLED;
-      return;
-    }
-  close (p1[0]);
-  close (p2[1]);
+  {
+    CloseHandle (p1[0]);
+    CloseHandle (p1[1]);
+    CloseHandle (p2[0]);
+    CloseHandle (p2[1]);
+    plugin->flags = EXTRACTOR_OPTION_DISABLED;
+    return;
+  }
+  CloseHandle (p1[0]);
+  CloseHandle (p2[1]);
   CloseHandle (p10_os_inh);
   CloseHandle (p21_os_inh);
 
-  write_plugin_data (p1[1], plugin);
+  plugin->cpipe_in = p1[1];
+  plugin->cpipe_out = p2[0];
 
-  plugin->cpipe_in = fdopen (p1[1], "w");
-  if (plugin->cpipe_in == NULL)
+  memset (&plugin->ov_read, 0, sizeof (OVERLAPPED));
+  memset (&plugin->ov_write, 0, sizeof (OVERLAPPED));
+
+  plugin->ov_write_buffer = NULL;
+
+  plugin->ov_write.hEvent = CreateEvent (NULL, TRUE, TRUE, NULL);
+  plugin->ov_read.hEvent = CreateEvent (NULL, TRUE, TRUE, NULL);
+}
+
+/**
+ * Stop the child process of this plugin.
+ */
+static void
+stop_process (struct EXTRACTOR_PluginList *plugin)
+{
+  int status;
+  HANDLE process;
+
+#if DEBUG
+  if (plugin->hProcess == INVALID_HANDLE_VALUE)
+    fprintf (stderr,
+            "Plugin `%s' choked on this input\n",
+            plugin->short_libname);
+#endif
+  if (plugin->hProcess == INVALID_HANDLE_VALUE ||
+      plugin->hProcess == NULL)
+    return;
+  TerminateProcess (plugin->hProcess, 0);
+  CloseHandle (plugin->hProcess);
+  plugin->hProcess = INVALID_HANDLE_VALUE;
+  CloseHandle (plugin->cpipe_out);
+  CloseHandle (plugin->cpipe_in);
+  plugin->cpipe_out = INVALID_HANDLE_VALUE;
+  plugin->cpipe_in = INVALID_HANDLE_VALUE;
+  CloseHandle (plugin->ov_read.hEvent);
+  CloseHandle (plugin->ov_write.hEvent);
+  if (plugin->ov_write_buffer != NULL)
+  {
+    free (plugin->ov_write_buffer);
+    plugin->ov_write_buffer = NULL;
+  }
+
+  if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
+    plugin->flags = EXTRACTOR_OPTION_DISABLED;
+
+  plugin->seek_request = -1;
+}
+
+#endif /* WINDOWS */
+
+/**
+ * Remove a plugin from a list.
+ *
+ * @param prev the current list of plugins
+ * @param library the name of the plugin to remove
+ * @return the reduced list, unchanged if the plugin was not loaded
+ */
+struct EXTRACTOR_PluginList *
+EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev,
+                       const char * library)
+{
+  struct EXTRACTOR_PluginList *pos;
+  struct EXTRACTOR_PluginList *first;
+
+  pos = prev;
+  first = prev;
+  while ((pos != NULL) && (0 != strcmp (pos->short_libname, library)))
     {
-      perror ("fdopen");
-      TerminateProcess (plugin->hProcess, 0);
-      WaitForSingleObject (plugin->hProcess, INFINITE);
-      CloseHandle (plugin->hProcess);
-      close (p1[1]);
-      close (p2[0]);
-      plugin->hProcess = INVALID_HANDLE_VALUE;
-      plugin->flags = EXTRACTOR_OPTION_DISABLED;
-      return;
+      prev = pos;
+      pos = pos->next;
     }
-  plugin->cpipe_out = p2[0];
+  if (pos != NULL)
+    {
+      /* found, close library */
+      if (first == pos)
+       first = pos->next;
+      else
+       prev->next = pos->next;
+      /* found */
+      stop_process (pos);
+      free (pos->short_libname);
+      free (pos->libname);
+      free (pos->plugin_options);
+      if (NULL != pos->libraryHandle) 
+       lt_dlclose (pos->libraryHandle);      
+      free (pos);
+    }
+#if DEBUG
+  else
+    fprintf(stderr,
+           "Unloading plugin `%s' failed!\n",
+           library);
 #endif
+  return first;
 }
 
 
 /**
- * Extract meta data using the given plugin, running the
- * actual code of the plugin out-of-process.
+ * Remove all plugins from the given list (destroys the list).
  *
- * @param plugin which plugin to call
- * @param size size of the file mapped by shmfn or tshmfn
- * @param shmfn file name of the shared memory segment
- * @param tshmfn file name of the shared memory segment for the end of the data
- * @param proc function to call on the meta data
+ * @param plugin the list of plugins
+ */
+void 
+EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins)
+{
+  while (plugins != NULL)
+    plugins = EXTRACTOR_plugin_remove (plugins, plugins->short_libname);
+}
+
+
+
+/**
+ * Open a file
+ */
+static int file_open(const char *filename, int oflag, ...)
+{
+  int mode;
+  const char *fn;
+#ifdef MINGW
+  char szFile[_MAX_PATH + 1];
+  long lRet;
+
+  if ((lRet = plibc_conv_to_win_path(filename, szFile)) != ERROR_SUCCESS)
+  {
+    errno = ENOENT;
+    SetLastError(lRet);
+    return -1;
+  }
+  fn = szFile;
+#else
+  fn = filename;
+#endif
+  mode = 0;
+#ifdef MINGW
+  /* Set binary mode */
+  mode |= O_BINARY;
+#endif
+  return OPEN(fn, oflag, mode);
+}
+
+#ifndef O_LARGEFILE
+#define O_LARGEFILE 0
+#endif
+
+#if HAVE_ZLIB
+#define MIN_ZLIB_HEADER 12
+#endif
+#if HAVE_LIBBZ2
+#define MIN_BZ2_HEADER 4
+#endif
+#if !defined (MIN_COMPRESSED_HEADER) && HAVE_ZLIB
+#define MIN_COMPRESSED_HEADER MIN_ZLIB_HEADER
+#endif
+#if !defined (MIN_COMPRESSED_HEADER) && HAVE_LIBBZ2
+#define MIN_COMPRESSED_HEADER MIN_BZ2_HEADER
+#endif
+#if !defined (MIN_COMPRESSED_HEADER)
+#define MIN_COMPRESSED_HEADER -1
+#endif
+
+#define COMPRESSED_DATA_PROBE_SIZE 3
+
+/**
+ * Try to decompress compressed data
+ *
+ * @param data data to decompress, or NULL (if fd is not -1)
+ * @param fd file to read data from, or -1 (if data is not NULL)
+ * @param fsize size of data (if data is not NULL) or size of fd file (if fd 
is not -1)
+ * @param compression_type type of compression, as returned by 
get_compression_type ()
+ * @param buffer a pointer to a buffer pointer, buffer pointer is NEVER a NULL 
and already has some data (usually - COMPRESSED_DATA_PROBE_SIZE bytes) in it.
+ * @param buffer_size a pointer to buffer size
+ * @param proc callback for metadata
  * @param proc_cls cls for proc
- * @return 0 if proc did not return non-zero
+ * @return 0 on success, anything else on error
  */
 static int
-extract_oop (struct EXTRACTOR_PluginList *plugin,
-             size_t size,
-            const char *shmfn,
-            const char *tshmfn,
-            EXTRACTOR_MetaDataProcessor proc,
-            void *proc_cls)
+try_to_decompress (const unsigned char *data, int fd, int64_t fsize, int 
compression_type, void **buffer, size_t *buffer_size, 
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 {
-  struct IpcHeader hdr;
-  char mimetype[MAX_MIME_LEN + 1];
-  char *data;
+  unsigned char *new_buffer;
+  ssize_t read_result;
 
-#ifndef WINDOWS
-  if (plugin->cpid == -1)
-#else
-  if (plugin->hProcess == INVALID_HANDLE_VALUE)
+  unsigned char *buf;
+  unsigned char *rbuf;
+  size_t dsize;
+#if HAVE_ZLIB
+  z_stream strm;
+  int ret;
+  size_t pos;
 #endif
-    return 0;
-  if (0 >= fprintf (plugin->cpipe_in, 
-                   "%s\n",
-                   shmfn))
+#if HAVE_LIBBZ2
+  bz_stream bstrm;
+  int bret;
+  size_t bpos;
+#endif
+
+  if (fd != -1)
+  {
+    if (fsize > *buffer_size)
     {
-      stop_process (plugin);
-#ifndef WINDOWS
-      plugin->cpid = -1;
-#else
-      plugin->hProcess = INVALID_HANDLE_VALUE;
-#endif
-      if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
-       plugin->flags = EXTRACTOR_OPTION_DISABLED;
-      return 0;
+      /* Read the rest of the file. Can't de-compress it partially anyway */
+      /* Memory mapping is not useful here, because memory mapping ALSO takes 
up
+       * memory (even more than a buffer, since it might be aligned), and
+       * because we need to read every byte anyway (lazy on-demand reads into
+       * memory provided by memory mapping won't help).
+       */
+      new_buffer = realloc (*buffer, fsize);
+      if (new_buffer == NULL)
+      {
+        free (*buffer);
+        return -1;
+      }
+      read_result = READ (fd, &new_buffer[*buffer_size], fsize - *buffer_size);
+      if (read_result != fsize - *buffer_size)
+      {
+        free (*buffer);
+        return -1;
+      }
+      *buffer = new_buffer;
+      *buffer_size = fsize;
     }
-  if (0 >= fprintf (plugin->cpipe_in, 
-                   "!%s\n",
-                   (tshmfn != NULL) ? tshmfn : ""))
+    data = (const unsigned char *) new_buffer;
+  }
+
+#if HAVE_ZLIB
+  if (compression_type == 1) 
+  {
+    /* Process gzip header */
+    unsigned int gzip_header_length = 10;
+
+    if (data[3] & 0x4) /* FEXTRA  set */
+      gzip_header_length += 2 + (unsigned) (data[10] & 0xff) +
+        (((unsigned) (data[11] & 0xff)) * 256);
+
+    if (data[3] & 0x8) /* FNAME set */
     {
-      stop_process (plugin);
-#ifndef WINDOWS
-      plugin->cpid = -1;
-#else
-      plugin->hProcess = INVALID_HANDLE_VALUE;
-#endif
-      if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
-       plugin->flags = EXTRACTOR_OPTION_DISABLED;
-      return 0;
+      const unsigned char *cptr = data + gzip_header_length;
+
+      /* stored file name is here */
+      while ((cptr - data) < fsize)
+      {
+        if ('\0' == *cptr)
+        break;
+        cptr++;
+      }
+
+      if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME,
+          EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
+          (const char *) (data + gzip_header_length),
+          cptr - (data + gzip_header_length)))
+        return 0; /* done */
+
+      gzip_header_length = (cptr - data) + 1;
     }
-  if (0 >= fprintf (plugin->cpipe_in, 
-                   "s%lu\n",
-                   size))
+
+    if (data[3] & 0x16) /* FCOMMENT set */
     {
-      stop_process (plugin);
-#ifndef WINDOWS
-      plugin->cpid = -1;
-#else
-      plugin->hProcess = INVALID_HANDLE_VALUE;
+      const unsigned char * cptr = data + gzip_header_length;
+
+      /* stored comment is here */
+      while (cptr < data + fsize)
+      {
+        if ('\0' == *cptr)
+          break;
+        cptr ++;
+      }  
+
+      if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT,
+          EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
+          (const char *) (data + gzip_header_length),
+          cptr - (data + gzip_header_length)))
+        return 0; /* done */
+
+      gzip_header_length = (cptr - data) + 1;
+    }
+
+    if (data[3] & 0x2) /* FCHRC set */
+      gzip_header_length += 2;
+
+    memset (&strm, 0, sizeof (z_stream));
+
+#ifdef ZLIB_VERNUM
+    gzip_header_length = 0;
 #endif
-      if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
-       plugin->flags = EXTRACTOR_OPTION_DISABLED;
-      return 0;
+
+    if (fsize > gzip_header_length)
+    {
+      strm.next_in = (Bytef *) data + gzip_header_length;
+      strm.avail_in = fsize - gzip_header_length;
     }
-  fflush (plugin->cpipe_in);
-  while (1)
+    else
     {
-      if (0 != read_all (plugin->cpipe_out,
-                        &hdr,
-                        sizeof(hdr)))
-       {
-         stop_process (plugin);
-#ifndef WINDOWS
-      plugin->cpid = -1;
+      strm.next_in = (Bytef *) data;
+      strm.avail_in = 0;
+    }
+    strm.total_in = 0;
+    strm.zalloc = NULL;
+    strm.zfree = NULL;
+    strm.opaque = NULL;
+
+    /*
+     * note: maybe plain inflateInit(&strm) is adequate,
+     * it looks more backward-compatible also ;
+     *
+     * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ;
+     * there might be a better check.
+     */
+    if (Z_OK == inflateInit2 (&strm,
+#ifdef ZLIB_VERNUM
+        15 + 32
 #else
-      plugin->hProcess = INVALID_HANDLE_VALUE;
+        -MAX_WBITS
 #endif
-         if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
-           plugin->flags = EXTRACTOR_OPTION_DISABLED;
-         return 0;
-       }
-      if  ( (hdr.type == 0) &&
-           (hdr.format == 0) &&
-           (hdr.data_len == 0) &&
-           (hdr.mime_len == 0) )
-       break;
-      if (hdr.mime_len > MAX_MIME_LEN)
-       {
-         stop_process (plugin);          
-#ifndef WINDOWS
-      plugin->cpid = -1;
-#else
-      plugin->hProcess = INVALID_HANDLE_VALUE;
+        ))
+    {
+      pos = 0;
+      dsize = 2 * fsize;
+      if ( (dsize > MAX_DECOMPRESS) ||
+          (dsize < fsize) )
+        dsize = MAX_DECOMPRESS;
+      buf = malloc (dsize);
+
+      if (buf != NULL)
+      {
+        strm.next_out = (Bytef *) buf;
+        strm.avail_out = dsize;
+
+        do
+        {
+          ret = inflate (&strm, Z_SYNC_FLUSH);
+          if (ret == Z_OK)
+          {
+            if (dsize == MAX_DECOMPRESS)
+              break;
+
+            pos += strm.total_out;
+            strm.total_out = 0;
+            dsize *= 2;
+
+            if (dsize > MAX_DECOMPRESS)
+              dsize = MAX_DECOMPRESS;
+
+            rbuf = realloc (buf, dsize);
+            if (rbuf == NULL)
+            {
+              free (buf);
+              buf = NULL;
+              break;
+            }
+
+            buf = rbuf;
+            strm.next_out = (Bytef *) &buf[pos];
+            strm.avail_out = dsize - pos;
+          }
+          else if (ret != Z_STREAM_END) 
+          {
+            /* error */
+            free (buf);
+            buf = NULL;
+          }
+        } while ((buf != NULL) && (ret != Z_STREAM_END));
+
+        dsize = pos + strm.total_out;
+        if ((dsize == 0) && (buf != NULL))
+        {
+          free (buf);
+          buf = NULL;
+        }
+      }
+
+      inflateEnd (&strm);
+
+      if (fd != -1)
+        if (*buffer != NULL)
+          free (*buffer);
+
+      if (buf == NULL)
+      {
+        return -1;
+      }
+      else
+      {
+        *buffer = buf;
+        *buffer_size = dsize;
+        return 0;
+      }
+    }
+  }
 #endif
-         if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
-           plugin->flags = EXTRACTOR_OPTION_DISABLED;
-         return 0;
-       }
-      data = malloc (hdr.data_len);
-      if (data == NULL)
-       {
-         stop_process (plugin);
-         return 1;
-       }
-      if ( (0 != (read_all (plugin->cpipe_out,
-                           mimetype,
-                           hdr.mime_len))) ||
-          (0 != (read_all (plugin->cpipe_out,
-                           data,
-                           hdr.data_len))) )
-       {
-         stop_process (plugin);
-#ifndef WINDOWS
-      plugin->cpid = -1;
-#else
-      plugin->hProcess = INVALID_HANDLE_VALUE;
+  
+#if HAVE_LIBBZ2
+  if (compression_type == 2) 
+  {
+    memset(&bstrm, 0, sizeof (bz_stream));
+    bstrm.next_in = (char *) data;
+    bstrm.avail_in = fsize;
+    bstrm.total_in_lo32 = 0;
+    bstrm.total_in_hi32 = 0;
+    bstrm.bzalloc = NULL;
+    bstrm.bzfree = NULL;
+    bstrm.opaque = NULL;
+    if (BZ_OK == BZ2_bzDecompressInit(&bstrm, 0,0)) 
+    {
+      bpos = 0;
+      dsize = 2 * fsize;
+      if ( (dsize > MAX_DECOMPRESS) || (dsize < fsize) )
+        dsize = MAX_DECOMPRESS;
+      buf = malloc (dsize);
+
+      if (buf != NULL) 
+      {
+        bstrm.next_out = (char *) buf;
+        bstrm.avail_out = dsize;
+
+        do
+        {
+          bret = BZ2_bzDecompress (&bstrm);
+          if (bret == Z_OK) 
+          {
+            if (dsize == MAX_DECOMPRESS)
+              break;
+            bpos += bstrm.total_out_lo32;
+            bstrm.total_out_lo32 = 0;
+
+            dsize *= 2;
+            if (dsize > MAX_DECOMPRESS)
+              dsize = MAX_DECOMPRESS;
+
+            rbuf = realloc(buf, dsize);
+            if (rbuf == NULL)
+            {
+              free (buf);
+              buf = NULL;
+              break;
+            }
+
+            buf = rbuf;
+            bstrm.next_out = (char*) &buf[bpos];
+            bstrm.avail_out = dsize - bpos;
+          } 
+          else if (bret != BZ_STREAM_END) 
+          {
+            /* error */
+            free (buf);
+            buf = NULL;
+          }
+        } while ((buf != NULL) && (bret != BZ_STREAM_END));
+
+        dsize = bpos + bstrm.total_out_lo32;
+        if ((dsize == 0) && (buf != NULL))
+        {
+          free (buf);
+          buf = NULL;
+        }
+      }
+
+      BZ2_bzDecompressEnd (&bstrm);
+
+      if (fd != -1)
+        if (*buffer != NULL)
+          free (*buffer);
+
+      if (buf == NULL)
+      {
+        return -1;
+      }
+      else
+      {
+        *buffer = buf;
+       *buffer_size = dsize;
+        return 0;
+      }
+    }
+  }
 #endif
-         free (data);
-         if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
-           plugin->flags = EXTRACTOR_OPTION_DISABLED;
-         return 0;
-       }          
-      mimetype[hdr.mime_len] = '\0';
-      if ( (proc != NULL) &&
-          (0 != proc (proc_cls, 
-                      plugin->short_libname,
-                      hdr.type,
-                      hdr.format,
-                      mimetype,
-                      data,
-                      hdr.data_len)) )
-       proc = NULL;    
-      free (data);
+  return -1;
+}
+
+/**
+ * Detect if we have compressed data on our hands.
+ *
+ * @param data pointer to a data buffer or NULL (in case fd is not -1)
+ * @param fd a file to read data from, or -1 (if data is not NULL)
+ * @param fsize size of data (if data is not NULL) or of file (if fd is not -1)
+ * @param buffer will receive a pointer to the data that this function read
+ * @param buffer_size will receive size of the buffer
+ * @return -1 to indicate an error, 0 to indicate uncompressed data, or a type 
(> 0) of compression
+ */
+static int
+get_compression_type (const unsigned char *data, int fd, int64_t fsize, void 
**buffer, size_t *buffer_size)
+{
+  void *read_data = NULL;
+  size_t read_data_size = 0;
+  ssize_t read_result;
+
+  if ((MIN_COMPRESSED_HEADER < 0) || (fsize < MIN_COMPRESSED_HEADER))
+  {
+    *buffer = NULL;
+    return 0;
+  }
+  if (data == NULL)
+  {
+    read_data_size = COMPRESSED_DATA_PROBE_SIZE;
+    read_data = malloc (read_data_size);
+    if (read_data == NULL)
+      return -1;
+    read_result = READ (fd, read_data, read_data_size);
+    if (read_result != read_data_size)
+    {
+      free (read_data);
+      return -1;
     }
-  if (NULL == proc)
+    *buffer = read_data;
+    *buffer_size = read_data_size;
+    data = (const void *) read_data;
+  }
+#if HAVE_ZLIB
+  if ((fsize >= MIN_ZLIB_HEADER) && (data[0] == 0x1f) && (data[1] == 0x8b) && 
(data[2] == 0x08))
     return 1;
+#endif
+#if HAVE_LIBBZ2
+  if ((fsize >= MIN_BZ2_HEADER) && (data[0] == 'B') && (data[1] == 'Z') && 
(data[2] == 'h')) 
+    return 2;
+#endif
   return 0;
-}           
+}
 
+#if WINDOWS
 
 /**
  * Setup a shared memory segment.
  *
+ * @param ptr set to the location of the map segment
+ * @param map where to store the map handle
+ * @param fn name of the mapping
+ * @param fn_size size available in fn
+ * @param size number of bytes to allocated for the mapping
+ * @return 0 on success
+ */
+static int
+make_shm_w32 (void **ptr, HANDLE *map, char *fn, size_t fn_size, size_t size)
+{
+  const char *tpath = "Local\\";
+  snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
+      (unsigned int) RANDOM());
+  *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, 
size, fn);
+  *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size);
+  if (*ptr == NULL)
+  {
+    CloseHandle (*map);
+    return 1;
+  }
+  return 0;
+}
+
+static void
+destroy_shm_w32 (void *ptr, HANDLE map)
+{
+  UnmapViewOfFile (ptr);
+  CloseHandle (map);
+}
+
+#else
+
+/**
+ * Setup a shared memory segment.
+ *
  * @param ptr set to the location of the shm segment
  * @param shmid where to store the shm ID
  * @param fn name of the shared segment
@@ -867,22 +1479,10 @@
  * @return 0 on success
  */
 static int
-make_shm (int is_tail,
-         void **ptr,
-#ifndef WINDOWS
-         int *shmid,
-#else
-         HANDLE *map,
-#endif   
-         char *fn,
-         size_t fn_size,
-         size_t size)
+make_shm_posix (void **ptr, int *shmid, char *fn, size_t fn_size, size_t size)
 {
   const char *tpath;
-#ifdef WINDOWS
-  tpath = "Local\\";
-#elif SOMEBSD
-  const char *tpath;
+#if SOMEBSD
   /* this works on FreeBSD, not sure about others... */
   tpath = getenv ("TMPDIR");
   if (tpath == NULL)
@@ -890,578 +1490,606 @@
 #else
   tpath = "/"; /* Linux */
 #endif 
-  snprintf (fn,
-           fn_size,
-           "%slibextractor-%sshm-%u-%u",
-           tpath,
-           (is_tail) ? "t" : "",
-           getpid(),
-           (unsigned int) RANDOM());
-#ifndef WINDOWS
+  snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
+      (unsigned int) RANDOM());
   *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
   *ptr = NULL;
-  if (-1 == (*shmid))
-    return 1;    
-  if ( (0 != ftruncate (*shmid, size)) ||
-       (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0))) 
||
-       (*ptr == (void*) -1) )
+  if (-1 == *shmid)
+    return 1;
+  if ((0 != ftruncate (*shmid, size)) ||
+      (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0))) 
||
+      (*ptr == (void*) -1) )
+  {
+    close (*shmid);
+    *shmid = -1;
+    shm_unlink (fn);
+    return 1;
+  }
+  return 0;
+}
+
+static void
+destroy_shm_posix (void *ptr, int shm_id, size_t size, char *shm_name)
+{
+  if (NULL != ptr)
+    munmap (ptr, size);
+  if (shm_id != -1)
+    close (shm_id);
+  shm_unlink (shm_name);
+}
+#endif
+
+
+static void
+init_plugin_state (struct EXTRACTOR_PluginList *plugin, char *shm_name, 
int64_t fsize)
+{
+  int write_result;
+  int init_state_size;
+  unsigned char *init_state;
+  int t;
+  size_t shm_name_len = strlen (shm_name) + 1;
+  init_state_size = 1 + sizeof (size_t) + shm_name_len + sizeof (int64_t);
+  switch (plugin->flags)
+  {
+  case EXTRACTOR_OPTION_DEFAULT_POLICY:
+  case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+    init_state = malloc (init_state_size);
+    if (init_state == NULL)
     {
-      close (*shmid);
-      *shmid = -1;
-      shm_unlink (fn);
-      return 1;
+      stop_process (plugin);
+      return;
     }
-  return 0;
-#else
-  *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, 
size, fn);
-  *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size);
-  if (*ptr == NULL)
+    t = 0;
+    init_state[t] = MESSAGE_INIT_STATE;
+    t += 1;
+    memcpy (&init_state[t], &fsize, sizeof (int64_t));
+    t += sizeof (int64_t);
+    memcpy (&init_state[t], &shm_name_len, sizeof (size_t));
+    t += sizeof (size_t);
+    memcpy (&init_state[t], shm_name, shm_name_len);
+    t += shm_name_len;
+    write_result = plugin_write (plugin, init_state, init_state_size);
+    free (init_state);
+    if (write_result < init_state_size)
     {
-      CloseHandle (*map);
-      return 1;
+      stop_process (plugin);
+      return;
     }
-  return 0;
-#endif
+    plugin->seek_request = 0;
+    break;
+  case EXTRACTOR_OPTION_IN_PROCESS:
+    plugin_open_shm (plugin, shm_name);
+    plugin->fsize = fsize;
+    plugin->init_state_method (plugin);
+    plugin->seek_request = 0;
+    return;
+    break;
+  case EXTRACTOR_OPTION_DISABLED:
+    return;
+    break;
+  }
 }
 
+static void
+discard_plugin_state (struct EXTRACTOR_PluginList *plugin)
+{
+  int write_result;
+  unsigned char discard_state = MESSAGE_DISCARD_STATE;
+  switch (plugin->flags)
+  {
+  case EXTRACTOR_OPTION_DEFAULT_POLICY:
+  case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+    /* This is somewhat clumsy, but it's the only stop-indicating
+     * non-W32/POSIX-specific field i could think of...
+     */
+    if (plugin->cpipe_out != -1)
+    {
+      write_result = plugin_write (plugin, &discard_state, 1);
+      if (write_result < 1)
+      {
+        stop_process (plugin);
+        return;
+      }
+    }
+    break;
+  case EXTRACTOR_OPTION_IN_PROCESS:
+    plugin->discard_state_method (plugin);
+    return;
+    break;
+  case EXTRACTOR_OPTION_DISABLED:
+    return;
+    break;
+  }
+}
 
-/**
- * Extract keywords using the given set of plugins.
- *
- * @param plugins the list of plugins to use
- * @param data data to process, never NULL
- * @param size number of bytes in data, ignored if data is NULL
- * @param tdata end of file data, or NULL
- * @param tsize number of bytes in tdata
- * @param proc function to call for each meta data item found
- * @param proc_cls cls argument to proc
- */
+static int
+give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, 
size_t map_size)
+{
+  int write_result;
+  int updated_shm_size = 1 + sizeof (int64_t) + sizeof (size_t);
+  unsigned char updated_shm[updated_shm_size];
+  int t = 0;
+  updated_shm[t] = MESSAGE_UPDATED_SHM;
+  t += 1;
+  memcpy (&updated_shm[t], &position, sizeof (int64_t));
+  t += sizeof (int64_t);
+  memcpy (&updated_shm[t], &map_size, sizeof (size_t));
+  t += sizeof (size_t);
+  switch (plugin->flags)
+  {
+  case EXTRACTOR_OPTION_DEFAULT_POLICY:
+  case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+    if (plugin->seek_request < 0)
+      return 0;
+    write_result = plugin_write (plugin, updated_shm, updated_shm_size);
+    if (write_result < updated_shm_size)
+    {
+      stop_process (plugin);
+      return 0;
+    }
+    return 1;
+  case EXTRACTOR_OPTION_IN_PROCESS:
+    plugin->position = position;
+    plugin->map_size = map_size;
+    return 0;
+  case EXTRACTOR_OPTION_DISABLED:
+    return 0;
+  default:
+    return 1;
+  }
+}
+
 static void
-extract (struct EXTRACTOR_PluginList *plugins,
-        const char * data,
-        size_t size,
-        const char * tdata,
-        size_t tsize,
-        EXTRACTOR_MetaDataProcessor proc,
-        void *proc_cls) 
+ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, 
void *shm_ptr, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 {
-  struct EXTRACTOR_PluginList *ppos;
-  enum EXTRACTOR_Options flags;
-  void *ptr;
-  void *tptr;
-  char fn[255];
-  char tfn[255];
-  int want_shm;
-  int want_tail;
-#ifndef WINDOWS
-  int shmid;
-  int tshmid;
+  int extract_reply;
+  switch (plugin->flags)
+  {
+  case EXTRACTOR_OPTION_DEFAULT_POLICY:
+  case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+    return;
+  case EXTRACTOR_OPTION_IN_PROCESS:
+    if (plugin->seek_request >= 0)
+    {
+      plugin->shm_ptr = shm_ptr;
+      extract_reply = plugin->extract_method (plugin, proc, proc_cls);
+      if (extract_reply == 1)
+        plugin->seek_request = -1;
+    }
+    break;
+  case EXTRACTOR_OPTION_DISABLED:
+    return;
+    break;
+  }
+}
+
+#if !WINDOWS
+int
+plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t 
size)
+{
+  ssize_t read_result;
+  size_t read_count = 0;
+  while (read_count < size)
+  {
+    read_result = read (plugin->cpipe_out, &buf[read_count], size - 
read_count);
+    if (read_result <= 0)
+      return read_result;
+    read_count += read_result;
+  }
+  return read_count;
+}
 #else
-  HANDLE map;
-  HANDLE tmap;
+int 
+plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t 
size)
+{
+  DWORD bytes_read;
+  BOOL bresult;
+  size_t read_count = 0;
+  while (read_count < size)
+  {
+    bresult = ReadFile (plugin->cpipe_out, &buf[read_count], size - 
read_count, &bytes_read, NULL);
+    if (!bresult)
+      return -1;
+    read_count += bytes_read;
+  }
+  return read_count;
+}
 #endif
 
-  want_shm = 0;
-  ppos = plugins;
-  while (NULL != ppos)
-    {      
-      switch (ppos->flags)
-       {
-       case EXTRACTOR_OPTION_DEFAULT_POLICY:
-#ifndef WINDOWS
-         if ( (0 == ppos->cpid) ||
-              (-1 == ppos->cpid) )
-#else
-         if (ppos->hProcess == NULL || ppos->hProcess == INVALID_HANDLE_VALUE)
-#endif
-           start_process (ppos);
-         want_shm = 1;
-         break;
-       case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
-#ifndef WINDOWS
-         if (0 == ppos->cpid)
-#else
-         if (ppos->hProcess == NULL)
-#endif
-           start_process (ppos);
-         want_shm = 1;
-         break;
-       case EXTRACTOR_OPTION_IN_PROCESS:
-         break;
-       case EXTRACTOR_OPTION_DISABLED:
-         break;
-       }      
-      ppos = ppos->next;
+static int
+receive_reply (struct EXTRACTOR_PluginList *plugin, 
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+  int read_result;
+  unsigned char code;
+  int must_read = 1;
+
+  int64_t seek_position;
+  struct IpcHeader hdr;
+  char *mime_type;
+  char *data;
+
+  while (must_read)
+  {
+    read_result = plugin_read (plugin, &code, 1);
+    if (read_result < 1)
+      return -1;
+    switch (code)
+    {
+    case MESSAGE_DONE: /* Done */
+      plugin->seek_request = -1;
+      must_read = 0;
+      break;
+    case MESSAGE_SEEK: /* Seek */
+      read_result = plugin_read (plugin, (unsigned char *) &seek_position, 
sizeof (int64_t));
+      if (read_result < sizeof (int64_t))
+        return -1;
+      plugin->seek_request = seek_position;
+      must_read = 0;
+      break;
+    case MESSAGE_META: /* Meta */
+      read_result = plugin_read (plugin, (unsigned char *) &hdr, sizeof (hdr));
+      if (read_result < sizeof (hdr)) /* FIXME: check hdr for sanity */
+        return -1;
+      mime_type = malloc (hdr.mime_len + 1);
+      if (mime_type == NULL)
+        return -1;
+      read_result = plugin_read (plugin, (unsigned char *) mime_type, 
hdr.mime_len);
+      if (read_result < hdr.mime_len)
+        return -1;
+      mime_type[hdr.mime_len] = '\0';
+      data = malloc (hdr.data_len);
+      if (data == NULL)
+      {
+        free (mime_type);
+        return -1;
+      }
+      read_result = plugin_read (plugin, (unsigned char *) data, hdr.data_len);
+      if (read_result < hdr.data_len)
+      {
+        free (mime_type);
+        free (data);
+        return -1;
+      }
+      read_result = proc (proc_cls, plugin->short_libname, hdr.meta_type, 
hdr.meta_format, mime_type, data, hdr.data_len);
+      free (mime_type);
+      free (data);
+      if (read_result != 0)
+        return 1;
+      break;
+    default:
+      return -1;
     }
-  ptr = NULL;
-  tptr = NULL;
-  if (want_shm)
+  }
+  return 0;
+}
+
+#if !WINDOWS
+static int
+wait_for_reply (struct EXTRACTOR_PluginList *plugins, 
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+  int ready;
+  int result;
+  struct timeval tv;
+  fd_set to_check;
+  int highest = 0;
+  int read_result;
+  struct EXTRACTOR_PluginList *ppos;
+
+  FD_ZERO (&to_check);
+
+  for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+  {
+    switch (ppos->flags)
     {
-      if (size > MAX_READ)
-       size = MAX_READ;
-      if (0 == make_shm (0, 
-                        &ptr,
-#ifndef WINDOWS
-                        &shmid,
+    case EXTRACTOR_OPTION_DEFAULT_POLICY:
+    case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+      if (ppos->seek_request == -1)
+        continue;
+      FD_SET (ppos->cpipe_out, &to_check);
+      if (highest < ppos->cpipe_out)
+        highest = ppos->cpipe_out;
+      break;
+    case EXTRACTOR_OPTION_IN_PROCESS:
+      break;
+    case EXTRACTOR_OPTION_DISABLED:
+      break;
+    }
+  }
+
+  tv.tv_sec = 10;
+  tv.tv_usec = 0;
+  ready = select (highest + 1, &to_check, NULL, NULL, &tv);
+  if (ready <= 0)
+    /* an error or timeout -> something's wrong or all plugins hung up */
+    return -1;
+
+  result = 0;
+  for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+  {
+    switch (ppos->flags)
+    {
+    case EXTRACTOR_OPTION_DEFAULT_POLICY:
+    case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+      if (ppos->seek_request == -1)
+        continue;
+      if (FD_ISSET (ppos->cpipe_out, &to_check))
+      {
+        read_result = receive_reply (ppos, proc, proc_cls);
+        if (read_result < 0)
+        {
+          stop_process (ppos);
+        }
+        result += 1;
+      }
+      break;
+    case EXTRACTOR_OPTION_IN_PROCESS:
+      break;
+    case EXTRACTOR_OPTION_DISABLED:
+      break;
+    }
+  }
+  return result;
+}
 #else
-                        &map,
-#endif
-                        fn, sizeof(fn), size))
-       {
-         memcpy (ptr, data, size);      
-         if ( (tdata != NULL) &&
-              (0 == make_shm (1,
-                              &tptr,
-#ifndef WINDOWS
-                              &tshmid,
-#else
-                              &tmap,
-#endif
-                              tfn, sizeof(tfn), tsize)) )
-           {
-             memcpy (tptr, tdata, tsize);      
-           }
-         else
-           {
-             tptr = NULL;
-           }
-       }
-      else
-       {
-         want_shm = 0;
-       }           
+static int
+wait_for_reply (struct EXTRACTOR_PluginList *plugins, 
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+  int result;
+  DWORD ms;
+  DWORD first_ready;
+  DWORD dwresult;
+  DWORD bytes_read;
+  BOOL bresult;
+  int i;
+  HANDLE events[MAXIMUM_WAIT_OBJECTS];
+  
+
+  struct EXTRACTOR_PluginList *ppos;
+
+  i = 0;
+  for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+  {
+    if (i == MAXIMUM_WAIT_OBJECTS)
+      return -1;
+    if (ppos->seek_request == -1)
+      continue;
+    switch (ppos->flags)
+    {
+    case EXTRACTOR_OPTION_DEFAULT_POLICY:
+    case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+      if (WaitForSingleObject (ppos->ov_read.hEvent, 0) == WAIT_OBJECT_0)
+      {
+        ResetEvent (ppos->ov_read.hEvent);
+        bresult = ReadFile (ppos->cpipe_out, &i, 0, &bytes_read, 
&ppos->ov_read);
+        if (bresult == TRUE)
+        {
+          SetEvent (ppos->ov_read.hEvent);
+        }
+        else
+        {
+          DWORD err = GetLastError ();
+          if (err != ERROR_IO_PENDING)
+            SetEvent (ppos->ov_read.hEvent);
+        }
+      }
+      events[i] = ppos->ov_read.hEvent;
+      i++;
+      break;
+    case EXTRACTOR_OPTION_IN_PROCESS:
+      break;
+    case EXTRACTOR_OPTION_DISABLED:
+      break;
     }
-  ppos = plugins;
-  while (NULL != ppos)
+  }
+
+  ms = 10000;
+  first_ready = WaitForMultipleObjects (i, events, FALSE, ms);
+  if (first_ready == WAIT_TIMEOUT || first_ready == WAIT_FAILED)
+    /* an error or timeout -> something's wrong or all plugins hung up */
+    return -1;
+
+  i = 0;
+  result = 0;
+  for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+  {
+    int read_result;
+    switch (ppos->flags)
     {
-      flags = ppos->flags;
-      if (! want_shm)
-       flags = EXTRACTOR_OPTION_IN_PROCESS;
-      switch (flags)
-       {
-       case EXTRACTOR_OPTION_DEFAULT_POLICY:
-         if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn, 
-                               (tptr != NULL) ? tfn : NULL,
-                               proc, proc_cls))
-           {
-             ppos = NULL;
-             break;
-           }
-#ifndef WINDOWS
-         if (ppos->cpid == -1)
-#else
-      if (ppos->hProcess == INVALID_HANDLE_VALUE)
+    case EXTRACTOR_OPTION_DEFAULT_POLICY:
+    case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+      if (ppos->seek_request == -1)
+        continue;
+      if (i < first_ready)
+      {
+        i += 1;
+        continue;
+      }
+      dwresult = WaitForSingleObject (ppos->ov_read.hEvent, 0);
+      read_result = 0;
+      if (dwresult == WAIT_OBJECT_0)
+      {
+        read_result = receive_reply (ppos, proc, proc_cls);
+        result += 1;
+      }
+      if (dwresult == WAIT_FAILED || read_result < 0)
+      {
+        stop_process (ppos);
+        if (dwresult == WAIT_FAILED)
+          result += 1;
+      }
+      i++;
+      break;
+    case EXTRACTOR_OPTION_IN_PROCESS:
+      break;
+    case EXTRACTOR_OPTION_DISABLED:
+      break;
+    }
+  }
+  return result;
+}
+
 #endif
-           {
-             start_process (ppos);
-             if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn, 
-                                   (tptr != NULL) ? tfn : NULL,
-                                   proc, proc_cls))
-               {
-                 ppos = NULL;
-                 break;
-               }
-           }
-         break;
-       case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
-         if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn, 
-                               (tptr != NULL) ? tfn : NULL,
-                               proc, proc_cls))
-           {
-             ppos = NULL;
-             break;
-           }
-         break;
-       case EXTRACTOR_OPTION_IN_PROCESS:                 
-         want_tail = ( (ppos->specials != NULL) &&
-                       (NULL != strstr (ppos->specials,
-                                        "want-tail")));
-         if (NULL == ppos->extractMethod) 
-           plugin_load (ppos);     
-         if ( ( (ppos->specials == NULL) ||
-                (NULL == strstr (ppos->specials,
-                                 "oop-only")) ) )
-           {
-             if (want_tail)
-               {
-                 if ( (NULL != ppos->extractMethod) &&
-                      (tdata != NULL) &&
-                      (0 != ppos->extractMethod (tdata, 
-                                                 tsize, 
-                                                 proc, 
-                                                 proc_cls,
-                                                 ppos->plugin_options)) )
-                   {
-                     ppos = NULL;
-                     break;
-                   }
-               }
-             else
-               {
-                 if ( (NULL != ppos->extractMethod) &&
-                      (0 != ppos->extractMethod (data, 
-                                                 size, 
-                                                 proc, 
-                                                 proc_cls,
-                                                 ppos->plugin_options)) )
-                   {
-                     ppos = NULL;
-                     break;
-                   }
-               }
-           }
-         break;
-       case EXTRACTOR_OPTION_DISABLED:
-         break;
-       }      
-      if (ppos == NULL)
-       break;
-      ppos = ppos->next;
+
+static int64_t
+seek_to_new_position (struct EXTRACTOR_PluginList *plugins, int fd, int64_t 
fsize, int64_t current_position)
+{
+  int64_t min_pos = fsize;
+  struct EXTRACTOR_PluginList *ppos;
+  for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+  {
+    switch (ppos->flags)
+    {
+    case EXTRACTOR_OPTION_DEFAULT_POLICY:
+    case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+    case EXTRACTOR_OPTION_IN_PROCESS:
+    if (ppos->seek_request > 0 && ppos->seek_request >= current_position &&
+        ppos->seek_request <= min_pos)
+      min_pos = ppos->seek_request;
+      break;
+    case EXTRACTOR_OPTION_DISABLED:
+      break;
     }
-  if (want_shm)
-    {
-#ifndef WINDOWS
-      if (NULL != ptr)
-       munmap (ptr, size);
-      if (shmid != -1)
-       close (shmid);
-      shm_unlink (fn);
-      if (NULL != tptr)
-       {
-         munmap (tptr, tsize);
-         shm_unlink (tfn);
-         if (tshmid != -1)
-           close (tshmid);
-       }
+  }
+  if (min_pos >= fsize)
+    return -1;
+#if WINDOWS
+  _lseeki64 (fd, min_pos, SEEK_SET);
+#elif !HAVE_SEEK64
+  lseek64 (fd, min_pos, SEEK_SET);
 #else
-      UnmapViewOfFile (ptr);
-      CloseHandle (map);
-      if (tptr != NULL)
-       {
-         UnmapViewOfFile (tptr);
-         CloseHandle (tmap);
-       }
+  if (min_pos >= INT_MAX)
+    return -1;
+  lseek (fd, (ssize_t) min_pos, SEEK_SET);
 #endif
-    }
+  return min_pos;
 }
 
+static void
+load_in_process_plugin (struct EXTRACTOR_PluginList *plugin)
+{
+  switch (plugin->flags)
+  {
+  case EXTRACTOR_OPTION_DEFAULT_POLICY:
+  case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+  case EXTRACTOR_OPTION_DISABLED:
+    break;
+  case EXTRACTOR_OPTION_IN_PROCESS:
+    plugin_load (plugin);
+    break;
+  }
+}
 
 /**
- * If the given data is compressed using gzip or bzip2, decompress
- * it.  Run 'extract' on the decompressed contents (or the original
- * contents if they were not compressed).
+ * Extract keywords using the given set of plugins.
  *
  * @param plugins the list of plugins to use
- * @param data data to process, never NULL
- * @param size number of bytes in data
- * @param tdata end of file data, or NULL
- * @param tsize number of bytes in tdata
+ * @param data data to process, or NULL if fds is not -1
+ * @param fd file to read data from, or -1 if data is not NULL
+ * @param fsize size of data or size of file
+ * @param buffer a buffer with data alteady read from the file (if fd != -1)
+ * @param buffer_size size of buffer
  * @param proc function to call for each meta data item found
  * @param proc_cls cls argument to proc
  */
 static void
-decompress_and_extract (struct EXTRACTOR_PluginList *plugins,
-                       const unsigned char * data,
-                       size_t size,
-                       const char * tdata,
-                       size_t tsize,
-                       EXTRACTOR_MetaDataProcessor proc,
-                       void *proc_cls) {
-  unsigned char * buf;
-  unsigned char * rbuf;
-  size_t dsize;
-#if HAVE_ZLIB
-  z_stream strm;
-  int ret;
-  size_t pos;
+do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd, 
int64_t fsize, void *buffer, size_t buffer_size, EXTRACTOR_MetaDataProcessor 
proc, void *proc_cls)
+{
+  int shm_result;
+  unsigned char *shm_ptr;
+#if !WINDOWS
+  int shm_id;
+#else
+  HANDLE map_handle;
 #endif
-#if HAVE_LIBBZ2
-  bz_stream bstrm;
-  int bret;
-  size_t bpos;
+  char shm_name[MAX_SHM_NAME + 1];
+
+  struct EXTRACTOR_PluginList *ppos;
+
+  int64_t position = 0;
+  size_t map_size;
+  ssize_t read_result;
+  int kill_plugins = 0;
+
+  map_size = (fd == -1) ? fsize : MAX_READ;
+
+  /* Make a shared memory object. Even if we're running in-process. Simpler 
that way */
+#if !WINDOWS
+  shm_result = make_shm_posix ((void **) &shm_ptr, &shm_id, shm_name, 
MAX_SHM_NAME,
+      map_size);
+#else  
+  shm_result = make_shm_w32 ((void **) &shm_ptr, &map_handle, shm_name, 
MAX_SHM_NAME,
+      map_size);
 #endif
+  if (shm_result != 0)
+    return;
 
-  buf = NULL;
-  dsize = 0;
-#if HAVE_ZLIB
-  /* try gzip decompression first */
-  if ( (size >= 12) &&
-       (data[0] == 0x1f) &&
-       (data[1] == 0x8b) &&
-       (data[2] == 0x08) ) 
+  /* This three-loops-instead-of-one construction is intended to increase 
parallelism */
+  for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+    start_process (ppos);
+
+  for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+    load_in_process_plugin (ppos);
+
+  for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+    write_plugin_data (ppos);
+
+  for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+    init_plugin_state (ppos, shm_name, fsize);
+
+  while (1)
+  {
+    int plugins_not_ready = 0;
+    if (fd != -1)
     {
-      /* Process gzip header */
-      unsigned int gzip_header_length = 10;
-      
-      if (data[3] & 0x4) /* FEXTRA  set */
-       gzip_header_length += 2 + (unsigned) (data[10] & 0xff)
-         + (((unsigned) (data[11] & 0xff)) * 256);
-      
-      if (data[3] & 0x8) /* FNAME set */
-       {
-         const unsigned char * cptr = data + gzip_header_length;
-         /* stored file name is here */
-         while (cptr < data + size)
-           {
-             if ('\0' == *cptr)
-               break;        
-             cptr++;
-           }
-         if (0 != proc (proc_cls,
-                        "<zlib>",
-                        EXTRACTOR_METATYPE_FILENAME,
-                        EXTRACTOR_METAFORMAT_C_STRING,
-                        "text/plain",
-                        (const char*) (data + gzip_header_length),
-                        cptr - (data + gzip_header_length)))
-           return; /* done */    
-         gzip_header_length = (cptr - data) + 1;
-       }
-      if (data[3] & 0x16) /* FCOMMENT set */
-       {
-         const unsigned char * cptr = data + gzip_header_length;
-         /* stored comment is here */    
-         while (cptr < data + size)
-           {
-             if('\0' == *cptr)
-               break;
-             cptr ++;
-           }   
-         if (0 != proc (proc_cls,
-                        "<zlib>",
-                        EXTRACTOR_METATYPE_COMMENT,
-                        EXTRACTOR_METAFORMAT_C_STRING,
-                        "text/plain",
-                        (const char*) (data + gzip_header_length),
-                        cptr - (data + gzip_header_length)))
-           return; /* done */
-         gzip_header_length = (cptr - data) + 1;
-       }
-      if(data[3] & 0x2) /* FCHRC set */
-       gzip_header_length += 2;
-      memset(&strm,
-            0,
-            sizeof(z_stream));
-#ifdef ZLIB_VERNUM
-      gzip_header_length = 0;
-#endif
-      if (size > gzip_header_length) 
-       {
-         strm.next_in = (Bytef*) data + gzip_header_length;
-         strm.avail_in = size - gzip_header_length;
-       }
+      /* fill the share buffer with data from the file */
+      if (buffer_size > 0)
+        memcpy (shm_ptr, buffer, buffer_size);
+      read_result = READ (fd, &shm_ptr[buffer_size], MAX_READ - buffer_size);
+      if (read_result <= 0)
+        break;
       else
-       {
-         strm.next_in = (Bytef*) data;
-         strm.avail_in = 0;
-       }
-      strm.total_in = 0;
-      strm.zalloc = NULL;
-      strm.zfree = NULL;
-      strm.opaque = NULL;
-      
-      /*
-       * note: maybe plain inflateInit(&strm) is adequate,
-       * it looks more backward-compatible also ;
-       *
-       * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ;
-       * there might be a better check.
-       */
-      if (Z_OK == inflateInit2(&strm,
-#ifdef ZLIB_VERNUM
-                              15 + 32
-#else
-                              -MAX_WBITS
-#endif
-                              )) {
-       dsize = 2 * size;
-       if (dsize > MAX_DECOMPRESS)
-         dsize = MAX_DECOMPRESS;
-       buf = malloc(dsize);
-       pos = 0;
-       if (buf == NULL) 
-         {
-           inflateEnd(&strm);
-         } 
-       else 
-         {
-           strm.next_out = (Bytef*) buf;
-           strm.avail_out = dsize;
-           do
-             {
-               ret = inflate(&strm,
-                             Z_SYNC_FLUSH);
-               if (ret == Z_OK) 
-                 {
-                   if (dsize == MAX_DECOMPRESS)
-                     break;
-                   pos += strm.total_out;
-                   strm.total_out = 0;
-                   dsize *= 2;
-                   if (dsize > MAX_DECOMPRESS)
-                     dsize = MAX_DECOMPRESS;
-                   rbuf = realloc(buf, dsize);
-                   if (rbuf == NULL)
-                     {
-                       free (buf);
-                       buf = NULL;
-                       break;
-                     }
-                   buf = rbuf;
-                   strm.next_out = (Bytef*) &buf[pos];
-                   strm.avail_out = dsize - pos;
-                 }
-               else if (ret != Z_STREAM_END) 
-                 {
-                   /* error */
-                   free(buf);
-                   buf = NULL;
-                 }
-             } while ( (buf != NULL) &&                
-                       (ret != Z_STREAM_END) );
-           dsize = pos + strm.total_out;
-           inflateEnd(&strm);
-           if ( (dsize == 0) &&
-                (buf != NULL) )
-             {
-               free(buf);
-               buf = NULL;
-             }
-         }
-      }
+        map_size = read_result + buffer_size;
+      if (buffer_size > 0)
+         buffer_size = 0;
     }
-#endif
-  
-#if HAVE_LIBBZ2
-  if ( (size >= 4) &&
-       (data[0] == 'B') &&
-       (data[1] == 'Z') &&
-       (data[2] == 'h') ) 
+    for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+      plugins_not_ready += give_shm_to_plugin (ppos, position, map_size);
+    for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+      ask_in_process_plugin (ppos, position, shm_ptr, proc, proc_cls);
+    while (plugins_not_ready > 0 && !kill_plugins)
     {
-      /* now try bz2 decompression */
-      memset(&bstrm,
-            0,
-            sizeof(bz_stream));
-      bstrm.next_in = (char*) data;
-      bstrm.avail_in = size;
-      bstrm.total_in_lo32 = 0;
-      bstrm.total_in_hi32 = 0;
-      bstrm.bzalloc = NULL;
-      bstrm.bzfree = NULL;
-      bstrm.opaque = NULL;
-      if ( (buf == NULL) &&
-          (BZ_OK == BZ2_bzDecompressInit(&bstrm,
-                                         0,
-                                         0)) ) 
-       {
-         dsize = 2 * size;
-         if (dsize > MAX_DECOMPRESS)
-           dsize = MAX_DECOMPRESS;
-         buf = malloc(dsize);
-         bpos = 0;
-         if (buf == NULL) 
-           {
-             BZ2_bzDecompressEnd(&bstrm);
-           }
-         else 
-           {
-             bstrm.next_out = (char*) buf;
-             bstrm.avail_out = dsize;
-             do {
-               bret = BZ2_bzDecompress(&bstrm);
-               if (bret == Z_OK) 
-                 {
-                   if (dsize == MAX_DECOMPRESS)
-                     break;
-                   bpos += bstrm.total_out_lo32;
-                   bstrm.total_out_lo32 = 0;
-                   dsize *= 2;
-                   if (dsize > MAX_DECOMPRESS)
-                     dsize = MAX_DECOMPRESS;
-                   rbuf = realloc(buf, dsize);
-                   if (rbuf == NULL)
-                     {
-                       free (buf);
-                       buf = NULL;
-                       break;
-                     }
-                   buf = rbuf;
-                   bstrm.next_out = (char*) &buf[bpos];
-                   bstrm.avail_out = dsize - bpos;
-                 } 
-               else if (bret != BZ_STREAM_END) 
-                 {
-                   /* error */
-                   free(buf);
-                   buf = NULL;
-                 }
-             } while ( (buf != NULL) &&
-                       (bret != BZ_STREAM_END) );
-             dsize = bpos + bstrm.total_out_lo32;
-             BZ2_bzDecompressEnd(&bstrm);
-             if ( (dsize == 0) &&
-                  (buf != NULL) )
-               {
-                 free(buf);
-                 buf = NULL;
-               }
-           }
-       }
+      int ready = wait_for_reply (plugins, proc, proc_cls);
+      if (ready <= 0)
+        kill_plugins = 1;
+      plugins_not_ready -= ready;
     }
-#endif  
-  if (buf != NULL) 
+    if (kill_plugins)
+      break;
+    if (fd != -1)
     {
-      data = buf;
-      size = dsize;
+      position += map_size;
+      position = seek_to_new_position (plugins, fd, fsize, position);
+      if (position < 0)
+        break;
     }
-  extract (plugins,
-          (const char*) data,
-          size,
-          tdata, 
-          tsize,
-          proc,
-          proc_cls);
-  if (buf != NULL)
-    free(buf);
-  errno = 0; /* kill transient errors */
-}
+    else
+      break;
+  }
 
+  if (kill_plugins)
+    for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+      stop_process (ppos);
+  for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+    discard_plugin_state (ppos);
 
-/**
- * Open a file
- */
-static int file_open(const char *filename, int oflag, ...)
-{
-  int mode;
-  const char *fn;
-#ifdef MINGW
-  char szFile[_MAX_PATH + 1];
-  long lRet;
-
-  if ((lRet = plibc_conv_to_win_path(filename, szFile)) != ERROR_SUCCESS)
-  {
-    errno = ENOENT;
-    SetLastError(lRet);
-    return -1;
-  }
-  fn = szFile;
+#if WINDOWS
+  destroy_shm_w32 (shm_ptr, map_handle);
 #else
-  fn = filename;
+  destroy_shm_posix (shm_ptr, shm_id, (fd == -1) ? fsize : MAX_READ, shm_name);
 #endif
-  mode = 0;
-#ifdef MINGW
-  /* Set binary mode */
-  mode |= O_BINARY;
-#endif
-  return OPEN(fn, oflag, mode);
 }
 
 
-#ifndef O_LARGEFILE
-#define O_LARGEFILE 0
-#endif
-
-
 /**
  * Extract keywords from a file using the given set of plugins.
  * If needed, opens the file and loads its data (via mmap).  Then
@@ -1478,93 +2106,152 @@
  */
 void
 EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins,
-                  const char *filename,
-                  const void *data,
-                  size_t size,
-                  EXTRACTOR_MetaDataProcessor proc,
-                  void *proc_cls)
+       const char *filename,
+       const void *data,
+       size_t size,
+       EXTRACTOR_MetaDataProcessor proc,
+       void *proc_cls)
 {
-  int fd;
-  void * buffer;
-  void * tbuffer;
-  struct stat fstatbuf;
-  size_t fsize;
-  size_t tsize;
-  int eno;
-  off_t offset;
-  long pg;
-#ifdef WINDOWS
-  SYSTEM_INFO sys;
-#endif
+  int fd = -1;
+  struct stat64 fstatbuf;
+  int64_t fsize = 0;
+  int memory_only = 1;
+  int compression_type = -1;
+  void *buffer = NULL;
+  size_t buffer_size;
+  int decompression_result;
 
-  fd = -1;
-  buffer = NULL;
-  if ( (data == NULL) &&
-       (filename != NULL) &&
-       (0 == STAT(filename, &fstatbuf)) &&
-       (!S_ISDIR(fstatbuf.st_mode)) &&
-       (-1 != (fd = file_open (filename,
-                              O_RDONLY | O_LARGEFILE))) )
-    {      
-      fsize = (fstatbuf.st_size > 0xFFFFFFFF) ? 0xFFFFFFFF : fstatbuf.st_size;
-      if (fsize == 0) 
-       {
-         close(fd);
-         return;
-       }
-      if (fsize > MAX_READ)
-       fsize = MAX_READ;
-      buffer = MMAP(NULL, fsize, PROT_READ, MAP_PRIVATE, fd, 0);
-      if ( (buffer == NULL) || (buffer == (void *) -1) ) 
-       {
-         eno = errno;
-         close(fd);
-         errno = eno;
-         return;
-       }
+  /* If data is not given, then we need to read it from the file. Try opening 
it */
+  if ((data == NULL) &&
+      (filename != NULL) &&
+      (0 == STAT64(filename, &fstatbuf)) &&
+      (!S_ISDIR(fstatbuf.st_mode)) &&
+      (-1 != (fd = file_open (filename,
+             O_RDONLY | O_LARGEFILE))))
+  {
+    /* Empty files are of no interest */
+    fsize = fstatbuf.st_size;
+    if (fsize == 0) 
+    {
+       close(fd);
+       return;
     }
-  if ( (buffer == NULL) &&
-       (data == NULL) )
+    /* File is too big -> can't read it into memory */
+    if (fsize > MAX_READ)
+      memory_only = 0;
+  }
+
+  /* Data is not given, and we've failed to open the file with data -> exit */
+  if ((fsize == 0) && (data == NULL))
     return;
-  /* for footer extraction */
-  tsize = 0;
-  tbuffer = NULL;
-  if ( (data == NULL) &&
-       (fstatbuf.st_size > fsize) &&
-       (fstatbuf.st_size > MAX_READ) )
+  /* fsize is now size of the data OR size of the file */
+  if (data != NULL)
+    fsize = size;
+
+  errno = 0;
+  /* Peek at first few bytes of the file (or of the data), and see if it's 
compressed.
+   * If data is NULL, buffer is allocated by the function and holds the first 
few bytes
+   * of the file, buffer_size is set too.
+   */
+  compression_type = get_compression_type (data, fd, fsize, &buffer, 
&buffer_size);
+  if (compression_type < 0)
+  {
+    /* errno is set by get_compression_type () */
+    if (fd != -1)
+      close (fd);
+    return;
+  }
+  if (compression_type > 0)
+  {
+    /* Don't assume that MAX_DECOMPRESS < MAX_READ */
+    if ((fsize > MAX_DECOMPRESS) || (fsize > MAX_READ))
     {
-      pg = SYSCONF (_SC_PAGE_SIZE);
-      if ( (pg > 0) &&
-          (pg < MAX_READ) )
-       {
-         offset = (1 + (fstatbuf.st_size - MAX_READ) / pg) * pg;
-         if (offset < fstatbuf.st_size)
-           {
-             tsize = fstatbuf.st_size - offset;
-             tbuffer = MMAP (NULL, tsize, PROT_READ, MAP_PRIVATE, fd, offset);
-             if ( (tbuffer == NULL) || (tbuffer == (void *) -1) ) 
-               {
-                 tsize = 0;
-                 tbuffer = NULL;
-               }
-           }
-       }
+      /* File or data is to big to be decompressed in-memory (the only kind of 
decompression we do) */
+      errno = EFBIG;
+      if (fd != -1)
+        close (fd);
+      if (buffer != NULL)
+        free (buffer);
+      return;
     }
-  decompress_and_extract (plugins,
-                         buffer != NULL ? buffer : data,
-                         buffer != NULL ? fsize : size,
-                         tbuffer,
-                         tsize,
-                         proc,
-                         proc_cls);
+    /* Decompress data (or file contents + what we've read so far. Either way 
it writes a new
+     * pointer to buffer, sets buffer_size, and frees the old buffer (if it 
wasn't NULL).
+     * In case of failure it cleans up the buffer after itself.
+     * Will also report compression-related metadata to the caller.
+     */
+    decompression_result = try_to_decompress (data, fd, fsize, 
compression_type, &buffer, &buffer_size, proc, proc_cls);
+    if (decompression_result != 0)
+    {
+      /* Buffer is taken care of already */
+      close (fd);
+      errno = EILSEQ;
+      return;
+    }
+    else
+    {
+      close (fd);
+      fd = -1;
+    }
+  }
+
+  /* Now we either have a non-NULL data of fsize bytes
+   * OR a valid fd to read from and a small buffer of buffer_size bytes
+   * OR an invalid fd and a big buffer of buffer_size bytes
+   * Simplify this situation a bit:
+   */
+  if ((data == NULL) && (fd == -1) && (buffer_size > 0))
+  {
+    data = (const void *) buffer;
+    fsize = buffer_size;
+  }
+
+  /* Now we either have a non-NULL data of fsize bytes
+   * OR a valid fd to read from and a small buffer of buffer_size bytes
+   * and we might need to free the buffer later in either case
+   */
+
+  /* do_extract () might set errno itself, but from our point of view 
everything is OK */
+  errno = 0;
+
+  do_extract (plugins, data, fd, fsize, buffer, buffer_size, proc, proc_cls);
+
   if (buffer != NULL)
-    MUNMAP (buffer, fsize);
-  if (tbuffer != NULL)
-    MUNMAP (tbuffer, tsize);
+    free (buffer);
   if (-1 != fd)
-    close(fd);  
+    close(fd);
 }
 
+
+#if WINDOWS
+void CALLBACK 
+RundllEntryPoint (HWND hwnd, 
+                 HINSTANCE hinst, 
+                 LPSTR lpszCmdLine, 
+                 int nCmdShow)
+{
+  intptr_t in_h;
+  intptr_t out_h;
+  int in, out;
+
+  sscanf(lpszCmdLine, "%lu %lu", &in_h, &out_h);
+  in = _open_osfhandle (in_h, _O_RDONLY);
+  out = _open_osfhandle (out_h, 0);
+  setmode (in, _O_BINARY);
+  setmode (out, _O_BINARY);
+  process_requests (read_plugin_data (in),
+                   in, out);
+}
+
+void CALLBACK 
+RundllEntryPointA (HWND hwnd, 
+                 HINSTANCE hinst, 
+                 LPSTR lpszCmdLine, 
+                 int nCmdShow)
+{
+  return RundllEntryPoint(hwnd, hinst, lpszCmdLine, nCmdShow);
+}
+#endif
+
 /**
  * Initialize gettext and libltdl (and W32 if needed).
  */
@@ -1579,12 +2266,12 @@
   if (err > 0) {
 #if DEBUG
     fprintf(stderr,
-           _("Initialization of plugin mechanism failed: %s!\n"),
-           lt_dlerror());
+      _("Initialization of plugin mechanism failed: %s!\n"),
+      lt_dlerror());
 #endif
     return;
   }
-#ifdef MINGW
+#if WINDOWS
   plibc_init("GNU", PACKAGE);
 #endif
 }
@@ -1594,12 +2281,10 @@
  * Deinit.
  */
 void __attribute__ ((destructor)) EXTRACTOR_ltdl_fini() {
-#ifdef MINGW
+#if WINDOWS
   plibc_shutdown();
 #endif
   lt_dlexit ();
 }
 
-
-
 /* end of extractor.c */

Modified: Extractor/src/main/extractor_plugins.c
===================================================================
--- Extractor/src/main/extractor_plugins.c      2012-03-27 12:46:29 UTC (rev 
20782)
+++ Extractor/src/main/extractor_plugins.c      2012-03-27 13:05:17 UTC (rev 
20783)
@@ -204,15 +204,24 @@
       plugin->flags = EXTRACTOR_OPTION_DISABLED;
       return -1;
     }
-  plugin->extractMethod = get_symbol_with_prefix (plugin->libraryHandle,
-                                                 "_EXTRACTOR_%s_extract",
+  plugin->extract_method = get_symbol_with_prefix (plugin->libraryHandle,
+                                                 
"_EXTRACTOR_%s_extract_method",
                                                  plugin->libname,
                                                  &plugin->specials);
-  if (plugin->extractMethod == NULL) 
+  plugin->init_state_method = get_symbol_with_prefix (plugin->libraryHandle,
+                                                 
"_EXTRACTOR_%s_init_state_method",
+                                                 plugin->libname,
+                                                 &plugin->specials);
+  plugin->discard_state_method = get_symbol_with_prefix (plugin->libraryHandle,
+                                                 
"_EXTRACTOR_%s_discard_state_method",
+                                                 plugin->libname,
+                                                 &plugin->specials);
+  if (plugin->extract_method == NULL || plugin->init_state_method == NULL ||
+      plugin->discard_state_method == NULL) 
     {
 #if DEBUG
       fprintf (stderr,
-              "Resolving `extract' method of plugin `%s' failed: %s\n",
+              "Resolving `extract', 'init_state' or 'discard_state' method(s) 
of plugin `%s' failed: %s\n",
               plugin->short_libname,
               lt_dlerror ());
 #endif
@@ -243,8 +252,15 @@
                      enum EXTRACTOR_Options flags)
 {
   struct EXTRACTOR_PluginList *result;
+  struct EXTRACTOR_PluginList *i;
   char *libname;
 
+  for (i = prev; i != NULL; i = i->next)
+  {
+    if (strcmp (i->short_libname, library) == 0)
+      return prev;
+  }
+
   libname = find_plugin (library);
   if (libname == NULL)
     {

Modified: Extractor/src/main/extractor_plugins.h
===================================================================
--- Extractor/src/main/extractor_plugins.h      2012-03-27 12:46:29 UTC (rev 
20782)
+++ Extractor/src/main/extractor_plugins.h      2012-03-27 13:05:17 UTC (rev 
20783)
@@ -64,7 +64,9 @@
   /**
    * Pointer to the function used for meta data extraction.
    */
-  EXTRACTOR_ExtractMethod extractMethod;
+  EXTRACTOR_extract_method extract_method;
+  EXTRACTOR_init_state_method init_state_method;
+  EXTRACTOR_discard_state_method discard_state_method;
 
   /**
    * Options for the plugin.
@@ -84,26 +86,72 @@
   enum EXTRACTOR_Options flags;
 
   /**
-   * Process ID of the child process for this plugin. 0 for 
-   * none.
+   * Process ID of the child process for this plugin. 0 for none.
    */
-#ifndef WINDOWS
+#if !WINDOWS
   int cpid;
 #else
   HANDLE hProcess;
 #endif
 
   /**
-   * Pipe used to send information about shared memory segments to
-   * the child process.  NULL if not initialized.
+   * Pipe used to communicate information to the plugin child process.
+   * NULL if not initialized.
    */
+#if !WINDOWS
   FILE *cpipe_in;
+#else
+  HANDLE cpipe_in;
+#endif
 
   /**
+   * A position this plugin wants us to seek to. -1 if it's finished.
+   * Starts at 0;
+   */
+  int64_t seek_request;
+
+#if !WINDOWS
+  int shm_id;
+#else
+  HANDLE map_handle;
+#endif
+
+  void *state;
+
+  int64_t fsize;
+
+  int64_t position;
+
+  unsigned char *shm_ptr;
+
+  size_t map_size;
+
+  /**
    * Pipe used to read information about extracted meta data from
-   * the child process.  -1 if not initialized.
+   * the plugin child process.  -1 if not initialized.
    */
+#if !WINDOWS
   int cpipe_out;
+#else
+  HANDLE cpipe_out;
+#endif
+
+#if WINDOWS
+  /**
+   * A structure for overlapped reads on W32.
+   */
+  OVERLAPPED ov_read;
+
+  /**
+   * A structure for overlapped writes on W32.
+   */
+  OVERLAPPED ov_write;
+
+  /**
+   * A write buffer for overlapped writes on W32
+   */
+  unsigned char *ov_write_buffer;
+#endif
 };
 
 /**

Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am   2012-03-27 12:46:29 UTC (rev 20782)
+++ Extractor/src/plugins/Makefile.am   2012-03-27 13:05:17 UTC (rev 20783)
@@ -1,4 +1,4 @@
-INCLUDES = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/common
+INCLUDES = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/common 
-I$(top_srcdir)/src/main
 
 # install plugins under:
 plugindir = $(libdir)/@RPLUGINDIR@
@@ -11,184 +11,24 @@
 
 SUBDIRS = . 
 
-if HAVE_FFMPEG
- thumbffmpeg=libextractor_thumbnailffmpeg.la
-endif
-
-if HAVE_LIBRPM
- rpm=libextractor_rpm.la 
-endif
-
-if HAVE_GLIB
-if WITH_GSF
- ole2=libextractor_ole2.la
-endif
-if HAVE_GTK
- thumbgtk=libextractor_thumbnailgtk.la
-endif
-endif
-
-if HAVE_QT
- thumbqt=libextractor_thumbnailqt.la
- qtflags=-lQtGui -lQtCore -lpthread
-else
-if HAVE_QT4
- thumbqt=libextractor_thumbnailqt.la
- qtflags=-lQtGui4 -lQtCore4
-endif
-endif
-
-if HAVE_QT_SVG
- svgflags = -lQtSvg
-else
-if HAVE_QT_SVG4
- svgflags = -lQtSvg4
-endif
-endif
-
-if HAVE_CXX
-if HAVE_EXIV2
- exiv2=libextractor_exiv2.la
-endif
-if HAVE_POPPLER
- pdf=libextractor_pdf.la
-endif
-endif
-
-if HAVE_MPEG2
- mpeg = libextractor_mpeg.la
-endif
-
-if HAVE_VORBISFILE
- ogg = libextractor_ogg.la
-endif
-
-if HAVE_FLAC
- flac = libextractor_flac.la
-endif
-
-if NEED_VORBIS
- vorbisflag = -lvorbis
-endif
-
-if NEED_OGG
- flacoggflag = -logg
-endif
-
 plugin_LTLIBRARIES = \
-  libextractor_applefile.la \
-  libextractor_asf.la \
-  libextractor_deb.la \
-  libextractor_dvi.la \
-  libextractor_elf.la \
-  $(exiv2) \
-  $(flac) \
-  libextractor_flv.la \
-  libextractor_gif.la \
-  libextractor_html.la \
   libextractor_id3.la \
   libextractor_id3v2.la \
-  libextractor_id3v23.la \
-  libextractor_id3v24.la \
-  libextractor_it.la \
-  libextractor_jpeg.la \
-  libextractor_man.la \
-  libextractor_mime.la \
-  libextractor_mkv.la \
-  libextractor_mp3.la \
-  $(mpeg) \
-  libextractor_nsf.la \
-  libextractor_nsfe.la \
-  libextractor_odf.la \
-  $(ogg) \
-  $(ole2) \
-  $(pdf) \
-  libextractor_png.la \
-  libextractor_ps.la \
-  libextractor_qt.la \
-  libextractor_real.la \
-  libextractor_riff.la \
-  $(rpm) \
-  libextractor_s3m.la \
-  libextractor_sid.la \
-  libextractor_tar.la \
-  $(thumbgtk) \
-  $(thumbqt) \
-  $(thumbffmpeg) \
-  libextractor_tiff.la \
-  libextractor_wav.la \
-  libextractor_xm.la \
-  libextractor_zip.la
+  libextractor_mp3.la
 
-libextractor_applefile_la_SOURCES = \
-  applefile_extractor.c
-libextractor_applefile_la_LDFLAGS = \
+libextractor_mp3_la_SOURCES = \
+  mp3_extractor.c 
+libextractor_mp3_la_LDFLAGS = \
   $(PLUGINFLAGS)
-libextractor_applefile_la_LIBADD = \
+libextractor_mp3_la_LIBADD = \
   $(top_builddir)/src/common/libextractor_common.la \
   $(LE_LIBINTL)
 
-libextractor_asf_la_SOURCES = \
-  asf_extractor.c 
-libextractor_asf_la_LDFLAGS = \
-  $(top_builddir)/src/common/libextractor_common.la \
+libextractor_ebml_la_SOURCES = \
+  ebml_extractor.c 
+libextractor_ebml_la_LDFLAGS = \
   $(PLUGINFLAGS)
 
-libextractor_deb_la_SOURCES = \
-  deb_extractor.c 
-libextractor_deb_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_deb_la_LIBADD = \
-  -lz
-
-libextractor_dvi_la_SOURCES = \
-  dvi_extractor.c 
-libextractor_dvi_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-
-libextractor_elf_la_SOURCES = \
-  elf_extractor.c 
-libextractor_elf_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_elf_la_LIBADD = \
-  $(top_builddir)/src/common/libextractor_common.la
-
-libextractor_exiv2_la_SOURCES = \
-  exiv2_extractor.cc 
-libextractor_exiv2_la_LDFLAGS = \
-  $(XTRA_CPPLIBS) $(PLUGINFLAGS) 
-libextractor_exiv2_la_LIBADD = \
-  -lexiv2
-
-libextractor_flac_la_SOURCES = \
-  flac_extractor.c
-libextractor_flac_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_flac_la_LIBADD = \
-  -lFLAC $(flacoggflag) \
-  $(LE_LIBINTL)
-
-libextractor_flv_la_SOURCES = \
-  flv_extractor.c
-libextractor_flv_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_flv_la_LIBADD = \
-  $(top_builddir)/src/common/libextractor_common.la
-
-libextractor_gif_la_SOURCES = \
-  gif_extractor.c 
-libextractor_gif_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_gif_la_LIBADD = \
-  $(top_builddir)/src/common/libextractor_common.la
-
-libextractor_html_la_SOURCES = \
-  html_extractor.c 
-libextractor_html_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_html_la_LIBADD = \
-  $(top_builddir)/src/common/libextractor_common.la
-
 libextractor_id3_la_SOURCES = \
   id3_extractor.c 
 libextractor_id3_la_LDFLAGS = \
@@ -204,211 +44,4 @@
 libextractor_id3v2_la_LIBADD = \
   $(top_builddir)/src/common/libextractor_common.la
 
-libextractor_id3v23_la_SOURCES = \
-  id3v23_extractor.c 
-libextractor_id3v23_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_id3v23_la_LIBADD = \
-  $(top_builddir)/src/common/libextractor_common.la
-
-libextractor_id3v24_la_SOURCES = \
-  id3v24_extractor.c 
-libextractor_id3v24_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_id3v24_la_LIBADD = \
-  $(top_builddir)/src/common/libextractor_common.la
-
-libextractor_it_la_SOURCES = \
-  it_extractor.c 
-libextractor_it_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-
-libextractor_jpeg_la_SOURCES = \
-  jpeg_extractor.c 
-libextractor_jpeg_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_jpeg_la_LIBADD = \
-  $(LE_LIBINTL)
-
-libextractor_man_la_SOURCES = \
-  man_extractor.c 
-libextractor_man_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_man_la_LIBADD = \
-  $(LE_LIBINTL)
-
-libextractor_mime_la_SOURCES = \
-  mime_extractor.c 
-libextractor_mime_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-
-libextractor_mkv_la_SOURCES = \
-  mkv_extractor.c 
-libextractor_mkv_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-
-libextractor_mp3_la_SOURCES = \
-  mp3_extractor.c 
-libextractor_mp3_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_mp3_la_LIBADD = \
-  $(top_builddir)/src/common/libextractor_common.la \
-  $(LE_LIBINTL)
-
-libextractor_mpeg_la_SOURCES = \
-  mpeg_extractor.c 
-libextractor_mpeg_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_mpeg_la_LIBADD = \
-  -lmpeg2
-
-libextractor_nsf_la_SOURCES = \
-  nsf_extractor.c 
-libextractor_nsf_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-
-libextractor_nsfe_la_SOURCES = \
-  nsfe_extractor.c 
-libextractor_nsfe_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-
-libextractor_odf_la_SOURCES = \
-  odf_extractor.c
-libextractor_odf_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_odf_la_LIBADD = \
- $(top_builddir)/src/common/libextractor_common.la \
- -lz 
-
-libextractor_ogg_la_SOURCES = \
-  ogg_extractor.c
-libextractor_ogg_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_ogg_la_LIBADD = \
-  -lvorbisfile $(vorbisflag) -logg
-
-libextractor_ole2_la_SOURCES =  \
-  ole2_extractor.c
-libextractor_ole2_la_CFLAGS = \
-  $(GSF_CFLAGS) 
-libextractor_ole2_la_LIBADD = \
-  $(LIBADD) $(GSF_LIBS) \
-  $(top_builddir)/src/common/libextractor_common.la 
-libextractor_ole2_la_LDFLAGS = \
-  $(PLUGINFLAGS) 
-
-libextractor_pdf_la_SOURCES = \
-  pdf_extractor.cc 
-libextractor_pdf_la_LDFLAGS = \
-  $(XTRA_CPPLIBS) $(PLUGINFLAGS) 
-libextractor_pdf_la_LIBADD = \
-  $(top_builddir)/src/common/libextractor_common.la \
-  -lpoppler
-
-libextractor_png_la_SOURCES = \
-  png_extractor.c
-libextractor_png_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_png_la_LIBADD = \
-  $(top_builddir)/src/common/libextractor_common.la \
-  -lz
-
-libextractor_ps_la_SOURCES = \
-  ps_extractor.c
-libextractor_ps_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-
-libextractor_qt_la_SOURCES = \
-  qt_extractor.c 
-libextractor_qt_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_qt_la_LIBADD = \
-  -lz -lm
-
-libextractor_real_la_SOURCES = \
-  real_extractor.c 
-libextractor_real_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-
-libextractor_riff_la_SOURCES = \
-  riff_extractor.c 
-libextractor_riff_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_riff_la_LIBADD = \
-  $(LE_LIBINTL) \
-  -lm 
-
-libextractor_rpm_la_SOURCES = \
-  rpm_extractor.c 
-libextractor_rpm_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_rpm_la_LIBADD = \
-  -lrpm
-
-libextractor_s3m_la_SOURCES = \
-  s3m_extractor.c 
-libextractor_s3m_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-
-libextractor_sid_la_SOURCES = \
-  sid_extractor.c 
-libextractor_sid_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-
-libextractor_tar_la_SOURCES = \
-  tar_extractor.c 
-libextractor_tar_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-
-libextractor_thumbnailffmpeg_la_SOURCES = \
-  thumbnailffmpeg_extractor.c
-libextractor_thumbnailffmpeg_la_LIBADD = \
-  -lavformat -lavcodec -lswscale -lavutil -lz -lbz2
-libextractor_thumbnailffmpeg_la_LDFLAGS = \
-  $(PLUGINFLAGS) 
-
-libextractor_thumbnailgtk_la_CFLAGS = \
-  $(GLIB_CFLAGS) $(GTK_CFLAGS)
-libextractor_thumbnailgtk_la_LIBADD = \
-  $(LIBADD) -lgobject-2.0 @GTK_LIBS@ 
-libextractor_thumbnailgtk_la_LDFLAGS = \
-  $(PLUGINFLAGS) 
-libextractor_thumbnailgtk_la_SOURCES = \
-  thumbnailgtk_extractor.c
-
-libextractor_thumbnailqt_la_SOURCES = \
-  thumbnailqt_extractor.cc
-libextractor_thumbnailqt_la_LDFLAGS = \
-  $(QT_LDFLAGS) \
-  $(PLUGINFLAGS)
-libextractor_thumbnailqt_la_LIBADD = \
-  $(qtflags) $(svgflags) 
-libextractor_thumbnailqt_la_CPPFLAGS = \
-  $(QT_CPPFLAGS) \
-  $(QT_CFLAGS) $(QT_SVG_CFLAGS)
-
-libextractor_tiff_la_SOURCES = \
-  tiff_extractor.c 
-libextractor_tiff_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_tiff_la_LIBADD = \
-  $(top_builddir)/src/common/libextractor_common.la
-
-libextractor_wav_la_SOURCES = \
-  wav_extractor.c
-libextractor_wav_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_wav_la_LIBADD = \
-  $(LE_LIBINTL)
-
-libextractor_xm_la_SOURCES = \
-  xm_extractor.c 
-libextractor_xm_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-
-libextractor_zip_la_SOURCES = \
-  zip_extractor.c
-libextractor_zip_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-
 EXTRA_DIST = template_extractor.c 

Modified: Extractor/src/plugins/id3_extractor.c
===================================================================
--- Extractor/src/plugins/id3_extractor.c       2012-03-27 12:46:29 UTC (rev 
20782)
+++ Extractor/src/plugins/id3_extractor.c       2012-03-27 13:05:17 UTC (rev 
20783)
@@ -29,6 +29,8 @@
 #include <unistd.h>
 #include <stdlib.h>
 
+#include "extractor_plugins.h"
+
 typedef struct
 {
   char *title;
@@ -199,6 +201,46 @@
 #define OK         0
 #define INVALID_ID3 1
 
+struct id3_state
+{
+  int state;
+  id3tag info;
+};
+
+enum ID3State
+{
+  ID3_INVALID = -1,
+  ID3_SEEKING_TO_TAIL = 0,
+  ID3_READING_TAIL = 1
+};
+
+void
+EXTRACTOR_id3_init_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+  struct id3_state *state;
+  state = plugin->state = malloc (sizeof (struct id3_state));
+  if (state == NULL)
+    return;
+  memset (state, 0, sizeof (struct id3_state));
+  state->state = ID3_SEEKING_TO_TAIL;
+}
+
+void
+EXTRACTOR_id3_discard_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+  struct id3_state *state = plugin->state;
+  if (state != NULL)
+  {
+    if (state->info.title != NULL) free (state->info.title);
+    if (state->info.year != NULL) free (state->info.year);
+    if (state->info.album != NULL) free (state->info.album);
+    if (state->info.artist != NULL) free (state->info.artist);
+    if (state->info.comment != NULL) free (state->info.comment);
+    free (state);
+  }
+  plugin->state = NULL;
+}
+
 static void
 trim (char *k)
 {
@@ -209,14 +251,14 @@
 }
 
 static int
-get_id3 (const char *data, size_t size, id3tag * id3)
+get_id3 (const char *data, int64_t offset, int64_t size, id3tag *id3)
 {
   const char *pos;
 
   if (size < 128)
     return INVALID_ID3;
 
-  pos = &data[size - 128];
+  pos = &data[offset];
   if (0 != strncmp ("TAG", pos, 3))
     return INVALID_ID3;
   pos += 3;
@@ -253,49 +295,82 @@
 }
 
 
-#define ADD(s,t) do { if ( (s != NULL) && (strlen(s) > 0) && (0 != (ret = proc 
(proc_cls, "id3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, 
strlen(s)+1)))) goto FINISH; } while (0)
+#define ADD(s,t) do { if ( (s != NULL) && (strlen(s) > 0) && (0 != proc 
(proc_cls, "id3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) 
return 1; } while (0)
 
 
-const char *
-EXTRACTOR_id3_options ()
+int
+EXTRACTOR_id3_extract_method (struct EXTRACTOR_PluginList *plugin,
+    EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 {
-  return "want-tail";
-}
+  int64_t file_position;
+  int64_t file_size;
+  int64_t offset = 0;
+  int64_t size;
+  struct id3_state *state;
+  char *data;
+  
+  char track[16];
 
+  if (plugin == NULL || plugin->state == NULL)
+    return 1;
 
-int 
-EXTRACTOR_id3_extract (const char *data,
-                      size_t size,
-                      EXTRACTOR_MetaDataProcessor proc,
-                      void *proc_cls,
-                      const char *options)
-{
-  id3tag info;
-  char track[16];
-  int ret;
+  state = plugin->state;
+  file_position = plugin->position;
+  file_size = plugin->fsize;
+  size = plugin->map_size;
+  data = (char *) plugin->shm_ptr;
 
-  ret = 0;
-  if (OK != get_id3 (data, size, &info))
-    return 0;
-  ADD (info.title, EXTRACTOR_METATYPE_TITLE);
-  ADD (info.artist, EXTRACTOR_METATYPE_ARTIST);
-  ADD (info.album, EXTRACTOR_METATYPE_ALBUM);
-  ADD (info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR);
-  ADD (info.genre, EXTRACTOR_METATYPE_GENRE);
-  ADD (info.comment, EXTRACTOR_METATYPE_COMMENT);
-  if (info.track_number != 0)
+  if (plugin->seek_request < 0)
+    return 1;
+  if (file_position - plugin->seek_request > 0)
+  {
+    plugin->seek_request = -1;
+    return 1;
+  }
+  if (plugin->seek_request - file_position < size)
+    offset = plugin->seek_request - file_position;
+
+  while (1)
+  {
+    switch (state->state)
     {
-      snprintf(track, 
-              sizeof(track), "%u", info.track_number);
-      ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER);
+    case ID3_INVALID:
+      plugin->seek_request = -1;
+      return 1;
+    case ID3_SEEKING_TO_TAIL:
+      offset = file_size - 128 - file_position;
+      if (offset > size)
+      {
+        state->state = ID3_READING_TAIL;
+        plugin->seek_request = file_position + offset;
+        return 0;
+      }
+      else if (offset < 0)
+      {
+        state->state = ID3_INVALID;
+        break;
+      }
+      state->state = ID3_READING_TAIL;
+       break;
+    case ID3_READING_TAIL:
+      if (OK != get_id3 (data, offset, size - offset, &state->info))
+        return 1;
+      ADD (state->info.title, EXTRACTOR_METATYPE_TITLE);
+      ADD (state->info.artist, EXTRACTOR_METATYPE_ARTIST);
+      ADD (state->info.album, EXTRACTOR_METATYPE_ALBUM);
+      ADD (state->info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR);
+      ADD (state->info.genre, EXTRACTOR_METATYPE_GENRE);
+      ADD (state->info.comment, EXTRACTOR_METATYPE_COMMENT);
+      if (state->info.track_number != 0)
+      {
+        snprintf(track, 
+            sizeof(track), "%u", state->info.track_number);
+        ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER);
+      }
+      state->state = ID3_INVALID;
     }
-FINISH:
-  if (info.title != NULL) free (info.title);
-  if (info.year != NULL) free (info.year);
-  if (info.album != NULL) free (info.album);
-  if (info.artist != NULL) free (info.artist);
-  if (info.comment != NULL) free (info.comment);
-  return ret; 
+  }
+  return 1;
 }
 
 /* end of id3_extractor.c */

Deleted: Extractor/src/plugins/id3v23_extractor.c
===================================================================
--- Extractor/src/plugins/id3v23_extractor.c    2012-03-27 12:46:29 UTC (rev 
20782)
+++ Extractor/src/plugins/id3v23_extractor.c    2012-03-27 13:05:17 UTC (rev 
20783)
@@ -1,420 +0,0 @@
-/*
-     This file is part of libextractor.
-     (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian 
Grothoff
-
-     libextractor is free software; you can redistribute it and/or modify
-     it under the terms of the GNU General Public License as published
-     by the Free Software Foundation; either version 2, or (at your
-     option) any later version.
-
-     libextractor is distributed in the hope that it will be useful, but
-     WITHOUT ANY WARRANTY; without even the implied warranty of
-     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-     General Public License for more details.
-
-     You should have received a copy of the GNU General Public License
-     along with libextractor; see the file COPYING.  If not, write to the
-     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-     Boston, MA 02111-1307, USA.
-
- */
-#define DEBUG_EXTRACT_ID3v23 0
-
-#include "platform.h"
-#include "extractor.h"
-#include <string.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#ifndef MINGW
-#include <sys/mman.h>
-#endif
-
-#include "convert.h"
-
-enum Id3v23Fmt
-  {
-    T, /* simple, 0-terminated string, prefixed by encoding */
-    U, /* 0-terminated ASCII string, no encoding */
-    UL, /* unsync'ed lyrics */
-    SL, /* sync'ed lyrics */
-    L, /* string with language prefix */
-    I /* image */
-  };
-
-typedef struct
-{
-  const char *text;
-  enum EXTRACTOR_MetaType type;
-  enum Id3v23Fmt fmt;
-} Matches;
-
-static Matches tmap[] = {
-  {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
-  {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
-  {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
-  {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
-  {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
-  /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, */
-  /* TDLY */
-  {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
-  {"TEXT", EXTRACTOR_METATYPE_WRITER, T},  
-  {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
-  /* TIME */
-  {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
-  {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
-  {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
-  /* TKEY */
-  {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
-  {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as 
unit */
-  {"TMED", EXTRACTOR_METATYPE_SOURCE, T}, 
-  {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
-  {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
-  {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
-  {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
-  {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
-  {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
-  {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
-  {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
-  {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
-  {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T}, 
-  {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
-  {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
-  {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
-  /* TRDA */
-  {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
-  /* TRSO */
-  {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
-  {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
-  /* TSSE */
-  {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
-  {"WCOM", EXTRACTOR_METATYPE_URL, U},
-  {"WCOP", EXTRACTOR_METATYPE_URL, U},
-  {"WOAF", EXTRACTOR_METATYPE_URL, U},
-  {"WOAS", EXTRACTOR_METATYPE_URL, U},
-  {"WORS", EXTRACTOR_METATYPE_URL, U},
-  {"WPAY", EXTRACTOR_METATYPE_URL, U},
-  {"WPUB", EXTRACTOR_METATYPE_URL, U},
-  {"WXXX", EXTRACTOR_METATYPE_URL, T},
-  {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
-  /* ... */
-  {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
-  {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
-  {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
-  /* ... */
-  {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
-  /* ... */
-  {"LINK", EXTRACTOR_METATYPE_URL, U},
-  /* ... */
-  {"USER", EXTRACTOR_METATYPE_LICENSE, T},
-  /* ... */
-  {NULL, 0, T}
-};
-
-
-/* mimetype = audio/mpeg */
-int 
-EXTRACTOR_id3v23_extract (const unsigned char *data,
-                         size_t size,
-                         EXTRACTOR_MetaDataProcessor proc,
-                         void *proc_cls,
-                         const char *options)
-{
-  int unsync;
-  int extendedHdr;
-  int experimental;
-  uint32_t tsize;
-  uint32_t pos;
-  uint32_t ehdrSize;
-  uint32_t padding;
-  uint32_t csize;
-  int i;
-  uint16_t flags;
-  char *mime;
-  enum EXTRACTOR_MetaType type;
-  size_t off;
-  int obo;
-
-  if ((size < 16) ||
-      (data[0] != 0x49) ||
-      (data[1] != 0x44) ||
-      (data[2] != 0x33) || (data[3] != 0x03) || (data[4] != 0x00))
-    return 0;
-  unsync = (data[5] & 0x80) > 0;
-  if (unsync)
-    return 0; /* not supported */
-  extendedHdr = (data[5] & 0x40) > 0;
-  experimental = (data[5] & 0x20) > 0;
-  if (experimental)
-    return 0;
-  tsize = (((data[6] & 0x7F) << 21) |
-           ((data[7] & 0x7F) << 14) |
-           ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
-  if (tsize + 10 > size)
-    return 0;
-  pos = 10;
-  padding = 0;
-  if (extendedHdr)
-    {
-      ehdrSize = (((data[10]) << 24) |
-                  ((data[11]) << 16) | ((data[12]) << 8) | ((data[12]) << 0));
-
-      padding = (((data[15]) << 24) |
-                 ((data[16]) << 16) | ((data[17]) << 8) | ((data[18]) << 0));
-      pos += 4 + ehdrSize;
-      if (padding < tsize)
-        tsize -= padding;
-      else
-        return 0;
-    }
-
-
-  while (pos < tsize)
-    {
-      if (pos + 10 > tsize)
-        return 0;
-      csize =
-        (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) +
-        data[pos + 7];
-      if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) ||
-         (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos))
-        break;
-      flags = (data[pos + 8] << 8) + data[pos + 9];
-      if (((flags & 0x80) > 0) /* compressed, not yet supported */  ||
-          ((flags & 0x40) > 0) /* encrypted, not supported */ )
-        {
-          pos += 10 + csize;
-          continue;
-        }
-      i = 0;
-      while (tmap[i].text != NULL)
-        {
-          if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
-            {
-              char *word;
-              if ((flags & 0x20) > 0)
-                {
-                  /* "group" identifier, skip a byte */
-                  pos++;
-                  csize--;
-                }
-             switch (tmap[i].fmt)
-               {
-               case T:
-                 /* this byte describes the encoding
-                    try to convert strings to UTF-8
-                    if it fails, then forget it */
-                 switch (data[pos + 10])
-                   {
-                   case 0x00:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                                              csize - 1, 
"ISO-8859-1");
-                     break;
-                   case 0x01:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                                              csize - 1, 
"UCS-2");
-                     break;
-                   default:
-                     /* bad encoding byte,
-                        try to convert from iso-8859-1 */
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                                              csize - 1, 
"ISO-8859-1");
-                     break;
-                   }
-                 break;
-               case U:
-                 word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 10],
-                                                          csize, "ISO-8859-1");
-                 break;
-               case UL:
-                 if (csize < 6)
-                   return 0; /* malformed */
-                 /* find end of description */
-                 off = 14;
-                 while ( (off < size) &&
-                         (off - pos < csize) &&
-                         (data[pos + off] == '\0') )
-                   off++;
-                 if ( (off >= csize) ||
-                      (data[pos+off] != '\0') )
-                   return 0; /* malformed */
-                 off++;
-                 switch (data[pos + 10])
-                   {
-                   case 0x00:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
-                                                              csize - off, 
"ISO-8859-1");
-                     break;
-                   case 0x01:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
-                                                              csize - off, 
"UCS-2");
-                     break;
-                   default:
-                     /* bad encoding byte,
-                        try to convert from iso-8859-1 */
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
-                                                              csize - off, 
"ISO-8859-1");
-                     break;
-                   }
-                 break;
-               case SL:
-                 if (csize < 7)
-                   return 0; /* malformed */
-                 /* find end of description */
-                 switch (data[pos + 10])
-                   {
-                   case 0x00:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 16],
-                                                              csize - 6, 
"ISO-8859-1");
-                     break;
-                   case 0x01:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 16],
-                                                              csize - 6, 
"UCS-2");
-                     break;
-                   default:
-                     /* bad encoding byte,
-                        try to convert from iso-8859-1 */
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 16],
-                                                              csize - 6, 
"ISO-8859-1");
-                     break;
-                   }
-                 break;
-               case L:
-                 if (csize < 5)
-                   return 0; /* malformed */
-                 /* find end of description */
-                 obo = data[pos + 14] == '\0' ? 1 : 0; /* someone put a \0 in 
front of comments... */
-                 if (csize < 6)
-                   obo = 0;
-                 switch (data[pos + 10])
-                   {
-                   case 0x00:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 14 + obo],
-                                                              csize - 4 - obo, 
"ISO-8859-1");
-                     break;
-                   case 0x01:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 14 + obo],
-                                                              csize - 4 - obo, 
"UCS-2");
-                     break;
-                   default:
-                     /* bad encoding byte,
-                        try to convert from iso-8859-1 */
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 14 + obo],
-                                                              csize - 4 - obo, 
"ISO-8859-1");
-                     break;
-                   }
-                 break;
-               case I:
-                 if (csize < 2)
-                   return 0; /* malformed */
-                 /* find end of mime type */
-                 off = 11;
-                 while ( (off < size) &&
-                         (off - pos < csize) &&
-                         (data[pos + off] == '\0') )
-                   off++;
-                 if ( (off >= csize) ||
-                      (data[pos+off] != '\0') )
-                   return 0; /* malformed */
-                 off++;
-                 mime = strdup ((const char*) &data[pos + 11]);
-                 
-                 switch (data[pos+off])
-                   {
-                   case 0x03:
-                   case 0x04:
-                     type = EXTRACTOR_METATYPE_COVER_PICTURE;
-                     break;
-                   case 0x07:
-                   case 0x08:
-                   case 0x09:
-                   case 0x0A:
-                   case 0x0B:
-                   case 0x0C:
-                     type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
-                     break;
-                   case 0x0D:
-                   case 0x0E:
-                   case 0x0F:
-                     type = EXTRACTOR_METATYPE_EVENT_PICTURE;
-                     break;
-                   case 0x14:
-                     type = EXTRACTOR_METATYPE_LOGO;
-                     type = EXTRACTOR_METATYPE_LOGO;
-                     break;
-                   default:
-                     type = EXTRACTOR_METATYPE_PICTURE;
-                     break;
-                   }
-                 off++;
-
-                 /* find end of description */
-                 while ( (off < size) &&
-                         (off - pos < csize) &&
-                         (data[pos + off] == '\0') )
-                   off++;
-                 if ( (off >= csize) ||
-                      (data[pos+off] != '\0') )
-                   {
-                     if (mime != NULL)
-                       free (mime);
-                     return 0; /* malformed */
-                   }
-                 off++;
-                 if ( (mime != NULL) &&
-                      (0 == strcasecmp ("-->",
-                                        mime)) )
-                   {
-                     /* not supported */
-                   }
-                 else
-                   {
-                     if (0 != proc (proc_cls,
-                                    "id3v23",
-                                    type,
-                                    EXTRACTOR_METAFORMAT_BINARY,
-                                    mime,
-                                    (const char*) &data[pos + off],
-                                    csize + 6 - off))                  
-                       {
-                         if (mime != NULL)
-                           free (mime);
-                         return 1;
-                       }
-                   }
-                 if (mime != NULL)
-                   free (mime);
-                 word = NULL;
-                 break;
-               default:
-                 return 0;
-               }             
-              if ((word != NULL) && (strlen (word) > 0))
-                {
-                 if (0 != proc (proc_cls,
-                                "id3v23",
-                                tmap[i].type,
-                                EXTRACTOR_METAFORMAT_UTF8,
-                                "text/plain",
-                                word,
-                                strlen(word)+1))
-                   {
-                     free (word);
-                     return 1;
-                   }
-                }
-             if (word != NULL)
-               free (word);
-              break;
-            }
-          i++;
-        }
-      pos += 10 + csize;
-    }
-  return 0;
-}
-
-/* end of id3v23_extractor.c */

Deleted: Extractor/src/plugins/id3v24_extractor.c
===================================================================
--- Extractor/src/plugins/id3v24_extractor.c    2012-03-27 12:46:29 UTC (rev 
20782)
+++ Extractor/src/plugins/id3v24_extractor.c    2012-03-27 13:05:17 UTC (rev 
20783)
@@ -1,455 +0,0 @@
-/*
-     This file is part of libextractor.
-     (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian 
Grothoff
-
-     libextractor is free software; you can redistribute it and/or modify
-     it under the terms of the GNU General Public License as published
-     by the Free Software Foundation; either version 2, or (at your
-     option) any later version.
-
-     libextractor is distributed in the hope that it will be useful, but
-     WITHOUT ANY WARRANTY; without even the implied warranty of
-     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-     General Public License for more details.
-
-     You should have received a copy of the GNU General Public License
-     along with libextractor; see the file COPYING.  If not, write to the
-     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-     Boston, MA 02111-1307, USA.
-
- */
-#define DEBUG_EXTRACT_ID3v24 0
-
-#include "platform.h"
-#include "extractor.h"
-#include <string.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#ifndef MINGW
-#include <sys/mman.h>
-#endif
-
-#include "convert.h"
-
-enum Id3v24Fmt
-  {
-    T, /* simple, 0-terminated string, prefixed by encoding */
-    U, /* 0-terminated ASCII string, no encoding */
-    UL, /* unsync'ed lyrics */
-    SL, /* sync'ed lyrics */
-    L, /* string with language prefix */
-    I /* image */
-  };
-
-typedef struct
-{
-  const char *text;
-  enum EXTRACTOR_MetaType type;
-  enum Id3v24Fmt fmt;
-} Matches;
-
-static Matches tmap[] = {
-  {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
-  {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
-  {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
-  {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
-  {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
-  /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, deprecated in 24 */
-  /* TDLY */
-  {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
-  {"TEXT", EXTRACTOR_METATYPE_WRITER, T},  
-  {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
-  /* TIME, deprecated in 24 */
-  {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
-  {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
-  {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
-  /* TKEY */
-  {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
-  {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as 
unit */
-  {"TMED", EXTRACTOR_METATYPE_SOURCE, T}, 
-  {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
-  {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
-  {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
-  {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
-  /* {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, deprecated in 24 */
-  {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
-  {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
-  {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
-  {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
-  {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T}, 
-  {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
-  {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
-  {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
-  /* TRDA, deprecated in 24 */
-  {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
-  /* TRSO */
-  /* {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, deprecated in 24 */
-  {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
-  /* TSSE */
-  /* {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, deprecated in 24 */
-  {"WCOM", EXTRACTOR_METATYPE_URL, U},
-  {"WCOP", EXTRACTOR_METATYPE_URL, U},
-  {"WOAF", EXTRACTOR_METATYPE_URL, U},
-  {"WOAS", EXTRACTOR_METATYPE_URL, U},
-  {"WORS", EXTRACTOR_METATYPE_URL, U},
-  {"WPAY", EXTRACTOR_METATYPE_URL, U},
-  {"WPUB", EXTRACTOR_METATYPE_URL, U},
-  {"WXXX", EXTRACTOR_METATYPE_URL, T},
-  /* {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, deprecated in 24 */
-  /* ... */
-  {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
-  {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
-  {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
-  /* ... */
-  {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
-  /* ... */
-  {"LINK", EXTRACTOR_METATYPE_URL, U},
-  /* ... */
-  {"USER", EXTRACTOR_METATYPE_LICENSE, T},
-  /* ... */
-  /* new frames in 24 */
-  /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */
-  {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T},
-  /* TDRC, TDRL, TDTG */
-  {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
-  {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T},
-  {"TMOO", EXTRACTOR_METATYPE_MOOD, T},
-  {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T},
-  {"TSOA", EXTRACTOR_METATYPE_ALBUM, T},
-  {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T},
-  {"TSOT", EXTRACTOR_METATYPE_TITLE, T},
-  {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T},
-  {NULL, 0, T}
-};
-
-
-/* mimetype = audio/mpeg */
-int 
-EXTRACTOR_id3v24_extract (const unsigned char *data,
-                         size_t size,
-                         EXTRACTOR_MetaDataProcessor proc,
-                         void *proc_cls,
-                         const char *options)
-{
-  int unsync;
-  int extendedHdr;
-  int experimental;
-  uint32_t tsize;
-  uint32_t pos;
-  uint32_t ehdrSize;
-  uint32_t csize;
-  int i;
-  uint16_t flags;
-  char *mime;
-  enum EXTRACTOR_MetaType type;
-  size_t off;
-
-  if ((size < 16) ||
-      (data[0] != 0x49) ||
-      (data[1] != 0x44) ||
-      (data[2] != 0x33) || (data[3] != 0x04) || (data[4] != 0x00))
-    return 0;
-  unsync = (data[5] & 0x80) > 0;
-  if (unsync)
-    return 0; /* not supported */
-  extendedHdr = (data[5] & 0x40) > 0;
-  experimental = (data[5] & 0x20) > 0;
-  if (experimental)
-    return 0;
-  /* footer = (data[5] & 0x10) > 0; */
-  tsize = (((data[6] & 0x7F) << 21) |
-           ((data[7] & 0x7F) << 14) |
-           ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
-  if (tsize + 10 > size)
-    return 0;
-  pos = 10;
-  if (extendedHdr)
-    {
-      ehdrSize = (((data[10] & 0x7F) << 21) |
-                 ((data[11] & 0x7F) << 14) |
-                 ((data[12] & 0x7F) << 7) | ((data[13] & 0x7F) << 0));
-      pos += 4 + ehdrSize;
-      if (ehdrSize > tsize)
-       return 0;
-    }
-  while (pos < tsize)
-    {
-      if (pos + 10 > tsize)
-        return 0;
-      csize =
-        (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) +
-        data[pos + 7];
-      if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) ||
-         (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos))
-        break;
-      flags = (data[pos + 8] << 8) + data[pos + 9];
-      if (((flags & 0x08) > 0) /* compressed, not yet supported */  ||
-          ((flags & 0x04) > 0) /* encrypted, not supported */ ||
-          ((flags & 0x02) > 0) /* unsynchronized, not supported */ )
-        {
-          pos += 10 + csize;
-          continue;
-        }
-      i = 0;
-      while (tmap[i].text != NULL)
-        {
-          if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
-            {
-              char *word;
-              if ((flags & 0x40) > 0)
-                {
-                  /* "group" identifier, skip a byte */
-                  pos++;
-                  csize--;
-                }
-
-             switch (tmap[i].fmt)
-               {
-               case T:
-                 /* this byte describes the encoding
-                    try to convert strings to UTF-8
-                    if it fails, then forget it */
-                 switch (data[pos + 10])
-                   {
-                   case 0x00:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                                              csize - 1, 
"ISO-8859-1");
-                     break;
-                   case 0x01:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                                              csize - 1, 
"UTF-16");
-                     break;
-                   case 0x02:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                                              csize - 1, 
"UTF-16BE");
-                     break;
-                   case 0x03:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                                              csize - 1, 
"UTF-8");
-                     break;
-                   default:
-                     /* bad encoding byte,
-                        try to convert from iso-8859-1 */
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                                              csize - 1, 
"ISO-8859-1");
-                     break;
-                   }
-                 break;
-               case U:
-                 word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 10],
-                                                          csize, "ISO-8859-1");
-                 break;
-               case UL:
-                 if (csize < 6)
-                   return 0; /* malformed */
-                 /* find end of description */
-                 off = 14;
-                 while ( (off < size) &&
-                         (off - pos < csize) &&
-                         (data[pos + off] == '\0') )
-                   off++;
-                 if ( (off >= csize) ||
-                      (data[pos+off] != '\0') )
-                   return 0; /* malformed */
-                 off++;
-                 switch (data[pos + 10])
-                   {
-                   case 0x00:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
-                                                              csize - off, 
"ISO-8859-1");
-                     break;
-                   case 0x01:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
-                                                              csize - off, 
"UTF-16");
-                     break;
-                   case 0x02:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
-                                                              csize - off, 
"UTF-16BE");
-                     break;
-                   case 0x03:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
-                                                              csize - off, 
"UTF-8");
-                     break;
-                   default:
-                     /* bad encoding byte,
-                        try to convert from iso-8859-1 */
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
-                                                              csize - off, 
"ISO-8859-1");
-                     break;
-                   }
-                 break;
-               case SL:
-                 if (csize < 7)
-                   return 0; /* malformed */
-                 /* find end of description */
-                 switch (data[pos + 10])
-                   {
-                   case 0x00:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 16],
-                                                              csize - 6, 
"ISO-8859-1");
-                     break;
-                   case 0x01:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 16],
-                                                              csize - 6, 
"UTF-16");
-                     break;
-                   case 0x02:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 16],
-                                                              csize - 6, 
"UTF-16BE");
-                     break;
-                   case 0x03:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 16],
-                                                              csize - 6, 
"UTF-8");
-                     break;
-                   default:
-                     /* bad encoding byte,
-                        try to convert from iso-8859-1 */
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 16],
-                                                              csize - 6, 
"ISO-8859-1");
-                     break;
-                   }
-                 break;
-               case L:
-                 if (csize < 5)
-                   return 0; /* malformed */
-                 /* find end of description */
-                 switch (data[pos + 10])
-                   {
-                   case 0x00:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 14],
-                                                              csize - 4, 
"ISO-8859-1");
-                     break;
-                   case 0x01:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 14],
-                                                              csize - 4, 
"UTF-16");
-                     break;
-                   case 0x02:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 14],
-                                                              csize - 4, 
"UTF-16BE");
-                     break;
-                   case 0x03:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 14],
-                                                              csize - 4, 
"UTF-8");
-                     break;
-                   default:
-                     /* bad encoding byte,
-                        try to convert from iso-8859-1 */
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 14],
-                                                              csize - 4, 
"ISO-8859-1");
-                     break;
-                   }
-                 break;
-               case I:
-                 if (csize < 2)
-                   return 0; /* malformed */
-                 /* find end of mime type */
-                 off = 11;
-                 while ( (off < size) &&
-                         (off - pos < csize) &&
-                         (data[pos + off] == '\0') )
-                   off++;
-                 if ( (off >= csize) ||
-                      (data[pos+off] != '\0') )
-                   return 0; /* malformed */
-                 off++;
-                 mime = strdup ((const char*) &data[pos + 11]);
-                 
-                 switch (data[pos+off])
-                   {
-                   case 0x03:
-                   case 0x04:
-                     type = EXTRACTOR_METATYPE_COVER_PICTURE;
-                     break;
-                   case 0x07:
-                   case 0x08:
-                   case 0x09:
-                   case 0x0A:
-                   case 0x0B:
-                   case 0x0C:
-                     type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
-                     break;
-                   case 0x0D:
-                   case 0x0E:
-                   case 0x0F:
-                     type = EXTRACTOR_METATYPE_EVENT_PICTURE;
-                     break;
-                   case 0x14:
-                     type = EXTRACTOR_METATYPE_LOGO;
-                     type = EXTRACTOR_METATYPE_LOGO;
-                     break;
-                   default:
-                     type = EXTRACTOR_METATYPE_PICTURE;
-                     break;
-                   }
-                 off++;
-
-                 /* find end of description */
-                 while ( (off < size) &&
-                         (off - pos < csize) &&
-                         (data[pos + off] == '\0') )
-                   off++;
-                 if ( (off >= csize) ||
-                      (data[pos+off] != '\0') )
-                   {
-                     if (mime != NULL)
-                       free (mime);
-                     return 0; /* malformed */
-                   }
-                 off++;
-                 if ( (mime != NULL) &&
-                      (0 == strcasecmp ("-->",
-                                        mime)) )
-                   {
-                     /* not supported */
-                   }
-                 else
-                   {
-                     if (0 != proc (proc_cls,
-                                    "id3v24",
-                                    type,
-                                    EXTRACTOR_METAFORMAT_BINARY,
-                                    mime,
-                                    (const char*) &data[pos + off],
-                                    csize + 6 - off))                  
-                       {
-                         if (mime != NULL)
-                           free (mime);
-                         return 1;
-                       }
-                   }
-                 if (mime != NULL)
-                   free (mime);
-                 word = NULL;
-                 break;
-               default:
-                 return 0;
-               }             
-              if ((word != NULL) && (strlen (word) > 0))
-                {
-                 if (0 != proc (proc_cls,
-                                "id3v24",
-                                tmap[i].type,
-                                EXTRACTOR_METAFORMAT_UTF8,
-                                "text/plain",
-                                word,
-                                strlen(word)+1))
-                   {
-                     free (word);
-                     return 1;
-                   }
-                }
-             if (word != NULL)
-               free (word);
-              break;
-            }
-          i++;
-        }
-      pos += 10 + csize;
-    }
-  return 0;
-}
-
-/* end of id3v24_extractor.c */

Modified: Extractor/src/plugins/id3v2_extractor.c
===================================================================
--- Extractor/src/plugins/id3v2_extractor.c     2012-03-27 12:46:29 UTC (rev 
20782)
+++ Extractor/src/plugins/id3v2_extractor.c     2012-03-27 13:05:17 UTC (rev 
20783)
@@ -26,6 +26,8 @@
 #endif
 #include "convert.h"
 
+#include "extractor_plugins.h"
+
 #define DEBUG_EXTRACT_ID3v2 0
 
 enum Id3v2Fmt
@@ -47,314 +49,723 @@
 
 static Matches tmap[] = {
   /* skipping UFI */
-  {"TT1", EXTRACTOR_METATYPE_SECTION, T},
-  {"TT2", EXTRACTOR_METATYPE_TITLE, T},
-  {"TT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
-  {"TP1", EXTRACTOR_METATYPE_ARTIST, T},
-  {"TP2", EXTRACTOR_METATYPE_PERFORMER, T},
-  {"TP3", EXTRACTOR_METATYPE_CONDUCTOR, T},
-  {"TP4", EXTRACTOR_METATYPE_INTERPRETATION, T},
-  {"TCM", EXTRACTOR_METATYPE_COMPOSER, T},
-  {"TXT", EXTRACTOR_METATYPE_WRITER, T},
-  {"TLA", EXTRACTOR_METATYPE_LANGUAGE, T},
-  {"TCO", EXTRACTOR_METATYPE_GENRE, T},
-  {"TAL", EXTRACTOR_METATYPE_ALBUM, T},
-  {"TPA", EXTRACTOR_METATYPE_DISC_NUMBER, T},
-  {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
-  {"TRC", EXTRACTOR_METATYPE_ISRC, T},
-  {"TYE", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
+  {"TT1 ", EXTRACTOR_METATYPE_SECTION, T},
+  {"TT2 ", EXTRACTOR_METATYPE_TITLE, T},
+  {"TT3 ", EXTRACTOR_METATYPE_SONG_VERSION, T},
+  {"TP1 ", EXTRACTOR_METATYPE_ARTIST, T},
+  {"TP2 ", EXTRACTOR_METATYPE_PERFORMER, T},
+  {"TP3 ", EXTRACTOR_METATYPE_CONDUCTOR, T},
+  {"TP4 ", EXTRACTOR_METATYPE_INTERPRETATION, T},
+  {"TCM ", EXTRACTOR_METATYPE_COMPOSER, T},
+  {"TXT ", EXTRACTOR_METATYPE_WRITER, T},
+  {"TLA ", EXTRACTOR_METATYPE_LANGUAGE, T},
+  {"TCO ", EXTRACTOR_METATYPE_GENRE, T},
+  {"TAL ", EXTRACTOR_METATYPE_ALBUM, T},
+  {"TPA ", EXTRACTOR_METATYPE_DISC_NUMBER, T},
+  {"TRK ", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
+  {"TRC ", EXTRACTOR_METATYPE_ISRC, T},
+  {"TYE ", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
   /*
     FIXME: these two and TYE should be combined into
     the actual publication date (if TRD is missing)
-  {"TDA", EXTRACTOR_METATYPE_PUBLICATION_DATE},
-  {"TIM", EXTRACTOR_METATYPE_PUBLICATION_DATE},
+  {"TDA ", EXTRACTOR_METATYPE_PUBLICATION_DATE},
+  {"TIM ", EXTRACTOR_METATYPE_PUBLICATION_DATE},
   */
-  {"TRD", EXTRACTOR_METATYPE_CREATION_TIME, T},
-  {"TMT", EXTRACTOR_METATYPE_SOURCE, T},
-  {"TFT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
-  {"TBP", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
-  {"TCR", EXTRACTOR_METATYPE_COPYRIGHT, T},
-  {"TPB", EXTRACTOR_METATYPE_PUBLISHER, T},
-  {"TEN", EXTRACTOR_METATYPE_ENCODED_BY, T},
-  {"TSS", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T},
-  {"TOF", EXTRACTOR_METATYPE_FILENAME, T},
-  {"TLE", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as 
unit */
-  {"TSI", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
+  {"TRD ", EXTRACTOR_METATYPE_CREATION_TIME, T},
+  {"TMT ", EXTRACTOR_METATYPE_SOURCE, T},
+  {"TFT ", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
+  {"TBP ", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
+  {"TCR ", EXTRACTOR_METATYPE_COPYRIGHT, T},
+  {"TPB ", EXTRACTOR_METATYPE_PUBLISHER, T},
+  {"TEN ", EXTRACTOR_METATYPE_ENCODED_BY, T},
+  {"TSS ", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T},
+  {"TOF ", EXTRACTOR_METATYPE_FILENAME, T},
+  {"TLE ", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as 
unit */
+  {"TSI ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
   /* skipping TDY, TKE */
-  {"TOT", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
-  {"TOA", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
-  {"TOL", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
-  {"TOR", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
+  {"TOT ", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
+  {"TOA ", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
+  {"TOL ", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
+  {"TOR ", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
   /* skipping TXX */
 
-  {"WAF", EXTRACTOR_METATYPE_URL, U},
-  {"WAR", EXTRACTOR_METATYPE_URL, U},
-  {"WAS", EXTRACTOR_METATYPE_URL, U},
-  {"WCM", EXTRACTOR_METATYPE_URL, U},
-  {"WCP", EXTRACTOR_METATYPE_RIGHTS, U},
-  {"WCB", EXTRACTOR_METATYPE_URL, U},
+  {"WAF ", EXTRACTOR_METATYPE_URL, U},
+  {"WAR ", EXTRACTOR_METATYPE_URL, U},
+  {"WAS ", EXTRACTOR_METATYPE_URL, U},
+  {"WCM ", EXTRACTOR_METATYPE_URL, U},
+  {"WCP ", EXTRACTOR_METATYPE_RIGHTS, U},
+  {"WCB ", EXTRACTOR_METATYPE_URL, U},
   /* skipping WXX */
-  {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
+  {"IPL ", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
   /* skipping MCI */
   /* skipping ETC */
   /* skipping MLL */
   /* skipping STC */
-  {"ULT", EXTRACTOR_METATYPE_LYRICS, UL},
-  {"SLT", EXTRACTOR_METATYPE_LYRICS, SL},
-  {"COM", EXTRACTOR_METATYPE_COMMENT, L},
+  {"ULT ", EXTRACTOR_METATYPE_LYRICS, UL},
+  {"SLT ", EXTRACTOR_METATYPE_LYRICS, SL},
+  {"COM ", EXTRACTOR_METATYPE_COMMENT, L},
   /* skipping RVA */
   /* skipping EQU */
   /* skipping REV */
-  {"PIC", EXTRACTOR_METATYPE_PICTURE, I},
+  {"PIC ", EXTRACTOR_METATYPE_PICTURE, I},
   /* skipping GEN */
-  /* {"CNT", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */
-  /*  {"POP", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */
+  /* {"CNT ", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */
+  /* {"POP ", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */
   /* skipping BUF */
   /* skipping CRM */
   /* skipping CRA */
-  /* {"LNK", EXTRACTOR_METATYPE_URL, XXX}, */
+  /* {"LNK ", EXTRACTOR_METATYPE_URL, XXX}, */
+
+
+  {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
+  {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
+  {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
+  {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
+  {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
+  {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, /* idv23 only */
+  /* TDLY */
+  {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
+  {"TEXT", EXTRACTOR_METATYPE_WRITER, T},  
+  {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
+  /* TIME, idv23 only */
+  {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
+  {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
+  {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
+  /* TKEY */
+  {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
+  {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as 
unit */
+  {"TMED", EXTRACTOR_METATYPE_SOURCE, T}, 
+  {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
+  {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
+  {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
+  {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
+  {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, /* idv23 only */
+  {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
+  {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
+  {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
+  {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
+  {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T}, 
+  {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
+  {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
+  {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
+  /* TRDA, idv23 only */
+  {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
+  /* TRSO */
+  {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, /* idv23 only */
+  {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
+  /* TSSE */
+  {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, /* idv23 only */
+  {"WCOM", EXTRACTOR_METATYPE_URL, U},
+  {"WCOP", EXTRACTOR_METATYPE_URL, U},
+  {"WOAF", EXTRACTOR_METATYPE_URL, U},
+  {"WOAS", EXTRACTOR_METATYPE_URL, U},
+  {"WORS", EXTRACTOR_METATYPE_URL, U},
+  {"WPAY", EXTRACTOR_METATYPE_URL, U},
+  {"WPUB", EXTRACTOR_METATYPE_URL, U},
+  {"WXXX", EXTRACTOR_METATYPE_URL, T},
+  {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, /* idv23 only */
+  /* ... */
+  {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
+  {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
+  {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
+  /* ... */
+  {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
+  /* ... */
+  {"LINK", EXTRACTOR_METATYPE_URL, U},
+  /* ... */
+  {"USER", EXTRACTOR_METATYPE_LICENSE, T},
+  /* ... */
+
+  /* new frames in id3v24 */
+  /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */
+  {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T},
+  /* TDRC, TDRL, TDTG */
+  {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
+  {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T},
+  {"TMOO", EXTRACTOR_METATYPE_MOOD, T},
+  {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T},
+  {"TSOA", EXTRACTOR_METATYPE_ALBUM, T},
+  {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T},
+  {"TSOT", EXTRACTOR_METATYPE_TITLE, T},
+  {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T},
+
   {NULL, 0, T},
 };
 
+struct id3v2_state
+{
+  int state;
+  unsigned int tsize;
+  size_t csize;
+  char id[4];
+  int32_t ti;
+  char ver;
+  char extended_header;
+  uint16_t frame_flags;
+  char *mime;
+};
 
-/* mimetype = audio/mpeg */
-int 
-EXTRACTOR_id3v2_extract (const unsigned char *data,
-                        size_t size,
-                        EXTRACTOR_MetaDataProcessor proc,
-                        void *proc_cls,
-                        const char *options)
+enum ID3v2State
 {
-  unsigned int tsize;
-  unsigned int pos;
+  ID3V2_INVALID = -1,
+  ID3V2_READING_HEADER = 0,
+  ID3V2_READING_FRAME_HEADER,
+  ID3V23_READING_EXTENDED_HEADER,
+  ID3V24_READING_EXTENDED_HEADER,
+  ID3V2_READING_FRAME
+};
+
+void
+EXTRACTOR_id3v2_init_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+  struct id3v2_state *state;
+  state = plugin->state = malloc (sizeof (struct id3v2_state));
+  if (state == NULL)
+    return;
+  memset (state, 0, sizeof (struct id3v2_state));
+  state->state = ID3V2_READING_HEADER;
+  state->ti = -1;
+  state->mime = NULL;
+}
+
+void
+EXTRACTOR_id3v2_discard_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+  struct id3v2_state *state = plugin->state;
+  if (state != NULL)
+  {
+    if (state->mime != NULL)
+      free (state->mime);
+    free (state);
+  }
+  plugin->state = NULL;
+}
+
+static int
+find_type (const char *id, size_t len)
+{
+  int i;
+  for (i = 0; tmap[i].text != NULL; i++)
+    if (0 == strncmp (tmap[i].text, id, len))
+      return i;
+  return -1;
+}
+
+int
+EXTRACTOR_id3v2_extract_method (struct EXTRACTOR_PluginList *plugin,
+    EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+  int64_t file_position;
+  int64_t file_size;
+  int64_t offset = 0;
+  int64_t size;
+  struct id3v2_state *state;
+  unsigned char *data;
+  char *word = NULL;
   unsigned int off;
   enum EXTRACTOR_MetaType type;
-  const char *mime;
+  unsigned char picture_type;
 
-  if ((size < 16) ||
-      (data[0] != 0x49) ||
-      (data[1] != 0x44) ||
-      (data[2] != 0x33) || (data[3] != 0x02) || (data[4] != 0x00))
-    return 0;
-  /* unsync: (data[5] & 0x80) > 0;  */
-  tsize = (((data[6] & 0x7F) << 21) |
-           ((data[7] & 0x7F) << 14) |
-           ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
+  if (plugin == NULL || plugin->state == NULL)
+    return 1;
 
-  if (tsize + 10 > size)
-    return 0;
-  pos = 10;
-  while (pos < tsize)
+  state = plugin->state;
+  file_position = plugin->position;
+  file_size = plugin->fsize;
+  size = plugin->map_size;
+  data = plugin->shm_ptr;
+
+  if (plugin->seek_request < 0)
+    return 1;
+  if (file_position - plugin->seek_request > 0)
+  {
+    plugin->seek_request = -1;
+    return 1;
+  }
+  if (plugin->seek_request - file_position < size)
+    offset = plugin->seek_request - file_position;
+
+  while (1)
+  {
+    switch (state->state)
     {
-      size_t csize;
-      int i;
+    case ID3V2_INVALID:
+      plugin->seek_request = -1;
+      return 1;
+    case ID3V2_READING_HEADER:
+      /* TODO: support id3v24 tags at the end of file. Here's a quote from id3 
faq:
+       * Q: Where is an ID3v2 tag located in an MP3 file?
+       * A: It is most likely located at the beginning of the file. Look for 
the
+       *    marker "ID3" in the first 3 bytes of the file. If it's not there, 
it
+       *    could be at the end of the file (if the tag is ID3v2.4). Look for 
the
+       *    marker "3DI" 10 bytes from the end of the file, or 10 bytes before 
the
+       *    beginning of an ID3v1 tag. Finally it is possible to embed ID3v2 
tags
+       *    in the actual MPEG stream, on an MPEG frame boundry. Almost nobody 
does
+       *    this.
+       * Parsing of such tags will not be completely correct, because we can't
+       * seek backwards. We will have to seek to file_size - chunk_size instead
+       * (by the way, chunk size is theoretically unknown, LE is free to use 
any chunk
+       * size, even though plugins often make assumptions about chunk size 
being large
+       * enough to make one atomic read without seeking, if offset == 0) and 
search
+       * for id3v1 at -128 offset, then look if there's a 3DI marker 10 bytes 
before
+       *  it (or 10 bytes before the end of file, if id3v1 is not there; not 
sure
+       * about APETAGs; we should probably just scan byte-by-byte from the end 
of file,
+       * until we hit 3DI, or reach the offset == 0), and use it set offset to 
the
+       * start of ID3v24 header, adjust the following file_position check and 
data
+       * indices (use offset), and otherwise proceed as normal (maybe file 
size checks
+       * along the way will have to be adjusted by -1, or made ">" instead of 
">=";
+       * these problems do not arise for tags at the beginning of the file, 
since
+       * audio itself is usually at least 1-byte long; when the tag is at the 
end of
+       * file, these checks will have to be 100% correct).
+       * If there are two tags (at the beginning and at the end of the file),
+       * a SEEK in the one at the beginning of the file can be used to seek to 
the
+       * one at the end.
+       */
+      /* TODO: merge id3v1 and id3v2 parsers. There's an "update" flag in 
id3v2 that
+       * tells the parser to augument id3v1 values with the values from id3v2 
(if this
+       * flag is not set, id3v2 parser must discard id3v1 data).
+       * At the moment id3v1 and id3v2 are parsed separately, and update flag 
is ignored.
+       */
+      if (file_position != 0 || size < 10 || (data[0] != 0x49) || (data[1] != 
0x44) || (data[2] != 0x33) || ((data[3] != 0x02) && (data[3] != 0x03) && 
(data[3] != 0x04))/* || (data[4] != 0x00) minor verisons are 
backward-compatible*/)
+      {
+        state->state = ID3V2_INVALID;
+        break;
+      }
+      state->ver = data[3];
+      if (state->ver == 0x02)
+      {
+        state->extended_header = 0;
+      }
+      else if ((state->ver == 0x03) || (state->ver == 0x04))
+      {
+        if ((data[5] & 0x80) > 0)
+        {
+          /* unsync is not supported in id3v23 or id3v24*/
+          state->state = ID3V2_INVALID;
+          break;
+        }
+        state->extended_header = (data[5] & 0x40) > 0;
+        if ((data[5] & 0x20) > 0)
+        {
+          /* experimental is not supported in id3v23 or id3v24*/
+          state->state = ID3V2_INVALID;
+          break;
+        }
+      }
+      state->tsize = (((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) | 
((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
+      if (state->tsize + 10 > file_size)
+      {
+        state->state = ID3V2_INVALID;
+        break;
+      }
+      offset = 10;
+      if (state->ver == 0x03 && state->extended_header)
+        state->state = ID3V23_READING_EXTENDED_HEADER;
+      else if (state->ver == 0x04 && state->extended_header)
+        state->state = ID3V24_READING_EXTENDED_HEADER;
+      else
+        state->state = ID3V2_READING_FRAME_HEADER;
+      break;
+    case ID3V23_READING_EXTENDED_HEADER:
+      if (offset + 9 >= size)
+      { 
+        if (offset == 0)
+        {
+          state->state = ID3V2_INVALID;
+          break;
+        }
+        plugin->seek_request = file_position + offset;
+        return 0;
+      }
+      if (state->ver == 0x03 && state->extended_header)
+      {
+        uint32_t padding, extended_header_size;
+        extended_header_size = (((data[offset]) << 24) | ((data[offset + 1]) 
<< 16) | ((data[offset + 2]) << 8) | ((data[offset + 3]) << 0));
+        padding = (((data[offset + 6]) << 24) | ((data[offset + 7]) << 16) | 
((data[offset + 8]) << 8) | ((data[offset + 9]) << 0));
+        if (data[offset + 4] == 0 && data[offset + 5] == 0)
+          /* Skip the CRC32 byte after extended header */
+          offset += 1;
+        offset += 4 + extended_header_size;
+        if (padding < state->tsize)
+          state->tsize -= padding;
+        else
+        {
+          state->state = ID3V2_INVALID;
+          break;
+        }
+      }
+      break;
+    case ID3V24_READING_EXTENDED_HEADER:
+      if (offset + 6 >= size)
+      { 
+        if (offset == 0)
+        {
+          state->state = ID3V2_INVALID;
+          break;
+        }
+        plugin->seek_request = file_position + offset;
+        return 0;
+      }
+      if ( (state->ver == 0x04) && (state->extended_header))
+      {
+       uint32_t extended_header_size;
 
-      if (pos + 7 > tsize)
+        extended_header_size = (((data[offset]) << 24) | 
+                               ((data[offset + 1]) << 16) | 
+                               ((data[offset + 2]) << 8) | 
+                               ((data[offset + 3]) << 0));
+        offset += 4 + extended_header_size;
+      }
+      break;
+    case ID3V2_READING_FRAME_HEADER:
+      if (file_position + offset > state->tsize ||
+          ((state->ver == 0x02) && file_position + offset + 6 >= state->tsize) 
||
+          (((state->ver == 0x03) || (state->ver == 0x04))&& file_position + 
offset + 10 >= state->tsize))
+      {
+        state->state = ID3V2_INVALID;
+        break;
+      }
+      if (((state->ver == 0x02) && (offset + 6 >= size)) ||
+          (((state->ver == 0x03) || (state->ver == 0x04)) && (offset + 10 >= 
size)))
+      {
+        plugin->seek_request = file_position + offset;
         return 0;
-      csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5];
-      if ((pos + 7 + csize > tsize) || (csize > tsize) || (csize == 0))
+      }
+      if (state->ver == 0x02)
+      {
+        memcpy (state->id, &data[offset], 3);
+        state->csize = (data[offset + 3] << 16) + (data[offset + 4] << 8) + 
data[offset + 5];
+        if ((file_position + offset + 6 + state->csize > file_size) || 
(state->csize > file_size) || (state->csize == 0))
+        {
+          state->state = ID3V2_INVALID;
+          break;
+        }
+        offset += 6;
+        state->frame_flags = 0;
+      }
+      else if ((state->ver == 0x03) || (state->ver == 0x04))
+      {
+        memcpy (state->id, &data[offset], 4);
+        if (state->ver == 0x03)
+          state->csize = (data[offset + 4] << 24) + (data[offset + 5] << 16) + 
(data[offset + 6] << 8) + data[offset + 7];
+        else if (state->ver == 0x04)
+          state->csize = ((data[offset + 4] & 0x7F) << 21) | ((data[offset + 
5] & 0x7F) << 14) | ((data[offset + 6] & 0x7F) << 07) | ((data[offset + 7] & 
0x7F) << 00);
+        if ((file_position + offset + 10 + state->csize > file_size) || 
(state->csize > file_size) || (state->csize == 0))
+        {
+          state->state = ID3V2_INVALID;
+          break;
+        }
+        state->frame_flags = (data[offset + 8] << 8) + data[offset + 9];
+        if (state->ver == 0x03)
+        {
+          if (((state->frame_flags & 0x80) > 0) /* compressed, not yet 
supported */ ||
+              ((state->frame_flags & 0x40) > 0) /* encrypted, not supported */)
+          {
+            /* Skip to next frame header */
+            offset += 10 + state->csize;
+            break;
+          }
+        }
+        else if (state->ver == 0x04)
+        {
+          if (((state->frame_flags & 0x08) > 0) /* compressed, not yet 
supported */ ||
+              ((state->frame_flags & 0x04) > 0) /* encrypted, not supported */ 
||
+              ((state->frame_flags & 0x02) > 0) /* unsynchronization, not 
supported */)
+          {
+            /* Skip to next frame header */
+            offset += 10 + state->csize;
+            break;
+          }
+          if ((state->frame_flags & 0x01) > 0)
+          {
+            /* Skip data length indicator */
+            state->csize -= 4;
+            offset += 4;
+          }
+        }
+        offset += 10;
+      }
+
+      state->ti = find_type ((const char *) state->id, (state->ver == 0x02) ? 
3 : (((state->ver == 0x03) || (state->ver == 0x04)) ? 4 : 0));
+      if (state->ti == -1)
+      {
+        offset += state->csize;
         break;
-      i = 0;
-      while (tmap[i].text != NULL)
+      }
+      state->state = ID3V2_READING_FRAME;
+      break;
+    case ID3V2_READING_FRAME:
+      if (offset == 0 && state->csize > size)
+      {
+        /* frame size is larger than the size of one data chunk we get at a 
time */
+        offset += state->csize;
+        state->state = ID3V2_READING_FRAME_HEADER;
+        break;
+      }
+      if (offset + state->csize > size)
+      {
+        plugin->seek_request = file_position + offset;
+        return 0;
+      }
+      word = NULL;
+      if (((state->ver == 0x03) && ((state->frame_flags & 0x20) > 0)) ||
+          ((state->ver == 0x04) && ((state->frame_flags & 0x40) > 0)))
+      {
+        /* "group" identifier, skip a byte */
+        offset++;
+        state->csize--;
+      }
+      switch (tmap[state->ti].fmt)
+      {
+      case T:
+        if (data[offset] == 0x00)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 1],
+              state->csize - 1, "ISO-8859-1");
+        else if (data[offset] == 0x01)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 1],
+              state->csize - 1, "UCS-2");
+        else if ((state->ver == 0x04) && (data[offset] == 0x02))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 1],
+              state->csize - 1, "UTF-16BE");
+        else if ((state->ver == 0x04) && (data[offset] == 0x03))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 1],
+              state->csize - 1, "UTF-8");
+        else
+          /* bad encoding byte, try to convert from iso-8859-1 */
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 1],
+              state->csize - 1, "ISO-8859-1");
+        break;
+      case U:
+        word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset],
+            state->csize, "ISO-8859-1");
+        break;
+      case UL:
+        if (state->csize < 6)
         {
-          if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 3))
-            {
-              char *word;
-              /* this byte describes the encoding
-                 try to convert strings to UTF-8
-                 if it fails, then forget it */
-             switch (tmap[i].fmt)
-               {
-               case T:           
-                 switch (data[pos + 6])
-                   {
-                   case 0x00:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 7],
-                                                              csize - 1, 
"ISO-8859-1");
-                     break;
-                   case 0x01:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 7],
-                                                              csize - 1, 
"UCS-2");
-                     break;
-                   default:
-                     /* bad encoding byte,
-                        try to convert from iso-8859-1 */
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 7],
-                                                              csize - 1, 
"ISO-8859-1");
-                     break;
-                   }
-                 break;
-               case U:
-                 word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 6],
-                                                          csize, "ISO-8859-1");
-                 break;
-               case UL:
-                 if (csize < 6)
-                   return 0; /* malformed */
-                 /* find end of description */
-                 off = 10;
-                 while ( (off < size) &&
-                         (off - pos < csize) &&
-                         (data[pos + off] == '\0') )
-                   off++;
-                 if ( (off >= csize) ||
-                      (data[pos+off] != '\0') )
-                   return 0; /* malformed */
-                 off++;
-                 switch (data[pos + 6])
-                   {
-                   case 0x00:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
-                                                              csize - off, 
"ISO-8859-1");
-                     break;
-                   case 0x01:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
-                                                              csize - off, 
"UCS-2");
-                     break;
-                   default:
-                     /* bad encoding byte,
-                        try to convert from iso-8859-1 */
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
-                                                              csize - off, 
"ISO-8859-1");
-                     break;
-                   }
-                 break;
-               case SL:
-                 if (csize < 7)
-                   return 0; /* malformed */
-                 /* find end of description */
-                 switch (data[pos + 6])
-                   {
-                   case 0x00:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 12],
-                                                              csize - 6, 
"ISO-8859-1");
-                     break;
-                   case 0x01:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 12],
-                                                              csize - 6, 
"UCS-2");
-                     break;
-                   default:
-                     /* bad encoding byte,
-                        try to convert from iso-8859-1 */
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 12],
-                                                              csize - 6, 
"ISO-8859-1");
-                     break;
-                   }
-                 break;
-               case L:
-                 if (csize < 5)
-                   return 0; /* malformed */
-                 /* find end of description */
-                 switch (data[pos + 6])
-                   {
-                   case 0x00:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 10],
-                                                              csize - 4, 
"ISO-8859-1");
-                     break;
-                   case 0x01:
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 10],
-                                                              csize - 4, 
"UCS-2");
-                     break;
-                   default:
-                     /* bad encoding byte,
-                        try to convert from iso-8859-1 */
-                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 10],
-                                                              csize - 4, 
"ISO-8859-1");
-                     break;
-                   }
-                 break;
-               case I:
-                 if (csize < 6)
-                   return 0; /* malformed */
-                 /* find end of description */
-                 off = 12;
-                 while ( (off < size) &&
-                         (off - pos < csize) &&
-                         (data[pos + off] == '\0') )
-                   off++;
-                 if ( (off >= csize) ||
-                      (data[pos+off] != '\0') )
-                   return 0; /* malformed */
-                 off++;
-                 switch (data[pos+11])
-                   {
-                   case 0x03:
-                   case 0x04:
-                     type = EXTRACTOR_METATYPE_COVER_PICTURE;
-                     break;
-                   case 0x07:
-                   case 0x08:
-                   case 0x09:
-                   case 0x0A:
-                   case 0x0B:
-                   case 0x0C:
-                     type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
-                     break;
-                   case 0x0D:
-                   case 0x0E:
-                   case 0x0F:
-                     type = EXTRACTOR_METATYPE_EVENT_PICTURE;
-                     break;
-                   case 0x14:
-                     type = EXTRACTOR_METATYPE_LOGO;
-                     type = EXTRACTOR_METATYPE_LOGO;
-                     break;
-                   default:
-                     type = EXTRACTOR_METATYPE_PICTURE;
-                     break;
-                   }
-                 if (0 == strncasecmp ("PNG",
-                                       (const char*) &data[pos + 7], 3))
-                   mime = "image/png";
-                 else if (0 == strncasecmp ("JPG",
-                                            (const char*) &data[pos + 7], 3))
-                   mime = "image/jpeg";
-                 else
-                   mime = NULL;
-                 if (0 == strncasecmp ("-->",
-                                       (const char*) &data[pos + 7], 3))
-                   {
-                     /* not supported */
-                   }
-                 else
-                   {
-                     if (0 != proc (proc_cls,
-                                    "id3v2",
-                                    type,
-                                    EXTRACTOR_METAFORMAT_BINARY,
-                                    mime,
-                                    (const char*) &data[pos + off],
-                                    csize + 6 - off))                  
-                       return 1;
-                   }
-                 word = NULL;
-                 break;
-               default:
-                 return 0;
-               }
-             if ((word != NULL) && (strlen (word) > 0))
-                {
-                 if (0 != proc (proc_cls,
-                                "id3v2",
-                                tmap[i].type,
-                                EXTRACTOR_METAFORMAT_UTF8,
-                                "text/plain",
-                                word,
-                                strlen(word)+1))
-                   {
-                     free (word);
-                     return 1;
-                   }
-               }
-             if (word != NULL)
-               free (word);
-              break;
-            }
-          i++;
+          /* malformed */
+          state->state = ID3V2_INVALID;
+          break;
         }
-      pos += 6 + csize;
+        /* find end of description */
+        off = 4;
+        while ((off < size) && (off < offset + state->csize) && (data[offset + 
off] != '\0'))
+          off++;
+        if ((off >= state->csize) || (data[offset + off] != '\0'))
+        {
+          /* malformed */
+          state->state = ID3V2_INVALID;
+          break;
+        }
+        off++;
+        if (data[offset] == 0x00)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+              state->csize - off, "ISO-8859-1");
+        else if (data[offset] == 0x01)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+              state->csize - off, "UCS-2");
+        else if ((state->ver == 0x04) && (data[offset] == 0x02))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+              state->csize - off, "UTF-16BE");
+        else if ((state->ver == 0x04) && (data[offset] == 0x03))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+              state->csize - off, "UTF-8");
+        else
+          /* bad encoding byte, try to convert from iso-8859-1 */
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+              state->csize - off, "ISO-8859-1");
+        break;
+      case SL:
+        if (state->csize < 7)
+        {
+          /* malformed */
+          state->state = ID3V2_INVALID;
+          break;
+        }
+        if (data[offset] == 0x00)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 6],
+              state->csize - 6, "ISO-8859-1");
+        else if (data[offset] == 0x01)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 6],
+              state->csize - 6, "UCS-2");
+        else if ((state->ver == 0x04) && (data[offset] == 0x02))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 6],
+              state->csize - 6, "UTF-16BE");
+        else if ((state->ver == 0x04) && (data[offset] == 0x03))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 6],
+              state->csize - 6, "UTF-8");
+        else
+          /* bad encoding byte, try to convert from iso-8859-1 */
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 6],
+              state->csize - 6, "ISO-8859-1");
+        break;
+      case L:
+        if (state->csize < 5)
+        {
+          /* malformed */
+          state->state = ID3V2_INVALID;
+          break;
+        }
+        /* find end of description */
+        off = 4;
+        while ((off < size) && (off < offset + state->csize) && (data[offset + 
off] != '\0'))
+          off++;
+        if ((off >= state->csize) || (data[offset + off] != '\0'))
+        {
+          /* malformed */
+          state->state = ID3V2_INVALID;
+          break;
+        }
+        off++;
+
+        if (data[offset] == 0x00)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+              state->csize - off, "ISO-8859-1");
+        else if (data[offset] == 0x01)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+              state->csize - off, "UCS-2");
+        else if ((state->ver == 0x04) && (data[offset] == 0x02))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+              state->csize - off, "UTF-1offBE");
+        else if ((state->ver == 0x04) && (data[offset] == 0x03))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+              state->csize - off, "UTF-8");
+        else
+          /* bad encoding byte, try to convert from iso-8859-1 */
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+              state->csize - off, "ISO-8859-1");
+        break;
+      case I:
+        if ( ( (state->ver == 0x02) && 
+              (state->csize < 7) ) ||
+            ( ( (state->ver == 0x03) || 
+                (state->ver == 0x04)) && (state->csize < 5)) )
+        {
+          /* malformed */
+          state->state = ID3V2_INVALID;
+          break;
+        }
+        if (state->mime != NULL)
+          free (state->mime);
+        state->mime = NULL;
+        if (state->ver == 0x02)
+        {
+          off = 5;
+          picture_type = data[offset + 5];
+        }
+        else if ((state->ver == 0x03) || (state->ver == 0x04))
+        {
+          off = 1;
+          while ((off < size) && (off < offset + state->csize) && (data[offset 
+ off] != '\0') )
+            off++;
+          if ((off >= state->csize) || (data[offset + off] != '\0'))
+          {
+            /* malformed */
+            state->state = ID3V2_INVALID;
+            break;
+          }
+          state->mime = malloc (off);
+          memcpy (state->mime, &data[offset + 1], off - 1);
+          state->mime[off - 1] = '\0';
+          off += 1;
+          picture_type = data[offset];
+          off += 1;
+        }
+        /* find end of description */
+        while ((off < size) && (off < offset + state->csize) && (data[offset + 
off] != '\0'))
+          off++;
+        if ((off >= state->csize) || (data[offset + off] != '\0'))
+        {
+          free (state->mime);
+          state->mime = NULL;
+          /* malformed */
+          state->state = ID3V2_INVALID;
+          break;
+        }
+        off++;
+        switch (picture_type)
+        {
+        case 0x03:
+        case 0x04:
+          type = EXTRACTOR_METATYPE_COVER_PICTURE;
+          break;
+        case 0x07:
+        case 0x08:
+        case 0x09:
+        case 0x0A:
+        case 0x0B:
+        case 0x0C:
+          type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
+          break;
+        case 0x0D:
+        case 0x0E:
+        case 0x0F:
+          type = EXTRACTOR_METATYPE_EVENT_PICTURE;
+          break;
+        case 0x14:
+          type = EXTRACTOR_METATYPE_LOGO;
+          type = EXTRACTOR_METATYPE_LOGO;
+          break;
+        default:
+          type = EXTRACTOR_METATYPE_PICTURE;
+          break;
+        }
+        if (state->ver == 0x02)
+        {
+          if (0 == strncasecmp ("PNG", (const char *) &data[offset + 1], 3))
+            state->mime = strdup ("image/png");
+          else if (0 == strncasecmp ("JPG", (const char *) &data[offset + 1], 
3))
+            state->mime = strdup ("image/jpeg");
+          else
+            state->mime = NULL;
+        }
+        else if (((state->ver == 0x03) || (state->ver == 0x04)) && (strchr 
(state->mime, '/') == NULL))
+        {
+          size_t mime_len = strlen (state->mime);
+          char *type_mime = malloc (mime_len + 6 + 1);
+          snprintf (type_mime, mime_len + 6 + 1, "image/%s", state->mime);
+          free (state->mime);
+          state->mime = type_mime;
+        }
+        if ((state->mime != NULL) && (0 == strcmp (state->mime, "-->")))
+        {
+          /* not supported */
+          free (state->mime);
+          state->mime = NULL;
+        }
+        else
+        {
+          if (0 != proc (proc_cls, "id3v2", type, EXTRACTOR_METAFORMAT_BINARY, 
state->mime, (const char*) &data[offset + off], state->csize - off))
+          {
+            if (state->mime != NULL)
+              free (state->mime);
+            state->mime = NULL;
+            return 1;
+          }
+          if (state->mime != NULL)
+            free (state->mime);
+          state->mime = NULL;
+        }
+        word = NULL;
+        break;
+      default:
+        return 1;
+      }
+      if ((word != NULL) && (strlen (word) > 0))
+      {
+        if (0 != proc (proc_cls, "id3v2", tmap[state->ti].type, 
EXTRACTOR_METAFORMAT_UTF8, "text/plain", word, strlen (word) + 1))
+        {
+          free (word);
+          return 1;
+        }
+      }
+      if (word != NULL)
+        free (word);
+      offset = offset + state->csize;
+      state->state = ID3V2_READING_FRAME_HEADER;
+    break;
     }
-  return 0;
+  }
+  return 1;
 }
 
 /* end of id3v2_extractor.c */

Modified: Extractor/src/plugins/mp3_extractor.c
===================================================================
--- Extractor/src/plugins/mp3_extractor.c       2012-03-27 12:46:29 UTC (rev 
20782)
+++ Extractor/src/plugins/mp3_extractor.c       2012-03-27 13:05:17 UTC (rev 
20783)
@@ -36,8 +36,41 @@
 #include <unistd.h>
 #include <stdlib.h>
 
-#define MAX_MP3_SCAN_DEEP 16768
-const int max_frames_scan = 1024;
+#include "extractor_plugins.h"
+
+#if WINDOWS
+#include <sys/param.h>          /* #define BYTE_ORDER */
+#endif
+#ifndef __BYTE_ORDER
+#ifdef _BYTE_ORDER
+#define __BYTE_ORDER _BYTE_ORDER
+#else
+#ifdef BYTE_ORDER
+#define __BYTE_ORDER BYTE_ORDER
+#endif
+#endif
+#endif
+#ifndef __BIG_ENDIAN
+#ifdef _BIG_ENDIAN
+#define __BIG_ENDIAN _BIG_ENDIAN
+#else
+#ifdef BIG_ENDIAN
+#define __BIG_ENDIAN BIG_ENDIAN
+#endif
+#endif
+#endif
+#ifndef __LITTLE_ENDIAN
+#ifdef _LITTLE_ENDIAN
+#define __LITTLE_ENDIAN _LITTLE_ENDIAN
+#else
+#ifdef LITTLE_ENDIAN
+#define __LITTLE_ENDIAN LITTLE_ENDIAN
+#endif
+#endif
+#endif
+
+#define LARGEST_FRAME_SIZE 8065
+
 enum
 { MPEG_ERR = 0, MPEG_V1 = 1, MPEG_V2 = 2, MPEG_V25 = 3 };
 
@@ -45,6 +78,11 @@
 { LAYER_ERR = 0, LAYER_1 = 1, LAYER_2 = 2, LAYER_3 = 3 };
 
 #define MPA_SYNC_MASK          ((unsigned int) 0xFFE00000)
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define MPA_SYNC_MASK_MEM      ((unsigned int) 0xFFE00000)
+#else
+#define MPA_SYNC_MASK_MEM      ((unsigned int) 0x0000E0FF)
+#endif
 #define MPA_LAST_SYNC_BIT_MASK ((unsigned int) 0x00100000)
 #define MPA_VERSION_MASK       ((unsigned int) 0x00080000)
 #define MPA_LAYER_MASK         ((unsigned int) 0x3)
@@ -106,169 +144,274 @@
 
 #define ADDR(s,t) do { if (0 != proc (proc_cls, "mp3", t, 
EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0)
 
-/* mimetype = audio/mpeg */
-int 
-EXTRACTOR_mp3_extract (const unsigned char *data,
-                      size_t size,
-                      EXTRACTOR_MetaDataProcessor proc,
-                      void *proc_cls,
-                      const char *options)
+struct mp3_state
 {
-  unsigned int header;
-  int counter = 0;
+  int state;
+
+  uint32_t header;
+  int sample_rate;
+  char mpeg_ver;
+  char layer;
+  char vbr_flag;
+  int ch;
+  char copyright_flag;
+  char original_flag;
+  int avg_bps;
+  int bitrate;
+
+  int64_t number_of_frames;
+  int64_t number_of_valid_frames;
+};
+
+enum MP3State
+{
+  MP3_LOOKING_FOR_FRAME = 0,
+  MP3_READING_FRAME = 1,
+};
+
+void
+EXTRACTOR_mp3_init_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+  struct mp3_state *state;
+  state = plugin->state = malloc (sizeof (struct mp3_state));
+  if (state == NULL)
+    return;
+  state->header = 0;
+  state->sample_rate = 0;
+  state->number_of_frames = 0;
+  state->number_of_valid_frames = 0;
+  state->mpeg_ver = 0;
+  state->layer = 0;
+  state->vbr_flag = 0;
+  state->ch = 0;
+  state->copyright_flag = 0;
+  state->original_flag = 0;
+  state->avg_bps = 0;
+  state->bitrate = 0;
+  state->state = 0;
+}
+
+void
+EXTRACTOR_mp3_discard_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+  if (plugin->state != NULL)
+  {
+    free (plugin->state);
+  }
+  plugin->state = NULL;
+}
+
+static int
+calculate_frame_statistics_and_maybe_report_it (struct EXTRACTOR_PluginList 
*plugin,
+    struct mp3_state *state, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+  int length;
+  char format[512];
+
+  if (((double) state->number_of_valid_frames / (double) 
state->number_of_frames) < 0.5 ||
+      state->number_of_valid_frames < 2)
+    /* Unlikely to be an mp3 file */
+    return 0;
+  ADDR ("audio/mpeg", EXTRACTOR_METATYPE_MIMETYPE);
+  state->avg_bps = state->avg_bps / state->number_of_valid_frames;
+  if (state->sample_rate > 0)
+    length = 1152 * state->number_of_valid_frames / state->sample_rate;
+  else if (state->avg_bps > 0 || state->bitrate > 0)
+    length = plugin->fsize / (state->avg_bps ? state->avg_bps : state->bitrate 
? state->bitrate : 1) / 125;
+  else
+    length = 0;
+
+  ADDR (mpeg_versions[state->mpeg_ver - 1], EXTRACTOR_METATYPE_FORMAT_VERSION);
+  snprintf (format,
+           sizeof (format),
+           "%s %s audio, %d kbps (%s), %d Hz, %s, %s, %s",
+            mpeg_versions[state->mpeg_ver - 1],
+            layer_names[state->layer - 1],
+            state->avg_bps,
+            state->vbr_flag ? _("VBR") : _("CBR"),
+            state->sample_rate,
+            channel_modes[state->ch],
+            state->copyright_flag ? _("copyright") : _("no copyright"),
+            state->original_flag ? _("original") : _("copy") );
+
+  ADDR (format, EXTRACTOR_METATYPE_RESOURCE_TYPE);
+  snprintf (format,
+           sizeof (format), "%dm%02d",
+            length / 60, length % 60);
+  ADDR (format, EXTRACTOR_METATYPE_DURATION);
+  return 0;
+}
+
+int
+EXTRACTOR_mp3_extract_method (struct EXTRACTOR_PluginList *plugin,
+                       EXTRACTOR_MetaDataProcessor proc,
+                      void *proc_cls)
+{
+  int64_t file_position;
+  int64_t file_size;
+  size_t offset = 0;
+  size_t size;
+  unsigned char *data;
+  struct mp3_state *state;
+
+  size_t frames_found_in_this_round = 0;
+  int start_anew = 0;
+
   char mpeg_ver = 0;
   char layer = 0;
   int idx_num = 0;
   int bitrate = 0;              /*used for each frame */
-  int avg_bps = 0;              /*average bitrate */
-  int vbr_flag = 0;
   int copyright_flag = 0;
   int original_flag = 0;
-  int length = 0;
   int sample_rate = 0;
   int ch = 0;
   int frame_size;
-  int frames = 0;
-  size_t pos = 0;
-  char format[512];
 
-  do
+  if (plugin == NULL || plugin->state == NULL)
+    return 1;
+
+  state = plugin->state;
+  file_position = plugin->position;
+  file_size = plugin->fsize;
+  size = plugin->map_size;
+  data = plugin->shm_ptr;
+
+  if (plugin->seek_request < 0)
+    return 1;
+  if (file_position - plugin->seek_request > 0)
+  {
+    plugin->seek_request = -1;
+    return 1;
+  }
+  if (plugin->seek_request - file_position < size)
+    offset = plugin->seek_request - file_position;
+
+  while (1)
+  {
+    switch (state->state)
     {
-      /* seek for frame start */
-      if (pos + sizeof (header) > size)
+    case MP3_LOOKING_FOR_FRAME:
+      /* Look for a frame header */
+      while (offset + sizeof (state->header) < size && (((*((uint32_t *) 
&data[offset])) & MPA_SYNC_MASK_MEM) != MPA_SYNC_MASK_MEM))
+        offset += 1;
+      if (offset + sizeof (state->header) >= size)
+      {
+        /* Alternative: (frames_found_in_this_round < (size / 
LARGEST_FRAME_SIZE / 2)) is to generous */
+        if ((file_position == 0 && ((double) state->number_of_valid_frames / 
(double) state->number_of_frames) < 0.5) ||
+            file_position + offset + sizeof (state->header) >= file_size)
         {
-          return 0;
-        }                       /*unable to find header */
-      header = (data[pos] << 24) | (data[pos+1] << 16) |
-               (data[pos+2] << 8) | data[pos+3];
-      if ((header & MPA_SYNC_MASK) == MPA_SYNC_MASK)
-        break;                  /*found header sync */
-      pos++;
-      counter++;                /*next try */
-    }
-  while (counter < MAX_MP3_SCAN_DEEP);
-  if (counter >= MAX_MP3_SCAN_DEEP)
-    return 0;
-
-  do
-    {                           /*ok, now we found a mp3 frame header */
-      frames++;
-      switch (header & (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK))
-        {
-        case (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK):
-          mpeg_ver = MPEG_V1;
-          break;
-        case (MPA_LAST_SYNC_BIT_MASK):
-          mpeg_ver = MPEG_V2;
-          break;
-        case 0:
-          mpeg_ver = MPEG_V25;
-          break;
-        case (MPA_VERSION_MASK):
-        default:
-          return 0;
+          calculate_frame_statistics_and_maybe_report_it (plugin, state, proc, 
proc_cls);
+          return 1;
         }
-      switch (header & (MPA_LAYER_MASK << MPA_LAYER_SHIFT))
-        {
-        case (0x1 << MPA_LAYER_SHIFT):
-          layer = LAYER_3;
-          break;
-        case (0x2 << MPA_LAYER_SHIFT):
-          layer = LAYER_2;
-          break;
-        case (0x3 << MPA_LAYER_SHIFT):
-          layer = LAYER_1;
-          break;
-        case 0x0:
-        default:
-          return 0;
-        }
+        plugin->seek_request = file_position + offset;
+        return 0;
+      }
+      state->header = (data[offset] << 24) | (data[offset + 1] << 16) |
+               (data[offset + 2] << 8) | data[offset + 3];
+      if ((state->header & MPA_SYNC_MASK) == MPA_SYNC_MASK)
+      {
+        state->state = MP3_READING_FRAME;
+        break;
+      }
+      break;
+    case MP3_READING_FRAME:
+      state->number_of_frames += 1;
+      start_anew = 0;
+      switch (state->header & (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK))
+      {
+      case (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK):
+        mpeg_ver = MPEG_V1;
+        break;
+      case (MPA_LAST_SYNC_BIT_MASK):
+        mpeg_ver = MPEG_V2;
+        break;
+      case 0:
+        mpeg_ver = MPEG_V25;
+        break;
+      case (MPA_VERSION_MASK):
+      default:
+        state->state = MP3_LOOKING_FOR_FRAME;
+        offset += 1;
+        start_anew = 1;
+      }
+      if (start_anew)
+        break;
+      switch (state->header & (MPA_LAYER_MASK << MPA_LAYER_SHIFT))
+      {
+      case (0x1 << MPA_LAYER_SHIFT):
+        layer = LAYER_3;
+        break;
+      case (0x2 << MPA_LAYER_SHIFT):
+        layer = LAYER_2;
+        break;
+      case (0x3 << MPA_LAYER_SHIFT):
+        layer = LAYER_1;
+        break;
+      case 0x0:
+      default:
+        state->state = MP3_LOOKING_FOR_FRAME;
+        offset += 1;
+        start_anew = 1;
+      }
+      if (start_anew)
+        break;
       if (mpeg_ver < MPEG_V25)
         idx_num = (mpeg_ver - 1) * 3 + layer - 1;
       else
         idx_num = 2 + layer;
-      bitrate = 1000 * bitrate_table[(header >> MPA_BITRATE_SHIFT) &
+      bitrate = 1000 * bitrate_table[(state->header >> MPA_BITRATE_SHIFT) &
                                      MPA_BITRATE_MASK][idx_num];
       if (bitrate < 0)
-        {
-          frames--;
-          break;
-        }                       /*error in header */
-      sample_rate = freq_table[(header >> MPA_FREQ_SHIFT) &
+      {
+        /*error in header */
+        state->state = MP3_LOOKING_FOR_FRAME;
+        offset += 1;
+        break;
+      }
+      sample_rate = freq_table[(state->header >> MPA_FREQ_SHIFT) &
                                MPA_FREQ_MASK][mpeg_ver - 1];
-      if (sample_rate < 0)
-        {
-          frames--;
-          break;
-        }                       /*error in header */
-      ch = ((header >> MPA_CHMODE_SHIFT) & MPA_CHMODE_MASK);
-      copyright_flag = (header >> MPA_COPYRIGHT_SHIFT) & 0x1;
-      original_flag = (header >> MPA_ORIGINAL_SHIFT) & 0x1;
-      frame_size =
-        144 * bitrate / (sample_rate ? sample_rate : 1) +
-        ((header >> MPA_PADDING_SHIFT) & 0x1);
+      if (sample_rate <= 0)
+      {
+        /*error in header */
+        state->state = MP3_LOOKING_FOR_FRAME;
+        offset += 1;
+        break;
+      }
+      ch = ((state->header >> MPA_CHMODE_SHIFT) & MPA_CHMODE_MASK);
+      copyright_flag = (state->header >> MPA_COPYRIGHT_SHIFT) & 0x1;
+      original_flag = (state->header >> MPA_ORIGINAL_SHIFT) & 0x1;
+      if (layer == LAYER_1)
+        frame_size = (12 * bitrate / sample_rate + ((state->header >> 
MPA_PADDING_SHIFT) & 0x1)) * 4;
+      else
+        frame_size = 144 * bitrate / sample_rate + ((state->header >> 
MPA_PADDING_SHIFT) & 0x1);
       if (frame_size <= 0)
-       {
-         /* Technically, bitrate can be 0. However, but this particular
-          * extractor is incapable of correctly processing 0-bitrate files
-          * anyway. And bitrate == 0 might also mean that this is just a
-          * random binary sequence, which is far more likely to be true.
-          *
-          * amatus suggests to use a different algorithm and parse significant
-          * part of the file, then count the number of correct mpeg frames.
-          * If the the percentage of correct frames is below a threshold,
-          * then this is not an mpeg file at all.
-          */
-         frames -= 1;
-         break;
-       }
-      avg_bps += bitrate / 1000;
+      {
+        /*error in header */
+        state->state = MP3_LOOKING_FOR_FRAME;
+        offset += 1;
+        break;
+      }
 
-      pos += frame_size - 4;
-      if (frames > max_frames_scan)
-        break;                  /*optimization */
-      if (avg_bps / frames != bitrate / 1000)
-        vbr_flag = 1;
-      if (pos + sizeof (header) > size)
-        break;                  /* EOF */
-      header = (data[pos] << 24) | (data[pos+1] << 16) |
-               (data[pos+2] << 8) | data[pos+3];
-    }
-  while ((header & MPA_SYNC_MASK) == MPA_SYNC_MASK);
+      /* Only save data from valid frames in the state */
+      state->avg_bps += bitrate / 1000;
+      state->sample_rate = sample_rate;
+      state->mpeg_ver = mpeg_ver;
+      state->layer = layer;
+      state->ch = ch;
+      state->copyright_flag = copyright_flag;
+      state->original_flag = original_flag;
+      state->bitrate = bitrate;
 
-  if (frames < 2)
-    return 0;                /*no valid frames */
-  ADDR ("audio/mpeg", EXTRACTOR_METATYPE_MIMETYPE);
-  avg_bps = avg_bps / frames;
-  if (max_frames_scan)
-    {                           /*if not all frames scaned */
-      length =
-        size / (avg_bps ? avg_bps : bitrate ? bitrate : 0xFFFFFFFF) / 125;
+      frames_found_in_this_round += 1;
+      state->number_of_valid_frames += 1;
+      if (state->avg_bps / state->number_of_valid_frames != bitrate / 1000)
+        state->vbr_flag = 1;
+      offset += frame_size;
+      state->state = MP3_LOOKING_FOR_FRAME;
+      break;
     }
-  else
-    {
-      length = 1152 * frames / (sample_rate ? sample_rate : 0xFFFFFFFF);
-    }
-
-  ADDR (mpeg_versions[mpeg_ver-1], EXTRACTOR_METATYPE_FORMAT_VERSION);
-  snprintf (format,
-           sizeof(format),
-           "%s %s audio, %d kbps (%s), %d Hz, %s, %s, %s",
-            mpeg_versions[mpeg_ver-1],
-            layer_names[layer-1],
-            avg_bps,
-            vbr_flag ? _("VBR") : _("CBR"),
-            sample_rate,
-            channel_modes[ch],
-            copyright_flag ? _("copyright") : _("no copyright"),
-            original_flag ? _("original") : _("copy") );
-
-  ADDR (format, EXTRACTOR_METATYPE_RESOURCE_TYPE);
-  snprintf (format,
-           sizeof (format), "%dm%02d",
-            length / 60, length % 60);
-  ADDR (format, EXTRACTOR_METATYPE_DURATION);
-  return 0;
+  }
+  return 1;
 }
 
 /* end of mp3_extractor.c */

Modified: Extractor/src/plugins/template_extractor.c
===================================================================
--- Extractor/src/plugins/template_extractor.c  2012-03-27 12:46:29 UTC (rev 
20782)
+++ Extractor/src/plugins/template_extractor.c  2012-03-27 13:05:17 UTC (rev 
20783)
@@ -21,21 +21,113 @@
 #include "platform.h"
 #include "extractor.h"
 
-int 
-EXTRACTOR_template_extract (const unsigned char *data,
-                           size_t size,
-                           EXTRACTOR_MetaDataProcessor proc,
-                           void *proc_cls,
-                           const char *options)
+#include "extractor_plugins.h"
+
+struct template_state
 {
-  if (0 != proc (proc_cls,
-                "template",
-                EXTRACTOR_METATYPE_RESERVED,
-                EXTRACTOR_METAFORMAT_UTF8,
-                "text/plain",
-                "foo",
-                strlen ("foo")+1))
+  int state;
+
+  /* more state fields here
+   * all variables that should survive more than one atomic read
+   * from the "file" are to be placed here.
+   */
+};
+
+enum TemplateState
+{
+  TEMPLATE_INVALID = -1,
+  TEMPLATE_LOOKING_FOR_FOO = 0,
+  TEMPLATE_READING_FOO,
+  TEMPLATE_READING_BAR,
+  TEMPLATE_SEEKING_TO_ZOOL
+};
+
+void
+EXTRACTOR_template_init_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+  struct template_state *state;
+  state = plugin->state = malloc (sizeof (struct template_state));
+  if (state == NULL)
+    return;
+  state->state = TEMPLATE_LOOKING_FOR_FOO; /* or whatever is the initial one */
+  /* initialize other fields to their "uninitialized" values or defaults */
+}
+
+void
+EXTRACTOR_template_discard_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+  if (plugin->state != NULL)
+  {
+    /* free other state fields that are heap-allocated */
+    free (plugin->state);
+  }
+  plugin->state = NULL;
+}
+
+int
+EXTRACTOR_template_extract_method (struct EXTRACTOR_PluginList *plugin,
+    EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+  int64_t file_position;
+  int64_t file_size;
+  size_t offset = 0;
+  size_t size;
+  unsigned char *data;
+  unsigned char *ff;
+  struct mp3_state *state;
+
+  /* temporary variables are declared here */
+
+  if (plugin == NULL || plugin->state == NULL)
     return 1;
-  /* insert more here */
-  return 0;
+
+  /* for easier access (and conforms better with the old plugins var names) */
+  state = plugin->state;
+  file_position = plugin->position;
+  file_size = plugin->fsize;
+  size = plugin->map_size;
+  data = plugin->shm_ptr;
+
+  /* sanity checks */
+  if (plugin->seek_request < 0)
+    return 1;
+  if (file_position - plugin->seek_request > 0)
+  {
+    plugin->seek_request = -1;
+    return 1;
+  }
+  if (plugin->seek_request - file_position < size)
+    offset = plugin->seek_request - file_position;
+
+  while (1)
+  {
+    switch (state->state)
+    {
+    case TEMPLATE_INVALID:
+      plugin->seek_request = -1;
+      return 1;
+    case TEMPLATE_LOOKING_FOR_FOO:
+      /* Find FOO in data buffer.
+       * If found, set offset to its position and set state to 
TEMPLATE_READING_FOO
+       * If not found, set seek_request to file_position + offset and return 1
+       * (but it's better to give up as early as possible, to avoid reading 
the whole
+       * file byte-by-byte).
+       */ 
+      break;
+    case TEMPLATE_READING_FOO:
+      /* See if offset + sizeof(foo) < size, otherwise set seek_request to 
offset and return 1;
+       * If file_position is 0, and size is still to small, give up.
+       * Read FOO, maybe increase offset to reflect that (depends on the 
parser logic).
+       * Either process FOO right here, or jump to another state (see ebml 
plugin for an example of complex
+       * state-jumps).
+       * If FOO says you need to seek somewhere - set offset to seek_target - 
file_position and set the
+       * next state (next state will check that offset < size; all states that 
do reading should do that,
+       * and also check for EOF).
+       */
+      /* ... */
+      break;
+    }
+  }
+  /* Should not reach this */
+  return 1;
 }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]