gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r23197 - in Extractor: . src/plugins


From: gnunet
Subject: [GNUnet-SVN] r23197 - in Extractor: . src/plugins
Date: Fri, 10 Aug 2012 21:22:14 +0200

Author: grothoff
Date: 2012-08-10 21:22:14 +0200 (Fri, 10 Aug 2012)
New Revision: 23197

Modified:
   Extractor/configure.ac
   Extractor/src/plugins/Makefile.am
   Extractor/src/plugins/ole2_extractor.c
Log:
implementing OLE2 support

Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac      2012-08-10 16:37:53 UTC (rev 23196)
+++ Extractor/configure.ac      2012-08-10 19:22:14 UTC (rev 23197)
@@ -575,7 +575,7 @@
 
 ABI_GSF
 
-AM_CONDITIONAL(WITH_GSF, test "x$have_gsf" = "xtrue")
+AM_CONDITIONAL(HAVE_GSF, test "x$have_gsf" = "xtrue")
 
 # produce new line
 echo ""

Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am   2012-08-10 16:37:53 UTC (rev 23196)
+++ Extractor/src/plugins/Makefile.am   2012-08-10 19:22:14 UTC (rev 23197)
@@ -67,7 +67,12 @@
 TEST_EXIV2=test_exiv2
 endif
 
+if HAVE_GSF
+PLUGIN_GSF=libextractor_ole2.la
+TEST_GSF=test_ole2
+endif
 
+
 plugin_LTLIBRARIES = \
   libextractor_it.la \
   libextractor_xm.la \
@@ -80,7 +85,8 @@
   $(PLUGIN_FLAC) \
   $(PLUGIN_MPEG) \
   $(PLUGIN_JPEG) \
-  $(PLUGIN_EXIV2)
+  $(PLUGIN_EXIV2) \
+  $(PLUGIN_GSF)
 
 if HAVE_ZZUF
   fuzz_tests=fuzz_default.sh 
@@ -97,7 +103,8 @@
   $(TEST_FLAC) \
   $(TEST_MPEG) \
   $(TEST_JPEG) \
-  $(TEST_EXIV2)
+  $(TEST_EXIV2) \
+  $(TEST_GSF)
 
 TESTS = \
   $(fuzz_tests) \
@@ -256,3 +263,21 @@
   $(top_builddir)/src/plugins/libtest.la
 
 
+libextractor_ole2_la_SOURCES = \
+  ole2_extractor.c
+libextractor_ole2_la_CFLAGS = \
+  $(GSF_CFLAGS)
+libextractor_ole2_la_CPPFLAGS = \
+  $(GSF_CFLAGS)
+libextractor_ole2_la_LDFLAGS = \
+  $(PLUGINFLAGS) 
+libextractor_ole2_la_LIBADD = \
+  $(top_builddir)/src/common/libextractor_common.la \
+  $(GSF_LIBS)
+
+test_ole2_SOURCES = \
+  test_ole2.c
+test_ole2_LDADD = \
+  $(top_builddir)/src/plugins/libtest.la
+
+

Modified: Extractor/src/plugins/ole2_extractor.c
===================================================================
--- Extractor/src/plugins/ole2_extractor.c      2012-08-10 16:37:53 UTC (rev 
23196)
+++ Extractor/src/plugins/ole2_extractor.c      2012-08-10 19:22:14 UTC (rev 
23197)
@@ -37,14 +37,23 @@
 #include <stdio.h>
 #include <ctype.h>
 #include <gsf/gsf-utils.h>
+#include <gsf/gsf-input-impl.h>
 #include <gsf/gsf-input-memory.h>
+#include <gsf/gsf-impl-utils.h>
 #include <gsf/gsf-infile.h>
 #include <gsf/gsf-infile-msole.h>
 #include <gsf/gsf-msole-utils.h>
 
-#define DEBUG_OLE2 0
 
+/**
+ * Set to 1 to use our own GsfInput subclass which supports seeking
+ * and thus can handle very large files.  Set to 0 to use the simple
+ * gsf in-memory buffer (which can only access the first ~16k) for
+ * debugging.
+ */
+#define USE_LE_INPUT 1
 
+
 /**
  * Give the given UTF8 string to LE by calling 'proc'.
  *
@@ -325,7 +334,7 @@
     if ( (buf[0xd5] + buf[0xd4] > 0) &&
         (0 != add_metadata (proc, proc_cls,
                             &buf[0xd6],
-                            EXTRACTOR_METATYPE_SUBJECT)) _)
+                            EXTRACTOR_METATYPE_SUBJECT)) )
       return 1;
     buf[0x215] = '\0';
     if ( (buf[0x115] + buf[0x116] > 0) &&
@@ -450,7 +459,7 @@
     case 0x041b:
       return __("Slovak");
     case 0x041c:
-    return __("Albanian");
+      return __("Albanian");
     case 0x041d:
       return __("Swedish");
     case 0x041e:
@@ -583,15 +592,26 @@
 
 /* *************************** custom GSF input method ***************** */
 
-G_BEGIN_DECLS
 #define LE_TYPE_INPUT                  (le_input_get_type ())
-#define LE_INPUT(obj)                  (G_TYPE_CHECK_INSTANCE_CAST ((obj), 
TYPE_LE_INPUT, LeInput))
-#define LE_INPUT_CLASS(klass)          (G_TYPE_CHECK_CLASS_CAST ((klass), 
TYPE_LE_INPUT, LeInputClass))
-#define IS_LE_INPUT(obj)               (G_TYPE_CHECK_INSTANCE_TYPE ((obj), 
TYPE_LE_INPUT))
-#define IS_LE_INPUT_CLASS(klass)       (G_TYPE_CHECK_CLASS_TYPE ((klass), 
TYPE_LE_INPUT))
-#define LE_INPUT_GET_CLASS(obj)        (G_TYPE_INSTANCE_GET_CLASS ((obj), 
TYPE_LE_INPUT, LeInputClass))
+#define LE_INPUT(obj)                  (G_TYPE_CHECK_INSTANCE_CAST ((obj), 
LE_TYPE_INPUT, LeInput))
+#define LE_INPUT_CLASS(klass)          (G_TYPE_CHECK_CLASS_CAST ((klass), 
LE_TYPE_INPUT, LeInputClass))
+#define IS_LE_INPUT(obj)               (G_TYPE_CHECK_INSTANCE_TYPE ((obj), 
LE_TYPE_INPUT))
+#define IS_LE_INPUT_CLASS(klass)       (G_TYPE_CHECK_CLASS_TYPE ((klass), 
LE_TYPE_INPUT))
+#define LE_INPUT_GET_CLASS(obj)        (G_TYPE_INSTANCE_GET_CLASS ((obj), 
LE_TYPE_INPUT, LeInputClass))
 
 /**
+ * Internal state of an "LeInput" object.
+ */
+typedef struct _LeInputPrivate 
+{
+  /**
+   * Our extraction context.
+   */
+  struct EXTRACTOR_ExtractContext *ec;
+} LeInputPrivate;
+
+
+/**
  * Overall state of an "LeInput" object.
  */
 typedef struct _LeInput 
@@ -610,18 +630,6 @@
 
 
 /**
- * Internal state of an "LeInput" object.
- */
-typedef struct _LeInputPrivate 
-{
-  /**
-   * Our extraction context.
-   */
-  struct EXTRACTOR_ExtractContext *ec;
-} LeInputPrivate;
-
-
-/**
  * LeInput's class state.
  */
 typedef struct _LeInputClass
@@ -640,13 +648,6 @@
 
 
 /**
- * Required method to obtain the LeInput "type".
- */
-GType
-le_input_get_type (void) G_GNUC_CONST;
-
-
-/**
  * Constructor for LeInput objects. 
  *
  * @param ec extraction context to use
@@ -654,32 +655,199 @@
  */
 GsfInput *
 le_input_new (struct EXTRACTOR_ExtractContext *ec);
-G_END_DECLS
 
 
 /**
- * Macro to create LeInput type definition.
+ * Class initializer for the "LeInput" class.
+ *
+ * @param class class object to initialize
  */
-G_DEFINE_TYPE (LeInput, le_input, GSF_TYPE_INPUT)
+static void
+le_input_class_init (LeInputClass *class);
 
 
 /**
+ * Initialize internal state of fresh input object.
  *
+ * @param input object to initialize
  */
 static void
+le_input_init (LeInput *input);
+
+
+/**
+ * Macro to create LeInput type definition and register the class.
+ */
+GSF_CLASS (LeInput, le_input, le_input_class_init, le_input_init, 
GSF_INPUT_TYPE)
+
+
+/**
+ * Duplicate input, leaving the new one at the same offset.
+ *
+ * @param input the input to duplicate
+ * @param err location for error reporting, can be NULL
+ * @return NULL on error (always)
+ */
+static GsfInput *
+le_input_dup (GsfInput *input,
+             GError **err)
+{
+  if (NULL != err)
+    *err = g_error_new (gsf_input_error_id (), 0,
+                       "dup not supported on LeInput");
+  return NULL;
+}
+
+
+/**
+ * Read at least num_bytes. Does not change the current position if
+ * there is an error. Will only read if the entire amount can be
+ * read. Invalidates the buffer associated with previous calls to
+ * gsf_input_read.
+ *
+ * @param input
+ * @param num_bytes
+ * @param optional_buffer
+ * @return buffer where num_bytes data are available, or NULL on error
+ */
+static const guint8 *
+le_input_read (GsfInput *input,
+              size_t num_bytes,
+              guint8 *optional_buffer)
+{
+  LeInput *li = LE_INPUT (input);
+  struct EXTRACTOR_ExtractContext *ec;
+  void *buf;
+  uint64_t old_off;
+  ssize_t ret;
+  
+  ec = li->priv->ec;
+  old_off = ec->seek (ec->cls, 0, SEEK_CUR);
+  if (num_bytes 
+      != (ret = ec->read (ec->cls,
+                         &buf,
+                         num_bytes)))
+    {
+      /* we don't support partial reads; 
+        most other GsfInput implementations in this case
+        allocate some huge temporary buffer just to avoid
+        the partial read; we might need to do that as well!? */
+      ec->seek (ec->cls, SEEK_SET, old_off);
+      return NULL;
+    }
+  if (NULL != optional_buffer)
+    {
+      memcpy (optional_buffer, buf, num_bytes);
+      return optional_buffer;
+    }
+  return buf;
+}
+
+
+/**
+ * Move the current location in an input stream
+ *
+ * @param input stream to seek
+ * @param offset target offset
+ * @param whence determines to what the offset is relative to
+ * @return TRUE on error
+ */
+static gboolean
+le_input_seek (GsfInput *input,
+              gsf_off_t offset,
+              GSeekType whence)
+{
+  LeInput *li = LE_INPUT (input);
+  struct EXTRACTOR_ExtractContext *ec;
+  int w;
+  int64_t ret;
+
+  ec = li->priv->ec;
+  switch (whence)
+    {
+    case G_SEEK_SET:
+      w = SEEK_SET;
+      break;
+    case G_SEEK_CUR:
+      w = SEEK_CUR;
+      break;
+    case G_SEEK_END:
+      w = SEEK_END;
+      break;
+    default:
+      return TRUE;
+    }
+  if (-1 == 
+      (ret = ec->seek (ec->cls,
+                      offset,
+                      w)))
+    return TRUE;
+  return FALSE;
+}
+
+
+/**
+ * Class initializer for the "LeInput" class.
+ *
+ * @param class class object to initialize
+ */
+static void
 le_input_class_init (LeInputClass *class)
 {
-  // GObjectClass *gobject_class;
   GsfInputClass *input_class;
 
-  // gobject_class = (GObjectClass *) class;
   input_class = (GsfInputClass *) class;
-  input_class->read = le_input_read;
+  input_class->Dup = le_input_dup;
+  input_class->Read = le_input_read;
+  input_class->Seek = le_input_seek;
   g_type_class_add_private (class, sizeof (LeInputPrivate));
 }
 
 
+/**
+ * Initialize internal state of fresh input object.
+ *
+ * @param input object to initialize
+ */
+static void
+le_input_init (LeInput *input)
+{
+  LeInputPrivate *priv;
 
+  input->priv =
+    G_TYPE_INSTANCE_GET_PRIVATE (input, LE_TYPE_INPUT,
+                                LeInputPrivate);
+  priv = input->priv;
+  priv->ec = NULL;
+}
+
+
+/**
+ * Creates a new LeInput object.
+ *
+ * @param ec extractor context to wrap
+ * @return NULL on error
+ */
+GsfInput *
+le_input_new (struct EXTRACTOR_ExtractContext *ec)
+{
+  LeInput *input;
+
+  input = g_object_new (LE_TYPE_INPUT, NULL);
+  gsf_input_set_size (GSF_INPUT (input),
+                     ec->get_size (ec->cls));
+  gsf_input_seek_emulate (GSF_INPUT (input),
+                         0);
+  input->input.name = NULL;
+  input->input.container = NULL;
+  input->priv->ec = ec;
+
+  return GSF_INPUT (input);
+}
+
+
+
+
 /* *********************** end of custom GSF input method ************* */
 
 
@@ -702,17 +870,50 @@
   unsigned int lid;
   const char *lang;
   int ret;
+  void *data;
+  uint64_t fsize;
+  ssize_t data_size;
 
-  if (size < 512 + 898)
-    return; /* can hardly be OLE2 */
-  if (NULL == (input = gsf_input_memory_new ((const guint8 *) data,
-                                            (gsf_off_t) size,
-                                            FALSE)))
+  fsize = ec->get_size (ec->cls);
+  if (fsize < 512 + 898)
+    {
+      /* File too small for OLE2 */
+      return; /* can hardly be OLE2 */
+    }
+  if (512 + 898 > (data_size = ec->read (ec->cls, &data, fsize)))
+    {
+      /* Failed to read minimum file size to buffer */
+      return;
+    }
+  data512 = (const unsigned char*) data + 512;
+  lid = data512[6] + (data512[7] << 8);
+  if ( (NULL != (lang = lid_to_language (lid))) &&
+       (0 != (ret = add_metadata (ec->proc, ec->cls,
+                                 lang,
+                                 EXTRACTOR_METATYPE_LANGUAGE))) )
     return;
+  lcb = data512[726] + (data512[727] << 8) + (data512[728] << 16) + 
(data512[729] << 24);
+  fcb = data512[722] + (data512[723] << 8) + (data512[724] << 16) + 
(data512[725] << 24);
+  if (0 != ec->seek (ec->cls, 0, SEEK_SET))
+    {
+      /* seek failed!? */
+      return;
+    }
+#if USE_LE_INPUT
+  if (NULL == (input = le_input_new (ec)))
+    {
+      fprintf (stderr, "le_input_new failed\n");
+      return;
+    }
+#else
+  input = gsf_input_memory_new ((const guint8 *) data,
+                               data_size,
+                               FALSE);
+#endif
   if (NULL == (infile = gsf_infile_msole_new (input, NULL)))
     {
       g_object_unref (G_OBJECT (input));
-      return 0;
+      return;
     }
   ret = 0;
   for (i=0;i<gsf_infile_num_children (infile);i++) 
@@ -722,32 +923,23 @@
       if (NULL == (name = gsf_infile_name_by_index (infile, i)))
        continue;
       src = NULL;
-      if ( ( (0 == strcmp(name, "\005SummaryInformation")) ||
-            (0 == strcmp(name, "\005DocumentSummaryInformation")) ) &&
+      if ( ( (0 == strcmp (name, "\005SummaryInformation")) ||
+            (0 == strcmp (name, "\005DocumentSummaryInformation")) ) &&
           (NULL != (src = gsf_infile_child_by_index (infile, i))) )
        ret = process (src,
-                      proc, 
-                      proc_cls);
+                      ec->proc, 
+                      ec->cls);
       if ( (0 == strcmp (name, "SfxDocumentInfo")) &&
           (NULL != (src = gsf_infile_child_by_index (infile, i))) )
        ret = process_star_office (src,
-                                  proc,
-                                  proc_cls);
+                                  ec->proc,
+                                  ec->cls);
       if (NULL != src)
        g_object_unref (G_OBJECT (src));
     }
   if (0 != ret)
     goto CLEANUP;
 
-  data512 = (const unsigned char*) &data[512];
-  lid = data512[6] + (data512[7] << 8);
-  if ( (NULL != (lang = lid_to_language (lid))) &&
-       (0 != (ret = add_metadata (proc, proc_cls,
-                                 lang,
-                                 EXTRACTOR_METATYPE_LANGUAGE))) )
-    goto CLEANUP;
-  lcb = data512[726] + (data512[727] << 8) + (data512[728] << 16) + 
(data512[729] << 24);
-  fcb = data512[722] + (data512[723] << 8) + (data512[724] << 16) + 
(data512[725] << 24);
   if (lcb < 6)
     goto CLEANUP;
   for (i=0;i<gsf_infile_num_children (infile);i++) 
@@ -763,14 +955,13 @@
          ret = history_extract (src,
                                 lcb,
                                 fcb,
-                                proc, proc_cls);
+                                ec->proc, ec->cls);
          g_object_unref (G_OBJECT (src));
        }    
     }
  CLEANUP:
   g_object_unref (G_OBJECT (infile));
   g_object_unref (G_OBJECT (input));
-  return ret;
 }
 
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]