gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r23202 - in Extractor: src/plugins src/plugins/testdata tes


From: gnunet
Subject: [GNUnet-SVN] r23202 - in Extractor: src/plugins src/plugins/testdata test
Date: Sat, 11 Aug 2012 17:10:15 +0200

Author: grothoff
Date: 2012-08-11 17:10:15 +0200 (Sat, 11 Aug 2012)
New Revision: 23202

Added:
   Extractor/src/plugins/test_ole2.c
   Extractor/src/plugins/testdata/ole2_blair.doc
   Extractor/src/plugins/testdata/ole2_excel.xls
   Extractor/src/plugins/testdata/ole2_msword.doc
   Extractor/src/plugins/testdata/ole2_starwriter40.sdw
Removed:
   Extractor/test/Test.doc
   Extractor/test/blair.doc
   Extractor/test/results.xls
   Extractor/test/sw40.sdw
Modified:
   Extractor/src/plugins/Makefile.am
   Extractor/src/plugins/ole2_extractor.c
Log:
ole2 testcase

Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am   2012-08-11 13:04:52 UTC (rev 23201)
+++ Extractor/src/plugins/Makefile.am   2012-08-11 15:10:15 UTC (rev 23202)
@@ -25,7 +25,11 @@
   testdata/wav_noise.wav \
   testdata/wav_alert.wav \
   testdata/it_dawn.it \
-  testdata/s3m_2nd_pm.s3m
+  testdata/s3m_2nd_pm.s3m \
+  testdata/ole2_msword.doc \
+  testdata/ole2_starwriter40.sdw \
+  testdata/ole2_blair.doc \
+  testdata/ole2_excel.xls
 
 if HAVE_VORBISFILE
 PLUGIN_OGG=libextractor_ogg.la

Modified: Extractor/src/plugins/ole2_extractor.c
===================================================================
--- Extractor/src/plugins/ole2_extractor.c      2012-08-11 13:04:52 UTC (rev 
23201)
+++ Extractor/src/plugins/ole2_extractor.c      2012-08-11 15:10:15 UTC (rev 
23202)
@@ -21,8 +21,7 @@
      -- the Gnome Structured File Library
      Copyright (C) 2002-2004 Jody Goldberg (address@hidden)
 
-     Part of this code was borrowed from wordleaker.cpp. See also
-     the README file in this directory.
+     Part of this code was adapted from wordleaker.
 */
 /**
  * @file plugins/ole2_extractor.c
@@ -67,10 +66,13 @@
  */
 static int
 add_metadata (EXTRACTOR_MetaDataProcessor proc,
-           void *proc_cls,
-           const char *phrase,
-           enum EXTRACTOR_MetaType type) 
+             void *proc_cls,
+             const char *phrase,
+             enum EXTRACTOR_MetaType type) 
 {
+  char *tmp;
+  int ret;
+
   if (0 == strlen (phrase))
     return 0;
   if (0 == strcmp (phrase, "\"\""))
@@ -79,13 +81,21 @@
     return 0;
   if (0 == strcmp (phrase, " "))
     return 0;
-  return proc (proc_cls, 
-              "ole2",
-              type,
-              EXTRACTOR_METAFORMAT_UTF8,
-              "text/plain",
-              phrase,
-              strlen (phrase) +1);
+  if (NULL == (tmp = strdup (phrase)))
+    return 0;
+  
+  while ( (strlen (tmp) > 0) &&
+         (isblank ((unsigned char) tmp [strlen (tmp) - 1])) )
+    tmp [strlen (tmp) - 1] = '\0';
+  ret = proc (proc_cls, 
+             "ole2",
+             type,
+             EXTRACTOR_METAFORMAT_UTF8,
+             "text/plain",
+             tmp,
+             strlen (tmp) + 1);
+  free (tmp);
+  return ret;
 }
 
 
@@ -212,9 +222,6 @@
     }
   if (NULL == contents)
     return;
-  if ( (strlen (contents) > 0) &&
-       ('\n' == contents[strlen (contents) - 1]) )
-    contents [strlen (contents) - 1] = '\0';
   if (0 == strcmp (type, "meta:generator"))
     {
       const char *mimetype = "application/vnd.ms-files";

Added: Extractor/src/plugins/test_ole2.c
===================================================================
--- Extractor/src/plugins/test_ole2.c                           (rev 0)
+++ Extractor/src/plugins/test_ole2.c   2012-08-11 15:10:15 UTC (rev 23202)
@@ -0,0 +1,490 @@
+/*
+     This file is part of libextractor.
+     (C) 2012 Vidyut Samanta and Christian Grothoff
+
+     libextractor is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published
+     by the Free Software Foundation; either version 3, or (at your
+     option) any later version.
+
+     libextractor is distributed in the hope that it will be useful, but
+     WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+     General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with libextractor; see the file COPYING.  If not, write to the
+     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+     Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file plugins/test_ole2.c
+ * @brief testcase for ole2 plugin
+ * @author Christian Grothoff
+ */
+#include "platform.h"
+#include "test_lib.h"
+
+
+/**
+ * Main function for the OLE2 testcase.
+ *
+ * @param argc number of arguments (ignored)
+ * @param argv arguments (ignored)
+ * @return 0 on success
+ */
+int
+main (int argc, char *argv[])
+{
+  struct SolutionData ole2_msword_sol[] =
+    {
+      { 
+       EXTRACTOR_METATYPE_CREATOR,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Nils Durner",
+       strlen ("Nils Durner") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_UNKNOWN_DATE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "2005-03-21T06:11:12Z",
+       strlen ("2005-03-21T06:11:12Z") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_DESCRIPTION,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "This is a small document to test meta data extraction by GNU 
libextractor.",
+       strlen ("This is a small document to test meta data extraction by GNU 
libextractor.") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_KEYWORDS,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "ole ole2 eole2extractor",
+       strlen ("ole ole2 eole2extractor") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_SUBJECT,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "GNU libextractor",
+       strlen ("GNU libextractor") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_TITLE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Testcase for the ole2 extractor",
+       strlen ("Testcase for the ole2 extractor") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_LAST_SAVED_BY,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Nils Durner",
+       strlen ("Nils Durner") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_CREATION_DATE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "2005-03-21T06:10:19Z",
+       strlen ("2005-03-21T06:10:19Z") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_EDITING_CYCLES,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "2",
+       strlen ("2") + 1,
+       0 
+      },
+      { 0, 0, NULL, NULL, 0, -1 }
+    };
+
+  struct SolutionData ole2_starwriter_sol[] =
+    {
+      { 
+       EXTRACTOR_METATYPE_CREATOR,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Christian Grothoff",
+       strlen ("Christian Grothoff") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_UNKNOWN_DATE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "2004-09-24T02:54:31Z",
+       strlen ("2004-09-24T02:54:31Z") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_DESCRIPTION,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "The comments",
+       strlen ("The comments") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_KEYWORDS,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "The Keywords",
+       strlen ("The Keywords") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_SUBJECT,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "The Subject",
+       strlen ("The Subject") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_TITLE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "The Title",
+       strlen ("The Title") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_LAST_SAVED_BY,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Christian Grothoff",
+       strlen ("Christian Grothoff") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_CREATION_DATE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "2004-09-24T02:53:15Z",
+       strlen ("2004-09-24T02:53:15Z") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_EDITING_CYCLES,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "4",
+       strlen ("4") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_TITLE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "The Title",
+       strlen ("The Title") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_SUBJECT,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "The Subject",
+       strlen ("The Subject") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_COMMENT,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "The comments",
+       strlen ("The comments") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_KEYWORDS,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "The Keywords",
+       strlen ("The Keywords") + 1,
+       0 
+      },
+      { 0, 0, NULL, NULL, 0, -1 }
+    };
+
+  struct SolutionData ole2_blair_sol[] =
+    {
+      { 
+       EXTRACTOR_METATYPE_LANGUAGE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "U.S. English",
+       strlen ("U.S. English") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_CREATOR,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "default",
+       strlen ("default") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_UNKNOWN_DATE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "2003-02-03T11:18:00Z",
+       strlen ("2003-02-03T11:18:00Z") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_TITLE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Iraq- ITS INFRASTRUCTURE OF CONCEALMENT, DECEPTION AND INTIMIDATION",
+       strlen ("Iraq- ITS INFRASTRUCTURE OF CONCEALMENT, DECEPTION AND 
INTIMIDATION") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_CHARACTER_COUNT,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "22090",
+       strlen ("22090") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_LAST_SAVED_BY,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "MKhan",
+       strlen ("MKhan") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_PAGE_COUNT,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "1",
+       strlen ("1") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_WORD_COUNT,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "3875",
+       strlen ("3875") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_CREATION_DATE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "2003-02-03T09:31:00Z",
+       strlen ("2003-02-03T09:31:00Z") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_EDITING_CYCLES,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "4",
+       strlen ("4") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_MIMETYPE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "application/vnd.ms-files",
+       strlen ("application/vnd.ms-files") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Microsoft Word 8.0",
+       strlen ("Microsoft Word 8.0") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_TEMPLATE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Normal.dot",
+       strlen ("Normal.dot") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_LINE_COUNT,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "184",
+       strlen ("184") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_PARAGRAPH_COUNT,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "44",
+       strlen ("44") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_REVISION_HISTORY,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Revision #0: Author `cic22' worked on 
`C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - 
security.asd'",
+       strlen ("Revision #0: Author `cic22' worked on 
`C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - 
security.asd'") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_REVISION_HISTORY,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Revision #1: Author `cic22' worked on 
`C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - 
security.asd'",
+       strlen ("Revision #1: Author `cic22' worked on 
`C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - 
security.asd'") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_REVISION_HISTORY,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Revision #2: Author `cic22' worked on 
`C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - 
security.asd'",
+       strlen ("Revision #2: Author `cic22' worked on 
`C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - 
security.asd'") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_REVISION_HISTORY,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Revision #3: Author `JPratt' worked on `C:\\TEMP\\Iraq - 
security.doc'",
+       strlen ("Revision #3: Author `JPratt' worked on `C:\\TEMP\\Iraq - 
security.doc'") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_REVISION_HISTORY,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Revision #4: Author `JPratt' worked on `A:\\Iraq - security.doc'",
+       strlen ("Revision #4: Author `JPratt' worked on `A:\\Iraq - 
security.doc'") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_REVISION_HISTORY,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Revision #5: Author `ablackshaw' worked on `C:\\ABlackshaw\\Iraq - 
security.doc'",
+       strlen ("Revision #5: Author `ablackshaw' worked on 
`C:\\ABlackshaw\\Iraq - security.doc'") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_REVISION_HISTORY,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Revision #6: Author `ablackshaw' worked on `C:\\ABlackshaw\\A;Iraq - 
security.doc'",
+       strlen ("Revision #6: Author `ablackshaw' worked on 
`C:\\ABlackshaw\\A;Iraq - security.doc'") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_REVISION_HISTORY,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Revision #7: Author `ablackshaw' worked on `A:\\Iraq - security.doc'",
+       strlen ("Revision #7: Author `ablackshaw' worked on `A:\\Iraq - 
security.doc'") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_REVISION_HISTORY,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Revision #8: Author `MKhan' worked on `C:\\TEMP\\Iraq - security.doc'",
+       strlen ("Revision #8: Author `MKhan' worked on `C:\\TEMP\\Iraq - 
security.doc'") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_REVISION_HISTORY,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Revision #9: Author `MKhan' worked on 
`C:\\WINNT\\Profiles\\mkhan\\Desktop\\Iraq.doc'",
+       strlen ("Revision #9: Author `MKhan' worked on 
`C:\\WINNT\\Profiles\\mkhan\\Desktop\\Iraq.doc'") + 1,
+       0 
+      },
+      { 0, 0, NULL, NULL, 0, -1 }
+    };
+
+  struct SolutionData ole2_excel_sol[] =
+    {
+      { 
+       EXTRACTOR_METATYPE_CREATOR,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "JV",
+       strlen ("JV") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_LAST_SAVED_BY,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "JV",
+       strlen ("JV") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_CREATION_DATE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "2002-03-20T21:26:28Z",
+       strlen ("2002-03-20T21:26:28Z") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_MIMETYPE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "application/vnd.ms-files",
+       strlen ("application/vnd.ms-files") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "Microsoft Excel",
+       strlen ("Microsoft Excel") + 1,
+       0 
+      },
+      { 0, 0, NULL, NULL, 0, -1 }
+    };
+
+  struct ProblemSet ps[] =
+    {
+      { "testdata/ole2_msword.doc",
+       ole2_msword_sol },
+      { "testdata/ole2_starwriter40.sdw",
+       ole2_starwriter_sol },
+      { "testdata/ole2_blair.doc",
+       ole2_blair_sol },
+      { "testdata/ole2_excel.xls",
+       ole2_excel_sol },
+      { NULL, NULL }
+    };
+  return ET_main ("ole2", ps);
+}
+
+/* end of test_ole2.c */

Copied: Extractor/src/plugins/testdata/ole2_blair.doc (from rev 23197, 
Extractor/test/blair.doc)
===================================================================
(Binary files differ)

Copied: Extractor/src/plugins/testdata/ole2_excel.xls (from rev 23197, 
Extractor/test/results.xls)
===================================================================
(Binary files differ)

Copied: Extractor/src/plugins/testdata/ole2_msword.doc (from rev 23197, 
Extractor/test/Test.doc)
===================================================================
(Binary files differ)

Copied: Extractor/src/plugins/testdata/ole2_starwriter40.sdw (from rev 23197, 
Extractor/test/sw40.sdw)
===================================================================
(Binary files differ)

Deleted: Extractor/test/Test.doc
===================================================================
(Binary files differ)

Deleted: Extractor/test/blair.doc
===================================================================
(Binary files differ)

Deleted: Extractor/test/results.xls
===================================================================
(Binary files differ)

Deleted: Extractor/test/sw40.sdw
===================================================================
(Binary files differ)




reply via email to

[Prev in Thread] Current Thread [Next in Thread]