[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r23202 - in Extractor: src/plugins src/plugins/testdata tes
From: |
gnunet |
Subject: |
[GNUnet-SVN] r23202 - in Extractor: src/plugins src/plugins/testdata test |
Date: |
Sat, 11 Aug 2012 17:10:15 +0200 |
Author: grothoff
Date: 2012-08-11 17:10:15 +0200 (Sat, 11 Aug 2012)
New Revision: 23202
Added:
Extractor/src/plugins/test_ole2.c
Extractor/src/plugins/testdata/ole2_blair.doc
Extractor/src/plugins/testdata/ole2_excel.xls
Extractor/src/plugins/testdata/ole2_msword.doc
Extractor/src/plugins/testdata/ole2_starwriter40.sdw
Removed:
Extractor/test/Test.doc
Extractor/test/blair.doc
Extractor/test/results.xls
Extractor/test/sw40.sdw
Modified:
Extractor/src/plugins/Makefile.am
Extractor/src/plugins/ole2_extractor.c
Log:
ole2 testcase
Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am 2012-08-11 13:04:52 UTC (rev 23201)
+++ Extractor/src/plugins/Makefile.am 2012-08-11 15:10:15 UTC (rev 23202)
@@ -25,7 +25,11 @@
testdata/wav_noise.wav \
testdata/wav_alert.wav \
testdata/it_dawn.it \
- testdata/s3m_2nd_pm.s3m
+ testdata/s3m_2nd_pm.s3m \
+ testdata/ole2_msword.doc \
+ testdata/ole2_starwriter40.sdw \
+ testdata/ole2_blair.doc \
+ testdata/ole2_excel.xls
if HAVE_VORBISFILE
PLUGIN_OGG=libextractor_ogg.la
Modified: Extractor/src/plugins/ole2_extractor.c
===================================================================
--- Extractor/src/plugins/ole2_extractor.c 2012-08-11 13:04:52 UTC (rev
23201)
+++ Extractor/src/plugins/ole2_extractor.c 2012-08-11 15:10:15 UTC (rev
23202)
@@ -21,8 +21,7 @@
-- the Gnome Structured File Library
Copyright (C) 2002-2004 Jody Goldberg (address@hidden)
- Part of this code was borrowed from wordleaker.cpp. See also
- the README file in this directory.
+ Part of this code was adapted from wordleaker.
*/
/**
* @file plugins/ole2_extractor.c
@@ -67,10 +66,13 @@
*/
static int
add_metadata (EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls,
- const char *phrase,
- enum EXTRACTOR_MetaType type)
+ void *proc_cls,
+ const char *phrase,
+ enum EXTRACTOR_MetaType type)
{
+ char *tmp;
+ int ret;
+
if (0 == strlen (phrase))
return 0;
if (0 == strcmp (phrase, "\"\""))
@@ -79,13 +81,21 @@
return 0;
if (0 == strcmp (phrase, " "))
return 0;
- return proc (proc_cls,
- "ole2",
- type,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- phrase,
- strlen (phrase) +1);
+ if (NULL == (tmp = strdup (phrase)))
+ return 0;
+
+ while ( (strlen (tmp) > 0) &&
+ (isblank ((unsigned char) tmp [strlen (tmp) - 1])) )
+ tmp [strlen (tmp) - 1] = '\0';
+ ret = proc (proc_cls,
+ "ole2",
+ type,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ tmp,
+ strlen (tmp) + 1);
+ free (tmp);
+ return ret;
}
@@ -212,9 +222,6 @@
}
if (NULL == contents)
return;
- if ( (strlen (contents) > 0) &&
- ('\n' == contents[strlen (contents) - 1]) )
- contents [strlen (contents) - 1] = '\0';
if (0 == strcmp (type, "meta:generator"))
{
const char *mimetype = "application/vnd.ms-files";
Added: Extractor/src/plugins/test_ole2.c
===================================================================
--- Extractor/src/plugins/test_ole2.c (rev 0)
+++ Extractor/src/plugins/test_ole2.c 2012-08-11 15:10:15 UTC (rev 23202)
@@ -0,0 +1,490 @@
+/*
+ This file is part of libextractor.
+ (C) 2012 Vidyut Samanta and Christian Grothoff
+
+ libextractor is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ libextractor is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libextractor; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file plugins/test_ole2.c
+ * @brief testcase for ole2 plugin
+ * @author Christian Grothoff
+ */
+#include "platform.h"
+#include "test_lib.h"
+
+
+/**
+ * Main function for the OLE2 testcase.
+ *
+ * @param argc number of arguments (ignored)
+ * @param argv arguments (ignored)
+ * @return 0 on success
+ */
+int
+main (int argc, char *argv[])
+{
+ struct SolutionData ole2_msword_sol[] =
+ {
+ {
+ EXTRACTOR_METATYPE_CREATOR,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Nils Durner",
+ strlen ("Nils Durner") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_UNKNOWN_DATE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "2005-03-21T06:11:12Z",
+ strlen ("2005-03-21T06:11:12Z") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_DESCRIPTION,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "This is a small document to test meta data extraction by GNU
libextractor.",
+ strlen ("This is a small document to test meta data extraction by GNU
libextractor.") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_KEYWORDS,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "ole ole2 eole2extractor",
+ strlen ("ole ole2 eole2extractor") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_SUBJECT,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "GNU libextractor",
+ strlen ("GNU libextractor") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_TITLE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Testcase for the ole2 extractor",
+ strlen ("Testcase for the ole2 extractor") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_LAST_SAVED_BY,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Nils Durner",
+ strlen ("Nils Durner") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_CREATION_DATE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "2005-03-21T06:10:19Z",
+ strlen ("2005-03-21T06:10:19Z") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_EDITING_CYCLES,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "2",
+ strlen ("2") + 1,
+ 0
+ },
+ { 0, 0, NULL, NULL, 0, -1 }
+ };
+
+ struct SolutionData ole2_starwriter_sol[] =
+ {
+ {
+ EXTRACTOR_METATYPE_CREATOR,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Christian Grothoff",
+ strlen ("Christian Grothoff") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_UNKNOWN_DATE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "2004-09-24T02:54:31Z",
+ strlen ("2004-09-24T02:54:31Z") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_DESCRIPTION,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "The comments",
+ strlen ("The comments") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_KEYWORDS,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "The Keywords",
+ strlen ("The Keywords") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_SUBJECT,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "The Subject",
+ strlen ("The Subject") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_TITLE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "The Title",
+ strlen ("The Title") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_LAST_SAVED_BY,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Christian Grothoff",
+ strlen ("Christian Grothoff") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_CREATION_DATE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "2004-09-24T02:53:15Z",
+ strlen ("2004-09-24T02:53:15Z") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_EDITING_CYCLES,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "4",
+ strlen ("4") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_TITLE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "The Title",
+ strlen ("The Title") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_SUBJECT,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "The Subject",
+ strlen ("The Subject") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_COMMENT,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "The comments",
+ strlen ("The comments") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_KEYWORDS,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "The Keywords",
+ strlen ("The Keywords") + 1,
+ 0
+ },
+ { 0, 0, NULL, NULL, 0, -1 }
+ };
+
+ struct SolutionData ole2_blair_sol[] =
+ {
+ {
+ EXTRACTOR_METATYPE_LANGUAGE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "U.S. English",
+ strlen ("U.S. English") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_CREATOR,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "default",
+ strlen ("default") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_UNKNOWN_DATE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "2003-02-03T11:18:00Z",
+ strlen ("2003-02-03T11:18:00Z") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_TITLE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Iraq- ITS INFRASTRUCTURE OF CONCEALMENT, DECEPTION AND INTIMIDATION",
+ strlen ("Iraq- ITS INFRASTRUCTURE OF CONCEALMENT, DECEPTION AND
INTIMIDATION") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_CHARACTER_COUNT,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "22090",
+ strlen ("22090") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_LAST_SAVED_BY,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "MKhan",
+ strlen ("MKhan") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_PAGE_COUNT,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "1",
+ strlen ("1") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_WORD_COUNT,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "3875",
+ strlen ("3875") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_CREATION_DATE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "2003-02-03T09:31:00Z",
+ strlen ("2003-02-03T09:31:00Z") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_EDITING_CYCLES,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "4",
+ strlen ("4") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_MIMETYPE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "application/vnd.ms-files",
+ strlen ("application/vnd.ms-files") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Microsoft Word 8.0",
+ strlen ("Microsoft Word 8.0") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_TEMPLATE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Normal.dot",
+ strlen ("Normal.dot") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_LINE_COUNT,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "184",
+ strlen ("184") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_PARAGRAPH_COUNT,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "44",
+ strlen ("44") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_REVISION_HISTORY,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Revision #0: Author `cic22' worked on
`C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq -
security.asd'",
+ strlen ("Revision #0: Author `cic22' worked on
`C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq -
security.asd'") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_REVISION_HISTORY,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Revision #1: Author `cic22' worked on
`C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq -
security.asd'",
+ strlen ("Revision #1: Author `cic22' worked on
`C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq -
security.asd'") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_REVISION_HISTORY,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Revision #2: Author `cic22' worked on
`C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq -
security.asd'",
+ strlen ("Revision #2: Author `cic22' worked on
`C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq -
security.asd'") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_REVISION_HISTORY,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Revision #3: Author `JPratt' worked on `C:\\TEMP\\Iraq -
security.doc'",
+ strlen ("Revision #3: Author `JPratt' worked on `C:\\TEMP\\Iraq -
security.doc'") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_REVISION_HISTORY,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Revision #4: Author `JPratt' worked on `A:\\Iraq - security.doc'",
+ strlen ("Revision #4: Author `JPratt' worked on `A:\\Iraq -
security.doc'") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_REVISION_HISTORY,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Revision #5: Author `ablackshaw' worked on `C:\\ABlackshaw\\Iraq -
security.doc'",
+ strlen ("Revision #5: Author `ablackshaw' worked on
`C:\\ABlackshaw\\Iraq - security.doc'") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_REVISION_HISTORY,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Revision #6: Author `ablackshaw' worked on `C:\\ABlackshaw\\A;Iraq -
security.doc'",
+ strlen ("Revision #6: Author `ablackshaw' worked on
`C:\\ABlackshaw\\A;Iraq - security.doc'") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_REVISION_HISTORY,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Revision #7: Author `ablackshaw' worked on `A:\\Iraq - security.doc'",
+ strlen ("Revision #7: Author `ablackshaw' worked on `A:\\Iraq -
security.doc'") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_REVISION_HISTORY,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Revision #8: Author `MKhan' worked on `C:\\TEMP\\Iraq - security.doc'",
+ strlen ("Revision #8: Author `MKhan' worked on `C:\\TEMP\\Iraq -
security.doc'") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_REVISION_HISTORY,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Revision #9: Author `MKhan' worked on
`C:\\WINNT\\Profiles\\mkhan\\Desktop\\Iraq.doc'",
+ strlen ("Revision #9: Author `MKhan' worked on
`C:\\WINNT\\Profiles\\mkhan\\Desktop\\Iraq.doc'") + 1,
+ 0
+ },
+ { 0, 0, NULL, NULL, 0, -1 }
+ };
+
+ struct SolutionData ole2_excel_sol[] =
+ {
+ {
+ EXTRACTOR_METATYPE_CREATOR,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "JV",
+ strlen ("JV") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_LAST_SAVED_BY,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "JV",
+ strlen ("JV") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_CREATION_DATE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "2002-03-20T21:26:28Z",
+ strlen ("2002-03-20T21:26:28Z") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_MIMETYPE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "application/vnd.ms-files",
+ strlen ("application/vnd.ms-files") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "Microsoft Excel",
+ strlen ("Microsoft Excel") + 1,
+ 0
+ },
+ { 0, 0, NULL, NULL, 0, -1 }
+ };
+
+ struct ProblemSet ps[] =
+ {
+ { "testdata/ole2_msword.doc",
+ ole2_msword_sol },
+ { "testdata/ole2_starwriter40.sdw",
+ ole2_starwriter_sol },
+ { "testdata/ole2_blair.doc",
+ ole2_blair_sol },
+ { "testdata/ole2_excel.xls",
+ ole2_excel_sol },
+ { NULL, NULL }
+ };
+ return ET_main ("ole2", ps);
+}
+
+/* end of test_ole2.c */
Copied: Extractor/src/plugins/testdata/ole2_blair.doc (from rev 23197,
Extractor/test/blair.doc)
===================================================================
(Binary files differ)
Copied: Extractor/src/plugins/testdata/ole2_excel.xls (from rev 23197,
Extractor/test/results.xls)
===================================================================
(Binary files differ)
Copied: Extractor/src/plugins/testdata/ole2_msword.doc (from rev 23197,
Extractor/test/Test.doc)
===================================================================
(Binary files differ)
Copied: Extractor/src/plugins/testdata/ole2_starwriter40.sdw (from rev 23197,
Extractor/test/sw40.sdw)
===================================================================
(Binary files differ)
Deleted: Extractor/test/Test.doc
===================================================================
(Binary files differ)
Deleted: Extractor/test/blair.doc
===================================================================
(Binary files differ)
Deleted: Extractor/test/results.xls
===================================================================
(Binary files differ)
Deleted: Extractor/test/sw40.sdw
===================================================================
(Binary files differ)
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r23202 - in Extractor: src/plugins src/plugins/testdata test,
gnunet <=