gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r23305 - in Extractor: . src/plugins src/plugins/old src/pl


From: gnunet
Subject: [GNUnet-SVN] r23305 - in Extractor: . src/plugins src/plugins/old src/plugins/testdata test
Date: Sun, 19 Aug 2012 00:25:21 +0200

Author: grothoff
Date: 2012-08-19 00:25:21 +0200 (Sun, 19 Aug 2012)
New Revision: 23305

Added:
   Extractor/src/plugins/archive_extractor.c
   Extractor/src/plugins/test_archive.c
   Extractor/src/plugins/testdata/archive_test.tar
Removed:
   Extractor/src/plugins/old/tar_extractor.c
   Extractor/test/test.tar
Modified:
   Extractor/README
   Extractor/TODO
   Extractor/configure.ac
   Extractor/src/plugins/Makefile.am
Log:
reincarnating tar plugin as archive plugin using libarchive

Modified: Extractor/README
===================================================================
--- Extractor/README    2012-08-18 14:14:56 UTC (rev 23304)
+++ Extractor/README    2012-08-18 22:25:21 UTC (rev 23305)
@@ -40,6 +40,7 @@
 The following dependencies are all optional, but should be
 available in order for maximum coverage:
 
+* libarchive
 * libavutil / libavformat / libavcodec / libswscale (ffmpeg)
 * libbz2 (bzip2)
 * libexiv2

Modified: Extractor/TODO
===================================================================
--- Extractor/TODO      2012-08-18 14:14:56 UTC (rev 23304)
+++ Extractor/TODO      2012-08-18 22:25:21 UTC (rev 23305)
@@ -1,5 +1,4 @@
 * Update plugins to new API (and cleanup code):
-  - tar
   - elf
   - applefile
   - pdf

Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac      2012-08-18 14:14:56 UTC (rev 23304)
+++ Extractor/configure.ac      2012-08-18 22:25:21 UTC (rev 23305)
@@ -307,6 +307,13 @@
          AM_CONDITIONAL(HAVE_TIFF, false))],
          AM_CONDITIONAL(HAVE_TIFF, false))
 
+AC_CHECK_LIB(archive, archive_read_open,
+        [AC_CHECK_HEADERS([archive.h],
+           AM_CONDITIONAL(HAVE_ARCHIVE, true)
+           AC_DEFINE(HAVE_ARCHIVE,1,[Have libarchive]),
+         AM_CONDITIONAL(HAVE_ARCHIVE, false))],
+         AM_CONDITIONAL(HAVE_ARCHIVE, false))
+
 AC_MSG_CHECKING(for ImageFactory::iptcData in -lexiv2)
 AC_LANG_PUSH(C++)
 SAVED_LDFLAGS=$LDFLAGS

Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am   2012-08-18 14:14:56 UTC (rev 23304)
+++ Extractor/src/plugins/Makefile.am   2012-08-18 22:25:21 UTC (rev 23305)
@@ -16,6 +16,7 @@
 
 EXTRA_DIST = \
   template_extractor.c \
+  testdata/archive_test.tar \
   testdata/deb_bzip2.deb \
   testdata/dvi_ora.dvi \
   testdata/flac_kraftwerk.flac \
@@ -71,6 +72,11 @@
 endif
 
 
+if HAVE_ARCHIVE
+PLUGIN_ARCHIVE=libextractor_archive.la
+TEST_ARCHIVE=test_archive
+endif
+
 if HAVE_EXIV2
 PLUGIN_EXIV2=libextractor_exiv2.la
 TEST_EXIV2=test_exiv2
@@ -142,6 +148,7 @@
   libextractor_wav.la \
   libextractor_xm.la \
   libextractor_zip.la \
+  $(PLUGIN_ARCHIVE) \
   $(PLUGIN_EXIV2) \
   $(PLUGIN_FFMPEG) \
   $(PLUGIN_FLAC) \
@@ -175,6 +182,7 @@
   test_wav \
   test_xm \
   test_zip \
+  $(TEST_ARCHIVE) \
   $(TEST_EXIV2) \
   $(TEST_FFMPEG) \
   $(TEST_FLAC) \
@@ -481,6 +489,19 @@
   $(PLUGINFLAGS)
 
 
+libextractor_archive_la_SOURCES = \
+  archive_extractor.c
+libextractor_archive_la_LDFLAGS = \
+  $(PLUGINFLAGS)
+libextractor_archive_la_LIBADD = \
+  -larchive
+
+test_archive_SOURCES = \
+  test_archive.c
+test_archive_LDADD = \
+  $(top_builddir)/src/plugins/libtest.la
+
+
 libextractor_thumbnailffmpeg_la_SOURCES = \
   thumbnailffmpeg_extractor.c
 libextractor_thumbnailffmpeg_la_LDFLAGS = \

Copied: Extractor/src/plugins/archive_extractor.c (from rev 23304, 
Extractor/src/plugins/old/tar_extractor.c)
===================================================================
--- Extractor/src/plugins/archive_extractor.c                           (rev 0)
+++ Extractor/src/plugins/archive_extractor.c   2012-08-18 22:25:21 UTC (rev 
23305)
@@ -0,0 +1,124 @@
+/*
+     This file is part of libextractor.
+     (C) 2012 Christian Grothoff
+
+     libextractor is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published
+     by the Free Software Foundation; either version 3, or (at your
+     option) any later version.
+
+     libextractor is distributed in the hope that it will be useful, but
+     WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+     General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with libextractor; see the file COPYING.  If not, write to the
+     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+     Boston, MA 02111-1307, USA.
+ */
+/**
+ * @file plugins/archive_extractor.c
+ * @brief plugin to support archives (such as TAR)
+ * @author Christian Grothoff
+ */
+#include "platform.h"
+#include "extractor.h"
+#include <archive.h>
+#include <archive_entry.h>
+
+/**
+ * Callback for libarchive for 'reading'.
+ *
+ * @param a archive handle
+ * @param client_data our 'struct EXTRACTOR_ExtractContext'
+ * @param buff where to store data with pointer to data
+ * @return number of bytes read
+ */
+static ssize_t
+read_cb (struct archive *a, 
+        void *client_data, 
+        const void **buff)
+{
+  struct EXTRACTOR_ExtractContext *ec = client_data;
+  ssize_t ret;
+
+  *buff = NULL;
+  if (-1 == (ret = ec->read (ec->cls, (void **) buff, 16 * 1024)))
+    return ARCHIVE_FATAL;
+  return ret;
+}
+
+
+/**
+ * Callback for libarchive for 'skipping'.
+ *
+ * @param a archive handle
+ * @param client_data our 'struct EXTRACTOR_ExtractContext'
+ * @param request number of bytes to skip
+ * @return number of bytes skipped
+ */
+static __LA_INT64_T
+skip_cb (struct archive *a, 
+        void *client_data,
+        __LA_INT64_T request)
+{
+  struct EXTRACTOR_ExtractContext *ec = client_data;
+
+  if (-1 == ec->seek (ec->cls, request, SEEK_CUR))
+    return 0;
+  return request;
+}
+
+
+/**
+ * Main entry method for the ARCHIVE extraction plugin.  
+ *
+ * @param ec extraction context provided to the plugin
+ */
+void 
+EXTRACTOR_archive_extract_method (struct EXTRACTOR_ExtractContext *ec)
+{
+  struct archive *a;
+  struct archive_entry *entry;
+  const char *fname;
+  const char *s;
+  char *format;
+
+  format = NULL;
+  a = archive_read_new ();
+  archive_read_support_compression_all (a);
+  archive_read_support_format_all (a);
+  archive_read_open2 (a, ec, NULL, &read_cb, &skip_cb, NULL);
+  while (ARCHIVE_OK == archive_read_next_header(a, &entry))
+    {
+      if ( (NULL == format) &&
+          (NULL != (fname = archive_format_name (a))) )
+       format = strdup (fname);
+      s = archive_entry_pathname (entry);
+      if (0 != ec->proc (ec->cls, 
+                        "tar", 
+                        EXTRACTOR_METATYPE_FILENAME, 
+                        EXTRACTOR_METAFORMAT_UTF8, 
+                        "text/plain", 
+                        s, strlen (s) + 1))
+       break;
+    }
+  archive_read_finish (a);
+  if (NULL != format)
+    {
+      if (0 != ec->proc (ec->cls, 
+                        "tar",
+                        EXTRACTOR_METATYPE_FORMAT,
+                        EXTRACTOR_METAFORMAT_UTF8,
+                        "text/plain", format, strlen (format) + 1))
+       {
+         free (format);
+         return;
+       } 
+      free (format);
+    }
+}
+
+
+/* end of tar_extractor.c */

Deleted: Extractor/src/plugins/old/tar_extractor.c
===================================================================
--- Extractor/src/plugins/old/tar_extractor.c   2012-08-18 14:14:56 UTC (rev 
23304)
+++ Extractor/src/plugins/old/tar_extractor.c   2012-08-18 22:25:21 UTC (rev 
23305)
@@ -1,855 +0,0 @@
-/*
-     This file is part of libextractor.
-     (C) 2002, 2003, 2004, 2005, 2009 Vidyut Samanta and Christian Grothoff
-
-     libextractor is free software; you can redistribute it and/or modify
-     it under the terms of the GNU General Public License as published
-     by the Free Software Foundation; either version 2, or (at your
-     option) any later version.
-
-     libextractor is distributed in the hope that it will be useful, but
-     WITHOUT ANY WARRANTY; without even the implied warranty of
-     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-     General Public License for more details.
-
-     You should have received a copy of the GNU General Public License
-     along with libextractor; see the file COPYING.  If not, write to the
-     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-     Boston, MA 02111-1307, USA.
- */
-
-#include "platform.h"
-#include "extractor.h"
-
-/*
- * Note that this code is not complete!
- *
- * References:
- *
- * http://www.mkssoftware.com/docs/man4/tar.4.asp
- * (does document USTAR format common nowadays,
- *  but not other extended formats such as the one produced
- *  by GNU tar 1.13 when very long filenames are met.)
- *
- * http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs
- * (J. Schilling's remarks on TAR formats compatibility issues.)
- */
-
-/*
- * Define known TAR archive member variants.
- * In theory different variants
- * can coexist within a single TAR archive file
- * although this will be uncommon.
- */
-#define TAR_V7ORIGINAL_FORMAT    (1)
-#define TAR_V7EXTENDED_FORMAT    (1 << 1)
-#define TAR_SCHILLING1985_FORMAT (1 << 2)
-#define TAR_POSIX1988_FORMAT     (1 << 3)
-#define TAR_GNU1991_FORMAT       (1 << 4)
-#define TAR_SCHILLING1994_FORMAT (1 << 5)
-#define TAR_GNU1997_FORMAT       (1 << 6)
-#define TAR_POSIX2001_FORMAT     (1 << 7)
-#define TAR_SCHILLING2001_FORMAT (1 << 8)
-#define TAR_SOLARIS2001_FORMAT   (1 << 9)
-#define TAR_GNU2004_FORMAT       (1 << 10)
-
-/*
- * TAR header structure, modelled after POSIX.1-1988
- */
-typedef struct
-{
-  char fileName[100];
-  char mode[8];
-  char userId[8];
-  char groupId[8];
-  char fileSize[12];
-  char lastModTime[12];
-  char chksum[8];
-  char link;
-  char linkName[100];
-  /*
-   * All fields below are a
-   * either zero-filled or undefined
-   * for UNIX V7 TAR archive members ;
-   * their header is always 512 octets long nevertheless.
-   */
-  char ustarMagic[6];
-  char version[2];
-  char userName[32];
-  char groupName[32];
-  char devMajor[8];
-  char devMinor[8];
-  char prefix[155];
-  char filler[12];
-} TarHeader;
-
-#define TAR_HEADER_SIZE (sizeof(TarHeader))
-#define TAR_TIME_FENCE  ((long long) (-(1LL << 62)))
-
-static size_t
-tar_roundup (size_t size)
-{
-  size_t diff = (size % TAR_HEADER_SIZE);
-
-  return (0 == diff) ? size : (size + (TAR_HEADER_SIZE - diff));
-}
-
-static int
-tar_isnonzero (const char *data, unsigned int length)
-{
-  unsigned int total = 0;
-
-  while (total < length)
-    {
-      if (0 != data[total])
-        return 1;
-      total++;
-    }
-
-  return 0;
-}
-
-static unsigned int
-tar_octalvalue (const char *data, size_t size, unsigned long long *valueptr)
-{
-  unsigned int result = 0;
-
-  if (NULL != data && 0 < size)
-    {
-      const char *p = data;
-      int found = 0;
-      unsigned long long value = 0;
-
-      while ((p < data + size) && (' ' == *p))
-        p += 1;
-
-      while ((p < data + size) && ('0' <= *p) && (*p < '8'))
-        {
-          found = 1;
-          value *= 8;
-          value += (*p - '0');
-          p += 1;
-        }
-
-      if (0 != found)
-        {
-          while ((p < data + size) && ((0 == *p) || (' ' == *p)))
-            p += 1;
-
-          result = (p - data);
-        }
-
-      if ((0 < result) && (NULL != valueptr))
-        *valueptr = value;
-    }
-
-  return result;
-}
-
-#ifndef EOVERFLOW
-#define EOVERFLOW -1
-#endif
-
-static int
-tar_time (long long timeval, char *rtime, unsigned int rsize)
-{
-  int retval = 0;
-
-  /*
-   * shift epoch to proleptic times
-   * to make subsequent modulo operations safer.
-   */
-  long long my_timeval = timeval
-    + ((long long) ((1970 * 365) + 478) * (long long) 86400);
-
-  unsigned int seconds = (unsigned int) (my_timeval % 60);
-  unsigned int minutes = (unsigned int) ((my_timeval / 60) % 60);
-  unsigned int hours = (unsigned int) ((my_timeval / 3600) % 24);
-
-  unsigned int year = 0;
-  unsigned int month = 1;
-
-  unsigned int days = (unsigned int) (my_timeval / (24 * 3600));
-
-  unsigned int days_in_month[] =
-    { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
-  unsigned int diff = 0;
-
-  if ((long long) 0 > my_timeval)
-    return EDOM;
-
-  /*
-   * 400-year periods
-   */
-  year += (400 * (days / ((365 * 400) + 97)));
-  days %= ((365 * 400) + 97);
-
-  /*
-   * 100-year periods
-   */
-  diff = (days / ((365 * 100) + 24));
-  if (4 <= diff)
-    {
-      year += 399;
-      days = 364;
-    }
-  else
-    {
-      year += (100 * diff);
-      days %= ((365 * 100) + 24);
-    }
-
-  /*
-   * remaining leap years
-   */
-  year += (4 * (days / ((365 * 4) + 1)));
-  days %= ((365 * 4) + 1);
-
-  while (1)
-    {
-      if ((0 == (year % 400)) || ((0 == (year % 4)) && (0 != (year % 100))))
-        {
-          if (366 > days)
-            {
-              break;
-            }
-          else
-            {
-              days -= 366;
-              year++;
-            }
-        }
-      else
-        {
-          if (365 > days)
-            {
-              break;
-            }
-          else
-            {
-              days -= 365;
-              year++;
-            }
-        }
-    }
-
-  if ((0 == (year % 400)) || ((0 == (year % 4)) && (0 != (year % 100))))
-    days_in_month[1] = 29;
-
-  for (month = 0; (month < 12) && (days >= days_in_month[month]); month += 1)
-    days -= days_in_month[month];
-
-  retval = snprintf (rtime, rsize, "%04u-%02u-%02uT%02u:%02u:%02uZ",
-                     year, month + 1, days + 1, hours, minutes, seconds);
-
-  return (retval < rsize) ? 0 : EOVERFLOW;
-}
-
-#define ADD(t,s) do { if (0 != (ret = proc (proc_cls, "tar", t, 
EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) goto FINISH; } while 
(0)
-#define ADDF(t,s) do { if (0 != (ret = proc (proc_cls, "tar", t, 
EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) { free(s); goto 
FINISH; } free (s); } while (0)
-
-int
-EXTRACTOR_tar_extract (const char *data,
-                       size_t size,
-                       EXTRACTOR_MetaDataProcessor proc,
-                       void *proc_cls, const char *options)
-{
-  char *fname = NULL;
-  size_t pos;
-  int contents_are_empty = 1;
-  long long maxftime = TAR_TIME_FENCE;
-  unsigned int format_archive = 0;
-  int ret;
-
-  if (512 != TAR_HEADER_SIZE)
-    return 0;                   /* compiler should remove this when optimising 
*/
-  if (0 != (size % TAR_HEADER_SIZE))
-    return 0;                   /* cannot be tar! */
-  if (size < TAR_HEADER_SIZE)
-    return 0;                   /* too short, or somehow truncated */
-
-  ret = 0;
-  pos = 0;
-  while ((pos + TAR_HEADER_SIZE) <= size)
-    {
-      const TarHeader *tar = NULL;
-      unsigned format_member = 0;
-      unsigned long long fmode;
-      unsigned long long fsize;
-      long long ftime = TAR_TIME_FENCE;
-      char typeFlag = -1;
-      const char *nul_pos;
-      unsigned int tar_prefix_length = 0;
-      unsigned int tar_name_length = 0;
-      unsigned int checksum_offset;
-      int checksum_computed_500s = 0;
-      int checksum_computed_512s = 0;
-      unsigned int checksum_computed_500u = 0;
-      unsigned int checksum_computed_512u = 0;
-      unsigned long long checksum_stored = 0;
-
-      /*
-       * Compute TAR header checksum and compare with stored value.
-       * Allow for non-conformant checksums computed with signed values,
-       * such as those produced by early Solaris tar.
-       * Allow for non-conformant checksums computed on first 500 octets,
-       * such as those produced by SunOS 4.x tar according to J. Schilling.
-       * This will also detect EOF marks, since a zero-filled block
-       * cannot possibly hold octal values.
-       */
-      for (checksum_offset = 0; checksum_offset < 148; checksum_offset += 1)
-        {
-          checksum_computed_500u +=
-            (unsigned char) data[pos + checksum_offset];
-          checksum_computed_500s += (signed char) data[pos + checksum_offset];
-        }
-      if (8 >
-          tar_octalvalue (data + pos + checksum_offset, 8, &checksum_stored))
-        break;
-      for (; checksum_offset < 156; checksum_offset += 1)
-        {
-          checksum_computed_500u += (unsigned char) ' ';
-          checksum_computed_500s += (signed char) ' ';
-        }
-      for (; checksum_offset < 500; checksum_offset += 1)
-        {
-          checksum_computed_500u +=
-            (unsigned char) data[pos + checksum_offset];
-          checksum_computed_500s += (signed char) data[pos + checksum_offset];
-        }
-
-      checksum_computed_512u = checksum_computed_500u;
-      checksum_computed_512s = checksum_computed_500s;
-      for (; checksum_offset < TAR_HEADER_SIZE; checksum_offset += 1)
-        {
-          checksum_computed_512u +=
-            (unsigned char) data[pos + checksum_offset];
-          checksum_computed_512s += (signed char) data[pos + checksum_offset];
-        }
-
-      /*
-       * Suggestion: use signed checksum matches to refine
-       * TAR format detection.
-       */
-      if ((checksum_stored != (unsigned long long) checksum_computed_512u)
-          && (checksum_stored != (unsigned long long) checksum_computed_512s)
-          && (checksum_stored != (unsigned long long) checksum_computed_500s)
-          && (checksum_stored != (unsigned long long) checksum_computed_500u))
-        break;
-
-      tar = (const TarHeader *) &data[pos];
-      typeFlag = tar->link;
-      pos += TAR_HEADER_SIZE;
-
-      /*
-       * Checking all octal fields helps reduce
-       * the possibility of false positives ;
-       * only the file size, time and mode are used for now.
-       *
-       * This will fail over GNU and Schilling TAR huge size fields
-       * using non-octal encodings used for very large file lengths (> 8 GB).
-       */
-      if ((12 > tar_octalvalue (tar->fileSize, 12,
-                                &fsize))
-          || (12 > tar_octalvalue (tar->lastModTime, 12,
-                                   (unsigned long long *) &ftime))
-          || (8 > tar_octalvalue (tar->mode, 8,
-                                  (unsigned long long *) &fmode))
-          || (8 > tar_octalvalue (tar->userId, 8, NULL))
-          || (8 > tar_octalvalue (tar->groupId, 8, NULL)))
-        break;
-
-      /*
-       * Find out which TAR variant is here.
-       */
-      if (0 == memcmp (tar->ustarMagic, "ustar  ", 7))
-        {
-
-          if (' ' == tar->mode[6])
-            format_member = TAR_GNU1991_FORMAT;
-          else if (('K' == typeFlag) || ('L' == typeFlag))
-            {
-              format_member = TAR_GNU1997_FORMAT;
-              ftime = TAR_TIME_FENCE;
-            }
-          else
-            format_member =
-              (((unsigned) fmode) !=
-               (((unsigned) fmode) & 03777)) ? TAR_GNU1997_FORMAT :
-              TAR_GNU2004_FORMAT;
-
-        }
-      else if (0 == memcmp (tar->ustarMagic, "ustar", 6))
-        {
-
-          /*
-           * It is important to perform test for SCHILLING1994 before GNU1997
-           * because certain extension type flags ('L' and 'S' for instance)
-           * are used by both.
-           */
-          if ((0 == tar->prefix[130])
-              && (12 <= tar_octalvalue (tar->prefix + 131, 12, NULL))
-              && (12 <= tar_octalvalue (tar->prefix + 143, 12, NULL))
-              && (0 == tar_isnonzero (tar->filler, 8))
-              && (0 == memcmp (tar->filler + 8, "tar", 4)))
-            {
-
-              format_member = TAR_SCHILLING1994_FORMAT;
-
-            }
-          else if (('D' == typeFlag) || ('K' == typeFlag)
-                   || ('L' == typeFlag) || ('M' == typeFlag)
-                   || ('N' == typeFlag) || ('S' == typeFlag)
-                   || ('V' == typeFlag))
-            {
-
-              format_member = TAR_GNU1997_FORMAT;
-
-            }
-          else if (('g' == typeFlag)
-                   || ('x' == typeFlag) || ('X' == typeFlag))
-            {
-
-              format_member = TAR_POSIX2001_FORMAT;
-              ftime = TAR_TIME_FENCE;
-
-            }
-          else
-            {
-
-              format_member = TAR_POSIX1988_FORMAT;
-
-            }
-        }
-      else if ((0 == memcmp (tar->filler + 8, "tar", 4))
-               && (0 == tar_isnonzero (tar->filler, 8)))
-        {
-
-          format_member = TAR_SCHILLING1985_FORMAT;
-
-        }
-      else if (('0' <= typeFlag) && (typeFlag <= '2'))
-        {
-
-          format_member = TAR_V7ORIGINAL_FORMAT;
-
-        }
-      else
-        {
-
-          format_member = TAR_V7EXTENDED_FORMAT;
-
-        }
-
-      /*
-       * Locate the file names.
-       */
-      if ((0 != (format_member & TAR_POSIX2001_FORMAT))
-         && (('x' == typeFlag) || ('X' == typeFlag)))
-       {
-         
-         if (size <= pos)
-           break;
-         
-         else if ((8 <= fsize) && fsize <= (unsigned long long) (size - pos))
-           {
-             const char *keyptr = data + pos;
-             const char *valptr = NULL;
-             const char *nameptr = NULL;
-             unsigned int keylength = 0;
-             unsigned int namelength = 0;
-             
-             while (keyptr < data + pos + (size_t) fsize)
-               {
-                 if (('0' > *keyptr) || ('9' < *keyptr))
-                   {
-                     keyptr += 1;
-                     continue;
-                   }
-                 
-                 keylength =
-                   (unsigned int) strtoul (keyptr, (char **) &valptr, 10);
-                 if ((0 < keylength) && (NULL != valptr)
-                     && (keyptr != valptr))
-                   {
-                     while ((valptr < data + pos + (size_t) fsize)
-                            && (' ' == *valptr))
-                       valptr += 1;
-                     if (0 == memcmp (valptr, "path=", 5))
-                       {
-                         nameptr = valptr + 5;
-                         namelength = keylength - (nameptr - keyptr);
-                       }
-                     else
-                       {
-                         
-                         if ((keylength > (valptr - keyptr) + 4 + 2)
-                             && (0 == memcmp (valptr, "GNU.", 4)))
-                           format_archive |= TAR_GNU2004_FORMAT;
-                         
-                         else if ((keylength > (valptr - keyptr) + 7 + 2)
-                                  && (0 == memcmp (valptr, "SCHILY.", 7)))
-                           format_archive |= TAR_SCHILLING2001_FORMAT;
-                         
-                         else if ((keylength > (valptr - keyptr) + 4 + 2)
-                                  && (0 == memcmp (valptr, "SUN.", 4)))
-                           format_archive |= TAR_SOLARIS2001_FORMAT;
-                       }
-                     
-                     keyptr += keylength;
-                   }
-                 else
-                   {
-                     nameptr = NULL;
-                     break;
-                   }
-                }
-             
-              if ((NULL != nameptr) && (0 != *nameptr)
-                  && ((size - (nameptr - data)) >= namelength)
-                  && (1 < namelength) )
-                {
-                  /*
-                   * There is an 1-offset because POSIX.1-2001
-                   * field separator is counted in field length.
-                   */
-                 if (fname != NULL)
-                   free (fname);
-                  fname = malloc (namelength);
-                  if (NULL != fname)
-                    {
-                      memcpy (fname, nameptr, namelength - 1);
-                      fname[namelength - 1] = '\0';
-
-                      pos += tar_roundup ((size_t) fsize);
-                      format_archive |= format_member;
-                      continue;
-                    }
-                }
-            }
-        }
-
-      else if ((0 != (format_member
-                      & (TAR_SCHILLING1994_FORMAT
-                         | TAR_GNU1997_FORMAT | TAR_GNU2004_FORMAT)))
-               && ('L' == typeFlag))
-        {
-
-          if (size <= pos)
-            break;
-
-          else if ((0 < fsize) && fsize <= (unsigned long long) (size - pos))
-            {
-
-              size_t length = (size_t) fsize;
-
-              nul_pos = memchr (data + pos, 0, length);
-              if (NULL != nul_pos)
-                length = (nul_pos - (data + pos));
-
-              if (0 < length)
-                {
-                 if (fname != NULL)
-                   free (fname);
-                  fname = malloc (1 + length);
-                  if (NULL != fname)
-                    {
-                      memcpy (fname, data + pos, length);
-                      fname[length] = '\0';
-                    }
-
-                  pos += tar_roundup ((size_t) fsize);
-                  format_archive |= format_member;
-                  continue;
-                }
-            }
-        }
-      else
-        {
-
-          nul_pos = memchr (tar->fileName, 0, sizeof tar->fileName);
-          tar_name_length = (0 == nul_pos)
-            ? sizeof (tar->fileName) : (nul_pos - tar->fileName);
-
-          if ((0 !=
-               (format_member & (TAR_GNU1997_FORMAT | TAR_GNU2004_FORMAT)))
-              && ('S' == typeFlag))
-            {
-
-              if ((0 == tar->prefix[40])
-                  && (0 != tar->prefix[137])
-                  && (12 <= tar_octalvalue (tar->prefix + 41, 12, NULL))
-                  && (12 <= tar_octalvalue (tar->prefix + 53, 12, NULL)))
-                {
-                  /*
-                   * fsize needs adjustment when there are more than 4 sparse 
blocks
-                   */
-                  size_t diffpos = 0;
-                  fsize += TAR_HEADER_SIZE;
-
-                  while ((pos + diffpos + TAR_HEADER_SIZE < size)
-                         && (0 != *(data + pos + diffpos + 504)))
-                    {
-                      diffpos += TAR_HEADER_SIZE;
-                      fsize += TAR_HEADER_SIZE;
-                    }
-                }
-
-              typeFlag = '0';
-
-            }
-          else if (0 != (format_member & TAR_SCHILLING1994_FORMAT))
-            {
-
-              nul_pos = memchr (tar->prefix, 0, 130);
-              tar_prefix_length = (0 == nul_pos)
-                ? 130 : (nul_pos - tar->prefix);
-
-              if ('S' == typeFlag)
-                typeFlag = '0';
-
-            }
-          else if (0 != (format_member & TAR_SCHILLING1985_FORMAT))
-            {
-
-              nul_pos = memchr (tar->prefix, 0, 155);
-              tar_prefix_length = (0 == nul_pos)
-                ? 155 : (nul_pos - tar->prefix);
-
-
-              if ('S' == typeFlag)
-                typeFlag = '0';
-
-            }
-          else if (0 != (format_member & TAR_POSIX1988_FORMAT))
-            {
-
-              nul_pos = memchr (tar->prefix, 0, sizeof tar->prefix);
-              tar_prefix_length = (0 == nul_pos)
-                ? sizeof tar->prefix : nul_pos - tar->prefix;
-
-            }
-        }
-
-      /*
-       * Update position so that next loop iteration will find
-       * either a TAR header or TAR EOF mark or just EOF.
-       *
-       * Consider archive member size to be zero
-       * with no data following the header in the following cases :
-       * '1' : hard link, '2' : soft link,
-       * '3' : character device, '4' : block device,
-       * '5' : directory, '6' : named pipe.
-       */
-      if ('1' != typeFlag && '2' != typeFlag
-          && '3' != typeFlag && '4' != typeFlag
-          && '5' != typeFlag && '6' != typeFlag)
-        {
-          if ((fsize > (unsigned long long) size)
-              || (fsize + (unsigned long long) pos >
-                  (unsigned long long) size))
-            break;
-
-          pos += tar_roundup ((size_t) fsize);
-        }
-      if (pos - 1 > size)
-        break;
-
-      format_archive |= format_member;
-
-      /*
-       * Store the file name in libextractor list.
-       *
-       * For the time being, only file types listed in POSIX.1-1988 ('0'..'7')
-       * are retained, leaving out labels, access control lists, etc.
-       */
-      if ((0 == typeFlag) || (('0' <= typeFlag) && (typeFlag <= '7')))
-        {
-          if (NULL == fname)
-            {
-              if (0 < tar_prefix_length + tar_name_length)
-                {
-                  fname = malloc (2 + tar_prefix_length + tar_name_length);
-
-                  if (NULL != fname)
-                    {
-                      if (0 < tar_prefix_length)
-                        {
-                          memcpy (fname, tar->prefix, tar_prefix_length);
-
-                          if (('/' != tar->prefix[tar_prefix_length - 1])
-                              && (0 < tar_name_length)
-                              && ('/' != tar->fileName[0]))
-                            {
-                              fname[tar_prefix_length] = '/';
-                              tar_prefix_length += 1;
-                            }
-                        }
-
-                      if (0 < tar_name_length)
-                        memcpy (fname + tar_prefix_length, tar->fileName,
-                                tar_name_length);
-
-                      fname[tar_prefix_length + tar_name_length] = '\0';
-                    }
-                }
-            }
-
-          if ((NULL != fname) && (0 != *fname))
-            {
-#if 0
-              fprintf (stdout,
-                       "(%u) flag = %c, size = %u, tname = (%s), fname = 
(%s)\n",
-                       __LINE__, typeFlag, (unsigned int) fsize,
-                       (NULL == tar->fileName) ? "" : tar->fileName,
-                       (NULL == fname) ? "" : fname);
-#endif
-
-              ADDF (EXTRACTOR_METATYPE_FILENAME, fname);
-              fname = NULL;
-              if (ftime > maxftime)
-                maxftime = ftime;
-              contents_are_empty = 0;
-            }
-        }
-
-      if (NULL != fname)
-        {
-          free (fname);
-          fname = NULL;
-        }
-    }
-
-  if (NULL != fname)
-    {
-      free (fname);
-      fname = NULL;
-    }
-
-  /*
-   * Report mimetype; report also format(s) and most recent date
-   * when at least one archive member was found.
-   */
-  if (0 == format_archive)
-    return ret;
-  if (0 == contents_are_empty)
-    {
-
-      const char *formats[5] = { NULL, NULL, NULL, NULL, NULL };
-      unsigned int formats_count = 0;
-      unsigned int formats_u = 0;
-      unsigned int format_length = 0;
-      char *format = NULL;
-
-      if (TAR_TIME_FENCE < maxftime)
-        {
-          char iso8601_time[24];
-
-          if (0 == tar_time (maxftime, iso8601_time, sizeof (iso8601_time)))
-            ADD (EXTRACTOR_METATYPE_CREATION_DATE, iso8601_time);
-        }
-
-      /*
-       * We only keep the most recent POSIX format.
-       */
-      if (0 != (format_archive & TAR_POSIX2001_FORMAT))
-        formats[formats_count++] = "POSIX 2001";
-
-      else if (0 != (format_archive & TAR_POSIX1988_FORMAT))
-        formats[formats_count++] = "POSIX 1988";
-
-      /*
-       * We only keep the most recent GNU format.
-       */
-      if (0 != (format_archive & TAR_GNU2004_FORMAT))
-        formats[formats_count++] = "GNU 2004";
-
-      else if (0 != (format_archive & TAR_GNU1997_FORMAT))
-        formats[formats_count++] = "GNU 1997";
-
-      else if (0 != (format_archive & TAR_GNU1991_FORMAT))
-        formats[formats_count++] = "GNU 1991";
-
-      /*
-       * We only keep the most recent Schilling format.
-       */
-      if (0 != (format_archive & TAR_SCHILLING2001_FORMAT))
-        formats[formats_count++] = "Schilling 2001";
-
-      else if (0 != (format_archive & TAR_SCHILLING1994_FORMAT))
-        formats[formats_count++] = "Schilling 1994";
-
-      else if (0 != (format_archive & TAR_SCHILLING1985_FORMAT))
-        formats[formats_count++] = "Schilling 1985";
-
-      /*
-       * We only keep the most recent Solaris format.
-       */
-      if (0 != (format_archive & TAR_SOLARIS2001_FORMAT))
-        formats[formats_count++] = "Solaris 2001";
-
-      /*
-       * We only keep the (supposedly) most recent UNIX V7 format.
-       */
-      if (0 != (format_archive & TAR_V7EXTENDED_FORMAT))
-        formats[formats_count++] = "UNIX extended V7";
-
-      else if (0 != (format_archive & TAR_V7ORIGINAL_FORMAT))
-        formats[formats_count++] = "UNIX original V7";
-
-      /*
-       * Build the format string
-       */
-      for (formats_u = 0; formats_u < formats_count; formats_u += 1)
-        {
-          if ((NULL != formats[formats_u]) && (0 != *formats[formats_u]))
-            {
-              if (0 < format_length)
-                format_length += 3;
-              format_length += strlen (formats[formats_u]);
-            }
-        }
-
-      if (0 < format_length)
-        {
-         if (fname != NULL)
-           free (fname);
-          format = malloc (format_length + 5);
-
-          if (NULL != format)
-            {
-
-              format_length = 0;
-
-              for (formats_u = 0; formats_u < formats_count; formats_u += 1)
-                {
-                  if ((NULL != formats[formats_u])
-                      && (0 != *formats[formats_u]))
-                    {
-                      if (0 < format_length)
-                        {
-                          strcpy (format + format_length, " + ");
-                          format_length += 3;
-                        }
-                      strcpy (format + format_length, formats[formats_u]);
-                      format_length += strlen (formats[formats_u]);
-                    }
-                }
-
-              if (0 < format_length)
-                {
-                  strcpy (format + format_length, " TAR");
-                  ADDF (EXTRACTOR_METATYPE_FORMAT_VERSION, format);
-                }
-              else
-                {
-                  free (format);
-                }
-            }
-        }
-    }
-
-  ADD (EXTRACTOR_METATYPE_MIMETYPE, "application/x-tar");
-FINISH:
-  return ret;
-}

Copied: Extractor/src/plugins/test_archive.c (from rev 23304, 
Extractor/src/plugins/test_jpeg.c)
===================================================================
--- Extractor/src/plugins/test_archive.c                                (rev 0)
+++ Extractor/src/plugins/test_archive.c        2012-08-18 22:25:21 UTC (rev 
23305)
@@ -0,0 +1,76 @@
+/*
+     This file is part of libextractor.
+     (C) 2012 Vidyut Samanta and Christian Grothoff
+
+     libextractor is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published
+     by the Free Software Foundation; either version 3, or (at your
+     option) any later version.
+
+     libextractor is distributed in the hope that it will be useful, but
+     WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+     General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with libextractor; see the file COPYING.  If not, write to the
+     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+     Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file plugins/test_archive.c
+ * @brief testcase for archive plugin
+ * @author Christian Grothoff
+ */
+#include "platform.h"
+#include "test_lib.h"
+
+
+/**
+ * Main function for the ARCHIVE testcase.
+ *
+ * @param argc number of arguments (ignored)
+ * @param argv arguments (ignored)
+ * @return 0 on success
+ */
+int
+main (int argc, char *argv[])
+{
+  struct SolutionData tar_archive_sol[] =
+    {
+      { 
+       EXTRACTOR_METATYPE_FILENAME,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "test.html",
+       strlen ("test.html") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_FILENAME,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "test.jpg",
+       strlen ("test.jpg") + 1,
+       0 
+      },
+      { 
+       EXTRACTOR_METATYPE_FORMAT,
+       EXTRACTOR_METAFORMAT_UTF8,
+       "text/plain",
+       "GNU tar format",
+       strlen ("GNU tar format") + 1,
+       0 
+      },
+      { 0, 0, NULL, NULL, 0, -1 }
+    };
+  struct ProblemSet ps[] =
+    {
+      { "testdata/archive_test.tar",
+       tar_archive_sol },
+      { NULL, NULL }
+    };
+  return ET_main ("archive", ps);
+}
+
+/* end of test_archive.c */

Copied: Extractor/src/plugins/testdata/archive_test.tar (from rev 23304, 
Extractor/test/test.tar)
===================================================================
(Binary files differ)

Deleted: Extractor/test/test.tar
===================================================================
(Binary files differ)




reply via email to

[Prev in Thread] Current Thread [Next in Thread]