gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r9798 - Extractor/src/plugins


From: gnunet
Subject: [GNUnet-SVN] r9798 - Extractor/src/plugins
Date: Fri, 18 Dec 2009 19:43:20 +0100

Author: grothoff
Date: 2009-12-18 19:43:20 +0100 (Fri, 18 Dec 2009)
New Revision: 9798

Added:
   Extractor/src/plugins/tar_extractor.c
Removed:
   Extractor/src/plugins/tarextractor.c
Modified:
   Extractor/src/plugins/Makefile.am
Log:
tar

Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am   2009-12-18 18:18:01 UTC (rev 9797)
+++ Extractor/src/plugins/Makefile.am   2009-12-18 18:43:20 UTC (rev 9798)
@@ -89,6 +89,7 @@
   $(pdf) \
   libextractor_real.la \
   $(rpm) \
+  libextractor_tar.la \
   $(thumbgtk) \
   libextractor_wav.la \
   libextractor_zip.la
@@ -245,6 +246,11 @@
 libextractor_rpm_la_LIBADD = \
   -lrpm
 
+libextractor_tar_la_SOURCES = \
+  tar_extractor.c 
+libextractor_tar_la_LDFLAGS = \
+  $(PLUGINFLAGS)
+
 libextractor_thumbnailgtk_la_CFLAGS = \
   $(GLIB_CFLAGS) $(GTK_CFLAGS)
 libextractor_thumbnailgtk_la_LIBADD = \
@@ -331,13 +337,6 @@
 libextractor_id3v24_la_LIBADD = \
   $(top_builddir)/src/common/libextractor_common.la
 
-libextractor_tar_la_SOURCES = \
-  tarextractor.c 
-libextractor_tar_la_LDFLAGS = \
-  $(PLUGINFLAGS)
-libextractor_tar_la_LIBADD = \
-  $(top_builddir)/src/main/libextractor.la -lz
-
 libextractor_tiff_la_SOURCES = \
   tiffextractor.c 
 libextractor_tiff_la_LDFLAGS = \

Copied: Extractor/src/plugins/tar_extractor.c (from rev 9791, 
Extractor/src/plugins/tarextractor.c)
===================================================================
--- Extractor/src/plugins/tar_extractor.c                               (rev 0)
+++ Extractor/src/plugins/tar_extractor.c       2009-12-18 18:43:20 UTC (rev 
9798)
@@ -0,0 +1,861 @@
+/*
+     This file is part of libextractor.
+     (C) 2002, 2003, 2004, 2005, 2009 Vidyut Samanta and Christian Grothoff
+
+     libextractor is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published
+     by the Free Software Foundation; either version 2, or (at your
+     option) any later version.
+
+     libextractor is distributed in the hope that it will be useful, but
+     WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+     General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with libextractor; see the file COPYING.  If not, write to the
+     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+     Boston, MA 02111-1307, USA.
+ */
+
+#include "platform.h"
+#include "extractor.h"
+
+/*
+ * Note that this code is not complete!
+ *
+ * References:
+ *
+ * http://www.mkssoftware.com/docs/man4/tar.4.asp
+ * (does document USTAR format common nowadays,
+ *  but not other extended formats such as the one produced
+ *  by GNU tar 1.13 when very long filenames are met.)
+ *
+ * http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs
+ * (J. Schilling's remarks on TAR formats compatibility issues.)
+ */
+
+/*
+ * Define known TAR archive member variants.
+ * In theory different variants
+ * can coexist within a single TAR archive file
+ * although this will be uncommon.
+ */
+#define TAR_V7ORIGINAL_FORMAT    (1)
+#define TAR_V7EXTENDED_FORMAT    (1 << 1)
+#define TAR_SCHILLING1985_FORMAT (1 << 2)
+#define TAR_POSIX1988_FORMAT     (1 << 3)
+#define TAR_GNU1991_FORMAT       (1 << 4)
+#define TAR_SCHILLING1994_FORMAT (1 << 5)
+#define TAR_GNU1997_FORMAT       (1 << 6)
+#define TAR_POSIX2001_FORMAT     (1 << 7)
+#define TAR_SCHILLING2001_FORMAT (1 << 8)
+#define TAR_SOLARIS2001_FORMAT   (1 << 9)
+#define TAR_GNU2004_FORMAT       (1 << 10)
+
+/*
+ * TAR header structure, modelled after POSIX.1-1988
+ */
+typedef struct
+{
+  char fileName[100];
+  char mode[8];
+  char userId[8];
+  char groupId[8];
+  char fileSize[12];
+  char lastModTime[12];
+  char chksum[8];
+  char link;
+  char linkName[100];
+  /*
+   * All fields below are a
+   * either zero-filled or undefined
+   * for UNIX V7 TAR archive members ;
+   * their header is always 512 octets long nevertheless.
+   */
+  char ustarMagic[6];
+  char version[2];
+  char userName[32];
+  char groupName[32];
+  char devMajor[8];
+  char devMinor[8];
+  char prefix[155];
+  char filler[12];
+} TarHeader;
+
+#define TAR_HEADER_SIZE (sizeof(TarHeader))
+#define TAR_TIME_FENCE  ((long long) (-(1LL << 62)))
+
+static size_t
+tar_roundup (size_t size)
+{
+  size_t diff = (size % TAR_HEADER_SIZE);
+
+  return (0 == diff) ? size : (size + (TAR_HEADER_SIZE - diff));
+}
+
+static int
+tar_isnonzero (const char *data, unsigned int length)
+{
+  unsigned int total = 0;
+
+  while (total < length)
+    {
+      if (0 != data[total])
+        return 1;
+      total++;
+    }
+
+  return 0;
+}
+
+static unsigned int
+tar_octalvalue (const char *data, size_t size, unsigned long long *valueptr)
+{
+  unsigned int result = 0;
+
+  if (NULL != data && 0 < size)
+    {
+      const char *p = data;
+      int found = 0;
+      unsigned long long value = 0;
+
+      while ((p < data + size) && (' ' == *p))
+        p += 1;
+
+      while ((p < data + size) && ('0' <= *p) && (*p < '8'))
+        {
+          found = 1;
+          value *= 8;
+          value += (*p - '0');
+          p += 1;
+        }
+
+      if (0 != found)
+        {
+          while ((p < data + size) && ((0 == *p) || (' ' == *p)))
+            p += 1;
+
+          result = (p - data);
+        }
+
+      if ((0 < result) && (NULL != valueptr))
+        *valueptr = value;
+    }
+
+  return result;
+}
+
+#ifndef EOVERFLOW
+#define EOVERFLOW -1
+#endif
+
+static int
+tar_time (long long timeval, char *rtime, unsigned int rsize)
+{
+  int retval = 0;
+
+  /*
+   * shift epoch to proleptic times
+   * to make subsequent modulo operations safer.
+   */
+  long long my_timeval = timeval
+    + ((long long) ((1970 * 365) + 478) * (long long) 86400);
+
+  unsigned int seconds = (unsigned int) (my_timeval % 60);
+  unsigned int minutes = (unsigned int) ((my_timeval / 60) % 60);
+  unsigned int hours = (unsigned int) ((my_timeval / 3600) % 24);
+
+  unsigned int year = 0;
+  unsigned int month = 1;
+
+  unsigned int days = (unsigned int) (my_timeval / (24 * 3600));
+
+  unsigned int days_in_month[] =
+    { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
+  unsigned int diff = 0;
+
+  if ((long long) 0 > my_timeval)
+    return EDOM;
+
+  /*
+   * 400-year periods
+   */
+  year += (400 * (days / ((365 * 400) + 97)));
+  days %= ((365 * 400) + 97);
+
+  /*
+   * 100-year periods
+   */
+  diff = (days / ((365 * 100) + 24));
+  if (4 <= diff)
+    {
+      year += 399;
+      days = 364;
+    }
+  else
+    {
+      year += (100 * diff);
+      days %= ((365 * 100) + 24);
+    }
+
+  /*
+   * remaining leap years
+   */
+  year += (4 * (days / ((365 * 4) + 1)));
+  days %= ((365 * 4) + 1);
+
+  while (1)
+    {
+      if ((0 == (year % 400)) || ((0 == (year % 4)) && (0 != (year % 100))))
+        {
+          if (366 > days)
+            {
+              break;
+            }
+          else
+            {
+              days -= 366;
+              year++;
+            }
+        }
+      else
+        {
+          if (365 > days)
+            {
+              break;
+            }
+          else
+            {
+              days -= 365;
+              year++;
+            }
+        }
+    }
+
+  if ((0 == (year % 400)) || ((0 == (year % 4)) && (0 != (year % 100))))
+    days_in_month[1] = 29;
+
+  for (month = 0; (month < 12) && (days >= days_in_month[month]); month += 1)
+    days -= days_in_month[month];
+
+  retval = snprintf (rtime, rsize, "%04u-%02u-%02uT%02u:%02u:%02uZ",
+                     year, month + 1, days + 1, hours, minutes, seconds);
+
+  return (retval < rsize) ? 0 : EOVERFLOW;
+}
+
+#define ADD(t,s) do { if (0 != (ret = proc (proc_cls, "tar", t, 
EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) goto FINISH; } while 
(0)
+#define ADDF(t,s) do { if (0 != (ret = proc (proc_cls, "tar", t, 
EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) { free(s); goto 
FINISH; } free (s); } while (0)
+
+int 
+EXTRACTOR_tar_extract (const char *data,
+                      size_t size,
+                      EXTRACTOR_MetaDataProcessor proc,
+                      void *proc_cls,
+                      const char *options)
+{
+  char *fname = NULL;
+  size_t pos = 0;
+  int contents_are_empty = 1;
+  long long maxftime = TAR_TIME_FENCE;
+  unsigned int format_archive = 0;
+  int ret;
+
+  if (512 != TAR_HEADER_SIZE)
+    return 0;                /* compiler should remove this when optimising */
+  if (0 != (size % TAR_HEADER_SIZE))
+    return 0;                /* cannot be tar! */
+  if (size < TAR_HEADER_SIZE)
+    return 0;                /* too short, or somehow truncated */
+
+  ret = 0;
+  pos = 0;
+  while ((pos + TAR_HEADER_SIZE) <= size)
+    {
+      const TarHeader *tar = NULL;
+      unsigned format_member = 0;
+      unsigned long long fmode;
+      unsigned long long fsize;
+      long long ftime = TAR_TIME_FENCE;
+      char typeFlag = -1;
+      const char *nul_pos;
+      unsigned int tar_prefix_length = 0;
+      unsigned int tar_name_length = 0;
+      unsigned int checksum_offset;
+      int checksum_computed_500s = 0;
+      int checksum_computed_512s = 0;
+      unsigned int checksum_computed_500u = 0;
+      unsigned int checksum_computed_512u = 0;
+      unsigned long long checksum_stored = 0;
+
+      /*
+       * Compute TAR header checksum and compare with stored value.
+       * Allow for non-conformant checksums computed with signed values,
+       * such as those produced by early Solaris tar.
+       * Allow for non-conformant checksums computed on first 500 octets,
+       * such as those produced by SunOS 4.x tar according to J. Schilling.
+       * This will also detect EOF marks, since a zero-filled block
+       * cannot possibly hold octal values.
+       */
+      for (checksum_offset = 0; checksum_offset < 148; checksum_offset += 1)
+        {
+          checksum_computed_500u +=
+            (unsigned char) data[pos + checksum_offset];
+          checksum_computed_500s += (signed char) data[pos + checksum_offset];
+        }
+      if (8 >
+          tar_octalvalue (data + pos + checksum_offset, 8, &checksum_stored))
+        break;
+      for (; checksum_offset < 156; checksum_offset += 1)
+        {
+          checksum_computed_500u += (unsigned char) ' ';
+          checksum_computed_500s += (signed char) ' ';
+        }
+      for (; checksum_offset < 500; checksum_offset += 1)
+        {
+          checksum_computed_500u +=
+            (unsigned char) data[pos + checksum_offset];
+          checksum_computed_500s += (signed char) data[pos + checksum_offset];
+        }
+
+      checksum_computed_512u = checksum_computed_500u;
+      checksum_computed_512s = checksum_computed_500s;
+      for (; checksum_offset < TAR_HEADER_SIZE; checksum_offset += 1)
+        {
+          checksum_computed_512u +=
+            (unsigned char) data[pos + checksum_offset];
+          checksum_computed_512s += (signed char) data[pos + checksum_offset];
+        }
+
+      /*
+       * Suggestion: use signed checksum matches to refine
+       * TAR format detection.
+       */
+      if ((checksum_stored != (unsigned long long) checksum_computed_512u)
+          && (checksum_stored != (unsigned long long) checksum_computed_512s)
+          && (checksum_stored != (unsigned long long) checksum_computed_500s)
+          && (checksum_stored != (unsigned long long) checksum_computed_500u))
+        break;
+
+      tar = (const TarHeader *) &data[pos];
+      typeFlag = tar->link;
+      pos += TAR_HEADER_SIZE;
+
+      /*
+       * Checking all octal fields helps reduce
+       * the possibility of false positives ;
+       * only the file size, time and mode are used for now.
+       *
+       * This will fail over GNU and Schilling TAR huge size fields
+       * using non-octal encodings used for very large file lengths (> 8 GB).
+       */
+      if ((12 > tar_octalvalue (tar->fileSize, 12,
+                                &fsize))
+          || (12 > tar_octalvalue (tar->lastModTime, 12,
+                                   (unsigned long long *) &ftime))
+          || (8 > tar_octalvalue (tar->mode, 8,
+                                  (unsigned long long *) &fmode))
+          || (8 > tar_octalvalue (tar->userId, 8, NULL))
+          || (8 > tar_octalvalue (tar->groupId, 8, NULL)))
+        break;
+
+      /*
+       * Find out which TAR variant is here.
+       */
+      if (0 == memcmp (tar->ustarMagic, "ustar  ", 7))
+        {
+
+          if (' ' == tar->mode[6])
+            format_member = TAR_GNU1991_FORMAT;
+          else if (('K' == typeFlag) || ('L' == typeFlag))
+            {
+              format_member = TAR_GNU1997_FORMAT;
+              ftime = TAR_TIME_FENCE;
+            }
+          else
+            format_member =
+              (((unsigned) fmode) !=
+               (((unsigned) fmode) & 03777)) ? TAR_GNU1997_FORMAT :
+              TAR_GNU2004_FORMAT;
+
+        }
+      else if (0 == memcmp (tar->ustarMagic, "ustar", 6))
+        {
+
+          /*
+           * It is important to perform test for SCHILLING1994 before GNU1997
+           * because certain extension type flags ('L' and 'S' for instance)
+           * are used by both.
+           */
+          if ((0 == tar->prefix[130])
+              && (12 <= tar_octalvalue (tar->prefix + 131, 12, NULL))
+              && (12 <= tar_octalvalue (tar->prefix + 143, 12, NULL))
+              && (0 == tar_isnonzero (tar->filler, 8))
+              && (0 == memcmp (tar->filler + 8, "tar", 4)))
+            {
+
+              format_member = TAR_SCHILLING1994_FORMAT;
+
+            }
+          else if (('D' == typeFlag) || ('K' == typeFlag)
+                   || ('L' == typeFlag) || ('M' == typeFlag)
+                   || ('N' == typeFlag) || ('S' == typeFlag)
+                   || ('V' == typeFlag))
+            {
+
+              format_member = TAR_GNU1997_FORMAT;
+
+            }
+          else if (('g' == typeFlag)
+                   || ('x' == typeFlag) || ('X' == typeFlag))
+            {
+
+              format_member = TAR_POSIX2001_FORMAT;
+              ftime = TAR_TIME_FENCE;
+
+            }
+          else
+            {
+
+              format_member = TAR_POSIX1988_FORMAT;
+
+            }
+        }
+      else if ((0 == memcmp (tar->filler + 8, "tar", 4))
+               && (0 == tar_isnonzero (tar->filler, 8)))
+        {
+
+          format_member = TAR_SCHILLING1985_FORMAT;
+
+        }
+      else if (('0' <= typeFlag) && (typeFlag <= '2'))
+        {
+
+          format_member = TAR_V7ORIGINAL_FORMAT;
+
+        }
+      else
+        {
+
+          format_member = TAR_V7EXTENDED_FORMAT;
+
+        }
+
+      /*
+       * Locate the file names.
+       */
+      if ((0 != (format_member & TAR_POSIX2001_FORMAT))
+          && (('x' == typeFlag) || ('X' == typeFlag)))
+        {
+
+          if (size <= pos)
+            break;
+
+          else if ((8 <= fsize) && fsize <= (unsigned long long) (size - pos))
+            {
+              const char *keyptr = data + pos;
+              const char *valptr = NULL;
+              const char *nameptr = NULL;
+              unsigned int keylength = 0;
+              unsigned int namelength = 0;
+
+              while (keyptr < data + pos + (size_t) fsize)
+                {
+                  if (('0' > *keyptr) || ('9' < *keyptr))
+                    {
+                      keyptr += 1;
+                      continue;
+                    }
+
+                  keylength =
+                    (unsigned int) strtoul (keyptr, (char **) &valptr, 10);
+                  if ((0 < keylength) && (NULL != valptr)
+                      && (keyptr != valptr))
+                    {
+                      unsigned int difflength = 0;
+
+                      while ((valptr < data + pos + (size_t) fsize)
+                             && (' ' == *valptr))
+                        valptr += 1;
+
+                      difflength = (valptr - keyptr);
+
+                      if (0 == memcmp (valptr, "path=", 5))
+                        {
+                          nameptr = valptr + 5;
+                          namelength = keylength - (nameptr - keyptr);
+                        }
+                      else
+                        {
+
+                          if ((keylength > (valptr - keyptr) + 4 + 2)
+                              && (0 == memcmp (valptr, "GNU.", 4)))
+                            format_archive |= TAR_GNU2004_FORMAT;
+
+                          else if ((keylength > (valptr - keyptr) + 7 + 2)
+                                   && (0 == memcmp (valptr, "SCHILY.", 7)))
+                            format_archive |= TAR_SCHILLING2001_FORMAT;
+
+                          else if ((keylength > (valptr - keyptr) + 4 + 2)
+                                   && (0 == memcmp (valptr, "SUN.", 4)))
+                            format_archive |= TAR_SOLARIS2001_FORMAT;
+                        }
+
+                      keyptr += keylength;
+                    }
+                  else
+                    {
+                      nameptr = NULL;
+                      break;
+                    }
+                }
+
+              if ((NULL != nameptr) && (0 != *nameptr)
+                  && ((size - (nameptr - data)) >= namelength)
+                  && (1 < namelength))
+                {
+                  if (NULL != fname)
+                    free (fname);
+                  /*
+                   * There is an 1-offset because POSIX.1-2001
+                   * field separator is counted in field length.
+                   */
+                  fname = malloc (namelength);
+                  if (NULL != fname)
+                    {
+                      memcpy (fname, nameptr, namelength - 1);
+                      fname[namelength - 1] = '\0';
+
+                      pos += tar_roundup ((size_t) fsize);
+                      format_archive |= format_member;
+                      continue;
+                    }
+                }
+            }
+        }
+
+      else if ((0 != (format_member
+                      & (TAR_SCHILLING1994_FORMAT
+                         | TAR_GNU1997_FORMAT | TAR_GNU2004_FORMAT)))
+               && ('L' == typeFlag))
+        {
+
+          if (size <= pos)
+            break;
+
+          else if ((0 < fsize) && fsize <= (unsigned long long) (size - pos))
+            {
+
+              size_t length = (size_t) fsize;
+
+              nul_pos = memchr (data + pos, 0, length);
+              if (NULL != nul_pos)
+                length = (nul_pos - (data + pos));
+
+              if (0 < length)
+                {
+                  if (NULL != fname)
+                    free (fname);
+                  fname = malloc (1 + length);
+                  if (NULL != fname)
+                    {
+                      memcpy (fname, data + pos, length);
+                      fname[length] = '\0';
+                    }
+
+                  pos += tar_roundup ((size_t) fsize);
+                  format_archive |= format_member;
+                  continue;
+                }
+            }
+        }
+      else
+        {
+
+          nul_pos = memchr (tar->fileName, 0, sizeof tar->fileName);
+          tar_name_length = (0 == nul_pos)
+            ? sizeof (tar->fileName) : (nul_pos - tar->fileName);
+
+          if ((0 !=
+               (format_member & (TAR_GNU1997_FORMAT | TAR_GNU2004_FORMAT)))
+              && ('S' == typeFlag))
+            {
+
+              if ((0 == tar->prefix[40])
+                  && (0 != tar->prefix[137])
+                  && (12 <= tar_octalvalue (tar->prefix + 41, 12, NULL))
+                  && (12 <= tar_octalvalue (tar->prefix + 53, 12, NULL)))
+                {
+                  /*
+                   * fsize needs adjustment when there are more than 4 sparse 
blocks
+                   */
+                  size_t diffpos = 0;
+                  fsize += TAR_HEADER_SIZE;
+
+                  while ((pos + diffpos + TAR_HEADER_SIZE < size)
+                         && (0 != *(data + pos + diffpos + 504)))
+                    {
+                      diffpos += TAR_HEADER_SIZE;
+                      fsize += TAR_HEADER_SIZE;
+                    }
+                }
+
+              typeFlag = '0';
+
+            }
+          else if (0 != (format_member & TAR_SCHILLING1994_FORMAT))
+            {
+
+              nul_pos = memchr (tar->prefix, 0, 130);
+              tar_prefix_length = (0 == nul_pos)
+                ? 130 : (nul_pos - tar->prefix);
+
+              if ('S' == typeFlag)
+                typeFlag = '0';
+
+            }
+          else if (0 != (format_member & TAR_SCHILLING1985_FORMAT))
+            {
+
+              nul_pos = memchr (tar->prefix, 0, 155);
+              tar_prefix_length = (0 == nul_pos)
+                ? 155 : (nul_pos - tar->prefix);
+
+
+              if ('S' == typeFlag)
+                typeFlag = '0';
+
+            }
+          else if (0 != (format_member & TAR_POSIX1988_FORMAT))
+            {
+
+              nul_pos = memchr (tar->prefix, 0, sizeof tar->prefix);
+              tar_prefix_length = (0 == nul_pos)
+                ? sizeof tar->prefix : nul_pos - tar->prefix;
+
+            }
+        }
+
+      /*
+       * Update position so that next loop iteration will find
+       * either a TAR header or TAR EOF mark or just EOF.
+       *
+       * Consider archive member size to be zero
+       * with no data following the header in the following cases :
+       * '1' : hard link, '2' : soft link,
+       * '3' : character device, '4' : block device,
+       * '5' : directory, '6' : named pipe.
+       */
+      if ('1' != typeFlag && '2' != typeFlag
+          && '3' != typeFlag && '4' != typeFlag
+          && '5' != typeFlag && '6' != typeFlag)
+        {
+          if ((fsize > (unsigned long long) size)
+              || (fsize + (unsigned long long) pos >
+                  (unsigned long long) size))
+            break;
+
+          pos += tar_roundup ((size_t) fsize);
+        }
+      if (pos - 1 > size)
+        break;
+
+      format_archive |= format_member;
+
+      /*
+       * Store the file name in libextractor list.
+       *
+       * For the time being, only file types listed in POSIX.1-1988 ('0'..'7')
+       * are retained, leaving out labels, access control lists, etc.
+       */
+      if ((0 == typeFlag) || (('0' <= typeFlag) && (typeFlag <= '7')))
+        {
+          if (NULL == fname)
+            {
+              if (0 < tar_prefix_length + tar_name_length)
+                {
+                  fname = malloc (2 + tar_prefix_length + tar_name_length);
+
+                  if (NULL != fname)
+                    {
+                      if (0 < tar_prefix_length)
+                        {
+                          memcpy (fname, tar->prefix, tar_prefix_length);
+
+                          if (('/' != tar->prefix[tar_prefix_length - 1])
+                              && (0 < tar_name_length)
+                              && ('/' != tar->fileName[0]))
+                            {
+                              fname[tar_prefix_length] = '/';
+                              tar_prefix_length += 1;
+                            }
+                        }
+
+                      if (0 < tar_name_length)
+                        memcpy (fname + tar_prefix_length, tar->fileName,
+                                tar_name_length);
+
+                      fname[tar_prefix_length + tar_name_length] = '\0';
+                    }
+                }
+            }
+
+          if ((NULL != fname) && (0 != *fname))
+            {
+#if 0
+              fprintf (stdout,
+                       "(%u) flag = %c, size = %u, tname = (%s), fname = 
(%s)\n",
+                       __LINE__, typeFlag, (unsigned int) fsize,
+                       (NULL == tar->fileName) ? "" : tar->fileName,
+                       (NULL == fname) ? "" : fname);
+#endif
+
+              ADDF (EXTRACTOR_METATYPE_FILENAME, fname);
+              fname = NULL;
+              if (ftime > maxftime)
+                maxftime = ftime;
+              contents_are_empty = 0;
+            }
+        }
+
+      if (NULL != fname)
+        {
+          free (fname);
+          fname = NULL;
+        }
+    }
+
+  if (NULL != fname)
+    {
+      free (fname);
+      fname = NULL;
+    }
+
+  /*
+   * Report mimetype; report also format(s) and most recent date
+   * when at least one archive member was found.
+   */
+  if (0 != format_archive)
+    {
+      if (0 == contents_are_empty)
+        {
+
+          const char *formats[5] = { NULL, NULL, NULL, NULL, NULL };
+          unsigned int formats_count = 0;
+          unsigned int formats_u = 0;
+          unsigned int format_length = 0;
+          char *format = NULL;
+
+          if (TAR_TIME_FENCE < maxftime)
+            {
+              char iso8601_time[24];
+
+              if (0 == tar_time (maxftime, iso8601_time, sizeof(iso8601_time)))
+                ADD (EXTRACTOR_METATYPE_CREATION_DATE, iso8601_time);
+            }
+
+          /*
+           * We only keep the most recent POSIX format.
+           */
+          if (0 != (format_archive & TAR_POSIX2001_FORMAT))
+            formats[formats_count++] = "POSIX 2001";
+
+          else if (0 != (format_archive & TAR_POSIX1988_FORMAT))
+            formats[formats_count++] = "POSIX 1988";
+
+          /*
+           * We only keep the most recent GNU format.
+           */
+          if (0 != (format_archive & TAR_GNU2004_FORMAT))
+            formats[formats_count++] = "GNU 2004";
+
+          else if (0 != (format_archive & TAR_GNU1997_FORMAT))
+            formats[formats_count++] = "GNU 1997";
+
+          else if (0 != (format_archive & TAR_GNU1991_FORMAT))
+            formats[formats_count++] = "GNU 1991";
+
+          /*
+           * We only keep the most recent Schilling format.
+           */
+          if (0 != (format_archive & TAR_SCHILLING2001_FORMAT))
+            formats[formats_count++] = "Schilling 2001";
+
+          else if (0 != (format_archive & TAR_SCHILLING1994_FORMAT))
+            formats[formats_count++] = "Schilling 1994";
+
+          else if (0 != (format_archive & TAR_SCHILLING1985_FORMAT))
+            formats[formats_count++] = "Schilling 1985";
+
+          /*
+           * We only keep the most recent Solaris format.
+           */
+          if (0 != (format_archive & TAR_SOLARIS2001_FORMAT))
+            formats[formats_count++] = "Solaris 2001";
+
+          /*
+           * We only keep the (supposedly) most recent UNIX V7 format.
+           */
+          if (0 != (format_archive & TAR_V7EXTENDED_FORMAT))
+            formats[formats_count++] = "UNIX extended V7";
+
+          else if (0 != (format_archive & TAR_V7ORIGINAL_FORMAT))
+            formats[formats_count++] = "UNIX original V7";
+
+          /*
+           * Build the format string
+           */
+          for (formats_u = 0; formats_u < formats_count; formats_u += 1)
+            {
+              if ((NULL != formats[formats_u]) && (0 != *formats[formats_u]))
+                {
+                  if (0 < format_length)
+                    format_length += 3;
+                  format_length += strlen (formats[formats_u]);
+                }
+            }
+
+          if (0 < format_length)
+            {
+              format = malloc (format_length + 5);
+
+              if (NULL != format)
+                {
+
+                  format_length = 0;
+
+                  for (formats_u = 0; formats_u < formats_count;
+                       formats_u += 1)
+                    {
+                      if ((NULL != formats[formats_u])
+                          && (0 != *formats[formats_u]))
+                        {
+                          if (0 < format_length)
+                            {
+                              strcpy (format + format_length, " + ");
+                              format_length += 3;
+                            }
+                          strcpy (format + format_length, formats[formats_u]);
+                          format_length += strlen (formats[formats_u]);
+                        }
+                    }
+
+                  if (0 < format_length)
+                    {
+                      strcpy (format + format_length, " TAR");
+                      ADDF (EXTRACTOR_METATYPE_FORMAT_VERSION, format);
+                    }
+                  else
+                    {
+                      free (format);
+                    }
+                }
+            }
+        }
+
+      ADD (EXTRACTOR_METATYPE_MIMETYPE, "application/x-tar");
+    }
+ FINISH:
+  return ret;
+}

Deleted: Extractor/src/plugins/tarextractor.c
===================================================================
--- Extractor/src/plugins/tarextractor.c        2009-12-18 18:18:01 UTC (rev 
9797)
+++ Extractor/src/plugins/tarextractor.c        2009-12-18 18:43:20 UTC (rev 
9798)
@@ -1,930 +0,0 @@
-/*
-     This file is part of libextractor.
-     (C) 2002, 2003, 2004, 2005, 2009 Vidyut Samanta and Christian Grothoff
-
-     libextractor is free software; you can redistribute it and/or modify
-     it under the terms of the GNU General Public License as published
-     by the Free Software Foundation; either version 2, or (at your
-     option) any later version.
-
-     libextractor is distributed in the hope that it will be useful, but
-     WITHOUT ANY WARRANTY; without even the implied warranty of
-     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-     General Public License for more details.
-
-     You should have received a copy of the GNU General Public License
-     along with libextractor; see the file COPYING.  If not, write to the
-     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-     Boston, MA 02111-1307, USA.
- */
-
-#include "platform.h"
-#include "extractor.h"
-
-/*
- * Note that this code is not complete!
- *
- * References:
- *
- * http://www.mkssoftware.com/docs/man4/tar.4.asp
- * (does document USTAR format common nowadays,
- *  but not other extended formats such as the one produced
- *  by GNU tar 1.13 when very long filenames are met.)
- *
- * http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs
- * (J. Schilling's remarks on TAR formats compatibility issues.)
- */
-
-static EXTRACTOR_KeywordList *
-addKeyword (EXTRACTOR_KeywordType type,
-            char *keyword, EXTRACTOR_KeywordList * next)
-{
-  EXTRACTOR_KeywordList *result = next;
-
-  if (NULL != keyword)
-    {
-      if (0 == *keyword)
-        {
-          free (keyword);
-        }
-      else
-        {
-          result = malloc (sizeof (EXTRACTOR_KeywordList));
-          if (NULL == result)
-            {
-              free (keyword);
-            }
-          else
-            {
-              result->next = next;
-              result->keyword = keyword;
-              result->keywordType = type;
-            }
-        }
-    }
-
-  return result;
-}
-
-static EXTRACTOR_KeywordList *
-appendKeyword (EXTRACTOR_KeywordType type,
-               char *keyword, EXTRACTOR_KeywordList * last)
-{
-  EXTRACTOR_KeywordList *result = last;
-
-  if (NULL != keyword)
-    {
-      if (0 == *keyword)
-        {
-          free (keyword);
-        }
-      else
-        {
-          if ((NULL != last) && (NULL != last->next))
-            abort ();
-          result = malloc (sizeof (EXTRACTOR_KeywordList));
-          if (NULL == result)
-            {
-              free (keyword);
-            }
-          else
-            {
-              result->next = NULL;
-              result->keywordType = type;
-              result->keyword = keyword;
-              if (NULL != last)
-                last->next = result;
-            }
-        }
-    }
-
-  return result;
-}
-
-/*
- * Define known TAR archive member variants.
- * In theory different variants
- * can coexist within a single TAR archive file
- * although this will be uncommon.
- */
-#define TAR_V7ORIGINAL_FORMAT    (1)
-#define TAR_V7EXTENDED_FORMAT    (1 << 1)
-#define TAR_SCHILLING1985_FORMAT (1 << 2)
-#define TAR_POSIX1988_FORMAT     (1 << 3)
-#define TAR_GNU1991_FORMAT       (1 << 4)
-#define TAR_SCHILLING1994_FORMAT (1 << 5)
-#define TAR_GNU1997_FORMAT       (1 << 6)
-#define TAR_POSIX2001_FORMAT     (1 << 7)
-#define TAR_SCHILLING2001_FORMAT (1 << 8)
-#define TAR_SOLARIS2001_FORMAT   (1 << 9)
-#define TAR_GNU2004_FORMAT       (1 << 10)
-
-/*
- * TAR header structure, modelled after POSIX.1-1988
- */
-typedef struct
-{
-  char fileName[100];
-  char mode[8];
-  char userId[8];
-  char groupId[8];
-  char fileSize[12];
-  char lastModTime[12];
-  char chksum[8];
-  char link;
-  char linkName[100];
-  /*
-   * All fields below are a
-   * either zero-filled or undefined
-   * for UNIX V7 TAR archive members ;
-   * their header is always 512 octets long nevertheless.
-   */
-  char ustarMagic[6];
-  char version[2];
-  char userName[32];
-  char groupName[32];
-  char devMajor[8];
-  char devMinor[8];
-  char prefix[155];
-  char filler[12];
-} TarHeader;
-
-#define TAR_HEADER_SIZE (sizeof(TarHeader))
-#define TAR_TIME_FENCE  ((long long) (-(1LL << 62)))
-
-static size_t
-tar_roundup (size_t size)
-{
-  size_t diff = (size % TAR_HEADER_SIZE);
-
-  return (0 == diff) ? size : (size + (TAR_HEADER_SIZE - diff));
-}
-
-static int
-tar_isnonzero (const char *data, unsigned int length)
-{
-  unsigned int total = 0;
-
-  while (total < length)
-    {
-      if (0 != data[total])
-        return 1;
-      total++;
-    }
-
-  return 0;
-}
-
-static unsigned int
-tar_octalvalue (const char *data, size_t size, unsigned long long *valueptr)
-{
-  unsigned int result = 0;
-
-  if (NULL != data && 0 < size)
-    {
-      const char *p = data;
-      int found = 0;
-      unsigned long long value = 0;
-
-      while ((p < data + size) && (' ' == *p))
-        p += 1;
-
-      while ((p < data + size) && ('0' <= *p) && (*p < '8'))
-        {
-          found = 1;
-          value *= 8;
-          value += (*p - '0');
-          p += 1;
-        }
-
-      if (0 != found)
-        {
-          while ((p < data + size) && ((0 == *p) || (' ' == *p)))
-            p += 1;
-
-          result = (p - data);
-        }
-
-      if ((0 < result) && (NULL != valueptr))
-        *valueptr = value;
-    }
-
-  return result;
-}
-
-#ifndef EOVERFLOW
-#define EOVERFLOW -1
-#endif
-
-static int
-tar_time (long long timeval, char *rtime, unsigned int rsize)
-{
-  int retval = 0;
-
-  /*
-   * shift epoch to proleptic times
-   * to make subsequent modulo operations safer.
-   */
-  long long my_timeval = timeval
-    + ((long long) ((1970 * 365) + 478) * (long long) 86400);
-
-  unsigned int seconds = (unsigned int) (my_timeval % 60);
-  unsigned int minutes = (unsigned int) ((my_timeval / 60) % 60);
-  unsigned int hours = (unsigned int) ((my_timeval / 3600) % 24);
-
-  unsigned int year = 0;
-  unsigned int month = 1;
-
-  unsigned int days = (unsigned int) (my_timeval / (24 * 3600));
-
-  unsigned int days_in_month[] =
-    { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
-  unsigned int diff = 0;
-
-  if ((long long) 0 > my_timeval)
-    return EDOM;
-
-  /*
-   * 400-year periods
-   */
-  year += (400 * (days / ((365 * 400) + 97)));
-  days %= ((365 * 400) + 97);
-
-  /*
-   * 100-year periods
-   */
-  diff = (days / ((365 * 100) + 24));
-  if (4 <= diff)
-    {
-      year += 399;
-      days = 364;
-    }
-  else
-    {
-      year += (100 * diff);
-      days %= ((365 * 100) + 24);
-    }
-
-  /*
-   * remaining leap years
-   */
-  year += (4 * (days / ((365 * 4) + 1)));
-  days %= ((365 * 4) + 1);
-
-  while (1)
-    {
-      if ((0 == (year % 400)) || ((0 == (year % 4)) && (0 != (year % 100))))
-        {
-          if (366 > days)
-            {
-              break;
-            }
-          else
-            {
-              days -= 366;
-              year++;
-            }
-        }
-      else
-        {
-          if (365 > days)
-            {
-              break;
-            }
-          else
-            {
-              days -= 365;
-              year++;
-            }
-        }
-    }
-
-  if ((0 == (year % 400)) || ((0 == (year % 4)) && (0 != (year % 100))))
-    days_in_month[1] = 29;
-
-  for (month = 0; (month < 12) && (days >= days_in_month[month]); month += 1)
-    days -= days_in_month[month];
-
-  retval = snprintf (rtime, rsize, "%04u-%02u-%02uT%02u:%02u:%02uZ",
-                     year, month + 1, days + 1, hours, minutes, seconds);
-
-  return (retval < rsize) ? 0 : EOVERFLOW;
-}
-
-struct EXTRACTOR_Keywords *
-libextractor_tar_extract (const char *filename,
-                          const char *data,
-                          size_t size, struct EXTRACTOR_Keywords *prev)
-{
-  char *fname = NULL;
-  size_t pos = 0;
-  int contents_are_empty = 1;
-  long long maxftime = TAR_TIME_FENCE;
-  unsigned int format_archive = 0;
-  struct EXTRACTOR_Keywords *last;
-
-  if (512 != TAR_HEADER_SIZE)
-    return prev;                /* compiler should remove this when optimising 
*/
-  if (0 != (size % TAR_HEADER_SIZE))
-    return prev;                /* cannot be tar! */
-  if (size < TAR_HEADER_SIZE)
-    return prev;                /* too short, or somehow truncated */
-
-  last = prev;
-  if (last != NULL)
-    while (last->next != NULL)
-      last = last->next;
-
-  pos = 0;
-  while ((pos + TAR_HEADER_SIZE) <= size)
-    {
-      const TarHeader *tar = NULL;
-      unsigned format_member = 0;
-      unsigned long long fmode;
-      unsigned long long fsize;
-      long long ftime = TAR_TIME_FENCE;
-      char typeFlag = -1;
-      const char *nul_pos;
-      unsigned int tar_prefix_length = 0;
-      unsigned int tar_name_length = 0;
-      unsigned int checksum_offset;
-      int checksum_computed_500s = 0;
-      int checksum_computed_512s = 0;
-      unsigned int checksum_computed_500u = 0;
-      unsigned int checksum_computed_512u = 0;
-      unsigned long long checksum_stored = 0;
-
-      /*
-       * Compute TAR header checksum and compare with stored value.
-       * Allow for non-conformant checksums computed with signed values,
-       * such as those produced by early Solaris tar.
-       * Allow for non-conformant checksums computed on first 500 octets,
-       * such as those produced by SunOS 4.x tar according to J. Schilling.
-       * This will also detect EOF marks, since a zero-filled block
-       * cannot possibly hold octal values.
-       */
-      for (checksum_offset = 0; checksum_offset < 148; checksum_offset += 1)
-        {
-          checksum_computed_500u +=
-            (unsigned char) data[pos + checksum_offset];
-          checksum_computed_500s += (signed char) data[pos + checksum_offset];
-        }
-      if (8 >
-          tar_octalvalue (data + pos + checksum_offset, 8, &checksum_stored))
-        break;
-      for (; checksum_offset < 156; checksum_offset += 1)
-        {
-          checksum_computed_500u += (unsigned char) ' ';
-          checksum_computed_500s += (signed char) ' ';
-        }
-      for (; checksum_offset < 500; checksum_offset += 1)
-        {
-          checksum_computed_500u +=
-            (unsigned char) data[pos + checksum_offset];
-          checksum_computed_500s += (signed char) data[pos + checksum_offset];
-        }
-
-      checksum_computed_512u = checksum_computed_500u;
-      checksum_computed_512s = checksum_computed_500s;
-      for (; checksum_offset < TAR_HEADER_SIZE; checksum_offset += 1)
-        {
-          checksum_computed_512u +=
-            (unsigned char) data[pos + checksum_offset];
-          checksum_computed_512s += (signed char) data[pos + checksum_offset];
-        }
-
-      /*
-       * Suggestion: use signed checksum matches to refine
-       * TAR format detection.
-       */
-      if ((checksum_stored != (unsigned long long) checksum_computed_512u)
-          && (checksum_stored != (unsigned long long) checksum_computed_512s)
-          && (checksum_stored != (unsigned long long) checksum_computed_500s)
-          && (checksum_stored != (unsigned long long) checksum_computed_500u))
-        break;
-
-      tar = (const TarHeader *) &data[pos];
-      typeFlag = tar->link;
-      pos += TAR_HEADER_SIZE;
-
-      /*
-       * Checking all octal fields helps reduce
-       * the possibility of false positives ;
-       * only the file size, time and mode are used for now.
-       *
-       * This will fail over GNU and Schilling TAR huge size fields
-       * using non-octal encodings used for very large file lengths (> 8 GB).
-       */
-      if ((12 > tar_octalvalue (tar->fileSize, 12,
-                                &fsize))
-          || (12 > tar_octalvalue (tar->lastModTime, 12,
-                                   (unsigned long long *) &ftime))
-          || (8 > tar_octalvalue (tar->mode, 8,
-                                  (unsigned long long *) &fmode))
-          || (8 > tar_octalvalue (tar->userId, 8, NULL))
-          || (8 > tar_octalvalue (tar->groupId, 8, NULL)))
-        break;
-
-      /*
-       * Find out which TAR variant is here.
-       */
-      if (0 == memcmp (tar->ustarMagic, "ustar  ", 7))
-        {
-
-          if (' ' == tar->mode[6])
-            format_member = TAR_GNU1991_FORMAT;
-          else if (('K' == typeFlag) || ('L' == typeFlag))
-            {
-              format_member = TAR_GNU1997_FORMAT;
-              ftime = TAR_TIME_FENCE;
-            }
-          else
-            format_member =
-              (((unsigned) fmode) !=
-               (((unsigned) fmode) & 03777)) ? TAR_GNU1997_FORMAT :
-              TAR_GNU2004_FORMAT;
-
-        }
-      else if (0 == memcmp (tar->ustarMagic, "ustar", 6))
-        {
-
-          /*
-           * It is important to perform test for SCHILLING1994 before GNU1997
-           * because certain extension type flags ('L' and 'S' for instance)
-           * are used by both.
-           */
-          if ((0 == tar->prefix[130])
-              && (12 <= tar_octalvalue (tar->prefix + 131, 12, NULL))
-              && (12 <= tar_octalvalue (tar->prefix + 143, 12, NULL))
-              && (0 == tar_isnonzero (tar->filler, 8))
-              && (0 == memcmp (tar->filler + 8, "tar", 4)))
-            {
-
-              format_member = TAR_SCHILLING1994_FORMAT;
-
-            }
-          else if (('D' == typeFlag) || ('K' == typeFlag)
-                   || ('L' == typeFlag) || ('M' == typeFlag)
-                   || ('N' == typeFlag) || ('S' == typeFlag)
-                   || ('V' == typeFlag))
-            {
-
-              format_member = TAR_GNU1997_FORMAT;
-
-            }
-          else if (('g' == typeFlag)
-                   || ('x' == typeFlag) || ('X' == typeFlag))
-            {
-
-              format_member = TAR_POSIX2001_FORMAT;
-              ftime = TAR_TIME_FENCE;
-
-            }
-          else
-            {
-
-              format_member = TAR_POSIX1988_FORMAT;
-
-            }
-        }
-      else if ((0 == memcmp (tar->filler + 8, "tar", 4))
-               && (0 == tar_isnonzero (tar->filler, 8)))
-        {
-
-          format_member = TAR_SCHILLING1985_FORMAT;
-
-        }
-      else if (('0' <= typeFlag) && (typeFlag <= '2'))
-        {
-
-          format_member = TAR_V7ORIGINAL_FORMAT;
-
-        }
-      else
-        {
-
-          format_member = TAR_V7EXTENDED_FORMAT;
-
-        }
-
-      /*
-       * Locate the file names.
-       */
-      if ((0 != (format_member & TAR_POSIX2001_FORMAT))
-          && (('x' == typeFlag) || ('X' == typeFlag)))
-        {
-
-          if (size <= pos)
-            break;
-
-          else if ((8 <= fsize) && fsize <= (unsigned long long) (size - pos))
-            {
-              const char *keyptr = data + pos;
-              const char *valptr = NULL;
-              const char *nameptr = NULL;
-              unsigned int keylength = 0;
-              unsigned int namelength = 0;
-
-              while (keyptr < data + pos + (size_t) fsize)
-                {
-                  if (('0' > *keyptr) || ('9' < *keyptr))
-                    {
-                      keyptr += 1;
-                      continue;
-                    }
-
-                  keylength =
-                    (unsigned int) strtoul (keyptr, (char **) &valptr, 10);
-                  if ((0 < keylength) && (NULL != valptr)
-                      && (keyptr != valptr))
-                    {
-                      unsigned int difflength = 0;
-
-                      while ((valptr < data + pos + (size_t) fsize)
-                             && (' ' == *valptr))
-                        valptr += 1;
-
-                      difflength = (valptr - keyptr);
-
-                      if (0 == memcmp (valptr, "path=", 5))
-                        {
-                          nameptr = valptr + 5;
-                          namelength = keylength - (nameptr - keyptr);
-                        }
-                      else
-                        {
-
-                          if ((keylength > (valptr - keyptr) + 4 + 2)
-                              && (0 == memcmp (valptr, "GNU.", 4)))
-                            format_archive |= TAR_GNU2004_FORMAT;
-
-                          else if ((keylength > (valptr - keyptr) + 7 + 2)
-                                   && (0 == memcmp (valptr, "SCHILY.", 7)))
-                            format_archive |= TAR_SCHILLING2001_FORMAT;
-
-                          else if ((keylength > (valptr - keyptr) + 4 + 2)
-                                   && (0 == memcmp (valptr, "SUN.", 4)))
-                            format_archive |= TAR_SOLARIS2001_FORMAT;
-                        }
-
-                      keyptr += keylength;
-                    }
-                  else
-                    {
-                      nameptr = NULL;
-                      break;
-                    }
-                }
-
-              if ((NULL != nameptr) && (0 != *nameptr)
-                  && ((size - (nameptr - data)) >= namelength)
-                  && (1 < namelength))
-                {
-                  if (NULL != fname)
-                    free (fname);
-                  /*
-                   * There is an 1-offset because POSIX.1-2001
-                   * field separator is counted in field length.
-                   */
-                  fname = malloc (namelength);
-                  if (NULL != fname)
-                    {
-                      memcpy (fname, nameptr, namelength - 1);
-                      fname[namelength - 1] = '\0';
-
-                      pos += tar_roundup ((size_t) fsize);
-                      format_archive |= format_member;
-                      continue;
-                    }
-                }
-            }
-        }
-
-      else if ((0 != (format_member
-                      & (TAR_SCHILLING1994_FORMAT
-                         | TAR_GNU1997_FORMAT | TAR_GNU2004_FORMAT)))
-               && ('L' == typeFlag))
-        {
-
-          if (size <= pos)
-            break;
-
-          else if ((0 < fsize) && fsize <= (unsigned long long) (size - pos))
-            {
-
-              size_t length = (size_t) fsize;
-
-              nul_pos = memchr (data + pos, 0, length);
-              if (NULL != nul_pos)
-                length = (nul_pos - (data + pos));
-
-              if (0 < length)
-                {
-                  if (NULL != fname)
-                    free (fname);
-                  fname = malloc (1 + length);
-                  if (NULL != fname)
-                    {
-                      memcpy (fname, data + pos, length);
-                      fname[length] = '\0';
-                    }
-
-                  pos += tar_roundup ((size_t) fsize);
-                  format_archive |= format_member;
-                  continue;
-                }
-            }
-        }
-      else
-        {
-
-          nul_pos = memchr (tar->fileName, 0, sizeof tar->fileName);
-          tar_name_length = (0 == nul_pos)
-            ? sizeof (tar->fileName) : (nul_pos - tar->fileName);
-
-          if ((0 !=
-               (format_member & (TAR_GNU1997_FORMAT | TAR_GNU2004_FORMAT)))
-              && ('S' == typeFlag))
-            {
-
-              if ((0 == tar->prefix[40])
-                  && (0 != tar->prefix[137])
-                  && (12 <= tar_octalvalue (tar->prefix + 41, 12, NULL))
-                  && (12 <= tar_octalvalue (tar->prefix + 53, 12, NULL)))
-                {
-                  /*
-                   * fsize needs adjustment when there are more than 4 sparse 
blocks
-                   */
-                  size_t diffpos = 0;
-                  fsize += TAR_HEADER_SIZE;
-
-                  while ((pos + diffpos + TAR_HEADER_SIZE < size)
-                         && (0 != *(data + pos + diffpos + 504)))
-                    {
-                      diffpos += TAR_HEADER_SIZE;
-                      fsize += TAR_HEADER_SIZE;
-                    }
-                }
-
-              typeFlag = '0';
-
-            }
-          else if (0 != (format_member & TAR_SCHILLING1994_FORMAT))
-            {
-
-              nul_pos = memchr (tar->prefix, 0, 130);
-              tar_prefix_length = (0 == nul_pos)
-                ? 130 : (nul_pos - tar->prefix);
-
-              if ('S' == typeFlag)
-                typeFlag = '0';
-
-            }
-          else if (0 != (format_member & TAR_SCHILLING1985_FORMAT))
-            {
-
-              nul_pos = memchr (tar->prefix, 0, 155);
-              tar_prefix_length = (0 == nul_pos)
-                ? 155 : (nul_pos - tar->prefix);
-
-
-              if ('S' == typeFlag)
-                typeFlag = '0';
-
-            }
-          else if (0 != (format_member & TAR_POSIX1988_FORMAT))
-            {
-
-              nul_pos = memchr (tar->prefix, 0, sizeof tar->prefix);
-              tar_prefix_length = (0 == nul_pos)
-                ? sizeof tar->prefix : nul_pos - tar->prefix;
-
-            }
-        }
-
-      /*
-       * Update position so that next loop iteration will find
-       * either a TAR header or TAR EOF mark or just EOF.
-       *
-       * Consider archive member size to be zero
-       * with no data following the header in the following cases :
-       * '1' : hard link, '2' : soft link,
-       * '3' : character device, '4' : block device,
-       * '5' : directory, '6' : named pipe.
-       */
-      if ('1' != typeFlag && '2' != typeFlag
-          && '3' != typeFlag && '4' != typeFlag
-          && '5' != typeFlag && '6' != typeFlag)
-        {
-          if ((fsize > (unsigned long long) size)
-              || (fsize + (unsigned long long) pos >
-                  (unsigned long long) size))
-            break;
-
-          pos += tar_roundup ((size_t) fsize);
-        }
-      if (pos - 1 > size)
-        break;
-
-      format_archive |= format_member;
-
-      /*
-       * Store the file name in libextractor list.
-       *
-       * For the time being, only file types listed in POSIX.1-1988 ('0'..'7')
-       * are retained, leaving out labels, access control lists, etc.
-       */
-      if ((0 == typeFlag) || (('0' <= typeFlag) && (typeFlag <= '7')))
-        {
-          if (NULL == fname)
-            {
-              if (0 < tar_prefix_length + tar_name_length)
-                {
-                  fname = malloc (2 + tar_prefix_length + tar_name_length);
-
-                  if (NULL != fname)
-                    {
-                      if (0 < tar_prefix_length)
-                        {
-                          memcpy (fname, tar->prefix, tar_prefix_length);
-
-                          if (('/' != tar->prefix[tar_prefix_length - 1])
-                              && (0 < tar_name_length)
-                              && ('/' != tar->fileName[0]))
-                            {
-                              fname[tar_prefix_length] = '/';
-                              tar_prefix_length += 1;
-                            }
-                        }
-
-                      if (0 < tar_name_length)
-                        memcpy (fname + tar_prefix_length, tar->fileName,
-                                tar_name_length);
-
-                      fname[tar_prefix_length + tar_name_length] = '\0';
-                    }
-                }
-            }
-
-          if ((NULL != fname) && (0 != *fname))
-            {
-#if 0
-              fprintf (stdout,
-                       "(%u) flag = %c, size = %u, tname = (%s), fname = 
(%s)\n",
-                       __LINE__, typeFlag, (unsigned int) fsize,
-                       (NULL == tar->fileName) ? "" : tar->fileName,
-                       (NULL == fname) ? "" : fname);
-#endif
-
-              last = appendKeyword (EXTRACTOR_FILENAME, fname, last);
-              fname = NULL;
-              if (prev == NULL)
-                prev = last;
-              if (ftime > maxftime)
-                maxftime = ftime;
-              contents_are_empty = 0;
-            }
-        }
-
-      if (NULL != fname)
-        {
-          free (fname);
-          fname = NULL;
-        }
-    }
-
-  if (NULL != fname)
-    {
-      free (fname);
-      fname = NULL;
-    }
-
-  /*
-   * Report mimetype; report also format(s) and most recent date
-   * when at least one archive member was found.
-   */
-  if (0 != format_archive)
-    {
-      if (0 == contents_are_empty)
-        {
-
-          const char *formats[5] = { NULL, NULL, NULL, NULL, NULL };
-          unsigned int formats_count = 0;
-          unsigned int formats_u = 0;
-          unsigned int format_length = 0;
-          char *format = NULL;
-
-          if (TAR_TIME_FENCE < maxftime)
-            {
-              char iso8601_time[24];
-
-              if (0 == tar_time (maxftime, iso8601_time, sizeof iso8601_time))
-                prev =
-                  addKeyword (EXTRACTOR_DATE, strdup (iso8601_time), prev);
-            }
-
-          /*
-           * We only keep the most recent POSIX format.
-           */
-          if (0 != (format_archive & TAR_POSIX2001_FORMAT))
-            formats[formats_count++] = "POSIX 2001";
-
-          else if (0 != (format_archive & TAR_POSIX1988_FORMAT))
-            formats[formats_count++] = "POSIX 1988";
-
-          /*
-           * We only keep the most recent GNU format.
-           */
-          if (0 != (format_archive & TAR_GNU2004_FORMAT))
-            formats[formats_count++] = "GNU 2004";
-
-          else if (0 != (format_archive & TAR_GNU1997_FORMAT))
-            formats[formats_count++] = "GNU 1997";
-
-          else if (0 != (format_archive & TAR_GNU1991_FORMAT))
-            formats[formats_count++] = "GNU 1991";
-
-          /*
-           * We only keep the most recent Schilling format.
-           */
-          if (0 != (format_archive & TAR_SCHILLING2001_FORMAT))
-            formats[formats_count++] = "Schilling 2001";
-
-          else if (0 != (format_archive & TAR_SCHILLING1994_FORMAT))
-            formats[formats_count++] = "Schilling 1994";
-
-          else if (0 != (format_archive & TAR_SCHILLING1985_FORMAT))
-            formats[formats_count++] = "Schilling 1985";
-
-          /*
-           * We only keep the most recent Solaris format.
-           */
-          if (0 != (format_archive & TAR_SOLARIS2001_FORMAT))
-            formats[formats_count++] = "Solaris 2001";
-
-          /*
-           * We only keep the (supposedly) most recent UNIX V7 format.
-           */
-          if (0 != (format_archive & TAR_V7EXTENDED_FORMAT))
-            formats[formats_count++] = "UNIX extended V7";
-
-          else if (0 != (format_archive & TAR_V7ORIGINAL_FORMAT))
-            formats[formats_count++] = "UNIX original V7";
-
-          /*
-           * Build the format string
-           */
-          for (formats_u = 0; formats_u < formats_count; formats_u += 1)
-            {
-              if ((NULL != formats[formats_u]) && (0 != *formats[formats_u]))
-                {
-                  if (0 < format_length)
-                    format_length += 3;
-                  format_length += strlen (formats[formats_u]);
-                }
-            }
-
-          if (0 < format_length)
-            {
-              format = malloc (format_length + 5);
-
-              if (NULL != format)
-                {
-
-                  format_length = 0;
-
-                  for (formats_u = 0; formats_u < formats_count;
-                       formats_u += 1)
-                    {
-                      if ((NULL != formats[formats_u])
-                          && (0 != *formats[formats_u]))
-                        {
-                          if (0 < format_length)
-                            {
-                              strcpy (format + format_length, " + ");
-                              format_length += 3;
-                            }
-                          strcpy (format + format_length, formats[formats_u]);
-                          format_length += strlen (formats[formats_u]);
-                        }
-                    }
-
-                  if (0 < format_length)
-                    {
-                      strcpy (format + format_length, " TAR");
-                      prev = addKeyword (EXTRACTOR_FORMAT, format, prev);
-                    }
-                  else
-                    {
-                      free (format);
-                    }
-                }
-            }
-        }
-
-      prev =
-        addKeyword (EXTRACTOR_MIMETYPE, strdup ("application/x-tar"), prev);
-    }
-
-  return prev;
-}





reply via email to

[Prev in Thread] Current Thread [Next in Thread]