[GNUnet-SVN] r9817 - in Extractor: . src/include src/main src/plugins

gnunet-svn
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r9817 - in Extractor: . src/include src/main src/plugins

From:	gnunet
Subject:	[GNUnet-SVN] r9817 - in Extractor: . src/include src/main src/plugins
Date:	Sun, 20 Dec 2009 01:06:45 +0100
Author: grothoff
Date: 2009-12-20 01:06:45 +0100 (Sun, 20 Dec 2009)
New Revision: 9817

Modified:
   Extractor/TODO
   Extractor/src/include/extractor.h
   Extractor/src/main/extractor_metatypes.c
   Extractor/src/plugins/id3v23_extractor.c
   Extractor/src/plugins/id3v24_extractor.c
   Extractor/src/plugins/id3v2_extractor.c
   Extractor/src/plugins/odf_extractor.c
   Extractor/src/plugins/ole2_extractor.c
   Extractor/src/plugins/png_extractor.c
   Extractor/src/plugins/qt_extractor.c
Log:
id3vx

Modified: Extractor/TODO
===================================================================
--- Extractor/TODO      2009-12-20 00:01:02 UTC (rev 9816)
+++ Extractor/TODO      2009-12-20 00:06:45 UTC (rev 9817)
@@ -17,12 +17,12 @@
 
 Incomplete code (missing features):
 * RIFF (idx1 attribute)
-* IDv2{3,4} (some attributes, make testcases in test/id3v2/ work)
 * StarOffice sdw (some attributes, see doc/)
 * man pages (interpret sections for authors, brief description)
 * pdf: full-text extraction!
 * EXIV2
 * ELF: 64-bit support, lists of architectures, OSes, etc. are incomplete
+* ID3v2x: unsynchronization support, (de)compression support, footer support 
(24)
 
 Desirable missing formats:
 * mbox / various e-mail formats

Modified: Extractor/src/include/extractor.h
===================================================================
--- Extractor/src/include/extractor.h   2009-12-20 00:01:02 UTC (rev 9816)
+++ Extractor/src/include/extractor.h   2009-12-20 00:06:45 UTC (rev 9817)
@@ -280,8 +280,8 @@
     EXTRACTOR_METATYPE_PRODUCT_VERSION = 148,
     EXTRACTOR_METATYPE_CONTRIBUTOR_NAME = 149,
     EXTRACTOR_METATYPE_MOVIE_DIRECTOR = 150,
-    EXTRACTOR_METATYPE_TV_NETWORK_NAME = 151,
-    EXTRACTOR_METATYPE_TV_SHOW_NAME = 152,
+    EXTRACTOR_METATYPE_NETWORK_NAME = 151,
+    EXTRACTOR_METATYPE_SHOW_NAME = 152,
     EXTRACTOR_METATYPE_CHAPTER_NAME = 153,
     EXTRACTOR_METATYPE_SONG_COUNT = 154,
     EXTRACTOR_METATYPE_STARTING_SONG = 155,
@@ -295,13 +295,17 @@
     EXTRACTOR_METATYPE_ORIGINAL_ARTIST = 163,
     EXTRACTOR_METATYPE_ORIGINAL_WRITER = 164,
     EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR = 165,
-    EXTRACTOR_METATYPE_LYRICS = 166,
-    EXTRACTOR_METATYPE_POPULARITY_METER = 167,
+    EXTRACTOR_METATYPE_ORIGINAL_PERFORMER = 166,
+    EXTRACTOR_METATYPE_LYRICS = 167,
+    EXTRACTOR_METATYPE_POPULARITY_METER = 168,
+    EXTRACTOR_METATYPE_LICENSEE = 169,
+    EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST = 170,
+    EXTRACTOR_METATYPE_MOOD = 171, 
+    EXTRACTOR_METATYPE_SUBTITLE = 172, 
 
     /* fixme: used up to here! */
 
     EXTRACTOR_METATYPE_MUSIC_CD_IDENTIFIER = 117,
-    EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST = 123,
 
     
     EXTRACTOR_METATYPE_SCALE = 108,
@@ -342,7 +346,6 @@
     EXTRACTOR_METATYPE_FULL_NAME = 113,
     EXTRACTOR_METATYPE_LINK = 116,
     EXTRACTOR_METATYPE_TIME = 122,
-    EXTRACTOR_METATYPE_MOOD = 124, 
     EXTRACTOR_METATYPE_TELEVISION_SYSTEM = 126,
     EXTRACTOR_METATYPE_HARDWARE_DEPENDENCY = 129,
     EXTRACTOR_METATYPE_RIPPER = 130,

Modified: Extractor/src/main/extractor_metatypes.c
===================================================================
--- Extractor/src/main/extractor_metatypes.c    2009-12-20 00:01:02 UTC (rev 
9816)
+++ Extractor/src/main/extractor_metatypes.c    2009-12-20 00:06:45 UTC (rev 
9817)
@@ -369,10 +369,10 @@
   /* 150 */
   { gettext_noop ("movie director"),
     gettext_noop ("name of the director") }, 
-  { gettext_noop ("TV network"),
-    gettext_noop ("name of the broadcasting TV network") }, 
-  { gettext_noop ("TV show"),
-    gettext_noop ("name of the TV show") }, 
+  { gettext_noop ("network"),
+    gettext_noop ("name of the broadcasting network or station") }, 
+  { gettext_noop ("show"),
+    gettext_noop ("name of the show") }, 
   { gettext_noop ("chapter name"),
     gettext_noop ("name of the chapter") }, 
   { gettext_noop ("song count"),
@@ -402,12 +402,25 @@
   /* 165 */
   { gettext_noop ("original release year"),
     gettext_noop ("year of the original release") }, 
+  { gettext_noop ("original performer"),
+    gettext_noop ("name of the original performer") }, 
   { gettext_noop ("lyrics"),
     gettext_noop ("lyrics of the song or text description of vocal 
activities") }, 
   { gettext_noop ("popularity"),
     gettext_noop ("information about the file's popularity") }, 
+  { gettext_noop ("licensee"),
+    gettext_noop ("name of the owner or licensee of the file") }, 
+  /* 170 */
+  { gettext_noop ("musician credit list"),
+    gettext_noop ("names of contributing musicians") }, 
+  { gettext_noop ("mood"),
+    gettext_noop ("keywords reflecting the mood of the piece") }, 
+  { gettext_noop ("subtitle"),
+    gettext_noop ("subtitle of this part") }, 
   { gettext_noop (""),
     gettext_noop ("") }, 
+  { gettext_noop (""),
+    gettext_noop ("") }, 
 #if 0
   
   gettext_noop("author"),

Modified: Extractor/src/plugins/id3v23_extractor.c
===================================================================
--- Extractor/src/plugins/id3v23_extractor.c    2009-12-20 00:01:02 UTC (rev 
9816)
+++ Extractor/src/plugins/id3v23_extractor.c    2009-12-20 00:06:45 UTC (rev 
9817)
@@ -1,6 +1,6 @@
 /*
      This file is part of libextractor.
-     (C) 2002, 2003, 2004, 2006, 2007 Vidyut Samanta and Christian Grothoff
+     (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian 
Grothoff
 
      libextractor is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -35,54 +35,83 @@
 
 #include "convert.h"
 
+enum Id3v23Fmt
+  {
+    T, /* simple, 0-terminated string, prefixed by encoding */
+    U, /* 0-terminated ASCII string, no encoding */
+    UL, /* unsync'ed lyrics */
+    SL, /* sync'ed lyrics */
+    L, /* string with language prefix */
+    I /* image */
+  };
+
 typedef struct
 {
   const char *text;
   enum EXTRACTOR_MetaType type;
+  enum Id3v23Fmt fmt;
 } Matches;
 
 static Matches tmap[] = {
-  {"COMM", EXTRACTOR_METATYPE_COMMENT},
-  {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR},
-  {"LINK", EXTRACTOR_METATYPE_LINK},
-  {"MCDI", EXTRACTOR_METATYPE_MUSIC_CD_IDENTIFIER},
-  {"PCNT", EXTRACTOR_METATYPE_PLAY_COUNTER},
-  {"POPM", EXTRACTOR_METATYPE_POPULARITY_METER},
-  {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT},
-  {"TDAT", EXTRACTOR_METATYPE_DATE},
-  {"TCON", EXTRACTOR_METATYPE_CONTENT_TYPE},
-  {"TIT1", EXTRACTOR_METATYPE_GENRE},
-  {"TENC", EXTRACTOR_METATYPE_ENCODED_BY},
-  {"TEXT", EXTRACTOR_METATYPE_LYRICS},
-  {"TOLY", EXTRACTOR_METATYPE_CONTRIBUTOR},
-  {"TOPE", EXTRACTOR_METATYPE_CONTRIBUTOR},
-  {"TOWN", EXTRACTOR_METATYPE_OWNER},
-  {"TPE1", EXTRACTOR_METATYPE_ARTIST},
-  {"TPE2", EXTRACTOR_METATYPE_ARTIST},
-  {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR},
-  {"TPE4", EXTRACTOR_METATYPE_INTERPRET},
-  {"TMED", EXTRACTOR_METATYPE_MEDIA_TYPE},
-  {"TCOM", EXTRACTOR_METATYPE_CREATOR},
-  {"TIME", EXTRACTOR_METATYPE_TIME},
-  {"TOFN", EXTRACTOR_METATYPE_FILENAME},
-  {"TOPE", EXTRACTOR_METATYPE_ARTIST},
-  {"TPUB", EXTRACTOR_METATYPE_PUBLISHER},
-  {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER},
-  {"TRSC", EXTRACTOR_METATYPE_ISRC},
-  {"TRSN", EXTRACTOR_METATYPE_SOURCE},
-  {"TRSO", EXTRACTOR_METATYPE_CREATED_FOR},
-  {"TSRC", EXTRACTOR_METATYPE_RESOURCE_IDENTIFIER},
-  {"TOAL", EXTRACTOR_METATYPE_ALBUM},
-  {"TALB", EXTRACTOR_METATYPE_ALBUM},
-  {"TLAN", EXTRACTOR_METATYPE_LANGUAGE},
-  {"TYER", EXTRACTOR_METATYPE_YEAR},
-  {"TLEN", EXTRACTOR_METATYPE_DURATION},
-  {"TIT2", EXTRACTOR_METATYPE_TITLE},
-  {"TIT3", EXTRACTOR_METATYPE_DESCRIPTION},
-  {"WCOM", EXTRACTOR_METATYPE_RELEASE},
-  {"WCOP", EXTRACTOR_METATYPE_DISCLAIMER},
-  {"", EXTRACTOR_METATYPE_KEYWORDS},
-  {NULL, 0}
+  {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
+  {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
+  {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
+  {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
+  {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
+  /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, */
+  /* TDLY */
+  {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
+  {"TEXT", EXTRACTOR_METATYPE_WRITER, T},  
+  {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
+  /* TIME */
+  {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
+  {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
+  {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
+  /* TKEY */
+  {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
+  {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as 
unit */
+  {"TMED", EXTRACTOR_METATYPE_SOURCE, T}, 
+  {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
+  {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
+  {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
+  {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
+  {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
+  {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
+  {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
+  {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
+  {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
+  {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T}, 
+  {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
+  {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
+  {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
+  /* TRDA */
+  {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
+  /* TRSO */
+  {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
+  {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
+  /* TSSE */
+  {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
+  {"WCOM", EXTRACTOR_METATYPE_URL, U},
+  {"WCOP", EXTRACTOR_METATYPE_URL, U},
+  {"WOAF", EXTRACTOR_METATYPE_URL, U},
+  {"WOAS", EXTRACTOR_METATYPE_URL, U},
+  {"WORS", EXTRACTOR_METATYPE_URL, U},
+  {"WPAY", EXTRACTOR_METATYPE_URL, U},
+  {"WPUB", EXTRACTOR_METATYPE_URL, U},
+  {"WXXX", EXTRACTOR_METATYPE_URL, T},
+  {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
+  /* ... */
+  {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
+  {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
+  {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
+  /* ... */
+  {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
+  /* ... */
+  {"LINK", EXTRACTOR_METATYPE_URL, U},
+  /* ... */
+  {"USER", EXTRACTOR_METATYPE_LICENSE, T},
+  /* ... */
+  {NULL, 0, T}
 };
 
 
@@ -104,6 +133,9 @@
   uint32_t csize;
   int i;
   uint16_t flags;
+  char *mime;
+  enum EXTRACTOR_MetaType type;
+  size_t off;
 
   if ((size < 16) ||
       (data[0] != 0x49) ||
@@ -111,12 +143,16 @@
       (data[2] != 0x33) || (data[3] != 0x03) || (data[4] != 0x00))
     return 0;
   unsync = (data[5] & 0x80) > 0;
+  if (unsync)
+    return 0; /* not supported */
   extendedHdr = (data[5] & 0x40) > 0;
   experimental = (data[5] & 0x20) > 0;
+  if (experimental)
+    return 0;
   tsize = (((data[6] & 0x7F) << 21) |
            ((data[7] & 0x7F) << 14) |
            ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
-  if ((tsize + 10 > size) || (experimental))
+  if (tsize + 10 > size)
     return 0;
   pos = 10;
   padding = 0;
@@ -142,7 +178,8 @@
       csize =
         (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) +
         data[pos + 7];
-      if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0))
+      if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) ||
+         (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos))
         break;
       flags = (data[pos + 8] << 8) + data[pos + 9];
       if (((flags & 0x80) > 0) /* compressed, not yet supported */  ||
@@ -163,32 +200,191 @@
                   pos++;
                   csize--;
                 }
-              csize--;
-              /* this byte describes the encoding
-                 try to convert strings to UTF-8
-                 if it fails, then forget it */
-              switch (data[pos + 10])
-                {
-                case 0x00:
-                  word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                        csize, "ISO-8859-1");
-                  break;
-                case 0x01:
-                  word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                        csize, "UCS-2");
-                  break;
-                default:
-                  /* bad encoding byte,
-                     try to convert from iso-8859-1 */
-                  word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                        csize, "ISO-8859-1");
-                  break;
-                }
-              pos++;
+             switch (tmap[i].fmt)
+               {
+               case T:
+                 /* this byte describes the encoding
+                    try to convert strings to UTF-8
+                    if it fails, then forget it */
+                 switch (data[pos + 10])
+                   {
+                   case 0x00:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
+                                                              csize - 1, 
"ISO-8859-1");
+                     break;
+                   case 0x01:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
+                                                              csize - 1, 
"UCS-2");
+                     break;
+                   default:
+                     /* bad encoding byte,
+                        try to convert from iso-8859-1 */
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
+                                                              csize - 1, 
"ISO-8859-1");
+                     break;
+                   }
+                 break;
+               case U:
+                 word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 10],
+                                                          csize, "ISO-8859-1");
+                 break;
+               case UL:
+                 if (csize < 6)
+                   return 0; /* malformed */
+                 /* find end of description */
+                 off = 14;
+                 while ( (off < size) &&
+                         (off - pos < csize) &&
+                         (data[pos + off] == '\0') )
+                   off++;
+                 if ( (off >= csize) ||
+                      (data[pos+off] != '\0') )
+                   return 0; /* malformed */
+                 off++;
+                 switch (data[pos + 10])
+                   {
+                   case 0x00:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
+                                                              csize - off, 
"ISO-8859-1");
+                     break;
+                   case 0x01:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
+                                                              csize - off, 
"UCS-2");
+                     break;
+                   default:
+                     /* bad encoding byte,
+                        try to convert from iso-8859-1 */
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
+                                                              csize - off, 
"ISO-8859-1");
+                     break;
+                   }
+                 break;
+               case SL:
+                 if (csize < 7)
+                   return 0; /* malformed */
+                 /* find end of description */
+                 switch (data[pos + 10])
+                   {
+                   case 0x00:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 16],
+                                                              csize - 6, 
"ISO-8859-1");
+                     break;
+                   case 0x01:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 16],
+                                                              csize - 6, 
"UCS-2");
+                     break;
+                   default:
+                     /* bad encoding byte,
+                        try to convert from iso-8859-1 */
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 16],
+                                                              csize - 6, 
"ISO-8859-1");
+                     break;
+                   }
+                 break;
+               case L:
+                 if (csize < 5)
+                   return 0; /* malformed */
+                 /* find end of description */
+                 switch (data[pos + 10])
+                   {
+                   case 0x00:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 14],
+                                                              csize - 4, 
"ISO-8859-1");
+                     break;
+                   case 0x01:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 14],
+                                                              csize - 4, 
"UCS-2");
+                     break;
+                   default:
+                     /* bad encoding byte,
+                        try to convert from iso-8859-1 */
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 14],
+                                                              csize - 4, 
"ISO-8859-1");
+                     break;
+                   }
+                 break;
+               case I:
+                 if (csize < 2)
+                   return 0; /* malformed */
+                 /* find end of mime type */
+                 off = 11;
+                 while ( (off < size) &&
+                         (off - pos < csize) &&
+                         (data[pos + off] == '\0') )
+                   off++;
+                 if ( (off >= csize) ||
+                      (data[pos+off] != '\0') )
+                   return 0; /* malformed */
+                 off++;
+                 mime = strdup ((const char*) &data[pos + 11]);
+                 
+                 switch (data[pos+off])
+                   {
+                   case 0x03:
+                   case 0x04:
+                     type = EXTRACTOR_METATYPE_COVER_PICTURE;
+                     break;
+                   case 0x07:
+                   case 0x08:
+                   case 0x09:
+                   case 0x0A:
+                   case 0x0B:
+                   case 0x0C:
+                     type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
+                     break;
+                   case 0x0D:
+                   case 0x0E:
+                   case 0x0F:
+                     type = EXTRACTOR_METATYPE_EVENT_PICTURE;
+                     break;
+                   case 0x14:
+                     type = EXTRACTOR_METATYPE_LOGO;
+                     type = EXTRACTOR_METATYPE_LOGO;
+                     break;
+                   default:
+                     type = EXTRACTOR_METATYPE_PICTURE;
+                     break;
+                   }
+                 off++;
+
+                 /* find end of description */
+                 while ( (off < size) &&
+                         (off - pos < csize) &&
+                         (data[pos + off] == '\0') )
+                   off++;
+                 if ( (off >= csize) ||
+                      (data[pos+off] != '\0') )
+                   return 0; /* malformed */
+                 off++;
+                 if (0 == strcasecmp ("-->",
+                                      mime))
+                   {
+                     /* not supported */
+                   }
+                 else
+                   {
+                     if (0 != proc (proc_cls,
+                                    "id3v23",
+                                    type,
+                                    EXTRACTOR_METAFORMAT_BINARY,
+                                    mime,
+                                    (const char*) &data[pos + off],
+                                    csize + 6 - off))                  
+                       {
+                         free (mime);
+                         return 1;
+                       }
+                   }
+                 free (mime);
+                 word = NULL;
+                 break;
+               default:
+                 return 0;
+               }             
               if ((word != NULL) && (strlen (word) > 0))
                 {
                  if (0 != proc (proc_cls,
-                                "id3v2",
+                                "id3v23",
                                 tmap[i].type,
                                 EXTRACTOR_METAFORMAT_UTF8,
                                 "text/plain",

Modified: Extractor/src/plugins/id3v24_extractor.c
===================================================================
--- Extractor/src/plugins/id3v24_extractor.c    2009-12-20 00:01:02 UTC (rev 
9816)
+++ Extractor/src/plugins/id3v24_extractor.c    2009-12-20 00:06:45 UTC (rev 
9817)
@@ -1,6 +1,6 @@
 /*
      This file is part of libextractor.
-     (C) 2002, 2003, 2004, 2006, 2009 Vidyut Samanta and Christian Grothoff
+     (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian 
Grothoff
 
      libextractor is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -18,7 +18,6 @@
      Boston, MA 02111-1307, USA.
 
  */
-
 #define DEBUG_EXTRACT_ID3v24 0
 
 #include "platform.h"
@@ -33,72 +32,98 @@
 #ifndef MINGW
 #include <sys/mman.h>
 #endif
+
 #include "convert.h"
 
+enum Id3v24Fmt
+  {
+    T, /* simple, 0-terminated string, prefixed by encoding */
+    U, /* 0-terminated ASCII string, no encoding */
+    UL, /* unsync'ed lyrics */
+    SL, /* sync'ed lyrics */
+    L, /* string with language prefix */
+    I /* image */
+  };
 
-static struct EXTRACTOR_Keywords *
-addKeyword (EXTRACTOR_KeywordList * oldhead,
-            char *phrase, EXTRACTOR_KeywordType type)
-{
-  EXTRACTOR_KeywordList *keyword;
-
-  keyword = malloc (sizeof (EXTRACTOR_KeywordList));
-  keyword->next = oldhead;
-  keyword->keyword = phrase;
-  keyword->keywordType = type;
-  return keyword;
-}
-
 typedef struct
 {
-  char *text;
+  const char *text;
   enum EXTRACTOR_MetaType type;
+  enum Id3v24Fmt fmt;
 } Matches;
 
 static Matches tmap[] = {
-  {"COMM", EXTRACTOR_METATYPE_COMMENT},
-  {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR},
-  {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR},
-  {"TMOO", EXTRACTOR_METATYPE_MOOD},
-  {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST},
-  {"LINK", EXTRACTOR_METATYPE_LINK},
-  {"MCDI", EXTRACTOR_METATYPE_MUSIC_CD_IDENTIFIER},
-  {"PCNT", EXTRACTOR_METATYPE_PLAY_COUNTER},
-  {"POPM", EXTRACTOR_METATYPE_POPULARITY_METER},
-  {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT},
-  {"TDRC", EXTRACTOR_METATYPE_DATE},
-  {"TCON", EXTRACTOR_METATYPE_GENRE},
-  {"TIT1", EXTRACTOR_METATYPE_GENRE},
-  {"TENC", EXTRACTOR_METATYPE_ENCODED_BY},
-  {"TEXT", EXTRACTOR_METATYPE_LYRICS},
-  {"TOLY", EXTRACTOR_METATYPE_CONTRIBUTOR},
-  {"TOPE", EXTRACTOR_METATYPE_CONTRIBUTOR},
-  {"TOWN", EXTRACTOR_METATYPE_OWNER},
-  {"TPE1", EXTRACTOR_METATYPE_ARTIST},
-  {"TPE2", EXTRACTOR_METATYPE_ARTIST},
-  {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR},
-  {"TPE4", EXTRACTOR_METATYPE_INTERPRET},
-  {"TIME", EXTRACTOR_METATYPE_TIME},
-  {"TMED", EXTRACTOR_METATYPE_MEDIA_TYPE},
-  {"TCOM", EXTRACTOR_METATYPE_CREATOR},
-  {"TOFN", EXTRACTOR_METATYPE_FILENAME},
-  {"TOPE", EXTRACTOR_METATYPE_ARTIST},
-  {"TPUB", EXTRACTOR_METATYPE_PUBLISHER},
-  {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER},
-  {"TRSC", EXTRACTOR_METATYPE_ISRC},
-  {"TRSN", EXTRACTOR_METATYPE_SOURCE},
-  {"TRSO", EXTRACTOR_METATYPE_CREATED_FOR},
-  {"TSRC", EXTRACTOR_METATYPE_RESOURCE_IDENTIFIER},
-  {"TYER", EXTRACTOR_METATYPE_YEAR},
-  {"TOAL", EXTRACTOR_METATYPE_ALBUM},
-  {"TALB", EXTRACTOR_METATYPE_ALBUM},
-  {"TLAN", EXTRACTOR_METATYPE_LANGUAGE},
-  {"TIT2", EXTRACTOR_METATYPE_TITLE},
-  {"TIT3", EXTRACTOR_METATYPE_DESCRIPTION},
-  {"WCOM", EXTRACTOR_METATYPE_RELEASE},
-  {"WCOP", EXTRACTOR_METATYPE_DISCLAIMER},
-  {"", EXTRACTOR_METATYPE_KEYWORDS},
-  {NULL, 0}
+  {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
+  {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
+  {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
+  {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
+  {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
+  /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, deprecated in 24 */
+  /* TDLY */
+  {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
+  {"TEXT", EXTRACTOR_METATYPE_WRITER, T},  
+  {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
+  /* TIME, deprecated in 24 */
+  {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
+  {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
+  {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
+  /* TKEY */
+  {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
+  {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as 
unit */
+  {"TMED", EXTRACTOR_METATYPE_SOURCE, T}, 
+  {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
+  {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
+  {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
+  {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
+  /* {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, deprecated in 24 */
+  {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
+  {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
+  {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
+  {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
+  {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T}, 
+  {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
+  {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
+  {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
+  /* TRDA, deprecated in 24 */
+  {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
+  /* TRSO */
+  /* {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, deprecated in 24 */
+  {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
+  /* TSSE */
+  /* {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, deprecated in 24 */
+  {"WCOM", EXTRACTOR_METATYPE_URL, U},
+  {"WCOP", EXTRACTOR_METATYPE_URL, U},
+  {"WOAF", EXTRACTOR_METATYPE_URL, U},
+  {"WOAS", EXTRACTOR_METATYPE_URL, U},
+  {"WORS", EXTRACTOR_METATYPE_URL, U},
+  {"WPAY", EXTRACTOR_METATYPE_URL, U},
+  {"WPUB", EXTRACTOR_METATYPE_URL, U},
+  {"WXXX", EXTRACTOR_METATYPE_URL, T},
+  /* {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, deprecated in 24 */
+  /* ... */
+  {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
+  {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
+  {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
+  /* ... */
+  {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
+  /* ... */
+  {"LINK", EXTRACTOR_METATYPE_URL, U},
+  /* ... */
+  {"USER", EXTRACTOR_METATYPE_LICENSE, T},
+  /* ... */
+  /* new frames in 24 */
+  /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */
+  {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T},
+  /* TDRC, TDRL, TDTG */
+  {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
+  {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T},
+  {"TMOO", EXTRACTOR_METATYPE_MOOD, T},
+  {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T},
+  {"TSOA", EXTRACTOR_METATYPE_ALBUM, T},
+  {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T},
+  {"TSOT", EXTRACTOR_METATYPE_TITLE, T},
+  {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T},
+  {NULL, 0, T}
 };
 
 
@@ -114,54 +139,60 @@
   int extendedHdr;
   int experimental;
   int footer;
-  unsigned int tsize;
-  unsigned int pos;
-  unsigned int ehdrSize;
-  unsigned int padding;
+  uint32_t tsize;
+  uint32_t pos;
+  uint32_t ehdrSize;
+  uint32_t padding;
+  uint32_t csize;
+  int i;
+  uint16_t flags;
+  char *mime;
+  enum EXTRACTOR_MetaType type;
+  size_t off;
 
   if ((size < 16) ||
       (data[0] != 0x49) ||
       (data[1] != 0x44) ||
       (data[2] != 0x33) || (data[3] != 0x04) || (data[4] != 0x00))
-    return prev;
+    return 0;
   unsync = (data[5] & 0x80) > 0;
+  if (unsync)
+    return 0; /* not supported */
   extendedHdr = (data[5] & 0x40) > 0;
   experimental = (data[5] & 0x20) > 0;
+  if (experimental)
+    return 0;
   footer = (data[5] & 0x10) > 0;
   tsize = (((data[6] & 0x7F) << 21) |
            ((data[7] & 0x7F) << 14) |
            ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
-  if ((tsize + 10 > size) || (experimental))
-    return prev;
+  if (tsize + 10 > size)
+    return 0;
   pos = 10;
   padding = 0;
   if (extendedHdr)
     {
       ehdrSize = (((data[10] & 0x7F) << 21) |
-                  ((data[11] & 0x7F) << 14) |
-                  ((data[12] & 0x7F) << 7) | ((data[13] & 0x7F) << 0));
-      pos += ehdrSize;
+                 ((data[11] & 0x7F) << 14) |
+                 ((data[12] & 0x7F) << 7) | ((data[13] & 0x7F) << 0));
+      pos += 4 + ehdrSize;
+      if (ehdrSize > tsize)
+       return 0;
     }
-
-
   while (pos < tsize)
     {
-      size_t csize;
-      int i;
-      unsigned short flags;
-
       if (pos + 10 > tsize)
-        return prev;
-
-      csize = (((data[pos + 4] & 0x7F) << 21) |
-               ((data[pos + 5] & 0x7F) << 14) |
-               ((data[pos + 6] & 0x7F) << 7) | ((data[pos + 7] & 0x7F) << 0));
-
-      if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0))
+        return 0;
+      csize =
+        (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) +
+        data[pos + 7];
+      if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) ||
+         (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos))
         break;
       flags = (data[pos + 8] << 8) + data[pos + 9];
-      if (((flags & 0x80) > 0) /* compressed, not yet supported */  ||
-          ((flags & 0x40) > 0) /* encrypted, not supported */ )
+      if (((flags & 0x08) > 0) /* compressed, not yet supported */  ||
+          ((flags & 0x04) > 0) /* encrypted, not supported */ ||
+          ((flags & 0x02) > 0) /* unsynchronized, not supported */ )
         {
           pos += 10 + csize;
           continue;
@@ -172,59 +203,216 @@
           if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
             {
               char *word;
-              if ((flags & 0x20) > 0)
+              if ((flags & 0x40) > 0)
                 {
                   /* "group" identifier, skip a byte */
                   pos++;
                   csize--;
                 }
 
-              /* this byte describes the encoding
-                 try to convert strings to UTF-8
-                 if it fails, then forget it */
-              csize--;
-              switch (data[pos + 10])
-                {
-                case 0x00:
-                  word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                        csize, "ISO-8859-1");
-                  break;
-                case 0x01:
-                  word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                        csize, "UTF-16");
-                  break;
-                case 0x02:
-                  word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                        csize, "UTF-16BE");
-                  break;
-                case 0x03:
-                  word = malloc (csize + 1);
-                  memcpy (word, &data[pos + 11], csize);
-                  word[csize] = '\0';
-                  break;
-                default:
-                  /* bad encoding byte,
-                     try to convert from iso-8859-1 */
-                  word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
-                                        csize, "ISO-8859-1");
-                  break;
-                }
-              pos++;
+             switch (tmap[i].fmt)
+               {
+               case T:
+                 /* this byte describes the encoding
+                    try to convert strings to UTF-8
+                    if it fails, then forget it */
+                 switch (data[pos + 10])
+                   {
+                   case 0x00:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
+                                                              csize - 1, 
"ISO-8859-1");
+                     break;
+                   case 0x01:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
+                                                              csize - 1, 
"UCS-2");
+                     break;
+                   default:
+                     /* bad encoding byte,
+                        try to convert from iso-8859-1 */
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 11],
+                                                              csize - 1, 
"ISO-8859-1");
+                     break;
+                   }
+                 break;
+               case U:
+                 word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 10],
+                                                          csize, "ISO-8859-1");
+                 break;
+               case UL:
+                 if (csize < 6)
+                   return 0; /* malformed */
+                 /* find end of description */
+                 off = 14;
+                 while ( (off < size) &&
+                         (off - pos < csize) &&
+                         (data[pos + off] == '\0') )
+                   off++;
+                 if ( (off >= csize) ||
+                      (data[pos+off] != '\0') )
+                   return 0; /* malformed */
+                 off++;
+                 switch (data[pos + 10])
+                   {
+                   case 0x00:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
+                                                              csize - off, 
"ISO-8859-1");
+                     break;
+                   case 0x01:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
+                                                              csize - off, 
"UCS-2");
+                     break;
+                   default:
+                     /* bad encoding byte,
+                        try to convert from iso-8859-1 */
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + off],
+                                                              csize - off, 
"ISO-8859-1");
+                     break;
+                   }
+                 break;
+               case SL:
+                 if (csize < 7)
+                   return 0; /* malformed */
+                 /* find end of description */
+                 switch (data[pos + 10])
+                   {
+                   case 0x00:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 16],
+                                                              csize - 6, 
"ISO-8859-1");
+                     break;
+                   case 0x01:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 16],
+                                                              csize - 6, 
"UCS-2");
+                     break;
+                   default:
+                     /* bad encoding byte,
+                        try to convert from iso-8859-1 */
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 16],
+                                                              csize - 6, 
"ISO-8859-1");
+                     break;
+                   }
+                 break;
+               case L:
+                 if (csize < 5)
+                   return 0; /* malformed */
+                 /* find end of description */
+                 switch (data[pos + 10])
+                   {
+                   case 0x00:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 14],
+                                                              csize - 4, 
"ISO-8859-1");
+                     break;
+                   case 0x01:
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 14],
+                                                              csize - 4, 
"UCS-2");
+                     break;
+                   default:
+                     /* bad encoding byte,
+                        try to convert from iso-8859-1 */
+                     word = EXTRACTOR_common_convert_to_utf8 ((const char *) 
&data[pos + 14],
+                                                              csize - 4, 
"ISO-8859-1");
+                     break;
+                   }
+                 break;
+               case I:
+                 if (csize < 2)
+                   return 0; /* malformed */
+                 /* find end of mime type */
+                 off = 11;
+                 while ( (off < size) &&
+                         (off - pos < csize) &&
+                         (data[pos + off] == '\0') )
+                   off++;
+                 if ( (off >= csize) ||
+                      (data[pos+off] != '\0') )
+                   return 0; /* malformed */
+                 off++;
+                 mime = strdup ((const char*) &data[pos + 11]);
+                 
+                 switch (data[pos+off])
+                   {
+                   case 0x03:
+                   case 0x04:
+                     type = EXTRACTOR_METATYPE_COVER_PICTURE;
+                     break;
+                   case 0x07:
+                   case 0x08:
+                   case 0x09:
+                   case 0x0A:
+                   case 0x0B:
+                   case 0x0C:
+                     type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
+                     break;
+                   case 0x0D:
+                   case 0x0E:
+                   case 0x0F:
+                     type = EXTRACTOR_METATYPE_EVENT_PICTURE;
+                     break;
+                   case 0x14:
+                     type = EXTRACTOR_METATYPE_LOGO;
+                     type = EXTRACTOR_METATYPE_LOGO;
+                     break;
+                   default:
+                     type = EXTRACTOR_METATYPE_PICTURE;
+                     break;
+                   }
+                 off++;
+
+                 /* find end of description */
+                 while ( (off < size) &&
+                         (off - pos < csize) &&
+                         (data[pos + off] == '\0') )
+                   off++;
+                 if ( (off >= csize) ||
+                      (data[pos+off] != '\0') )
+                   return 0; /* malformed */
+                 off++;
+                 if (0 == strcasecmp ("-->",
+                                      mime))
+                   {
+                     /* not supported */
+                   }
+                 else
+                   {
+                     if (0 != proc (proc_cls,
+                                    "id3v24",
+                                    type,
+                                    EXTRACTOR_METAFORMAT_BINARY,
+                                    mime,
+                                    (const char*) &data[pos + off],
+                                    csize + 6 - off))                  
+                       {
+                         free (mime);
+                         return 1;
+                       }
+                   }
+                 free (mime);
+                 word = NULL;
+                 break;
+               default:
+                 return 0;
+               }             
               if ((word != NULL) && (strlen (word) > 0))
                 {
-                  prev = addKeyword (prev, word, tmap[i].type);
+                 if (0 != proc (proc_cls,
+                                "id3v24",
+                                tmap[i].type,
+                                EXTRACTOR_METAFORMAT_UTF8,
+                                "text/plain",
+                                word,
+                                strlen(word)+1))
+                   {
+                     free (word);
+                     return 1;
+                   }
                 }
-              else
-                {
-                  free (word);
-                }
+             free (word);
               break;
             }
           i++;
         }
       pos += 10 + csize;
     }
-  return prev;
+  return 0;
 }
 
 /* end of id3v24_extractor.c */

Modified: Extractor/src/plugins/id3v2_extractor.c
===================================================================
--- Extractor/src/plugins/id3v2_extractor.c     2009-12-20 00:01:02 UTC (rev 
9816)
+++ Extractor/src/plugins/id3v2_extractor.c     2009-12-20 00:06:45 UTC (rev 
9817)
@@ -113,7 +113,7 @@
   /* skipping CRM */
   /* skipping CRA */
   /* {"LNK", EXTRACTOR_METATYPE_URL, XXX}, */
-  {NULL, 0},
+  {NULL, 0, T},
 };
 
 

Modified: Extractor/src/plugins/odf_extractor.c
===================================================================
--- Extractor/src/plugins/odf_extractor.c       2009-12-20 00:01:02 UTC (rev 
9816)
+++ Extractor/src/plugins/odf_extractor.c       2009-12-20 00:06:45 UTC (rev 
9817)
@@ -44,7 +44,7 @@
   { "meta:creation-date", EXTRACTOR_METATYPE_CREATION_DATE },
   { "dc:date",            EXTRACTOR_METATYPE_UNKNOWN_DATE },
   { "dc:creator",         EXTRACTOR_METATYPE_CREATOR },
-  { "dc:language",        EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE },
+  { "dc:language",        EXTRACTOR_METATYPE_LANGUAGE },
   { "dc:title",           EXTRACTOR_METATYPE_TITLE },
   { "dc:description",     EXTRACTOR_METATYPE_DESCRIPTION },
   { "dc:subject",         EXTRACTOR_METATYPE_SUBJECT },

Modified: Extractor/src/plugins/ole2_extractor.c
===================================================================
--- Extractor/src/plugins/ole2_extractor.c      2009-12-20 00:01:02 UTC (rev 
9816)
+++ Extractor/src/plugins/ole2_extractor.c      2009-12-20 00:06:45 UTC (rev 
9817)
@@ -107,7 +107,7 @@
   { "meta:generator", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
   { "meta:template", EXTRACTOR_METATYPE_TEMPLATE },
   { "meta:editing-cycles", EXTRACTOR_METATYPE_EDITING_CYCLES }, 
-  /* { "Dictionary", EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE },  */
+  /* { "Dictionary", EXTRACTOR_METATYPE_LANGUAGE },  */
   /* { "gsf:security", EXTRACTOR_SECURITY }, */
   /* { "gsf:scale", EXTRACTOR_SCALE }, // always "false"? */
   /* { "meta:editing-duration", EXTRACTOR_METATYPE_TOTAL_EDITING_TIME }, // 
encoding? */
@@ -544,7 +544,7 @@
   if ( (lang != NULL) && (ret == 0) )
     ret = addKeyword(proc, proc_cls,
                     lang,
-                    EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE);  
+                    EXTRACTOR_METATYPE_LANGUAGE);  
   if (lcb >= 6) {
     for (i=0;i<gsf_infile_num_children(infile);i++) {
       if (ret != 0)

Modified: Extractor/src/plugins/png_extractor.c
===================================================================
--- Extractor/src/plugins/png_extractor.c       2009-12-20 00:01:02 UTC (rev 
9816)
+++ Extractor/src/plugins/png_extractor.c       2009-12-20 00:06:45 UTC (rev 
9817)
@@ -141,7 +141,7 @@
   language = &data[pos];
   ret = 0;
   if (stnlen (language, length - pos) > 0)
-    ADDF (EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE,
+    ADDF (EXTRACTOR_METATYPE_LANGUAGE,
          stndup (language, length - pos));
   pos += stnlen (language, length - pos) + 1;
   if (pos + 1 >= length)

Modified: Extractor/src/plugins/qt_extractor.c
===================================================================
--- Extractor/src/plugins/qt_extractor.c        2009-12-20 00:01:02 UTC (rev 
9816)
+++ Extractor/src/plugins/qt_extractor.c        2009-12-20 00:06:45 UTC (rev 
9817)
@@ -395,9 +395,9 @@
   {"catg", EXTRACTOR_METATYPE_SECTION},
   {"keyw", EXTRACTOR_METATYPE_KEYWORDS},
   {"desc", EXTRACTOR_METATYPE_DESCRIPTION},
-  {"tvnn", EXTRACTOR_METATYPE_TV_NETWORK_NAME},
-  {"tvsh", EXTRACTOR_METATYPE_TV_SHOW_NAME}, 
-  {"tven", EXTRACTOR_METATYPE_TV_NETWORK_NAME},
+  {"tvnn", EXTRACTOR_METATYPE_NETWORK_NAME},
+  {"tvsh", EXTRACTOR_METATYPE_SHOW_NAME}, 
+  {"tven", EXTRACTOR_METATYPE_NETWORK_NAME},
   {NULL, EXTRACTOR_METATYPE_RESERVED}
 };
 
@@ -850,7 +850,7 @@
   lang = ntohs (txt->language);
   if (lang >= sizeof (languages) / sizeof (char *))
     return 0;                   /* invalid */
-  addKeyword (EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE, languages[lang], ec);
+  addKeyword (EXTRACTOR_METATYPE_LANGUAGE, languages[lang], ec);
 
   meta = malloc (len + 1);
   memcpy (meta, &txt[1], len);
[Prev in Thread]
Current Thread
[Next in Thread]
[GNUnet-SVN] r9817 - in Extractor: . src/include src/main src/plugins, gnunet <=
Prev by Date: [GNUnet-SVN] r9816 - GNUnet/src/applications/dv_dht/module
Next by Date: [GNUnet-SVN] r9818 - Extractor/src/main
Previous by thread: [GNUnet-SVN] r9816 - GNUnet/src/applications/dv_dht/module
Next by thread: [GNUnet-SVN] r9818 - Extractor/src/main
Index(es):
- Date
- Thread