gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r6034 - Extractor/src/plugins


From: gnunet
Subject: [GNUnet-SVN] r6034 - Extractor/src/plugins
Date: Mon, 24 Dec 2007 18:26:47 -0700 (MST)

Author: grothoff
Date: 2007-12-24 18:26:46 -0700 (Mon, 24 Dec 2007)
New Revision: 6034

Modified:
   Extractor/src/plugins/qtextractor.c
Log:
Add support for some common iTunes tags to qtextractor.

Heikki Lindholm



Modified: Extractor/src/plugins/qtextractor.c
===================================================================
--- Extractor/src/plugins/qtextractor.c 2007-12-24 06:04:06 UTC (rev 6033)
+++ Extractor/src/plugins/qtextractor.c 2007-12-25 01:26:46 UTC (rev 6034)
@@ -25,6 +25,162 @@
 
 #define DEBUG 0
 
+/* verbatim from mp3extractor */
+static const char *const genre_names[] = {
+  gettext_noop ("Blues"),
+  gettext_noop ("Classic Rock"),
+  gettext_noop ("Country"),
+  gettext_noop ("Dance"),
+  gettext_noop ("Disco"),
+  gettext_noop ("Funk"),
+  gettext_noop ("Grunge"),
+  gettext_noop ("Hip-Hop"),
+  gettext_noop ("Jazz"),
+  gettext_noop ("Metal"),
+  gettext_noop ("New Age"),
+  gettext_noop ("Oldies"),
+  gettext_noop ("Other"),
+  gettext_noop ("Pop"),
+  gettext_noop ("R&B"),
+  gettext_noop ("Rap"),
+  gettext_noop ("Reggae"),
+  gettext_noop ("Rock"),
+  gettext_noop ("Techno"),
+  gettext_noop ("Industrial"),
+  gettext_noop ("Alternative"),
+  gettext_noop ("Ska"),
+  gettext_noop ("Death Metal"),
+  gettext_noop ("Pranks"),
+  gettext_noop ("Soundtrack"),
+  gettext_noop ("Euro-Techno"),
+  gettext_noop ("Ambient"),
+  gettext_noop ("Trip-Hop"),
+  gettext_noop ("Vocal"),
+  gettext_noop ("Jazz+Funk"),
+  gettext_noop ("Fusion"),
+  gettext_noop ("Trance"),
+  gettext_noop ("Classical"),
+  gettext_noop ("Instrumental"),
+  gettext_noop ("Acid"),
+  gettext_noop ("House"),
+  gettext_noop ("Game"),
+  gettext_noop ("Sound Clip"),
+  gettext_noop ("Gospel"),
+  gettext_noop ("Noise"),
+  gettext_noop ("Alt. Rock"),
+  gettext_noop ("Bass"),
+  gettext_noop ("Soul"),
+  gettext_noop ("Punk"),
+  gettext_noop ("Space"),
+  gettext_noop ("Meditative"),
+  gettext_noop ("Instrumental Pop"),
+  gettext_noop ("Instrumental Rock"),
+  gettext_noop ("Ethnic"),
+  gettext_noop ("Gothic"),
+  gettext_noop ("Darkwave"),
+  gettext_noop ("Techno-Industrial"),
+  gettext_noop ("Electronic"),
+  gettext_noop ("Pop-Folk"),
+  gettext_noop ("Eurodance"),
+  gettext_noop ("Dream"),
+  gettext_noop ("Southern Rock"),
+  gettext_noop ("Comedy"),
+  gettext_noop ("Cult"),
+  gettext_noop ("Gangsta Rap"),
+  gettext_noop ("Top 40"),
+  gettext_noop ("Christian Rap"),
+  gettext_noop ("Pop/Funk"),
+  gettext_noop ("Jungle"),
+  gettext_noop ("Native American"),
+  gettext_noop ("Cabaret"),
+  gettext_noop ("New Wave"),
+  gettext_noop ("Psychedelic"),
+  gettext_noop ("Rave"),
+  gettext_noop ("Showtunes"),
+  gettext_noop ("Trailer"),
+  gettext_noop ("Lo-Fi"),
+  gettext_noop ("Tribal"),
+  gettext_noop ("Acid Punk"),
+  gettext_noop ("Acid Jazz"),
+  gettext_noop ("Polka"),
+  gettext_noop ("Retro"),
+  gettext_noop ("Musical"),
+  gettext_noop ("Rock & Roll"),
+  gettext_noop ("Hard Rock"),
+  gettext_noop ("Folk"),
+  gettext_noop ("Folk/Rock"),
+  gettext_noop ("National Folk"),
+  gettext_noop ("Swing"),
+  gettext_noop ("Fast-Fusion"),
+  gettext_noop ("Bebob"),
+  gettext_noop ("Latin"),
+  gettext_noop ("Revival"),
+  gettext_noop ("Celtic"),
+  gettext_noop ("Bluegrass"),
+  gettext_noop ("Avantgarde"),
+  gettext_noop ("Gothic Rock"),
+  gettext_noop ("Progressive Rock"),
+  gettext_noop ("Psychedelic Rock"),
+  gettext_noop ("Symphonic Rock"),
+  gettext_noop ("Slow Rock"),
+  gettext_noop ("Big Band"),
+  gettext_noop ("Chorus"),
+  gettext_noop ("Easy Listening"),
+  gettext_noop ("Acoustic"),
+  gettext_noop ("Humour"),
+  gettext_noop ("Speech"),
+  gettext_noop ("Chanson"),
+  gettext_noop ("Opera"),
+  gettext_noop ("Chamber Music"),
+  gettext_noop ("Sonata"),
+  gettext_noop ("Symphony"),
+  gettext_noop ("Booty Bass"),
+  gettext_noop ("Primus"),
+  gettext_noop ("Porn Groove"),
+  gettext_noop ("Satire"),
+  gettext_noop ("Slow Jam"),
+  gettext_noop ("Club"),
+  gettext_noop ("Tango"),
+  gettext_noop ("Samba"),
+  gettext_noop ("Folklore"),
+  gettext_noop ("Ballad"),
+  gettext_noop ("Power Ballad"),
+  gettext_noop ("Rhythmic Soul"),
+  gettext_noop ("Freestyle"),
+  gettext_noop ("Duet"),
+  gettext_noop ("Punk Rock"),
+  gettext_noop ("Drum Solo"),
+  gettext_noop ("A Cappella"),
+  gettext_noop ("Euro-House"),
+  gettext_noop ("Dance Hall"),
+  gettext_noop ("Goa"),
+  gettext_noop ("Drum & Bass"),
+  gettext_noop ("Club-House"),
+  gettext_noop ("Hardcore"),
+  gettext_noop ("Terror"),
+  gettext_noop ("Indie"),
+  gettext_noop ("BritPop"),
+  gettext_noop ("Negerpunk"),
+  gettext_noop ("Polsk Punk"),
+  gettext_noop ("Beat"),
+  gettext_noop ("Christian Gangsta Rap"),
+  gettext_noop ("Heavy Metal"),
+  gettext_noop ("Black Metal"),
+  gettext_noop ("Crossover"),
+  gettext_noop ("Contemporary Christian"),
+  gettext_noop ("Christian Rock"),
+  gettext_noop ("Merengue"),
+  gettext_noop ("Salsa"),
+  gettext_noop ("Thrash Metal"),
+  gettext_noop ("Anime"),
+  gettext_noop ("JPop"),
+  gettext_noop ("Synthpop"),
+};
+
+#define GENRE_NAME_COUNT \
+    ((unsigned int)(sizeof genre_names / sizeof (const char *const)))
+
+
 typedef struct
 {
   unsigned int size;
@@ -136,22 +292,32 @@
                             size_t size,
                             size_t pos, struct EXTRACTOR_Keywords ** list);
 
+typedef struct
+{
+  char *name;
+  AtomHandler handler;
+} HandlerEntry;
+
 /**
  * Call the handler for the atom at the given position.
  * Will check validity of the given atom.
  *
  * @return 0 on error, 1 for success, -1 for unknown atom type
  */
-static int handleAtom (const char *input,
+static int handleAtom (HandlerEntry *handlers,
+                       const char *input,
                        size_t size,
                        size_t pos, struct EXTRACTOR_Keywords **list);
 
+static HandlerEntry all_handlers[];
+static HandlerEntry ilst_handlers[];
+
 /**
- * Process all atoms.
+ * Process atoms.
  * @return 0 on error, 1 for success, -1 for unknown atom type
  */
 static int
-processAllAtoms (const char *input,
+processAtoms (HandlerEntry *handlers, const char *input,
                  size_t size, struct EXTRACTOR_Keywords **list)
 {
   size_t pos;
@@ -161,7 +327,7 @@
   pos = 0;
   while (pos < size - sizeof (Atom))
     {
-      if (0 == handleAtom (input, size, pos, list))
+      if (0 == handleAtom (handlers, input, size, pos, list))
         return 0;
       pos += getAtomSize (&input[pos]);
     }
@@ -169,6 +335,17 @@
 }
 
 /**
+ * Process all atoms.
+ * @return 0 on error, 1 for success, -1 for unknown atom type
+ */
+static int
+processAllAtoms (const char *input,
+                 size_t size, struct EXTRACTOR_Keywords **list)
+{
+  processAtoms(all_handlers, input, size, list);
+}
+
+/**
  * Handle the moov atom.
  * @return 0 on error, 1 for success, -1 for unknown atom type
  */
@@ -181,6 +358,7 @@
                           getAtomSize (&input[pos]) - hdr, list);
 }
 
+/* see 
http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap1/chapter_2_section_5.html
 */
 typedef struct
 {
   Atom header;
@@ -198,9 +376,12 @@
   const char *mime;
 } C2M;
 
+/* see http://www.mp4ra.org/filetype.html 
+ *     http://www.ftyps.com/ */
 static C2M ftMap[] = {
   {"qt  ", "video/quicktime"},
   {"isom", "video/mp4"},        /* ISO Base Media files */
+  {"iso2", "video/mp4"},
   {"mp41", "video/mp4"},        /* MPEG-4 (ISO/IEC 14491-1) version 1 */
   {"mp42", "video/mp4"},        /* MPEG-4 (ISO/IEC 14491-1) version 2 */
   {"3gp1", "video/3gpp"},
@@ -210,9 +391,12 @@
   {"3gp5", "video/3gpp"},
   {"3g2a", "video/3gpp2"},
   {"mmp4", "video/mp4"},        /* Mobile MPEG-4 */
-  {"M4A ", "video/mp4"},
-  {"M4P ", "video/mp4"},
-  {"mjp2", "video/mj2"},        /* Motion JPEG 2000 */
+  {"M4A ", "audio/mp4"},
+  {"M4B ", "audio/mp4"},
+  {"M4P ", "audio/mp4"},
+  {"M4V ", "video/mp4"},
+  {"mj2s", "video/mj2"},        /* Motion JPEG 2000 */
+  {"mjp2", "video/mj2"},
   {NULL, NULL},
 };
 
@@ -223,8 +407,9 @@
   const FileType *ft;
   int i;
 
-  if (getAtomSize (&input[pos]) != sizeof (FileType))
+  if (getAtomSize (&input[pos]) < sizeof (FileType)) {
     return 0;
+  }
   ft = (const FileType *) &input[pos];
 
   i = 0;
@@ -344,7 +529,7 @@
       free (buf);
       return 0;                 /* decode error? */
     }
-  ret = handleAtom (buf, s, 0, list);
+  ret = handleAtom (all_handlers, buf, s, 0, list);
   free (buf);
   return ret;
 }
@@ -645,18 +830,147 @@
                           getAtomSize (&input[pos]) - hdr, list);
 }
 
+static int
+processDataAtom (const char *input,
+                size_t size, /* parent atom size */
+                size_t pos,
+                const char *patom,
+                EXTRACTOR_KeywordType type,
+                struct EXTRACTOR_Keywords **list)
+{
+  char *meta;
+  unsigned char version;
+  unsigned int flags;
+  unsigned long long asize;
+  unsigned int len;
+  unsigned int hdr;
+  int i;
+
+  hdr = getAtomHeaderSize (&input[pos]);
+  asize = getAtomSize (&input[pos]);
+  if (memcmp(&input[pos+4], "data", 4) != 0)
+    return -1;
+
+  if (asize < hdr + 8 || /* header + u32 flags + u32 reserved */
+      asize > (getAtomSize(&patom[0]) - 8))
+    return 0;
+
+  len = (unsigned int)(asize - (hdr + 8));
+
+  version = input[pos+8];
+  flags = ((unsigned char)input[pos+9]<<16) |
+          ((unsigned char)input[pos+10]<<8) | 
+          (unsigned char)input[pos+11];
+#if DEBUG
+  printf("[data] version:%02x flags:%08x txtlen:%d\n", version, flags, len);
+#endif
+
+  if (version != 0)
+    return -1;
+
+  if (flags == 0x0) { /* binary data */
+    if (memcmp(&patom[4], "gnre", 4) == 0) {
+      if (len >= 2) {
+        short genre = ((unsigned char)input[pos+16] << 8) |
+                               (unsigned char)input[pos+17];
+        if (genre > 0 && genre < GENRE_NAME_COUNT)
+          addKeyword(EXTRACTOR_GENRE, genre_names[genre-1], list);
+      }
+      return 1;
+    }
+    else {
+      return -1;
+    }
+  }
+  else if (flags == 0x1) { /* text data */
+    meta = malloc (len + 1);
+    memcpy (meta, &input[pos+16], len);
+    meta[len] = '\0';
+    for (i = 0; i < len; i++)
+      if (meta[i] == '\r')
+        meta[i] = '\n';
+    addKeyword (type, meta, list);
+    free (meta);
+    return 1;
+  }
+
+  return -1;
+}
+
 typedef struct
 {
-  char *name;
-  AtomHandler handler;
-} HandlerEntry;
+  const char *atom_type;
+  EXTRACTOR_KeywordType type;
+} ITTagConversionEntry;
 
-static HandlerEntry handlers[] = {
+/* iTunes Tags:
+ * see http://atomicparsley.sourceforge.net/mpeg-4files.html */
+static ITTagConversionEntry it_to_extr_table[] = {
+  {"\xa9" "alb", EXTRACTOR_ALBUM,},
+  {"\xa9" "ART", EXTRACTOR_ARTIST,},
+  {"aART", EXTRACTOR_ARTIST,},
+  {"\xa9" "cmt", EXTRACTOR_COMMENT,},
+  {"\xa9" "day", EXTRACTOR_YEAR,},
+  {"\xa9" "nam", EXTRACTOR_TITLE,},
+  {"\xa9" "gen", EXTRACTOR_GENRE,},
+  {"gnre", EXTRACTOR_GENRE,},
+  {"\xa9" "wrt", EXTRACTOR_AUTHOR,},
+  {"\xa9" "too", EXTRACTOR_ENCODED_BY,},
+  {"cprt", EXTRACTOR_COPYRIGHT,},
+  {"\xa9" "grp", EXTRACTOR_GROUP,},
+  {"catg", EXTRACTOR_CATEGORY,},
+  {"keyw", EXTRACTOR_KEYWORDS,},
+  {"desc", EXTRACTOR_DESCRIPTION,},
+  {"tvnn", EXTRACTOR_PUBLISHER,}, /* TV Network Name */
+  {"tvsh", EXTRACTOR_TITLE,}, /* TV Show Name */
+/*  {"tven", EXTRACTOR_i,},*/ /* TV Network Name */
+  {NULL, EXTRACTOR_UNKNOWN},
+};
+
+static int
+iTunesTagHandler (const char *input,
+           size_t size, size_t pos, struct EXTRACTOR_Keywords **list)
+{
+  unsigned long long asize;
+  unsigned int hdr;
+  int i;
+
+  hdr = getAtomHeaderSize (&input[pos]);
+  asize = getAtomSize (&input[pos]);
+
+  if (asize < hdr + 8) /* header + at least one atom */
+    return 0;
+
+  i = 0;
+  while ((it_to_extr_table[i].atom_type != NULL) && 
+         (0 != memcmp (&input[pos+4], it_to_extr_table[i].atom_type, 4)))
+    i++;
+  if (it_to_extr_table[i].atom_type != NULL)
+    return processDataAtom(input, asize, pos+hdr, &input[pos],  
+                           it_to_extr_table[i].type, list);
+
+  return -1;
+}
+
+
+static int 
+ilstHandler (const char *input,
+             size_t size, size_t pos, struct EXTRACTOR_Keywords **list)
+{
+  int i;
+  unsigned int hdr = getAtomHeaderSize (&input[pos]);
+  return processAtoms(ilst_handlers, &input[pos + hdr],
+                      getAtomSize(&input[pos]) - hdr, list);
+}
+
+
+static HandlerEntry all_handlers[] = {
   {"moov", &moovHandler},
   {"cmov", &cmovHandler},
   {"mvhd", &mvhdHandler},
   {"trak", &trakHandler},
   {"tkhd", &tkhdHandler},
+  {"ilst", &ilstHandler},
   {"meta", &metaHandler},
   {"udta", &udtaHandler},
   {"ftyp", &ftypHandler},
@@ -695,12 +1009,49 @@
   {NULL, NULL},
 };
 
+static HandlerEntry ilst_handlers[] = {
+  {"\xa9" "alb", &iTunesTagHandler},
+  {"\xa9" "ART", &iTunesTagHandler},
+  {"aART", &iTunesTagHandler},
+  {"\xa9" "cmt", &iTunesTagHandler},
+  {"\xa9" "day", &iTunesTagHandler},
+  {"\xa9" "nam", &iTunesTagHandler},
+  {"\xa9" "gen", &iTunesTagHandler},
+  {"gnre", &iTunesTagHandler},
+  {"trkn", &iTunesTagHandler},
+  {"disk", &iTunesTagHandler},
+  {"\xa9" "wrt", &iTunesTagHandler},
+  {"\xa9" "too", &iTunesTagHandler},
+  {"tmpo", &iTunesTagHandler},
+  {"cprt", &iTunesTagHandler},
+  {"cpil", &iTunesTagHandler},
+  {"covr", &iTunesTagHandler},
+  {"rtng", &iTunesTagHandler},
+  {"\xa9" "grp", &iTunesTagHandler},
+  {"stik", &iTunesTagHandler},
+  {"pcst", &iTunesTagHandler},
+  {"catg", &iTunesTagHandler},
+  {"keyw", &iTunesTagHandler},
+  {"purl", &iTunesTagHandler},
+  {"egid", &iTunesTagHandler},
+  {"desc", &iTunesTagHandler},
+  {"\xa9" "lyr", &iTunesTagHandler},
+  {"tvnn", &iTunesTagHandler},
+  {"tvsh", &iTunesTagHandler},
+  {"tven", &iTunesTagHandler},
+  {"tvsn", &iTunesTagHandler},
+  {"tves", &iTunesTagHandler},
+  {"purd", &iTunesTagHandler},
+  {"pgap", &iTunesTagHandler},
+  {NULL, NULL},
+};
+
 /**
  * Call the handler for the atom at the given position.
  * @return 0 on error, 1 for success, -1 for unknown atom type
  */
 static int
-handleAtom (const char *input,
+handleAtom (HandlerEntry *handlers, const char *input,
             size_t size, size_t pos, struct EXTRACTOR_Keywords **list)
 {
   int i;





reply via email to

[Prev in Thread] Current Thread [Next in Thread]