[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/emms dbcc614 24/80: Fix id3v2 bugs
From: |
Stefan Monnier |
Subject: |
[elpa] externals/emms dbcc614 24/80: Fix id3v2 bugs |
Date: |
Wed, 17 Mar 2021 18:42:23 -0400 (EDT) |
branch: externals/emms
commit dbcc6143ce6730669a76c57fd3fb25981df74be5
Author: Petteri Hintsanen <petterih@iki.fi>
Commit: Petteri Hintsanen <petterih@iki.fi>
Fix id3v2 bugs
- Tag-level id3v2 unsynchronization is now supported. Frame-level
unsynchronization is still missing, and likely won’t be implemented at
all.
- Fix frame size decoding between different id3v2 versions.
- Use correct id3v2.2 sizes during decoding.
- Remove a terminating null byte from text strings only if there is
one.
---
emms-info-native.el | 152 +++++++++++++++++++++++++++++++++++-----------------
1 file changed, 102 insertions(+), 50 deletions(-)
diff --git a/emms-info-native.el b/emms-info-native.el
index 4f5555a..93621b4 100644
--- a/emms-info-native.el
+++ b/emms-info-native.el
@@ -42,14 +42,14 @@
;; streams only. Based on RFC 7845, see URL
;; ‘https://tools.ietf.org/html/rfc7845.html’.
;;
-;; - FLAC: FLAC streams in native encapsulation format, filename
-;; extension ‘.flac’. Based on xiph.org’s FLAC format
-;; specification, see URL ‘https://xiph.org/flac/format.html’.
+;; - FLAC streams in native encapsulation format, filename extension
+;; ‘.flac’. Based on xiph.org’s FLAC format specification, see URL
+;; ‘https://xiph.org/flac/format.html’.
;;
-;; - MP3: MP3 files with extension ‘.mp3’ and id3v2 tags. All id3v2
-;; revisions should work, but many features like CRC and
-;; unsynchronization are not supported. Based on id3v2 Informal
-;; Standards, see URL ‘https://id3.org’.
+;; - MP3 files with extension ‘.mp3’ and id3v2 tags. All id3v2
+;; versions should work, but many features like CRC, compression and
+;; encryption are not supported. Based on id3v2 Informal Standards,
+;; see URL ‘https://id3.org’.
;;
;; Format detection is based solely on filename extension, which is
;; matched case-insensitively.
@@ -60,7 +60,7 @@
(require 'cl-lib)
(require 'emms-info)
-(defconst emms-info-native--max-peek-size (* 512 1024)
+(defconst emms-info-native--max-peek-size (* 2048 1024)
"Maximum buffer size for metadata decoding.
Functions called by ‘emms-info-native’ read certain amounts of
data into a temporary buffer while decoding metadata. This
@@ -499,6 +499,11 @@ Return the comment block data in a vector."
;;;; id3v2 (MP3) code
+(defvar emms-info-native--id3v2-version 0
+ "Last decoded id3v2 version.
+This is a kludge; it is needed because bindat spec cannot refer
+outside itself.")
+
(defconst emms-info-native--id3v2-magic-array
[#x49 #x44 #x33]
"id3v2 header magic pattern ‘ID3’.")
@@ -510,23 +515,25 @@ Return the comment block data in a vector."
emms-info-native--id3v2-magic-array
last)))
(version u8)
+ (eval (setq emms-info-native--id3v2-version last))
(revision u8)
(flags bits 1)
(size-bytes vec 4)
- (size eval (emms-info-native--checked-id3v2-size last)))
+ (size eval (emms-info-native--checked-id3v2-size 'tag last)))
"id3v2 header specification.")
(defconst emms-info-native--id3v2-frame-bindat-spec
- '((id str 4)
- (size-bytes vec 4)
- (size eval (emms-info-native--checked-id3v2-size last))
- (flags bits 2)
+ '((id str (eval (if (= emms-info-native--id3v2-version 2) 3 4)))
+ (size-bytes vec (eval (if (= emms-info-native--id3v2-version 2) 3 4)))
+ (size eval (emms-info-native--checked-id3v2-size 'frame last))
+ (flags bits (eval (if (= emms-info-native--id3v2-version 2) 0 2)))
(payload vec (size)))
"id3v2 frame specification.")
(defconst emms-info-native--id3v2-frame-to-info
'(("TP1" . "artist")
("TPE1" . "artist")
+ ("TPE2" . "albumartist")
("TCM" . "composer")
("TCOM" . "composer")
("TIT2" . "title")
@@ -554,20 +561,19 @@ Return the comment block data in a vector."
"Read and decode id3v2 metadata from FILENAME.
Return metadata in a list of (FIELD . VALUE) cons cells. See
‘emms-info-native--decode-id3v2-text-frame’ for details."
- (let* ((header (emms-info-native--decode-id3v2-header filename))
+ (let* (emms-info-native--id3v2-version
+ (header (emms-info-native--decode-id3v2-header filename))
(tag-size (bindat-get-field header 'size))
+ (unsync (memq 7 (bindat-get-field header 'flags)))
(offset 10))
- (when (> tag-size emms-info-native--max-peek-size)
- (error "id3v2 tag size %s is too large" size))
- (when (memq 7 (bindat-get-field header 'flags))
- (error "id3v2 unsynchronisation scheme is not supported"))
(when (memq 6 (bindat-get-field header 'flags))
;; Skip the extended header.
(cl-incf offset
(emms-info-native--decode-id3v2-ext-header-size filename)))
(emms-info-native--decode-id3v2-frames filename
offset
- (+ tag-size 10))))
+ (+ tag-size 10)
+ unsync)))
(defun emms-info-native--decode-id3v2-header (filename)
"Read and decode id3v2 header from FILENAME."
@@ -586,46 +592,88 @@ Return the size. Signal an error if the size exceeds
(insert-file-contents-literally filename nil 10 14)
(emms-info-native--checked-id3v2-size (buffer-string))))
-(defun emms-info-native--checked-id3v2-size (bytes)
- "Calculate id3v2 element size from BYTES and check its validity.
+(defun emms-info-native--checked-id3v2-size (elt bytes)
+ "Calculate id3v2 element ELT size from BYTES.
+ELT must be either 'tag or 'frame. Check the validity of size.
Return the size."
- (let ((size (emms-info-native--decode-id3v2-size bytes)))
+ (let (size)
+ (cond ((eq elt 'tag)
+ (setq size (emms-info-native--decode-id3v2-size bytes t)))
+ ((eq elt 'frame)
+ (if (= emms-info-native--id3v2-version 4)
+ (setq size (emms-info-native--decode-id3v2-size bytes t))
+ (setq size (emms-info-native--decode-id3v2-size bytes nil)))))
(when (or (= size 0) (> size emms-info-native--max-peek-size))
- (error "id3v2 tag/header/frame size %s is invalid" bytes))
+ (error "id3v2 tag or frame size %s is invalid" size))
size))
-(defun emms-info-native--decode-id3v2-size (bytes)
+(defun emms-info-native--decode-id3v2-size (bytes syncsafe)
"Decode id3v2 element size from BYTES.
-BYTES are interpreted as 7-bit bytes, MSB first. Return the
-size."
- (apply '+ (seq-map-indexed (lambda (elt idx)
- (* (expt 2 (* 7 idx)) elt))
- (reverse bytes))))
-
-(defun emms-info-native--decode-id3v2-frames (filename begin end)
+Depending on SYNCSAFE, BYTES are interpreted as 7- or 8-bit
+bytes, MSB first.
+
+Return the decoded size."
+ (let ((num-bits (if syncsafe 7 8)))
+ (apply '+ (seq-map-indexed (lambda (elt idx)
+ (* (expt 2 (* num-bits idx)) elt))
+ (reverse bytes)))))
+
+(defun emms-info-native--decode-id3v2-frames (filename
+ begin
+ end
+ unsync)
"Read and decode id3v2 text frames from FILENAME.
BEGIN should be the offset of first byte after id3v2 header and
extended header (if any), and END should be the offset after the
complete id3v2 tag.
+If UNSYNC is t, the frames are assumed to have gone through
+unsynchronization and decoded as such.
+
Return metadata in a list of (FIELD . VALUE) cons cells. See
‘emms-info-native--decode-id3v2-text-frame’ for details."
(with-temp-buffer
(set-buffer-multibyte nil)
+ (insert (emms-info-native--read-id3v2-frames filename
+ begin
+ end
+ unsync))
+ (emms-info-native--decode-id3v2-text-frames (buffer-string))))
+
+(defun emms-info-native--read-id3v2-frames (filename begin end unsync)
+ "Read id3v2 frames from FILE.
+Start at offset BEGIN and end before offset END. If UNSYNC is t,
+reverse unsynchronization.
+
+Return the frames."
+ (with-temp-buffer
+ (set-buffer-multibyte nil)
(insert-file-contents-literally filename nil begin end)
- (let (comments
- (offset 0))
- (condition-case nil
- (while (< offset end)
- (let* ((frame (bindat-unpack
emms-info-native--id3v2-frame-bindat-spec
- (buffer-string)
- offset))
- (comment (emms-info-native--decode-id3v2-text-frame
- frame)))
- (when comment (push comment comments))
- (cl-incf offset (+ (bindat-get-field frame 'size) 10))))
- (error nil))
- comments)))
+ (when unsync
+ (while (re-search-forward (string 255 0) nil t)
+ (replace-match (string 255))))
+ (buffer-string)))
+
+(defun emms-info-native--decode-id3v2-text-frames (frames)
+ "Decode id3v2 text frames from FRAMES.
+
+Return metadata in a list of (FIELD . VALUE) cons cells. See
+‘emms-info-native--decode-id3v2-text-frame’ for details."
+ (let (comments
+ (offset 0))
+ (condition-case nil
+ (while (< offset (length frames))
+ (let* ((frame (bindat-unpack
emms-info-native--id3v2-frame-bindat-spec
+ frames
+ offset))
+ (comment (emms-info-native--decode-id3v2-text-frame
+ frame)))
+ (when comment (push comment comments))
+ (cl-incf offset
+ (+ (bindat-get-field frame 'size)
+ (if (= emms-info-native--id3v2-version 2) 6 10)))))
+ (error nil))
+ comments))
(defun emms-info-native--decode-id3v2-text-frame (frame)
"Identify and decode id3v2 text frame FRAME.
@@ -645,12 +693,16 @@ If there is no such identifier, return nil."
emms-info-native--id3v2-frame-to-info)))
(defun emms-info-native--decode-id3v2-string (bytes)
- "Decode id3v2 text information.
-Return the text in BYTES as string."
- (let ((encoding (emms-info-native--id3v2-text-encoding bytes))
- (string (mapconcat #'byte-to-string (seq-rest bytes) "")))
- ;; Discard the null terminator.
- (substring (decode-coding-string string encoding) 0 -1)))
+ "Decode id3v2 text information from BYTES.
+Remove the terminating null byte, if any. Return the text as
+string."
+ (let* ((encoding (emms-info-native--id3v2-text-encoding bytes))
+ (string (mapconcat #'byte-to-string (seq-rest bytes) ""))
+ (decoded (decode-coding-string string encoding)))
+ (when (> (length decoded) 0)
+ (if (equal (substring decoded -1) "\0")
+ (substring decoded 0 -1)
+ decoded))))
(defun emms-info-native--id3v2-text-encoding (bytes)
"Return the encoding for text information BYTES."
- [elpa] externals/emms a431263 09/80: * doc/emms.texinfo: update copyright years, (continued)
- [elpa] externals/emms a431263 09/80: * doc/emms.texinfo: update copyright years, Stefan Monnier, 2021/03/17
- [elpa] externals/emms 5c3226b 10/80: * doc/developer-release.txt: remove duplicate, Stefan Monnier, 2021/03/17
- [elpa] externals/emms 50bdbcc 12/80: * emms-tag-editor.el: Add emms-tag-editor-pipe, Stefan Monnier, 2021/03/17
- [elpa] externals/emms bc5e516 14/80: Add id3v2 (MP3) support to emms-info-native, Stefan Monnier, 2021/03/17
- [elpa] externals/emms 9c8d961 15/80: Simplify Ogg-related code, Stefan Monnier, 2021/03/17
- [elpa] externals/emms 17e5c6a 05/80: * doc/emms.texinfo: setup examples, Stefan Monnier, 2021/03/17
- [elpa] externals/emms 209077e 07/80: * doc/emms.texinfo: update manual, Stefan Monnier, 2021/03/17
- [elpa] externals/emms d0142e7 11/80: Merge branch 'info-native', Stefan Monnier, 2021/03/17
- [elpa] externals/emms b8e1de7 17/80: Clean up Opus code, Stefan Monnier, 2021/03/17
- [elpa] externals/emms 810dd41 18/80: Fix typos in Commentary, Stefan Monnier, 2021/03/17
- [elpa] externals/emms dbcc614 24/80: Fix id3v2 bugs,
Stefan Monnier <=
- [elpa] externals/emms 0f11ae1 22/80: Reorder functions and constants, Stefan Monnier, 2021/03/17
- [elpa] externals/emms e89bc15 26/80: Merge branch 'info-native', Stefan Monnier, 2021/03/17
- [elpa] externals/emms fa8f64e 33/80: Fix whitespace trimming, Stefan Monnier, 2021/03/17
- [elpa] externals/emms 5aea8e7 35/80: Fix id3v2 frame id to info-field mapping, Stefan Monnier, 2021/03/17
- [elpa] externals/emms f9f2bab 30/80: Trim trailing whitespace from id3v2 strings, Stefan Monnier, 2021/03/17
- [elpa] externals/emms 60c9304 32/80: Trim trailing whitespace from all info-fields, Stefan Monnier, 2021/03/17
- [elpa] externals/emms 6e5d477 21/80: Simplify FLAC code, Stefan Monnier, 2021/03/17
- [elpa] externals/emms 757043b 41/80: Merge branch 'info-native', Stefan Monnier, 2021/03/17
- [elpa] externals/emms 7479d7d 19/80: Use lexical binding, Stefan Monnier, 2021/03/17
- [elpa] externals/emms a372976 20/80: Fix Opus channel mapping decoding, Stefan Monnier, 2021/03/17