From a0a28c6a578fa6938baf09fd5efcaac01a65677c Mon Sep 17 00:00:00 2001 From: wapmorgan Date: Mon, 16 Jan 2017 02:15:40 +0300 Subject: [PATCH] Fix scanning main id3v2 tags --- README.md | 12 +-- bin/scan | 15 ++- src/Mp3Info.php | 256 +++++++++++++++++++++++++++++------------------- 3 files changed, 172 insertions(+), 111 deletions(-) diff --git a/README.md b/README.md index f1572e4..b9ef03a 100644 --- a/README.md +++ b/README.md @@ -11,12 +11,12 @@ This class extracts information from mpeg/mp3 audio: | Audio | id3v1 Tags | id3v2 Tags | |--------------|------------|------------| -| duration | song | | -| bitRate | artist | | -| sampleRate | album | | -| channel | year | | -| framesCount | comment | | -| codecVersion | genre | | +| duration | song | TIT2 | +| bitRate | artist | TPE1 | +| sampleRate | album | TALB | +| channel | year | TYER | +| framesCount | comment | COMM | +| codecVersion | genre | TCON | | layerVersion | | | 1. Usage diff --git a/bin/scan b/bin/scan index fca2a63..25f85e7 100755 --- a/bin/scan +++ b/bin/scan @@ -20,7 +20,7 @@ function substrIfLonger($string, $maxLength) { return $string; } -function analyze($filename, &$total_duration, &$total_parse_time) { +function analyze($filename, &$total_duration, &$total_parse_time, $id3v2 = false) { if (!is_readable($filename)) return; try { $audio = new Mp3Info($filename, true); @@ -28,6 +28,17 @@ function analyze($filename, &$total_duration, &$total_parse_time) { return null; } echo sprintf('%15s | %4s | %7s | %0.1fkHz | %-11s | %-10s | %.5f', substrIfLonger(basename($filename), 15), formatTime($audio->duration), $audio->isVbr ? 'vbr' : ($audio->bitRate / 1000).'kbps', ($audio->sampleRate / 1000), isset($audio->tags1['song']) ? substrIfLonger($audio->tags1['song'], 11) : null, isset($audio->tags1['artist']) ? substrIfLonger($audio->tags1['artist'], 10) : null, $audio->_parsingTime).PHP_EOL; + if ($id3v2 && !empty($audio->tags2)) { + foreach ($audio->tags2 as $tag=>$value) { + echo ' '.$tag.': '; + if ($tag == 'COMM') { + foreach ($value as $lang => $comment) { + echo '['.$lang.'] '.$comment['short'].'; '.$comment['actual'].PHP_EOL; + } + } else + echo $value.PHP_EOL; + } + } $total_duration += $audio->duration; $total_parse_time += $audio->_parsingTime; } @@ -57,7 +68,7 @@ foreach ($argv as $arg) { } } } else if (is_file($arg)) { - analyze($arg, $total_duration, $total_parse_time); + analyze($arg, $total_duration, $total_parse_time, true); if ($compare) analyzeId3($f, $id3_parse_time); } } diff --git a/src/Mp3Info.php b/src/Mp3Info.php index a8eb8f1..4907ab8 100644 --- a/src/Mp3Info.php +++ b/src/Mp3Info.php @@ -25,8 +25,8 @@ class Mp3Info { const VBR_SYNC = "Xing"; const CBR_SYNC = "Info"; - const TAGS = 1; - const META = 2; + const META = 1; + const TAGS = 2; const MPEG_1 = 1; const MPEG_2 = 2; @@ -420,48 +420,84 @@ class Mp3Info { ), ); switch ($frame_id) { - case 'UFID': # Unique file identifier - break; + // case 'UFID': # Unique file identifier + // break; ################# Text information frames case 'TALB': # Album/Movie/Show title - case 'TBPM': # BPM (beats per minute) - case 'TCOM': # Composer + $raw = fread($fp, $frame_size); + // var_dump($raw); + $data = unpack("C1encoding/A".($frame_size - 1)."information", $raw); + if ((bool)($data['encoding'] == 0x00)) # ISO-8859-1 + $this->tags2[$frame_id] = mb_convert_encoding($data['information'], 'utf-8', 'iso-8859-1'); + else # utf-16 + $this->tags2[$frame_id] = mb_convert_encoding($data['information'], 'utf-8', 'utf-16'); + break; + // case 'TBPM': # BPM (beats per minute) + // case 'TCOM': # Composer case 'TCON': # Content type - case 'TCOP': # Copyright message - case 'TDAT': # Date - case 'TDLY': # Playlist delay - case 'TENC': # Encoded by - case 'TEXT': # Lyricist/Text writer - case 'TFLT': # File type - case 'TIME': # Time - case 'TIT1': # Content group description + $raw = fread($fp, $frame_size); + $data = unpack("C1encoding/A".($frame_size - 1)."information", $raw); + if ((bool)($data['encoding'] == 0x00)) # ISO-8859-1 + $this->tags2[$frame_id] = mb_convert_encoding($data['information'], 'utf-8', 'iso-8859-1'); + else # utf-16 + $this->tags2[$frame_id] = mb_convert_encoding($data['information'], 'utf-8', 'utf-16'); + break; + // case 'TCOP': # Copyright message + // case 'TDAT': # Date + // case 'TDLY': # Playlist delay + // case 'TENC': # Encoded by + // case 'TEXT': # Lyricist/Text writer + // case 'TFLT': # File type + // case 'TIME': # Time + // case 'TIT1': # Content group description case 'TIT2': # Title/songname/content description - case 'TIT3': # Subtitle/Description refinement - case 'TKEY': # Initial key - case 'TLAN': # Language(s) - case 'TLEN': # Length - case 'TMED': # Media type - case 'TOAL': # Original album/movie/show title - case 'TOFN': # Original filename - case 'TOLY': # Original lyricist(s)/text writer(s) - case 'TOPE': # Original artist(s)/performer(s) - case 'TORY': # Original release year - case 'TOWN': # File owner/licensee + $raw = fread($fp, $frame_size); + $data = unpack("C1encoding/A".($frame_size - 1)."information", $raw); + if ((bool)($data['encoding'] == 0x00)) # ISO-8859-1 + $this->tags2[$frame_id] = mb_convert_encoding($data['information'], 'utf-8', 'iso-8859-1'); + else # utf-16 + $this->tags2[$frame_id] = mb_convert_encoding($data['information'], 'utf-8', 'utf-16'); + break; + // case 'TIT3': # Subtitle/Description refinement + // case 'TKEY': # Initial key + // case 'TLAN': # Language(s) + // case 'TLEN': # Length + // case 'TMED': # Media type + // case 'TOAL': # Original album/movie/show title + // case 'TOFN': # Original filename + // case 'TOLY': # Original lyricist(s)/text writer(s) + // case 'TOPE': # Original artist(s)/performer(s) + // case 'TORY': # Original release year + // case 'TOWN': # File owner/licensee case 'TPE1': # Lead performer(s)/Soloist(s) - case 'TPE2': # Band/orchestra/accompaniment - case 'TPE3': # Conductor/performer refinement - case 'TPE4': # Interpreted, remixed, or otherwise modified by - case 'TPOS': # Part of a set - case 'TPUB': # Publisher - case 'TRCK': # Track number/Position in set - case 'TRDA': # Recording dates - case 'TRSN': # Internet radio station name - case 'TRSO': # Internet radio station owner - case 'TSIZ': # Size - case 'TSRC': # ISRC (international standard recording code) - case 'TSSE': # Software/Hardware and settings used for encoding + $raw = fread($fp, $frame_size); + $data = unpack("C1encoding/A".($frame_size - 1)."information", $raw); + if ((bool)($data['encoding'] == 0x00)) # ISO-8859-1 + $this->tags2[$frame_id] = mb_convert_encoding($data['information'], 'utf-8', 'iso-8859-1'); + else # utf-16 + $this->tags2[$frame_id] = mb_convert_encoding($data['information'], 'utf-8', 'utf-16'); + break; + // case 'TPE2': # Band/orchestra/accompaniment + // case 'TPE3': # Conductor/performer refinement + // case 'TPE4': # Interpreted, remixed, or otherwise modified by + // case 'TPOS': # Part of a set + // case 'TPUB': # Publisher + // case 'TRCK': # Track number/Position in set + // case 'TRDA': # Recording dates + // case 'TRSN': # Internet radio station name + // case 'TRSO': # Internet radio station owner + // case 'TSIZ': # Size + // case 'TSRC': # ISRC (international standard recording code) + // case 'TSSE': # Software/Hardware and settings used for encoding case 'TYER': # Year + $raw = fread($fp, $frame_size); + $data = unpack("C1encoding/A".($frame_size - 1)."information", $raw); + if ((bool)($data['encoding'] == 0x00)) # ISO-8859-1 + $this->tags2[$frame_id] = mb_convert_encoding($data['information'], 'utf-8', 'iso-8859-1'); + else # utf-16 + $this->tags2[$frame_id] = mb_convert_encoding($data['information'], 'utf-8', 'utf-16'); + break; case 'TXXX': # User defined text information frame $raw = fread($fp, $frame_size); $data = unpack("C1encoding/A".($frame_size - 1)."information", $raw); @@ -473,53 +509,64 @@ class Mp3Info { ################# Text information frames ################# URL link frames - case 'WCOM': # Commercial information - break; - case 'WCOP': # Copyright/Legal information - break; - case 'WOAF': # Official audio file webpage - break; - case 'WOAR': # Official artist/performer webpage - break; - case 'WOAS': # Official audio source webpage - break; - case 'WORS': # Official internet radio station homepage - break; - case 'WPAY': # Payment - break; - case 'WPUB': # Publishers official webpage - break; - case 'WXXX': # User defined URL link frame - break; + // case 'WCOM': # Commercial information + // break; + // case 'WCOP': # Copyright/Legal information + // break; + // case 'WOAF': # Official audio file webpage + // break; + // case 'WOAR': # Official artist/performer webpage + // break; + // case 'WOAS': # Official audio source webpage + // break; + // case 'WORS': # Official internet radio station homepage + // break; + // case 'WPAY': # Payment + // break; + // case 'WPUB': # Publishers official webpage + // break; + // case 'WXXX': # User defined URL link frame + // break; ################# URL link frames - case 'IPLS': # Involved people list - break; - case 'MCDI': # Music CD identifier - break; - case 'ETCO': # Event timing codes - break; - case 'MLLT': # MPEG location lookup table - break; - case 'SYTC': # Synchronized tempo codes - break; - case 'USLT': # Unsychronized lyric/text transcription - break; - case 'SYLT': # Synchronized lyric/text - break; + // case 'IPLS': # Involved people list + // break; + // case 'MCDI': # Music CD identifier + // break; + // case 'ETCO': # Event timing codes + // break; + // case 'MLLT': # MPEG location lookup table + // break; + // case 'SYTC': # Synchronized tempo codes + // break; + // case 'USLT': # Unsychronized lyric/text transcription + // break; + // case 'SYLT': # Synchronized lyric/text + // break; case 'COMM': # Comments $dataEnd = ftell($fp) + $frame_size; $raw = fread($fp, 4); $data = unpack("C1encoding/A3language", $raw); // read until \null character $short_description = null; + $last_null = false; + $actual_text = false; while (ftell($fp) < $dataEnd) { $char = fgetc($fp); - if ($char == chr(0)) $actual_text = null; - else if (isset($actual_text)) $actual_text .= $char; + if ($char == "\00" && $actual_text === false) { + if ($data['encoding'] == 0x1) { # two null-bytes for utf-16 + if ($last_null) + $actual_text = null; + else + $last_null = true; + } else # no condition for iso-8859-1 + $actual_text = null; + + } + else if ($actual_text !== false) $actual_text .= $char; else $short_description .= $char; } - if (!isset($actual_text)) $actual_text = $short_description; + if ($actual_text === false) $actual_text = $short_description; // list($short_description, $actual_text) = sscanf("s".chr(0)."s", $data['texts']); // list($short_description, $actual_text) = explode(chr(0), $data['texts']); $this->tags2[$frame_id][$data['language']] = array( @@ -527,42 +574,45 @@ class Mp3Info { 'actual' => (bool)($data['encoding'] == 0x00) ? mb_convert_encoding($actual_text, 'utf-8', 'iso-8859-1') : mb_convert_encoding($actual_text, 'utf-8', 'utf-16'), ); break; - case 'RVAD': # Relative volume adjustment - break; - case 'EQUA': # Equalization - break; - case 'RVRB': # Reverb - break; - case 'APIC': # Attached picture - break; - case 'GEOB': # General encapsulated object - break; + // case 'RVAD': # Relative volume adjustment + // break; + // case 'EQUA': # Equalization + // break; + // case 'RVRB': # Reverb + // break; + // case 'APIC': # Attached picture + // break; + // case 'GEOB': # General encapsulated object + // break; case 'PCNT': # Play counter $raw = fread($fp, $frame_size); $data = unpack("L", $raw); $this->tags2[$frame_id] = $data[1]; break; - case 'POPM': # Popularimeter - break; - case 'RBUF': # Recommended buffer size - break; - case 'AENC': # Audio encryption - break; - case 'LINK': # Linked information - break; - case 'POSS': # Position synchronisation frame - break; - case 'USER': # Terms of use - break; - case 'OWNE': # Ownership frame - break; - case 'COMR': # Commercial frame - break; - case 'ENCR': # Encryption method registration - break; - case 'GRID': # Group identification registration - break; - case 'PRIV': # Private frame + // case 'POPM': # Popularimeter + // break; + // case 'RBUF': # Recommended buffer size + // break; + // case 'AENC': # Audio encryption + // break; + // case 'LINK': # Linked information + // break; + // case 'POSS': # Position synchronisation frame + // break; + // case 'USER': # Terms of use + // break; + // case 'OWNE': # Ownership frame + // break; + // case 'COMR': # Commercial frame + // break; + // case 'ENCR': # Encryption method registration + // break; + // case 'GRID': # Group identification registration + // break; + // case 'PRIV': # Private frame + // break; + default: + fseek($fp, $frame_size, SEEK_CUR); break; } }