Class: FormatParser::MP3Parser

Inherits:
Object
  • Object
show all
Includes:
IOUtils
Defined in:
lib/parsers/mp3_parser.rb

Defined Under Namespace

Modules: ID3Extraction Classes: InvalidDeepFetch, MP3Info, MPEGFrame, TagWrapper, VBRHeader

Constant Summary collapse

MAX_FRAMES_TO_SCAN =

We limit the number of MPEG frames we scan to obtain our duration estimation

500
SAMPLES_PER_FRAME =

Default frame size for mp3

1152
ZIP_LOCAL_ENTRY_SIGNATURE =

For some edge cases

"PK\x03\x04\x14\x00".b
PNG_HEADER_BYTES =
[137, 80, 78, 71, 13, 10, 26, 10].pack('C*')
WEBP_HEADER_REGEX =
/RIFF.{4}WEBP/i
MAGIC_LE =
[0x49, 0x49, 0x2A, 0x0].pack('C4')
MAGIC_BE =
[0x4D, 0x4D, 0x0, 0x2A].pack('C4')
TIFF_HEADER_BYTES =
[MAGIC_LE, MAGIC_BE]
MP3_MIME_TYPE =
'audio/mpeg'

Constants included from IOUtils

IOUtils::INTEGER_DIRECTIVES

Instance Method Summary collapse

Methods included from IOUtils

#read_bytes, #read_fixed_point, #read_int, #safe_read, #safe_skip, #skip_bytes

Instance Method Details

#call(raw_io) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/parsers/mp3_parser.rb', line 61

def call(raw_io)
  io = FormatParser::IOConstraint.new(raw_io)

  # Special case: some ZIPs (Office documents) did detect as MP3s.
  # To avoid having that happen, we check for the PKZIP signature -
  # local entry header signature - at the very start of the file.
  # If the file is too small safe_read will fail too and the parser
  # will terminate here. Same with PNGs. In the future
  # we should implement "confidence" for MP3 as of all our formats
  # it is by far the most lax.
  header = safe_read(io, 12)
  return if header.start_with?(ZIP_LOCAL_ENTRY_SIGNATURE)
  return if header.start_with?(PNG_HEADER_BYTES)
  return if header.start_with?(WEBP_HEADER_REGEX)

  io.seek(0)
  return if TIFF_HEADER_BYTES.include?(safe_read(io, 4))

  # Prevention against parsing WAV files.
  io.seek(0)
  wav_chunk_id, _wav_size, wav_riff_type = safe_read(io, 12).unpack('a4la4')
  return if wav_chunk_id == 'RIFF' || wav_riff_type == 'WAVE'

  # Read all the ID3 tags (or at least attempt to)
  io.seek(0)
  id3v1 = ID3Extraction.attempt_id3_v1_extraction(io)
  tags = [id3v1, ID3Extraction.attempt_id3_v2_extraction(io)].compact

  io.seek(0) if tags.empty?

  # Compute how many bytes are occupied by the actual MPEG frames
  ignore_bytes_at_tail = id3v1 ? 128 : 0
  ignore_bytes_at_head = io.pos
  bytes_used_by_frames = io.size - ignore_bytes_at_head - ignore_bytes_at_tail

  io.seek(ignore_bytes_at_head)

  maybe_xing_header, initial_frames = parse_mpeg_frames(io)

  return if initial_frames.empty?

  first_frame = initial_frames.first

  id3tags_hash = with_id3tag_local_configs { blend_id3_tags_into_hash(*tags) }

  file_info = FormatParser::Audio.new(
    format: :mp3,
    # media_duration_frames is omitted because the frames
    # in MPEG are not the same thing as in a movie file - they
    # do not tell anything of substance
    num_audio_channels: first_frame.channels,
    audio_sample_rate_hz: first_frame.sample_rate,
    intrinsics: id3tags_hash.merge(id3tags: tags),
    content_type: MP3_MIME_TYPE,
  )

  extra_file_attirbutes = fetch_extra_attributes_from_id3_tags(id3tags_hash)

  extra_file_attirbutes.each do |name, value|
    file_info.send(:"#{name}=", value)
  end

  if maybe_xing_header
    duration = maybe_xing_header.frames * SAMPLES_PER_FRAME / first_frame.sample_rate.to_f
    _bit_rate = maybe_xing_header.byte_count * 8 / duration / 1000
    file_info.media_duration_seconds = duration
    return file_info
  end

  # Estimate duration using the frames we did parse - to have an exact one
  # we would need to have all the frames and thus read most of the file
  _avg_bitrate = float_average_over(initial_frames, :frame_bitrate)
  avg_frame_size = float_average_over(initial_frames, :frame_length)
  avg_sample_rate = float_average_over(initial_frames, :sample_rate)

  est_frame_count = bytes_used_by_frames / avg_frame_size
  est_samples = est_frame_count * SAMPLES_PER_FRAME
  est_duration_seconds = est_samples / avg_sample_rate

  # Safeguard for i.e. some JPEGs being recognized as MP3
  # to prevent ambiguous recognition
  return if est_duration_seconds == Float::INFINITY

  file_info.media_duration_seconds = est_duration_seconds
  file_info
end

#likely_match?(filename) ⇒ Boolean

Returns:

  • (Boolean)


57
58
59
# File 'lib/parsers/mp3_parser.rb', line 57

def likely_match?(filename)
  filename =~ /\.mp3$/i
end