Module: CommonUtils

Included in:
SCC, SRT, TTML, VTT
Defined in:
lib/utils/common_utils.rb

Constant Summary collapse

CREDITS =
"Credits: Autogenerated by subtitle Rubygem".freeze
SCC_DEFAULT_FRAME_RATE =
ENV["SCC_DEFAULT_FRAME_RATE"] || 23.976

Instance Method Summary collapse

Instance Method Details

#create_file(src_type, dest_type, output_file, target_lang) ⇒ Object

Method to create the file with basic header informations which can be further updated with the transformed caption details by respective implementations

  • src_type - Source caption type. Refer to AllFather::TYPE_SCC type constants

  • dest_type - Target caption type. Refer to AllFather::TYPE_SCC type constants

  • output_file - Creates this output_file to which type specific

information would be dumped into
  • target_lang - Target lang of the output_file

Returns

true if the file is created with right headers and false otherwise



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/utils/common_utils.rb', line 24

def create_file(src_type, dest_type, output_file, target_lang)
  file = nil
  done = false
  begin
    # Create the file in overwrite mode
    file = File.open(output_file, "w")

    # Dump the initial info into the file to start off with
    case dest_type
    when AllFather::TYPE_SCC
      file.write("Scenarist_SCC V1.0\n\n")

    when AllFather::TYPE_SRT
      file.write("NOTE #{CREDITS}\n\n")

    when AllFather::TYPE_VTT
      file.write("WEBVTT\n\n")
      file.write("NOTE #{CREDITS}\n\n")

    when AllFather::TYPE_TTML
      target_lang ||= ""
      # TODO: Move this to a template file and load from there !!
      data = <<-EOF
<tt xml:lang="" xmlns="http://www.w3.org/ns/ttml">
<head>
  <metadata xmlns:ttm="http://www.w3.org/ns/ttml#metadata">
    <ttm:desc>#{CREDITS}</ttm:desc>
  </metadata>
</head>
<body>
  <div xml:lang=\"#{target_lang}\">
        EOF
        file.write(data)

    when AllFather::TYPE_DFXP
      target_lang ||= ""
      data = <<-EOF
<tt xml:lang="" xmlns="http://www.w3.org/2004/11/ttaf1">
<head>
  <meta xmlns:ttm="http://www.w3.org/2004/11/ttaf1#metadata">
    <ttm:desc>#{CREDITS}</ttm:desc>
  </meta>
</head>
<body>
  <div xml:lang=\"#{target_lang}\">
        EOF
        file.write(data)
    else
      raise AllFather::InvalidInputException.new("Not a valid type; Failed to create output file for type #{type}")
    end
    done = true
  ensure
    file.close if file rescue nil
  end
  done
end

#extension_from_type(type) ⇒ Object

Method to return a valid extension for a given caption type Refer to ‘AllFather#VALID_FILES`

  • type - Must be one of the valid type defined in ‘AllFather`

Raises

InvalidInputException if a valid type is not provided



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/utils/common_utils.rb', line 90

def extension_from_type(type)
  case type 
  when AllFather::TYPE_SCC
    return AllFather::VALID_FILES[0]
  when AllFather::TYPE_SRT
    return AllFather::VALID_FILES[1]
  when AllFather::TYPE_VTT
    return AllFather::VALID_FILES[2]
  when AllFather::TYPE_TTML
    return AllFather::VALID_FILES[3]
  when AllFather::TYPE_DFXP
    return AllFather::VALID_FILES[4]
  else
    raise AllFather::InvalidInputException.new("Not a valid type; Failed to create output file for type #{type}")
  end
end

#new_cue(model, target_type, last_cue = false) ⇒ Object

Method to return the cue info of the caption based on the model and target caption type which can be used by the caller’s transformation routine

  • model - ‘CueInfo` instance which is caption agnostic details of a cue

  • target_type - The target type to which the new cue is to be generated

  • last_cue - true for last cue and false otherwise.



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/utils/common_utils.rb', line 139

def new_cue(model, target_type, last_cue = false)
  message = nil
  case target_type
  when AllFather::TYPE_SCC
    start_unit = model.start_time_units
    h = start_unit[0].to_s.rjust(2, "0")
    m = start_unit[1].to_s.rjust(2, "0")
    s = start_unit[2].to_s.rjust(2, "0")
    ms = start_unit[3]
    # Convert to Frames assuming a framerate of 23.976
    # Pad 0 if frames is <= 9
    frames = ((ms.to_f * SCC_DEFAULT_FRAME_RATE) / 1000.0).to_i.to_s.rjust(2, "0").to_i
    # TODO: Might have to strip off non-english characters here
    message = "#{h}:#{m}:#{s}:#{frames} " + scc_encode(model.message)
  when AllFather::TYPE_VTT, AllFather::TYPE_SRT
    start_unit = model.start_time_units
    end_unit = model.end_time_units
    message = ""
    if model.sequence
      message = model.sequence + "\n"
    else
      message = model.index.to_s + "\n"
    end
    delimiter_added = false
    [start_unit, end_unit].each do |unit|
      h = unit[0].to_s.rjust(2, "0")
      m = unit[1].to_s.rjust(2, "0")
      s = unit[2].to_s.rjust(2, "0")
      ms = unit[3]
      if ms < 100
        ms = ms.to_s.rjust(3, "0")
      end
      if target_type == AllFather::TYPE_VTT
        message << "#{h}:#{m}:#{s}:#{ms}"
      else
        message << "#{h}:#{m}:#{s},#{ms}"
      end
      unless delimiter_added
        message << " --> "
        delimiter_added = true
      end
    end
    message << "\n"
    message << model.message
    message << "\n"
    message << "\n" unless model.message.end_with?("\n")
  when AllFather::TYPE_TTML, AllFather::TYPE_DFXP
    start_unit = model.start_time_units
    end_unit = model.end_time_units
    h = start_unit[0].to_s.rjust(2, "0")
    m = start_unit[1].to_s.rjust(2, "0")
    s = start_unit[2].to_s.rjust(2, "0")
    ms = start_unit[3]
    begin_time = "#{h}:#{m}:#{s}"
    begin_time << ".#{ms.to_s.rjust(3, "0")}" if ms > 0
    h = end_unit[0].to_s.rjust(2, "0")
    m = end_unit[1].to_s.rjust(2, "0")
    s = end_unit[2].to_s.rjust(2, "0")
    ms = end_unit[3]
    end_time = "#{h}:#{m}:#{s}"
    end_time << ".#{ms.to_s.rjust(3, "0")}" if ms > 0
    message = "<p begin=\"#{begin_time}\" end=\"#{end_time}\">#{model.message.encode(:xml => :text)}</p>"
    message << "</div>\n</body>\n</tt>" if last_cue
  end
  message
end

#scc_encode(free_text) ⇒ Object

Method to encode a text to SCC format

  • free_text - Text that needs to be encoded

Returns

The encoded string that can be added to SCC file



115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/utils/common_utils.rb', line 115

def scc_encode(free_text)
  encoded_str = ""
  count = 0
  free_text.each_byte do |char|
    count += 1
    binval = char.to_s(2).count("1") % 2 == 0 ? (char.to_i | 128 ).to_s(2) : char.to_s(2)
    encode_char = binval.to_i(2).to_s(16)
    if ((count > 0) && (count % 2 == 0))
      encoded_str << encode_char << " "
    else
      encoded_str << encode_char
    end
  end
  encoded_str
end

#time_details(time_stamp, type) ⇒ Object

Method that normalizes the timestamps from various different caption formats into a caption agnostic format

  • time_stamp - The timestamp parsed from the caption file for a given caption type

  • type - A valid caption type. Refer to ‘AllFather` for valid types



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
# File 'lib/utils/common_utils.rb', line 213

def time_details(time_stamp, type)
  h = m = s = ms = nil
  elapsed_seconds = nil
  case type 
  when AllFather::TYPE_SCC
    tokens = time_stamp.split(":")
    h = tokens[0].to_i
    m = tokens[1].to_i
    s = tokens[2].to_i
    frames = tokens[3].to_i
    ms = (frames * 1000 / SCC_DEFAULT_FRAME_RATE).round(0).to_s.rjust(3, "0").to_i
    if ms >= 1000
      ms = 999
    end
  when AllFather::TYPE_SRT
    tokens = time_stamp.split(",")
    ms = tokens[1].to_i
    tokens = tokens[0].split(":")
    h = tokens[0].to_i
    m = tokens[1].to_i
    s = tokens[2].to_i
  when AllFather::TYPE_VTT
    tokens = time_stamp.split(".")
    ms = tokens[1].to_i
    tokens = tokens[0].split(":")
    if tokens.size == 2
      h = 0
      m = tokens[0].to_i
      s = tokens[1].to_i
    else
      h = tokens[0].to_i
      m = tokens[1].to_i
      s = tokens[2].to_i
    end
  when AllFather::TYPE_TTML, AllFather::TYPE_DFXP
    # We support only clock-time without framerate / tickrate and only media timebase
    # For offset hence we don't support frames / ticks
    tokens = time_stamp.split(":")
    if tokens.size > 1
      if tokens.size > 3
        # This is specified with frames and/or subframes. Unsupported
        raise AllFather::InvalidInputException.new("TTML file with clock-time referencing frames / ticks is unsupported")
      end
      h = tokens[0].to_i
      m = tokens[1].to_i
      ms_tokens = tokens[2].split(".")
      if ms_tokens.size == 1
        ms = 0
      else
        ms = ms_tokens[1].to_i
      end
      s = ms_tokens[0].to_i
    else
      # Parsing in offset mode
      if time_stamp.end_with?("ms")
        unit = "ms"
        time_with_no_unit = time_stamp[0, time_stamp.size - 2]
      else
        unit = time_stamp[time_stamp.size - 1]
        time_with_no_unit = time_stamp[0, time_stamp.size - 1]
      end
      case unit 
      when "m"
        time_with_no_unit = time_with_no_unit.to_f * 60
      when "h"
        time_with_no_unit = time_with_no_unit.to_f * (60 * 60)
      when "s"
        # do nothing
      when "ms"
        time_with_no_unit = time_with_no_unit.to_f / 1000.0
      else
        # Fail out f / t
        raise AllFather::InvalidInputException.new("TTML file with offset-time referencing frames / ticks is unsupported")
      end
      tokens = time_with_no_unit.to_s.split(".")
      h = m = 0
      if tokens.size == 1
        s = time_with_no_unit
        ms = 0
      else
        s = tokens[0].to_i
        ms = tokens[1].to_i
      end
      h = s / 3600
      m = (s / 60) % 60
      s = s % 60
    end
  end
  elapsed_seconds = (h * 60 * 60) + (m * 60) + s
  return [h, m, s, ms, elapsed_seconds]
end

#write_cue(model, file_map, last_cue = false) ⇒ Object

Method to write the cue details to the output files

  • model - Cue instance

  • file_map - Hash of files for each caption type

  • last_cue - true for last cue and false otherwise



313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
# File 'lib/utils/common_utils.rb', line 313

def write_cue(model, file_map, last_cue = false)
  file_map.each do |type, file_path|
    File.open(file_path, "a") do |f|
      f.puts new_cue(model, type, last_cue)
    end
  end
  if last_cue
    # Pretty print the output for ttml & dfxp
    file_map.each do |type, file_path|
      next unless [AllFather::TYPE_DFXP, AllFather::TYPE_TTML].include?(type)
      file = File.open(file_path, "r")
      xml_doc = Nokogiri::XML(file, &:noblanks)
      File.write(file_path, xml_doc.to_s)
    end
  end
end