Class: VTT

Inherits:
Object
  • Object
show all
Includes:
AllFather, CommonUtils
Defined in:
lib/vtt.rb

Overview

Library to handle VTT Files

Uses the translator available to do the necessary language operations as defined by the AllFather

Constant Summary collapse

SUPPORTED_TRANSFORMATIONS =
[TYPE_SCC, TYPE_SRT, TYPE_TTML, TYPE_DFXP]

Constants included from CommonUtils

CommonUtils::CREDITS, CommonUtils::SCC_DEFAULT_FRAME_RATE

Constants included from AllFather

AllFather::TYPE_DFXP, AllFather::TYPE_SCC, AllFather::TYPE_SRT, AllFather::TYPE_TTML, AllFather::TYPE_VTT, AllFather::VALID_FILES

Instance Method Summary collapse

Methods included from CommonUtils

#create_file, #extension_from_type, #new_cue, #scc_encode, #time_details, #write_cue

Methods included from AllFather

#callsign

Constructor Details

#initialize(cc_file) ⇒ VTT

Returns a new instance of VTT.



19
20
21
22
# File 'lib/vtt.rb', line 19

def initialize(cc_file)
  @cc_file = cc_file
  raise "Invalid VTT file provided" unless is_valid?
end

Instance Method Details

#infer_languagesObject

Returns the inferred language in an array



72
73
74
75
76
77
78
79
80
81
# File 'lib/vtt.rb', line 72

def infer_languages
  lang = nil
  begin
    sample_text = get_text(@cc_file, 100)
    lang = @translator.infer_language(sample_text)
  rescue StandardError => e
    puts "Error while detecting the language due to #{e.message}"
  end
  [lang]
end

#is_valid?Boolean

Method to add required set of validations specific to caption type

Returns:

  • (Boolean)


86
87
88
89
90
91
92
93
94
# File 'lib/vtt.rb', line 86

def is_valid?
  # Do any VTT specific validations here
  if @cc_file =~ /^.*\.(vtt)$/
    return true
  end
  # TODO: Check if it's required to do a File read to see if the 1st line is WEBVTT
  # to handle cases where invalid file is named with vtt extension
  return false
end

#set_translator(translator) ⇒ Object



24
25
26
27
# File 'lib/vtt.rb', line 24

def set_translator(translator)
  super(translator)
  @translator = translator
end

#supported_transformationsObject



96
97
98
# File 'lib/vtt.rb', line 96

def supported_transformations
  return SUPPORTED_TRANSFORMATIONS
end

#transform_to(types, src_lang, target_lang, output_dir) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/vtt.rb', line 100

def transform_to(types, src_lang, target_lang, output_dir)
  # Let's start off with some validations
  super(types, src_lang, target_lang, output_dir)

  # Suffix output dir with File seperator
  output_dir = "#{output_dir}#{File::Separator}" unless output_dir.end_with?(File::Separator)
  
  translate = false
  if target_lang && !target_lang.empty?
    translate = true
    if @translator.nil?
      raise StandardError.new("Cannot infer language as engine options are not provided")
    end
  end
  # Prepare the output files for each type
  file_map = {}
  types.each do |type|
    output_file = File.basename(@cc_file, File.extname(@cc_file))
    output_file << "_#{target_lang}" if translate
    output_file << extension_from_type(type)
    out_file = "#{output_dir}#{output_file}"
    if create_file(TYPE_VTT, type, out_file, target_lang)
      file_map[type] = out_file
    else
      raise StandardError.new("Failed to create output file for type #{type}")
    end
  end

  # Read the file and prepare the cue model
  cue_info = nil
  ccfile = File.open(@cc_file, 'r:UTF-8', &:read)
  message = ""
  collect_msg = false
  cue_index = 1
  ccfile.each_line do | line |
    if line.strip.empty?
      collect_msg = false
      next 
    end
    time_points = line.scan(/^((\d\d:)+\d\d[.,]\d\d\d)\s-->\s((\d\d:)+\d\d[.,]\d\d\d)/)
    if time_points.empty?
      if collect_msg
        message << line
      end
    else
      collect_msg = false
      unless message.empty?
        cue_info.message = translated_msg(translate, message, src_lang, target_lang)
        write_cue(cue_info, file_map)
        message = ""
        cue_index += 1
      end
      # This is a cue point. Fetch timestamps
      cue_info = CueInfo.new(AllFather::TYPE_VTT)
      cue_info.index = cue_index
      cue_info.start = time_points[0][0]
      cue_info.end = time_points[0][2]
      start_units = time_details(cue_info.start, TYPE_VTT)
      end_units = time_details(cue_info.end, TYPE_VTT)
      cue_info.start_time_units = start_units
      cue_info.end_time_units = end_units
      collect_msg = true
    end
  end
  cue_info.message = translated_msg(translate, message, src_lang, target_lang) unless message.empty?
  write_cue(cue_info, file_map, true)
end

#translate(src_lang, dest_lang, out_file) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/vtt.rb', line 29

def translate(src_lang, dest_lang, out_file)
  super(src_lang, dest_lang, out_file)
  begin
    ccfile = File.open(@cc_file, 'r:UTF-8', &:read)
    outfile = File.open(out_file, "w")
    text_collection = false
    text_sample = ""
    ccfile.each_line do | line |
      if line =~ /^((\d\d:)+\d\d[.,]\d\d\d)\s-->\s((\d\d:)+\d\d[.,]\d\d\d)/
        text_collection = true
        outfile.puts line
      elsif line.strip.empty? && !text_sample.empty?
        json_text = JSON.parse(text_sample) rescue nil
        if json_text.nil?
          trans_resp = @translator.translate(text_sample, src_lang, dest_lang)
          outfile.puts trans_resp
          outfile.puts
        else
          outfile.puts text_sample
          outfile.puts
        end
        text_sample = ""
        text_collection = false
      elsif text_collection
        text_sample << line
      else
        outfile.puts line
      end
    end

    if !text_sample.empty?
      trans_resp = @translator.translate(text_sample, src_lang, dest_lang)
      outfile.puts trans_resp
      outfile.puts
    end
  ensure
    outfile.close
  end
end