Class: TedTalk::Converter
Instance Method Summary
collapse
Methods included from UnixTools
check_command, delete_dir
download_successful?, get_binary, get_final_location, get_html, get_json, get_wav
Constructor Details
#initialize(url) ⇒ Converter
Returns a new instance of Converter.
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
# File 'lib/ted_talk.rb', line 71
def initialize(url)
begin
if /(?:http\:\/\/)?(?:www\.)?ted\.com\/talks\/(?:lang\/[^\/]+\/)?(.+\.html)/ =~ url
@url = "http://www.ted.com/talks/" + $1
else
puts "The specified URL does not seem to be a valid one"
exit
end
if html = get_html(@url)
@html = html
else
puts "The specified URL does not respond with a TED Talk content"
exit
end
@url_basename = File.basename(@url)
ted_doc = Nokogiri::HTML(@html)
data = ted_doc.xpath("//div[@id='share_and_save']").first
@ted_id = data.attribute("data-id").value
@video_url = ted_doc.xpath("//a[@id='no-flash-video-download']").attribute("href").value
@basename = File.basename(@video_url, ".*")
@captions = {}
@title = ted_doc.xpath("//h1[1]").text.strip rescue ""
@speaker = @title.split(":", 2).first.strip rescue ""
@available_langs = []
ted_doc.xpath("//select[@id='languageCode'][1]/option").collect do |op|
v = op.attributes["value"].value.strip
@available_langs << v if v != ""
end
@available_langs.sort!
@titles = {}
@titles["en"] = get_title("en")
@descriptions = {}
@descriptions["en"] = get_description("en")
@language_hash = list_langs
rescue => e
puts "The specified URL does not seem to contain a regular TED Talk contents"
exit
end
end
|
Instance Method Details
#desc_talk(lang = "en") ⇒ Object
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
# File 'lib/ted_talk.rb', line 124
def desc_talk(lang = "en")
setup_lang(lang)
unless @descriptions[lang]
lang = "en"
end
puts "\nTitle:\n" + @titles["en"]
puts @titles[lang] if lang != "en"
puts ""
puts "Description:\n" + @descriptions[lang]
puts ""
puts "Available Languages: "
@available_langs.each do |lang_code|
lang_name = @language_hash[lang_code]
puts " " + lang_name + ": " + lang_code
end
end
|
#execute(outdir = "./", lang = "en", speed = 1, silence = 0, video = false) ⇒ Object
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
# File 'lib/ted_talk.rb', line 141
def execute(outdir = "./", lang = "en", speed = 1, silence = 0, video = false)
puts "TedTalk is prepararing for the process"
@outdir = File.join(outdir, @ted_id + "-" + @basename)
Dir.mkdir(@outdir) unless File.exists?(@outdir)
@speed = speed
@silence = silence
@lang = lang
get_captions("en")
setup_lang(lang)
get_captions(lang)
video_filepath = get_binary(@video_url)
wav_filepath = get_wav(video_filepath)
outfile = @outdir + "/" + @basename + "-result.mp3"
speakslow = SpeakSlow::Converter.new(wav_filepath, outfile)
speakslow.execute(speed, silence)
write_info(outfile)
if video
`cp #{video_filepath} #{@outdir + "/"}`
end
end
|
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
|
# File 'lib/ted_talk.rb', line 267
def format_captions(captions)
lang_name = @lang_name || "English"
result = "TED Talk ID: #{@ted_id}\n"
result << "Speaker: #{@speaker}\n"
result << "Title: #{@title} (with captions in #{lang_name})\n"
result << "URL: #{@url}\n\n"
num_digits = captions.size.to_s.split(//).size
captions.each_with_index do |c, index|
index_s = sprintf("%0#{num_digits}d", index + 1)
result << "\n" if c[:start_of_paragraph]
result << "#{index_s} #{c[:content]} \n"
end
return result
end
|
283
284
285
286
287
288
289
290
291
292
293
|
# File 'lib/ted_talk.rb', line 283
def format_time(time)
millis = time % 1000 / 10
millis_s = sprintf("%02d", millis)
total_seconds = time / 1000
minutes = total_seconds / 60
seconds = total_seconds - minutes * 60
seconds_s = sprintf("%02d", seconds)
minutes_s = sprintf("%02d", minutes)
minutes_s = sprintf("%02d", minutes)
minutes_s + "." + seconds_s + "." + millis_s
end
|
#get_captions(lang = "en") ⇒ Object
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
|
# File 'lib/ted_talk.rb', line 178
def get_captions(lang = "en")
unless @available_langs.index(lang)
puts "Caption in #{lang} is not available"
return false
end
json_url = "http://www.ted.com/talks/subtitles/id/#{@ted_id}"
json_url << "/lang/#{lang}" unless lang == "en"
script_json = get_json(json_url)
num_total_captions = script_json["captions"].size
num_digits = num_total_captions.to_s.split(//).size
captions = [{:id => sprintf("%0#{num_digits}d", 0),
:start_time_s => "00.00.00",
:duration => nil,
:content => "",
:start_of_paragraph => false,
:start_time => 0
}]
script_json["captions"].each_with_index do |caption, index|
result = {}
result[:id] = sprintf("%0#{num_digits}d", index + 1)
result[:start_time] = INTRO_DURATION - AD_DURATION + POST_AD_DURATION + caption["startTime"].to_i
result[:start_time_s] = format_time(result[:start_time])
result[:duration] = caption["duration"].to_i
result[:content] = caption["content"].gsub(/\s+/, " ")
result[:end_time_s] = format_time(result[:start_time] + caption["duration"].to_i)
result[:start_of_paragraph] = caption["startOfParagraph"]
if index == 0
intro_duration =
captions[0][:duration] = result[:start_time]
end
captions << result
end
lang_sym = lang
File.open(@outdir + "/" + @basename + "-" + lang + ".txt", "w") do |f|
f.write format_captions(captions)
end
@captions[lang_sym] = captions
return captions
end
|
#get_description(lang) ⇒ Object
170
171
172
173
174
175
176
|
# File 'lib/ted_talk.rb', line 170
def get_description(lang)
lang_url = "http://www.ted.com/talks/lang/#{lang}/" + @url_basename
html = get_html(lang_url)
lang_doc = Nokogiri::HTML(html)
result = lang_doc.xpath("//meta[@name='description']").first.attribute("content").value.strip
return result || ""
end
|
#get_title(lang) ⇒ Object
163
164
165
166
167
168
|
# File 'lib/ted_talk.rb', line 163
def get_title(lang)
lang_url = "http://www.ted.com/talks/lang/#{lang}/" + @url_basename
html = get_html(lang_url)
lang_doc = Nokogiri::HTML(html)
lang_doc.xpath("//meta[@name='title']").first.attribute("content").value.split("|").first.strip rescue ""
end
|
#get_video_urls(html) ⇒ Object
295
296
297
|
# File 'lib/ted_talk.rb', line 295
def get_video_urls(html)
videos = html.scan(/http\:\/\/download.ted.com\/talks\/#{@basename}.*?\.mp4/).sort
end
|
#list_langs ⇒ Object
218
219
220
221
222
223
224
225
226
227
228
229
|
# File 'lib/ted_talk.rb', line 218
def list_langs
language_hash = {}
lang_url = "http://www.ted.com/translate/languages"
html = get_html(lang_url)
ted_doc = Nokogiri::HTML(html)
data = ted_doc.xpath("//div[@id='content'][1]//ul//a").each do |lang|
lang_name = lang.text
lang_code = lang.attribute("href").value.split("/")[-1].strip
language_hash[lang_code] = lang_name.sub(/\(.+?\)/){""}.strip
end
return language_hash
end
|
#setup_lang(lang) ⇒ Object
111
112
113
114
115
116
117
118
119
120
121
122
|
# File 'lib/ted_talk.rb', line 111
def setup_lang(lang)
unless @available_langs.index lang
puts "Description in #{lang} is not available"
return false
end
@lang = lang
if lang != "en"
@titles[lang] = get_title(lang) rescue ""
@descriptions[lang] = get_description(lang) rescue ""
@lang_name = @language_hash[@lang]
end
end
|
#write_info(filepath) ⇒ Object
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
|
# File 'lib/ted_talk.rb', line 231
def write_info(filepath)
puts "Writing captions to MP3"
TagLib::MPEG::File.open(filepath) do |mp3|
tag = mp3.id3v2_tag
tag.artist = "TED Talk "
tag.title = @title
tag.title += " (with captions in #{@lang_name})" if @lang_name
tag.title += " [x#{@speed}]" if @speed and @speed != 1
tag.genre = "Talk"
caption_text = @titles["en"] || ""
caption_text << @titles[@lang] + "\n" if @titles[@lang]
caption_text << "--------------------\n"
caption_text << @descriptions["en"] + "\n" if @descriptions["en"]
caption_text << @descriptions[@lang] + "\n" if @descriptions[@lang]
caption_text << "\n"
@captions["en"].each_with_index do |c, index|
caption_text << "--------------------\n\n" if c[:start_of_paragraph]
next if c[:content] == ""
caption_text << c[:content] + "\n"
if @captions[@lang]
bl_content = @captions[@lang][index][:content] + "\n\n" rescue ""
caption_text << bl_content
end
end
uslt = TagLib::ID3v2::UnsynchronizedLyricsFrame.new
uslt.language = "eng"
uslt.text_encoding = TagLib::String::UTF8
uslt.text = caption_text
tag.add_frame(uslt)
mp3.save
end
end
|