Class: Storyboard::SRT
Defined Under Namespace
Classes: Page
Constant Summary collapse
- SPAN_REGEX =
'[[:digit:]]+:[[:digit:]]+:[[:digit:]]+[,\.][[:digit:]]+'
Instance Attribute Summary collapse
-
#encoding ⇒ Object
Returns the value of attribute encoding.
-
#options ⇒ Object
Returns the value of attribute options.
-
#pages ⇒ Object
Returns the value of attribute pages.
-
#text ⇒ Object
Returns the value of attribute text.
Instance Method Summary collapse
- #check_bom(line) ⇒ Object
-
#clean_promos ⇒ Object
Strip out obnoxious “CREATED BY L33T DUD3” or “DOWNLOADED FROM __” text.
- #count ⇒ Object
- #fix_encoding(l) ⇒ Object
-
#initialize(contents, parent_options) ⇒ SRT
constructor
A new instance of SRT.
-
#parse ⇒ Object
There are some horrid files, so I want to be able to have more than just a single regex to parse the srt file.
- #save ⇒ Object
- #to_s ⇒ Object
Constructor Details
#initialize(contents, parent_options) ⇒ SRT
Returns a new instance of SRT.
92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/storyboard/subtitles.rb', line 92 def initialize(contents, ) @options = @text = contents @pages = [] @needs_KFhimaji = false check_bom(@text.lines.first) Storyboard.current_encoding = @encoding @text = text.force_encoding(Storyboard.current_encoding) parse clean_promos LOG.info("Parsed subtitle file. #{count} entries found.") end |
Instance Attribute Details
#encoding ⇒ Object
Returns the value of attribute encoding.
90 91 92 |
# File 'lib/storyboard/subtitles.rb', line 90 def encoding @encoding end |
#options ⇒ Object
Returns the value of attribute options.
90 91 92 |
# File 'lib/storyboard/subtitles.rb', line 90 def @options end |
#pages ⇒ Object
Returns the value of attribute pages.
90 91 92 |
# File 'lib/storyboard/subtitles.rb', line 90 def pages @pages end |
#text ⇒ Object
Returns the value of attribute text.
90 91 92 |
# File 'lib/storyboard/subtitles.rb', line 90 def text @text end |
Instance Method Details
#check_bom(line) ⇒ Object
106 107 108 109 110 111 112 113 114 115 116 117 |
# File 'lib/storyboard/subtitles.rb', line 106 def check_bom(line) bom_check = line.force_encoding("UTF-8").lines.to_a[0].bytes.to_a @encoding = 'UTF-8' if bom_check[0..1] == [255,254] @encoding = "UTF-16LE" ret = line[2..6] elsif bom_check[0..2] == [239,187,191] @encoding = "UTF-8" ret = line[3..6] end line end |
#clean_promos ⇒ Object
Strip out obnoxious “CREATED BY L33T DUD3” or “DOWNLOADED FROM __” text
169 170 171 172 173 174 175 176 177 |
# File 'lib/storyboard/subtitles.rb', line 169 def clean_promos @pages.delete_if {|page| !page[:lines].grep(/Subtitles downloaded/).empty? || !page[:lines].grep(/addic7ed/).empty? || !page[:lines].grep(/OpenSubtitles/).empty? || !page[:lines].grep(/sync, corrected by/).empty? || false } end |
#count ⇒ Object
190 191 192 |
# File 'lib/storyboard/subtitles.rb', line 190 def count @pages.count end |
#fix_encoding(l) ⇒ Object
120 121 122 123 124 125 126 |
# File 'lib/storyboard/subtitles.rb', line 120 def fix_encoding(l) # The only ISO8859-1 I hit so far. I expec this to grow. if !(l.bytes.to_a | [233,146]).empty? && @encoding == 'UTF-8' l = l.unpack("C*").pack("U*") end l end |
#parse ⇒ Object
There are some horrid files, so I want to be able to have more than just a single regex to parse the srt file. Eventually, handling these errors will be a thing to do.
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
# File 'lib/storyboard/subtitles.rb', line 130 def parse phase = :line_no page = nil @text.each_line {|l| l = fix_encoding(l) l = l.strip #p l.bytes.to_a case phase when :line_no l = l.gsub(Storyboard.encode_regexp('\W'),'') if l =~ Storyboard.encode_regexp('^\d+$') page = Page.new(@pages.count + 1, nil, nil, []) phase = :time elsif !l.empty? raise "Bad SRT File: Should have a block number but got '#{l.force_encoding('UTF-8')}' [#{l.bytes.to_a.join(',')}]" end when :time l = l.gsub(Storyboard.encode_regexp('[^\,\:[0-9] \-\>]'), '') if l =~ Storyboard.encode_regexp("^(#{SPAN_REGEX}) --> (#{SPAN_REGEX})$") page[:start_time] = STRTime.parse($1) + @options[:nudge] page[:end_time] = STRTime.parse($2) + @options[:nudge] phase = :text else raise "Bad SRT File: Should have time range but got '#{l}'".force_encoding(Storyboard.current_encoding) end when :text if l.empty? phase = :line_no @pages << page else Storyboard.needs_KFhimaji(true) if l.contains_cjk? page[:lines] << l.gsub(Storyboard.encode_regexp("<\/?[^>]*>"), "").encode!("UTF-8") end end } end |
#save ⇒ Object
179 180 181 182 183 184 |
# File 'lib/storyboard/subtitles.rb', line 179 def save File.open(File.join([:work_dir], [:basename] + '.srt'), 'w') {|f| f.write(self.to_s) } self end |
#to_s ⇒ Object
186 187 188 |
# File 'lib/storyboard/subtitles.rb', line 186 def to_s text end |