Module: RTesseract::Box
- Extended by:
- Base
- Defined in:
- lib/rtesseract/box.rb
Class Method Summary collapse
- .parse(content) ⇒ Object
- .parse_confidence(line) ⇒ Object
- .parse_line(line) ⇒ Object
- .parse_position(line) ⇒ Object
- .run(source, errors, options) ⇒ Object
- .word_info(word, positions, confidence) ⇒ Object
Methods included from Base
Class Method Details
.parse(content) ⇒ Object
16 17 18 |
# File 'lib/rtesseract/box.rb', line 16 def parse(content) content.lines.map { |line| parse_line(line) }.compact end |
.parse_confidence(line) ⇒ Object
45 46 47 |
# File 'lib/rtesseract/box.rb', line 45 def parse_confidence(line) line.match(/(?<=;)(.*?)(?=')/).to_s.split end |
.parse_line(line) ⇒ Object
20 21 22 23 24 25 26 27 28 |
# File 'lib/rtesseract/box.rb', line 20 def parse_line(line) return unless line.match?(/oc(rx|r)_word/) word = line.to_s.scan(/>(.*)</).flatten.first.to_s return if word.strip == '' word_info(word, parse_position(line), parse_confidence(line)) end |
.parse_position(line) ⇒ Object
41 42 43 |
# File 'lib/rtesseract/box.rb', line 41 def parse_position(line) line.match(/(?<=title)(.*?)(?=;)/).to_s.split end |
.run(source, errors, options) ⇒ Object
8 9 10 11 12 13 14 |
# File 'lib/rtesseract/box.rb', line 8 def run(source, errors, ) = .merge({ tessedit_create_hocr: 1 }) RTesseract::Command.new(source, temp_file_path, errors, ).run do |output_path| parse(File.read("#{output_path}.hocr")) end end |
.word_info(word, positions, confidence) ⇒ Object
30 31 32 33 34 35 36 37 38 39 |
# File 'lib/rtesseract/box.rb', line 30 def word_info(word, positions, confidence) { word: word, confidence: confidence[-1].to_i, x_start: positions[1].to_i, y_start: positions[2].to_i, x_end: positions[3].to_i, y_end: positions[4].to_i } end |