Class: TesseractFFI::Tesseract
- Inherits:
-
Object
- Object
- TesseractFFI::Tesseract
- Includes:
- TesseractFFI, ConfVars, OEM, Rectangles
- Defined in:
- lib/tesseract_ffi/tesseract.rb
Overview
class Tesseract
Constant Summary
Constants included from TesseractFFI
DEFAULT, LEGACY, LEGACY_LTSM, LTSM, VERSION
Instance Attribute Summary collapse
-
#errors ⇒ Object
readonly
Returns the value of attribute errors.
-
#file_name ⇒ Object
Returns the value of attribute file_name.
-
#hocr_text ⇒ Object
readonly
Returns the value of attribute hocr_text.
-
#language ⇒ Object
Returns the value of attribute language.
-
#source_resolution ⇒ Object
Returns the value of attribute source_resolution.
-
#utf8_text ⇒ Object
readonly
Returns the value of attribute utf8_text.
Instance Method Summary collapse
- #convert_to_pdf(output_stem) ⇒ Object
-
#initialize(file_name: nil, language: 'eng', source_resolution: 72, oem: DEFAULT) ⇒ Tesseract
constructor
A new instance of Tesseract.
-
#log(msg) ⇒ Object
just output to console.
-
#ocr ⇒ Object
rubocop:enable Metrics/AbcSize, Metrics/MethodLength.
- #recognize ⇒ Object
-
#setup ⇒ Object
rubocop:disable Metrics/AbcSize, Metrics/MethodLength.
Methods included from Rectangles
#recognize_rectangle, #recognize_rectangles, #set_rectangle, #valid_rectangle?, #valid_rectangle_list?
Methods included from OEM
Methods included from ConfVars
#get_double_variable, #get_integer_variable, #print_variables_to_file, #set_variable
Methods included from TesseractFFI
Constructor Details
#initialize(file_name: nil, language: 'eng', source_resolution: 72, oem: DEFAULT) ⇒ Tesseract
Returns a new instance of Tesseract.
14 15 16 17 18 19 20 21 22 23 24 25 |
# File 'lib/tesseract_ffi/tesseract.rb', line 14 def initialize(file_name: nil, language: 'eng', source_resolution: 72, oem: DEFAULT) unless file_name.is_a?(String) && File.exist?(file_name) log 'Error: Tesseract needs a file ' + (file_name || 'no name given') raise TessException.new(error_msg: 'file_name must be provided') end @file_name = file_name @language = language @source_resolution = source_resolution @oem = oem @errors = [] end |
Instance Attribute Details
#errors ⇒ Object (readonly)
Returns the value of attribute errors.
12 13 14 |
# File 'lib/tesseract_ffi/tesseract.rb', line 12 def errors @errors end |
#file_name ⇒ Object
Returns the value of attribute file_name.
11 12 13 |
# File 'lib/tesseract_ffi/tesseract.rb', line 11 def file_name @file_name end |
#hocr_text ⇒ Object (readonly)
Returns the value of attribute hocr_text.
12 13 14 |
# File 'lib/tesseract_ffi/tesseract.rb', line 12 def hocr_text @hocr_text end |
#language ⇒ Object
Returns the value of attribute language.
11 12 13 |
# File 'lib/tesseract_ffi/tesseract.rb', line 11 def language @language end |
#source_resolution ⇒ Object
Returns the value of attribute source_resolution.
11 12 13 |
# File 'lib/tesseract_ffi/tesseract.rb', line 11 def source_resolution @source_resolution end |
#utf8_text ⇒ Object (readonly)
Returns the value of attribute utf8_text.
12 13 14 |
# File 'lib/tesseract_ffi/tesseract.rb', line 12 def utf8_text @utf8_text end |
Instance Method Details
#convert_to_pdf(output_stem) ⇒ Object
69 70 71 72 73 74 75 |
# File 'lib/tesseract_ffi/tesseract.rb', line 69 def convert_to_pdf(output_stem) setup do datapath = TesseractFFI.tess_get_datapath(@handle) pdf_renderer = TesseractFFI.tess_pdf_renderer_create(output_stem, datapath, false) TesseractFFI.tess_process_pages(@handle, @file_name, nil, 5000, pdf_renderer) end end |
#log(msg) ⇒ Object
just output to console
28 29 30 |
# File 'lib/tesseract_ffi/tesseract.rb', line 28 def log(msg) puts msg end |
#ocr ⇒ Object
rubocop:enable Metrics/AbcSize, Metrics/MethodLength
55 56 57 58 59 60 61 |
# File 'lib/tesseract_ffi/tesseract.rb', line 55 def ocr tess_set_source_resolution(@handle, @source_resolution) raise TessException.new(error_msg: 'Recognition Error') if tess_recognize(@handle, 0) != 0 @utf8_text = tess_get_utf8(@handle, 0) @hocr_text = tess_get_hocr(@handle, 0) end |
#recognize ⇒ Object
63 64 65 66 67 |
# File 'lib/tesseract_ffi/tesseract.rb', line 63 def recognize setup do ocr end end |
#setup ⇒ Object
rubocop:disable Metrics/AbcSize, Metrics/MethodLength
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/tesseract_ffi/tesseract.rb', line 33 def setup @handle = tess_create raise TessException.new(error_msg: 'Library Error') unless @handle result = tess_init(@handle, 0, @language, @oem) raise TessException.new(error_msg: 'Init Error') if result != 0 @image = tess_pix_read(@file_name) image_status = tess_set_image(@handle, @image) raise TessException.new(error_msg: "Unable to set image #{@file_name}") if image_status != 0 yield # run the block for recognition etc rescue TessException => e @errors << "Tesseract Error #{e.error[:error_msg]}" log @errors raise ensure tess_end(@handle) tess_delete(@handle) end |