Class: TesseractFFI::Tesseract

Inherits:
Object
  • Object
show all
Includes:
TesseractFFI, ConfVars, OEM, Rectangles
Defined in:
lib/tesseract_ffi/tesseract.rb

Overview

class Tesseract

Constant Summary

Constants included from TesseractFFI

DEFAULT, LEGACY, LEGACY_LTSM, LTSM, VERSION

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Rectangles

#recognize_rectangle, #recognize_rectangles, #set_rectangle, #valid_rectangle?, #valid_rectangle_list?

Methods included from OEM

#oem

Methods included from ConfVars

#get_double_variable, #get_integer_variable, #print_variables_to_file, #set_variable

Methods included from TesseractFFI

to_pdf, to_text

Constructor Details

#initialize(file_name: nil, language: 'eng', source_resolution: 72, oem: DEFAULT) ⇒ Tesseract

Returns a new instance of Tesseract.



14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/tesseract_ffi/tesseract.rb', line 14

def initialize(file_name: nil, language: 'eng', source_resolution: 72, oem: DEFAULT)
  unless file_name.is_a?(String) && File.exist?(file_name)
    log 'Error: Tesseract needs a file ' + (file_name || 'no name given')
    raise TessException.new(error_msg: 'file_name must be provided')
  end

  @file_name = file_name
  @language = language
  @source_resolution = source_resolution
  @oem = oem
  @errors = []
end

Instance Attribute Details

#errorsObject (readonly)

Returns the value of attribute errors.



12
13
14
# File 'lib/tesseract_ffi/tesseract.rb', line 12

def errors
  @errors
end

#file_nameObject

Returns the value of attribute file_name.



11
12
13
# File 'lib/tesseract_ffi/tesseract.rb', line 11

def file_name
  @file_name
end

#hocr_textObject (readonly)

Returns the value of attribute hocr_text.



12
13
14
# File 'lib/tesseract_ffi/tesseract.rb', line 12

def hocr_text
  @hocr_text
end

#languageObject

Returns the value of attribute language.



11
12
13
# File 'lib/tesseract_ffi/tesseract.rb', line 11

def language
  @language
end

#source_resolutionObject

Returns the value of attribute source_resolution.



11
12
13
# File 'lib/tesseract_ffi/tesseract.rb', line 11

def source_resolution
  @source_resolution
end

#utf8_textObject (readonly)

Returns the value of attribute utf8_text.



12
13
14
# File 'lib/tesseract_ffi/tesseract.rb', line 12

def utf8_text
  @utf8_text
end

Instance Method Details

#convert_to_pdf(output_stem) ⇒ Object



69
70
71
72
73
74
75
# File 'lib/tesseract_ffi/tesseract.rb', line 69

def convert_to_pdf(output_stem)
  setup do
    datapath = TesseractFFI.tess_get_datapath(@handle)
    pdf_renderer = TesseractFFI.tess_pdf_renderer_create(output_stem, datapath, false)
    TesseractFFI.tess_process_pages(@handle, @file_name, nil, 5000, pdf_renderer)
  end
end

#log(msg) ⇒ Object

just output to console



28
29
30
# File 'lib/tesseract_ffi/tesseract.rb', line 28

def log(msg)
  puts msg
end

#ocrObject

rubocop:enable Metrics/AbcSize, Metrics/MethodLength

Raises:



55
56
57
58
59
60
61
# File 'lib/tesseract_ffi/tesseract.rb', line 55

def ocr
  tess_set_source_resolution(@handle, @source_resolution)
  raise TessException.new(error_msg: 'Recognition Error') if tess_recognize(@handle, 0) != 0

  @utf8_text = tess_get_utf8(@handle, 0)
  @hocr_text = tess_get_hocr(@handle, 0)
end

#recognizeObject



63
64
65
66
67
# File 'lib/tesseract_ffi/tesseract.rb', line 63

def recognize
  setup do
    ocr
  end
end

#setupObject

rubocop:disable Metrics/AbcSize, Metrics/MethodLength



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/tesseract_ffi/tesseract.rb', line 33

def setup
  @handle = tess_create
  raise TessException.new(error_msg: 'Library Error') unless @handle

  result = tess_init(@handle, 0, @language, @oem)
  raise TessException.new(error_msg: 'Init Error') if result != 0

  @image = tess_pix_read(@file_name)
  image_status = tess_set_image(@handle, @image)
  raise TessException.new(error_msg: "Unable to set image #{@file_name}") if image_status != 0

  yield # run the block for recognition etc
rescue TessException => e
  @errors << "Tesseract Error #{e.error[:error_msg]}"
  log @errors
  raise
ensure
  tess_end(@handle)
  tess_delete(@handle)
end