Class: Tesseract::API

Inherits:
Object
  • Object
show all
Defined in:
lib/tesseract/api.rb,
lib/tesseract/api/image.rb,
lib/tesseract/api/iterator.rb

Defined Under Namespace

Classes: Image, Iterator

Constant Summary collapse

Types =
{
	int:    [:integer],
	bool:   [:boolean],
	double: [:float],
	string: [:str]
}

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeAPI

Returns a new instance of API.



56
57
58
# File 'lib/tesseract/api.rb', line 56

def initialize
	@internal = FFI::AutoPointer.new(C::BaseAPI.create, self.class.method(:finalize))
end

Class Method Details

.finalize(pointer) ⇒ Object

:nodoc:



60
61
62
# File 'lib/tesseract/api.rb', line 60

def self.finalize (pointer) # :nodoc:
	C::BaseAPI.destroy(pointer)
end

.image_for(image) ⇒ Object

Get a pointer to a tesseract-ocr usable image from a path, a string with the data or an IO stream.



37
38
39
# File 'lib/tesseract/api.rb', line 37

def self.image_for (image)
	Image.new(image)
end

.to_language_code(code) ⇒ Object

Transform a language code to tesseract-ocr usable codes



43
44
45
46
47
# File 'lib/tesseract/api.rb', line 43

def self.to_language_code (code)
	ISO_639.find(code.to_s.downcase).alpha3
rescue
	code.to_s
end

Instance Method Details

#all_word_confidencesObject



193
194
195
# File 'lib/tesseract/api.rb', line 193

def all_word_confidences
	C::BaseAPI.all_word_confidences(to_ffi)
end

#clearObject



197
198
199
# File 'lib/tesseract/api.rb', line 197

def clear
	C::BaseAPI.clear(to_ffi)
end

#endObject



201
202
203
# File 'lib/tesseract/api.rb', line 201

def end
	C::BaseAPI.end(to_ffi)
end

#get_box(page = 0) ⇒ Object



169
170
171
172
173
174
175
176
177
# File 'lib/tesseract/api.rb', line 169

def get_box (page = 0)
	pointer = C::BaseAPI.get_box_text(to_ffi, page)
	result  = pointer.read_string
	result.force_encoding 'UTF-8'

	result
ensure
	C.free_array_of_char(pointer)
end

#get_iteratorObject



152
153
154
# File 'lib/tesseract/api.rb', line 152

def get_iterator
	Iterator.new(C::BaseAPI.get_iterator(to_ffi))
end

#get_page_seg_modeObject



112
113
114
# File 'lib/tesseract/api.rb', line 112

def get_page_seg_mode
	C::BaseAPI.get_page_seg_mode(to_ffi)
end

#get_textObject



156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/tesseract/api.rb', line 156

def get_text
	pointer = C::BaseAPI.get_utf8_text(to_ffi)

	return if pointer.null?

	result = pointer.read_string
	result.force_encoding 'UTF-8'

	result
ensure
	C.free_array_of_char(pointer) unless pointer.null?
end

#get_unlvObject



179
180
181
182
183
184
185
186
187
# File 'lib/tesseract/api.rb', line 179

def get_unlv
	pointer = C::BaseAPI.get_unlv_text(to_ffi)
	result  = pointer.read_string
	result.force_encoding 'ISO8859-1'

	result
ensure
	C.free_array_of_char(pointer)
end

#get_variable(name, type = nil) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/tesseract/api.rb', line 80

def get_variable (name, type = nil)
	if type.nil?
		type = Types.keys.find { |type| C::BaseAPI.__send__ "has_#{type}_variable", to_ffi, name }

		if type
			C::BaseAPI.__send__ "get_#{type}_variable", to_ffi, name
		end
	else
		unless Types.has_key?(type)
			name, aliases = Types.find { |name, aliases| aliases.member?(type) }

			raise ArgumentError, "unknown type #{type}" unless name

			type = name
		end

		if C::BaseAPI.__send__ "has_#{type}_variable", to_ffi, name
			C::BaseAPI.__send__ "get_#{type}_variable", to_ffi, name
		end
	end
end

#init(datapath = '.', language = 'eng', mode = :DEFAULT) ⇒ Object



102
103
104
105
106
# File 'lib/tesseract/api.rb', line 102

def init (datapath = '.', language = 'eng', mode = :DEFAULT)
	unless C::BaseAPI.init(to_ffi, datapath, language.to_s, mode).zero?
		raise 'the API did not Init correctly'
	end
end

#mean_text_confidenceObject



189
190
191
# File 'lib/tesseract/api.rb', line 189

def mean_text_confidence
	C::BaseAPI.mean_text_conf(to_ffi)
end

#process_page(pix, page = 0, name = "") ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
# File 'lib/tesseract/api.rb', line 140

def process_page (pix, page = 0, name = "")
	result = C.create_string

	unless C::BaseAPI.process_page(to_ffi, pix.is_a?(Image) ? pix.to_ffi : pix, page, name, result)
		raise 'process_page failed'
	end

	C.string_content(result).read_string(C.string_length(result))
ensure
	C.destroy_string(result)
end

#process_pages(name) ⇒ Object



128
129
130
131
132
133
134
135
136
137
138
# File 'lib/tesseract/api.rb', line 128

def process_pages (name)
	result = C.create_string

	unless C::BaseAPI.process_pages(to_ffi, name, result)
		raise 'process_pages failed'
	end

	C.string_content(result).read_string(C.string_length(result))
ensure
	C.destroy_string(result)
end

#read_config_file(path) ⇒ Object



108
109
110
# File 'lib/tesseract/api.rb', line 108

def read_config_file (path)
	C::BaseAPI.read_config_file(to_ffi, path)
end

#set_image(pix) ⇒ Object



120
121
122
# File 'lib/tesseract/api.rb', line 120

def set_image (pix)
	C::BaseAPI.set_image(to_ffi, pix.is_a?(Image) ? pix.to_ffi : pix)
end

#set_input_name(name) ⇒ Object



68
69
70
# File 'lib/tesseract/api.rb', line 68

def set_input_name (name)
	C::BaseAPI.set_input_name(to_ffi, name)
end

#set_output_name(name) ⇒ Object



72
73
74
# File 'lib/tesseract/api.rb', line 72

def set_output_name (name)
	C::BaseAPI.set_output_name(to_ffi, name)
end

#set_page_seg_mode(value) ⇒ Object



116
117
118
# File 'lib/tesseract/api.rb', line 116

def set_page_seg_mode (value)
	C::BaseAPI.set_page_seg_mode(to_ffi, value)
end

#set_rectangle(left, top, width, height) ⇒ Object



124
125
126
# File 'lib/tesseract/api.rb', line 124

def set_rectangle (left, top, width, height)
	C::BaseAPI.set_rectangle(to_ffi, left, top, width, height)
end

#set_variable(name, value) ⇒ Object



76
77
78
# File 'lib/tesseract/api.rb', line 76

def set_variable (name, value)
	C::BaseAPI.set_variable(to_ffi, name, value)
end

#to_ffiObject



205
206
207
# File 'lib/tesseract/api.rb', line 205

def to_ffi
	@internal
end

#versionObject



64
65
66
# File 'lib/tesseract/api.rb', line 64

def version
	C::BaseAPI.version(to_ffi)
end