Class: RTesseract
- Inherits:
-
Object
- Object
- RTesseract
- Defined in:
- lib/rtesseract.rb,
lib/rtesseract/mixed.rb,
lib/rtesseract/errors.rb
Defined Under Namespace
Classes: ConversionError, ImageNotSelectedError, Mixed, TempFilesNotRemovedError
Constant Summary collapse
- VERSION =
'0.0.7'
Instance Attribute Summary collapse
-
#lang ⇒ Object
Select the language ===Languages * eng - English * deu - German * deu-f - German fraktur * fra - French * ita - Italian * nld - Dutch * por - Portuguese * spa - Spanish * vie - Vietnamese Note: Make sure you have installed the language to tesseract.
-
#options ⇒ Object
Returns the value of attribute options.
-
#processor ⇒ Object
readonly
Returns the value of attribute processor.
-
#psm ⇒ Object
Page Segment Mode.
Instance Method Summary collapse
-
#clear_console_output ⇒ Object
TODO: Clear console for MacOS or Windows.
- #config ⇒ Object
- #config_file ⇒ Object
-
#convert ⇒ Object
Convert image to string.
-
#crop!(x, y, width, height) ⇒ Object
Crop image to convert.
- #generate_uid ⇒ Object
- #image_name ⇒ Object
-
#initialize(src = "", options = {}) ⇒ RTesseract
constructor
A new instance of RTesseract.
-
#remove_file(files = []) ⇒ Object
Remove files.
- #source=(src) ⇒ Object
-
#to_s ⇒ Object
Output value.
-
#to_s_without_spaces ⇒ Object
Remove spaces and break-lines.
Constructor Details
#initialize(src = "", options = {}) ⇒ RTesseract
Returns a new instance of RTesseract.
14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/rtesseract.rb', line 14 def initialize(src = "", = {}) @uid = .delete(:uid) || nil @source = Pathname.new src @command = .delete(:command) || "tesseract" @lang = .delete(:lang) || .delete("lang") || "" @psm = .delete(:psm) || .delete("psm") || nil @clear_console_output = .delete(:clear_console_output) @clear_console_output = true if @clear_console_output.nil? @options = @value = "" @x, @y, @w, @h = [] @processor = .delete(:processor) || .delete("processor") choose_processor! end |
Instance Attribute Details
#lang ⇒ Object
Select the language
Languages
-
eng - English
-
deu - German
-
deu-f - German fraktur
-
fra - French
-
ita - Italian
-
nld - Dutch
-
por - Portuguese
-
spa - Spanish
-
vie - Vietnamese
Note: Make sure you have installed the language to tesseract
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/rtesseract.rb', line 76 def lang language = "#{@lang}".strip.downcase { #Aliases to languages names "eng" => ["en","en-us","english"], "ita" => ["it"], "por" => ["pt","pt-br","portuguese"], "spa" => ["sp"] }.each do |value,names| return " -l #{value} " if names.include? language end return " -l #{language} " if language.size > 0 "" rescue "" end |
#options ⇒ Object
Returns the value of attribute options.
9 10 11 |
# File 'lib/rtesseract.rb', line 9 def @options end |
#processor ⇒ Object (readonly)
Returns the value of attribute processor.
12 13 14 |
# File 'lib/rtesseract.rb', line 12 def processor @processor end |
#psm ⇒ Object
Page Segment Mode
93 94 95 96 97 |
# File 'lib/rtesseract.rb', line 93 def psm @psm.nil? ? "" : " -psm #{@psm} " rescue "" end |
Instance Method Details
#clear_console_output ⇒ Object
TODO: Clear console for MacOS or Windows
112 113 114 115 |
# File 'lib/rtesseract.rb', line 112 def clear_console_output return "" unless @clear_console_output return "2>/dev/null" if File.exist?("/dev/null") #Linux console clear end |
#config ⇒ Object
99 100 101 102 |
# File 'lib/rtesseract.rb', line 99 def config @options ||= {} @options.collect{|k,v| "#{k} #{v}" }.join("\n") end |
#config_file ⇒ Object
104 105 106 107 108 109 |
# File 'lib/rtesseract.rb', line 104 def config_file return "" if @options == {} conf = Tempfile.new("config") conf.write(config) conf.path end |
#convert ⇒ Object
Convert image to string
118 119 120 121 122 123 124 125 126 127 128 |
# File 'lib/rtesseract.rb', line 118 def convert generate_uid tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}") tmp_image = image_to_tiff `#{@command} #{tmp_image} #{tmp_file.to_s} #{lang} #{psm} #{config_file} #{clear_console_output}` @value = File.read("#{tmp_file.to_s}.txt").to_s @uid = nil remove_file([tmp_image,"#{tmp_file.to_s}.txt"]) rescue raise RTesseract::ConversionError end |
#crop!(x, y, width, height) ⇒ Object
Crop image to convert
40 41 42 43 |
# File 'lib/rtesseract.rb', line 40 def crop!(x,y,width,height) @x, @y, @w, @h = x, y, width, height self end |
#generate_uid ⇒ Object
59 60 61 62 |
# File 'lib/rtesseract.rb', line 59 def generate_uid @uid = rand.to_s[2,10] if @uid.nil? @uid end |
#image_name ⇒ Object
34 35 36 |
# File 'lib/rtesseract.rb', line 34 def image_name @source.basename end |
#remove_file(files = []) ⇒ Object
Remove files
46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/rtesseract.rb', line 46 def remove_file(files=[]) files.each do |file| begin File.unlink(file) if File.exist?(file) rescue system "rm -f #{file}" end end true rescue raise RTesseract::TempFilesNotRemovedError end |
#source=(src) ⇒ Object
29 30 31 32 |
# File 'lib/rtesseract.rb', line 29 def source= src @value = "" @source = Pathname.new src end |
#to_s ⇒ Object
Output value
131 132 133 134 135 136 137 138 139 |
# File 'lib/rtesseract.rb', line 131 def to_s return @value if @value != "" if @source.file? convert @value else raise RTesseract::ImageNotSelectedError end end |
#to_s_without_spaces ⇒ Object
Remove spaces and break-lines
142 143 144 |
# File 'lib/rtesseract.rb', line 142 def to_s_without_spaces to_s.gsub(" ","").gsub("\n","").gsub("\r","") end |