Class: OCR::Scan
- Inherits:
-
Object
- Object
- OCR::Scan
- Defined in:
- lib/simple-ocr/scan.rb
Constant Summary collapse
- EXTENS =
%w{pdf}
Instance Method Summary collapse
-
#clean_img ⇒ Object
Shell Script for cleaning the Image.
-
#convert_to_img ⇒ Object
Conversion of PDF to Image.
-
#delete_files ⇒ Object
Deleting unnecessary files after processing.
-
#exec_command(command) ⇒ Object
Execute Command.
- #handle_output_type(type) ⇒ Object
-
#initialize(input_file, output_file, options, type) ⇒ Scan
constructor
Initialize your Input File, Output File, Options, Type.
- #pdf?(input_file = @input_file) ⇒ Boolean
-
#scan_img ⇒ Object
OCR of Input.
Constructor Details
#initialize(input_file, output_file, options, type) ⇒ Scan
Initialize your Input File, Output File, Options, Type.
12 13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/simple-ocr/scan.rb', line 12 def initialize(input_file, output_file, , type) @output_file = output_file @options = @type = handle_output_type(type) @input_file = input_file if pdf?(input_file) @image = OCR::Path.new(input_file).image_path convert_to_img else @image = input_file end @clean_image = OCR::Path.new(output_file).clean_image_path end |
Instance Method Details
#clean_img ⇒ Object
Shell Script for cleaning the Image.
54 55 56 57 58 |
# File 'lib/simple-ocr/scan.rb', line 54 def clean_img name = 'simple-ocr' g = Gem::Specification.find_by_name(name) `sh #{File.join(g.full_gem_path, 'lib/textcleaner')} -g -e stretch -f 25 -o 20 -t 30 -u -s 1 -T -p 20 '#{@image}' '#{@clean_image}'` end |
#convert_to_img ⇒ Object
Conversion of PDF to Image
37 38 39 |
# File 'lib/simple-ocr/scan.rb', line 37 def convert_to_img `gs -sDEVICE=png16m '-r#{OCR::MIN_DENSITY}' -o '#{@image}' '#{@input_file}'` end |
#delete_files ⇒ Object
Deleting unnecessary files after processing.
61 62 63 64 |
# File 'lib/simple-ocr/scan.rb', line 61 def delete_files FileUtils.rm_rf(@clean_image) FileUtils.rm_rf(@image) if pdf? end |
#exec_command(command) ⇒ Object
Execute Command
49 50 51 |
# File 'lib/simple-ocr/scan.rb', line 49 def exec_command(command) Open3.popen3(command) end |
#handle_output_type(type) ⇒ Object
26 27 28 29 30 31 32 33 34 |
# File 'lib/simple-ocr/scan.rb', line 26 def handle_output_type(type) if type == :pdf 'pdf' elsif type == :hocr 'hocr' else nil.to_s end end |
#pdf?(input_file = @input_file) ⇒ Boolean
66 67 68 |
# File 'lib/simple-ocr/scan.rb', line 66 def pdf?(input_file = @input_file) OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf] end |
#scan_img ⇒ Object
OCR of Input
42 43 44 45 46 |
# File 'lib/simple-ocr/scan.rb', line 42 def scan_img clean_img `tesseract '#{@clean_image}' #{@options} '#{@output_file}' #{@type}` delete_files end |