Class: OCR::Scan

Inherits:
Object
  • Object
show all
Defined in:
lib/simple-ocr/scan.rb

Constant Summary collapse

EXTENS =
%w{pdf}

Instance Method Summary collapse

Constructor Details

#initialize(input_file, output_file, options, type) ⇒ Scan

Initialize your Input File, Output File, Options, Type.



11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/simple-ocr/scan.rb', line 11

def initialize(input_file, output_file, options, type)
	@output_file = output_file
	@options = options
	@type = type
	@input_file = input_file
	if OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
		@image = OCR::Path.new(input_file).image_path
		convert_to_img
	else
		@image = input_file
	end
	@clean_image = OCR::Path.new(input_file).clean_image_path
end

Instance Method Details

#clean_imgObject

Shell Script for cleaning the Image.



43
44
45
# File 'lib/simple-ocr/scan.rb', line 43

def clean_img
	`sh ./textcleaner -g -e stretch -f 25 -o 20 -t 30 -u -s 1 -T -p 20 '#{@image}' '#{@clean_image}'`
end

#convert_to_imgObject

Conversion of PDF to Image



26
27
28
# File 'lib/simple-ocr/scan.rb', line 26

def convert_to_img
	`gs -sDEVICE=png16m '-r#{OCR::MIN_DENSITY}' -o '#{@image}' '#{@input_file}'`
end

#delete_filesObject

Deleting unnecessary file after processing.



48
49
50
51
# File 'lib/simple-ocr/scan.rb', line 48

def delete_files
	FileUtils.rm_rf(@clean_image)
	FileUtils.rm_rf(@image) if OCR::Path.new(@input_file).name_exten[1] == "pdf"
end

#exec_command(command) ⇒ Object

Execute Command



38
39
40
# File 'lib/simple-ocr/scan.rb', line 38

def exec_command(command)
	Open3.popen3(command)
end

#scan_imgObject

OCR of Input



31
32
33
34
35
# File 'lib/simple-ocr/scan.rb', line 31

def scan_img
	clean_img
	`tesseract '#{@clean_image}' #{@options} '#{@output_file}' #{@type}`
	delete_files
end