Class: OCRFile
- Inherits:
-
Object
- Object
- OCRFile
- Defined in:
- lib/ocrfile.rb
Instance Method Summary collapse
-
#give_me_text ⇒ Object
Send file to give me text.
- #give_me_text_local(mime_magic) ⇒ Object
-
#gotten_text_ok?(text) ⇒ Boolean
Checks if text was successfully extracted.
-
#initialize(file, input_dir, output_dir, rel_path, tika) ⇒ OCRFile
constructor
A new instance of OCRFile.
-
#load_extracted_text(file) ⇒ Object
Load text that is already extracted.
-
#ocr ⇒ Object
OCR file.
Constructor Details
#initialize(file, input_dir, output_dir, rel_path, tika) ⇒ OCRFile
Returns a new instance of OCRFile.
6 7 8 9 10 11 12 13 |
# File 'lib/ocrfile.rb', line 6 def initialize(file, input_dir, output_dir, rel_path, tika) @path = file @input_dir = input_dir @output_dir = output_dir @rel_path = rel_path @tika = tika @text = "" end |
Instance Method Details
#give_me_text ⇒ Object
Send file to give me text
45 46 47 48 49 50 51 52 |
# File 'lib/ocrfile.rb', line 45 def give_me_text c = Curl::Easy.new("http://givemetext.okfnlabs.org/tika/tika/form") c.multipart_form_post = true c.http_post(Curl::PostField.file('file', @path)) @text = c.body_str gotten_text_ok?(@text) end |
#give_me_text_local(mime_magic) ⇒ Object
54 55 56 57 58 59 60 61 62 63 |
# File 'lib/ocrfile.rb', line 54 def give_me_text_local(mime_magic) c = Curl::Easy.new(@tika + "/tika") file_data = File.read(@path) c.headers['Content-Type'] = mime_magic.type c.headers['Accept'] = "text/plain" c.http_put(file_data) @text = c.body_str gotten_text_ok?(@text) end |
#gotten_text_ok?(text) ⇒ Boolean
Checks if text was successfully extracted
66 67 68 |
# File 'lib/ocrfile.rb', line 66 def gotten_text_ok?(text) throw :extraction_error if text.include?("java.io.IOException: Stream Closed") end |
#load_extracted_text(file) ⇒ Object
Load text that is already extracted
39 40 41 42 |
# File 'lib/ocrfile.rb', line 39 def load_extracted_text(file) puts "file already exists" @text = JSON.parse(File.read(file))["text"] end |
#ocr ⇒ Object
OCR file
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/ocrfile.rb', line 16 def ocr begin mime_magic = MimeMagic.by_path(@path) if File.exist?(@output_dir+@rel_path+".json") load_extracted_text(@output_dir+@rel_path+".json") else if @tika give_me_text_local(mime_magic) else give_me_text end end rescue # Detect errors # binding.pry error_file = @path + "\n" IO.write(@output_dir+"/error_log.txt", error_file, mode: 'a') end return @text end |