Module: OCR
Overview
helper for OCR’ing single digits that were screen captured
Constant Summary collapse
- GOCR =
"gocr"
- CACHE =
{}
- CACHE_FILE =
File.('~/.sensible-cinema-ocr.marshal')
Instance Method Summary collapse
- #clear_cache! ⇒ Object
-
#identify_digit(memory_bitmap, options = {}) ⇒ Object
options are :might_be_colon, :should_invert.
- #load_from_ocr_seed ⇒ Object
- #serialize_cache_to_disk ⇒ Object
- #unserialize_cache_from_disk ⇒ Object
- #version ⇒ Object
Instance Method Details
#clear_cache! ⇒ Object
97 98 99 100 |
# File 'lib/ocr.rb', line 97 def clear_cache! CACHE.clear File.delete CACHE_FILE if File.exist?(CACHE_FILE) end |
#identify_digit(memory_bitmap, options = {}) ⇒ Object
options are :might_be_colon, :should_invert
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/ocr.rb', line 43 def identify_digit memory_bitmap, = {} require 'mini_magick' # here because of installation woe, but actually not a big slowdown if CACHE.has_key?(memory_bitmap) return CACHE[memory_bitmap] unless (defined?($OCR_NO_CACHE) && $OCR_NO_CACHE) else puts 'cache miss' if $DEBUG && $VERBOSE end if [:might_be_colon] # do special processing <sigh> total = (memory_bitmap.scan(/\x00{5}+/)).length if total >= 3 # really should be 4 for VLC # it had some darkness...therefore have been a colon! CACHE[memory_bitmap] = ":" return ":" end end image = MiniMagick::Image.read(memory_bitmap) # any operation on image is expensive, requires convert.exe in path... if [:should_invert] # hulu wants negate # but doesn't want sharpen, for whatever reason... # mogrify calls it negate... image.negate end image.format(:pnm) # I think it's VLC full screen that wants sharpening... image.sharpen(2) if [:sharpen] # hulu does *not* want sharpen, though I haven't checked it too closely... previous = nil p if $DEBUG raise 'you must pass in OCR levels in the player description' unless [:levels] for level in [:levels] command = "#{GOCR} -l #{level} #{image.path} 2>NUL" a = `#{command}` if a =~ /[0-9]/ # it might be funky like "_1_\n" a.strip! a.gsub!('_', '') a = a.to_i return CACHE[memory_bitmap] = a end end # cache failures here, for VLC's hour clock' sake CACHE[memory_bitmap] = nil nil end |
#load_from_ocr_seed ⇒ Object
114 115 116 117 118 119 120 121 |
# File 'lib/ocr.rb', line 114 def load_from_ocr_seed for file in Dir[__DIR__ + "/ocr_seed/*.bmp"] file =~ /(\d+)\.bmp/i digit = $1.to_i raise unless digit < 10 CACHE[File.binread(file)] = digit end end |
#serialize_cache_to_disk ⇒ Object
104 105 106 |
# File 'lib/ocr.rb', line 104 def serialize_cache_to_disk File.binwrite(CACHE_FILE, Marshal.dump(CACHE)) end |
#unserialize_cache_from_disk ⇒ Object
108 109 110 111 112 |
# File 'lib/ocr.rb', line 108 def unserialize_cache_from_disk if File.exist? CACHE_FILE CACHE.merge!(Marshal.load(File.binread(CACHE_FILE))) end end |
#version ⇒ Object
93 94 95 |
# File 'lib/ocr.rb', line 93 def version `#{GOCR} -h 2>&1` end |