Class: Paperless::Engine
- Inherits:
-
Object
- Object
- Paperless::Engine
- Defined in:
- lib/paperless/engine.rb
Constant Summary collapse
- PDFPEN_ENGINE =
'pdfpen'
- PDFPENPRO_ENGINE =
'pdfpenpro'
- PDFPENPRO6_ENGINE =
'pdfpenpro6'
- ACROBAT_ENGINE =
'acrobat'
- DEVONTHINKPRO_ENGINE =
'devonthinkpro'
- DEVONTHINKPRO_SERVICE =
'devonthinkpro'
- FINDER_SERVICE =
'finder'
- EVERNOTE_SERVICE =
'evernote'
Instance Attribute Summary collapse
-
#service ⇒ Object
readonly
Returns the value of attribute service.
Instance Method Summary collapse
- #add_tags(tags) ⇒ Object
- #create(options) ⇒ Object
-
#initialize(options) ⇒ Engine
constructor
A new instance of Engine.
- #ocr(dump = false) ⇒ Object
- #print ⇒ Object
- #process_pdf ⇒ Object
- #process_rules ⇒ Object
- #process_rules_engine(text) ⇒ Object
- #process_text ⇒ Object
- #set_date_default ⇒ Object
- #set_destination(destination) ⇒ Object
- #set_service(service) ⇒ Object
- #set_title(title) ⇒ Object
Constructor Details
#initialize(options) ⇒ Engine
Returns a new instance of Engine.
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/paperless/engine.rb', line 28 def initialize() @destination = nil @service = [:default_service] @title = nil @date = DateTime.now @tags = Array.new() @file = [:file] @text_ext = [:text_ext] @pdf_ext = [Paperless::PDF_EXT] @default_service = [:default_service] @date_format = [:date_format] @date_locale = [:date_locale] @date_default = [:date_default] @default_destination = [:default_destination] @rules = Array.new() [:rules].each do |rule| @rules.push Paperless::Rule.new(rule) end @ocr_engine = [:ocr_engine]||false end |
Instance Attribute Details
#service ⇒ Object (readonly)
Returns the value of attribute service.
26 27 28 |
# File 'lib/paperless/engine.rb', line 26 def service @service end |
Instance Method Details
#add_tags(tags) ⇒ Object
70 71 72 73 74 75 |
# File 'lib/paperless/engine.rb', line 70 def () if .length > 0 @tags = (@tags + ).collect {|x| x = x.downcase } @tags.uniq! end end |
#create(options) ⇒ Object
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
# File 'lib/paperless/engine.rb', line 197 def create() # May need to externalize this so other methods can access it. service = case @service.nil? ? @default_service : @service when /^#{EVERNOTE_SERVICE}$/i then PaperlessService::Evernote.new when /^#{FINDER_SERVICE}$/i then PaperlessService::Finder.new when /^#{DEVONTHINKPRO_SERVICE}$/i then PaperlessService::DevonThinkPro.new else false end if service self.print destination = @destination.nil? ? @default_destination : @destination title = @title.nil? ? File.basename(@file, File.extname(@file)) : @title service.create({ :delete => [:delete], :destination => destination, :text_ext => @text_ext, :file => @file, :date => @date, :title => title, :tags => @tags }) else puts "WARNING: No valid Service was defined." end end |
#ocr(dump = false) ⇒ Object
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
# File 'lib/paperless/engine.rb', line 158 def ocr(dump = false) reader = PDF::Reader.new(@file) if reader.pages.length > 0 text = reader.pages[0].text if !text.nil? && text != '' puts text if dump puts "This doc already seems to be OCR'd. Not processing through #{@ocr_engine}" return end end puts "Running OCR on file with #{@ocr_engine}" ocr_engine = case @ocr_engine when /^#{PDFPENPRO6_ENGINE}$/i then PaperlessOCR::PDFpenPro6.new when /^#{PDFPEN6_ENGINE}$/i then PaperlessOCR::PDFpen6.new when /^#{PDFPENPRO_ENGINE}$/i then PaperlessOCR::PDFpenPro.new when /^#{PDFPEN_ENGINE}$/i then PaperlessOCR::PDFpen.new when /^#{ACROBAT_ENGINE}$/i then PaperlessOCR::Acrobat.new when /^#{DEVONTHINKPRO_ENGINE}$/i then PaperlessOCR::DevonThinkPro.new else false end if ocr_engine ocr_engine.ocr({:file => @file}) if dump puts "Dumping Page Content..." # Print the contents of the doc reader = PDF::Reader.new(@file) reader.pages.each do |page| puts page.text end end else puts "WARNING: No valid OCR engine was defined." end end |
#print ⇒ Object
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 |
# File 'lib/paperless/engine.rb', line 226 def print service = @service.nil? ? @default_service : @service title = @title.nil? ? File.basename(@file, File.extname(@file)) : @title destination = @destination.nil? ? @default_destination : @destination if destination == PaperlessService::Finder::NO_MOVE && service == PaperlessService::FINDER.downcase destination = File.dirname(@file) end puts "* ---------------------------------------------" puts "* File: #{@file}" puts "* Service: #{service}" puts "* Destination: #{destination}" puts "* Title: #{title}" puts "* Date: #{@date.strftime('%Y-%m-%d')}" puts "* Tags: #{@tags.join(', ')}" puts "* ---------------------------------------------" end |
#process_pdf ⇒ Object
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/paperless/engine.rb', line 133 def process_pdf puts "Processing PDF pages..." reader = PDF::Reader.new(@file) # Verify that we need to search for date or just set to today # Need to prcess file for date in case the rules need to use it. # First check if there are actually any date rules @rules.each do |rule| if rule.condition == Paperless::DATE_VAR reader.pages.each do |page| break if @date = date_search(page.text,@date_locale) end # Check for the date in the file name if not found in the content @date = date_search(@file,@date_locale) if @date.nil? break end end # Process each page and pass it through the rules engine reader.pages.each do |page| process_rules_engine(page.text) end end |
#process_rules ⇒ Object
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/paperless/engine.rb', line 52 def process_rules markdown_ext = ['md','mmd'] text_ext = @text_ext + markdown_ext file_ext = File.extname(@file).gsub(/\./,'') if file_ext == Paperless::PDF_EXT self.process_pdf elsif text_ext.index file_ext self.process_text else puts "Unknown file type. No rules were processed." end end |
#process_rules_engine(text) ⇒ Object
100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/paperless/engine.rb', line 100 def process_rules_engine(text) self.set_date_default if @date.nil? # Process each page and pass it through the rules engine @rules.each do |rule| rule.set_date(@date,@date_format) if !rule.matched && rule.match(@file, text) self.(rule.) self.set_destination(rule.destination) self.set_title(rule.title) self.set_service(rule.service) end end end |
#process_text ⇒ Object
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/paperless/engine.rb', line 114 def process_text puts "Processing Text file..." text = File.open(@file, "rb") {|io| io.read} # Verify that we need to search for date or just set to today # Need to prcess file for date in case the rules need to use it. # First check if there are actually any date rules @rules.each do |rule| if rule.condition == Paperless::DATE_VAR @date = date_search(text,@date_locale) || date_search(@file,@date_locale) end end # Process each page and pass it through the rules engine process_rules_engine(text) end |
#set_date_default ⇒ Object
89 90 91 92 93 94 95 96 97 98 |
# File 'lib/paperless/engine.rb', line 89 def set_date_default() puts "Using default date..." # Set the default date to the date of the file or else to now if @date_default == Paperless::FILEDATE t = File.stat(@file).mtime @date = Date.new(t.year,t.month,t.day) else @date = DateTime.now end end |
#set_destination(destination) ⇒ Object
77 78 79 |
# File 'lib/paperless/engine.rb', line 77 def set_destination(destination) @destination = destination if destination && @destination.nil? end |
#set_service(service) ⇒ Object
85 86 87 |
# File 'lib/paperless/engine.rb', line 85 def set_service(service) @service = service if service && @service.nil? end |
#set_title(title) ⇒ Object
81 82 83 |
# File 'lib/paperless/engine.rb', line 81 def set_title(title) @title = title if title && @title.nil? end |