Module: Documentalist
- Defined in:
- lib/dependencies.rb,
lib/documentalist.rb,
lib/backends/net_pbm.rb,
lib/backends/odf_merge.rb,
lib/backends/pdf_tools.rb,
lib/backends/open_office.rb,
lib/backends/wkhtmltopdf.rb
Defined Under Namespace
Modules: Dependencies, NetPBM, ODFMerge, OpenOffice, PdfTools, WkHtmlToPdf Classes: Error
Constant Summary collapse
- BACKENDS =
{ # Find a better pattern to pick backend, this one smells pretty bad :WkHtmlToPdf => {[:html, :htm] => :pdf}, :OpenOffice => {[:odt, :doc, :rtf, :docx, :txt, :wps] => [:odt, :doc, :rtf, :pdf, :txt, :html, :htm, :wps]}, :NetPBM => {:ppm => [:jpg, :jpeg]}, :PdfTools => {:pdf => :txt}, }
- @@config =
{}
- @@logger =
nil
Class Method Summary collapse
-
.backend_for_conversion(origin, destination) ⇒ Object
Finds the relevant server to perform the conversion.
-
.check_dependencies ⇒ Object
Checks the dependencies for backends.
- .config ⇒ Object
- .config=(hash) ⇒ Object
- .config? ⇒ Boolean
- .config_from_yaml!(file, options = {}) ⇒ Object
-
.convert(file = nil, options = {}) ⇒ Object
Takes all conversion requests and dispatches them appropriately.
- .default_config! ⇒ Object
- .extract_images(file) {|image_files| ... } ⇒ Object
- .extract_text(file) ⇒ Object
-
.logger ⇒ Object
Returns the logger object used to log documentalist operations.
-
.odf_merge(template, options = {}) ⇒ Object
Merge an ODF document with an arbitrary hash of data.
-
.symbolize(hash) ⇒ Object
Returns a new hash with recursively symbolized keys.
-
.timeout(time_limit = 0, options = {:attempts => 1, :sleep => nil}) ⇒ Object
Runs a block with a system-enforced timeout and optionally retry with an optional sleep between attempts of running the given block.
Class Method Details
.backend_for_conversion(origin, destination) ⇒ Object
Finds the relevant server to perform the conversion
44 45 46 47 48 49 50 51 |
# File 'lib/documentalist.rb', line 44 def self.backend_for_conversion(origin, destination) origin = origin.to_s.gsub(/.*\./, "").to_sym destination = destination.to_s.gsub(/.*\./, "").to_sym BACKENDS.map { |b| [send(:const_get, b[0]), b[1]] }.detect do |s, conversions| conversions.keys.flatten.include?(origin) and conversions.values.flatten.include?(destination) end.to_a.first end |
.check_dependencies ⇒ Object
Checks the dependencies for backends
162 163 164 165 166 167 168 169 170 171 172 173 |
# File 'lib/documentalist.rb', line 162 def self.check_dependencies puts "Checking backends system dependencies" Documentalist.constants.each do |backend| backend = Documentalist.const_get backend.to_sym if backend.respond_to? :check_dependencies puts "Checking dependencies for #{backend.to_s}" backend.send :check_dependencies end end end |
.config ⇒ Object
12 13 14 15 |
# File 'lib/documentalist.rb', line 12 def self.config default_config! unless config? @@config end |
.config=(hash) ⇒ Object
17 18 19 20 |
# File 'lib/documentalist.rb', line 17 def self.config=(hash) # We want to symbolize keys ourselves since we're not depending on Active Support @@config = symbolize hash end |
.config? ⇒ Boolean
22 23 24 |
# File 'lib/documentalist.rb', line 22 def self.config? @@config != {} end |
.config_from_yaml!(file, options = {}) ⇒ Object
30 31 32 33 |
# File 'lib/documentalist.rb', line 30 def self.config_from_yaml!(file, = {}) self.config = YAML::load(File.open(file)) self.config = config[[:section].to_sym] if [:section] end |
.convert(file = nil, options = {}) ⇒ Object
Takes all conversion requests and dispatches them appropriately
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/documentalist.rb', line 54 def self.convert(file=nil, ={}) if [:input] and [:input_format] and file.nil? file = File.join(Dir.tmpdir, "#{rand(10**9)}.#{[:input_format].to_s}") File.open(file, 'w') { |f| f.write([:input]) } end raise Documentalist::Error.new("#{file} does not exist !") unless File.exist?(file) if [:to_format] [:to] = file.gsub(/#{"\\" + File.extname(file)}$/, ".#{[:to_format].to_s}") elsif [:to] [:to_format] = File.extname([:to]).gsub(/\./, "").to_sym elsif [:stream] [:to_format] = [:stream] [:to] = File.join(Dir.tmpdir, "#{rand(10**9)}.#{[:to_format]}") else raise Documentalist::Error.new("No destination, format, or stream format was given") end [:from_format] = File.extname(file).gsub(/\./, "").to_sym backend = backend_for_conversion([:from_format], [:to_format]) backend.convert(file, ) # TODO : that would fails removing the file since the input parameter gets overridden # we'll live with it for now if [:input] and [:input_format] and file.nil? FileUtils.rm(file) end if [:stream] data = File.read([:to]) FileUtils.rm([:to]) yield(data) if block_given? data else yield([:to]) if block_given? [:to] end end |
.default_config! ⇒ Object
26 27 28 |
# File 'lib/documentalist.rb', line 26 def self.default_config! config_from_yaml! File.join(File.dirname(__FILE__), %w{.. config default.yml}) end |
.extract_images(file) {|image_files| ... } ⇒ Object
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
# File 'lib/documentalist.rb', line 106 def self.extract_images(file) temp_dir = File.join(Dir.tmpdir, rand(10**9).to_s) if File.extname(file) == '.pdf' temp_file = File.join(temp_dir, File.basename(file)) FileUtils.mkdir_p temp_dir FileUtils.cp file, temp_file system "pdfimages #{temp_file} '#{File.join(temp_dir, "img")}'" Dir.glob(File.join(temp_dir, "*.ppm")).each do |ppm_image| #raise ppm_image Documentalist.convert(ppm_image, :to_format => :jpeg) end else Documentalist.convert file, :to_format => :html end image_files = Dir.glob(File.join(temp_dir, "*.{jpg,jpeg,bmp,tif,tiff,gif,png}")) yield(image_files) if block_given? image_files end |
.extract_text(file) ⇒ Object
95 96 97 98 99 100 101 102 103 104 |
# File 'lib/documentalist.rb', line 95 def self.extract_text(file) converted = convert(file, :to_format => :txt) if converted and File.exist?(converted) text = Kconv.toutf8(File.open(converted).read) FileUtils.rm(converted) yield(text) if block_given? text end end |
.logger ⇒ Object
Returns the logger object used to log documentalist operations
151 152 153 154 155 156 157 158 159 |
# File 'lib/documentalist.rb', line 151 def self.logger unless @@logger Documentalist.config[:log_file] ||= File.join(File.dirname(File.(__FILE__)), %w{.. documentalist.log}) @@logger = Logger.new(Documentalist.config[:log_file]) @@logger.level = Logger.const_get(config[:log_level] ? config[:log_level].upcase : "WARN") end @@logger end |
.odf_merge(template, options = {}) ⇒ Object
Merge an ODF document with an arbitrary hash of data
7 8 9 |
# File 'lib/backends/odf_merge.rb', line 7 def self.odf_merge(template, = {}) ODFMerge.merge_template(template, ) end |
.symbolize(hash) ⇒ Object
Returns a new hash with recursively symbolized keys
176 177 178 179 180 181 |
# File 'lib/documentalist.rb', line 176 def self.symbolize(hash) hash.each_key do |key| hash[key.to_sym] = hash.delete key hash[key.to_sym] = symbolize(hash[key.to_sym]) if hash[key.to_sym].is_a?(Hash) end end |
.timeout(time_limit = 0, options = {:attempts => 1, :sleep => nil}) ⇒ Object
Runs a block with a system-enforced timeout and optionally retry with an optional sleep between attempts of running the given block. All times are in seconds.
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/documentalist.rb', line 134 def self.timeout(time_limit = 0, = {:attempts => 1, :sleep => nil}) if block_given? attempts = [:attempts] || 1 begin SystemTimer.timeout time_limit do yield end rescue Timeout::Error attempts -= 1 sleep([:sleep]) if [:sleep] retry unless attempts.zero? raise end end end |