Class: Tripleloop::DocumentProcessor

Inherits:
Object
  • Object
show all
Defined in:
lib/tripleloop/document_processor.rb

Defined Under Namespace

Classes: ExtractorNotFoundError

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(document, options = {}) ⇒ DocumentProcessor

Returns a new instance of DocumentProcessor.



5
6
7
8
# File 'lib/tripleloop/document_processor.rb', line 5

def initialize(document, options={})
  @options = options
  @document = Util.with_nested_fetch(document)
end

Instance Attribute Details

#documentObject (readonly)

Returns the value of attribute document.



3
4
5
# File 'lib/tripleloop/document_processor.rb', line 3

def document
  @document
end

#optionsObject (readonly)

Returns the value of attribute options.



3
4
5
# File 'lib/tripleloop/document_processor.rb', line 3

def options
  @options
end

Class Method Details

.batch_process(documents, options = {}) ⇒ Object



25
26
27
28
29
30
31
32
33
# File 'lib/tripleloop/document_processor.rb', line 25

def self.batch_process(documents, options={})
  documents.map { |doc|
    self.new(doc, options).extracted_statements
  }.reduce(Hash.new([])) { |accu, statements|
    accu.merge(statements) { |k, olds, news|
      olds.concat(news)
    }
  }
end

.extractors(*args) ⇒ Object



10
11
12
13
14
15
16
17
# File 'lib/tripleloop/document_processor.rb', line 10

def self.extractors(*args)
  options = args.last.respond_to?(:fetch) ? args.pop : {}
  @extractors ||= {}

  args.each do |ext|
    @extractors[ext] = options
  end
end

Instance Method Details

#extracted_statementsObject



19
20
21
22
23
# File 'lib/tripleloop/document_processor.rb', line 19

def extracted_statements
  @extracted_statements ||= Hash[extractor_instances.map { |extractor|
    [extractor.name.to_sym, extractor.extract]
  }]
end