Class: Tripleloop::DocumentProcessor

Inherits:
Object
  • Object
show all
Defined in:
lib/tripleloop/document_processor.rb

Defined Under Namespace

Classes: ExtractorNotFoundError

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(document) ⇒ DocumentProcessor

Returns a new instance of DocumentProcessor.



5
6
7
# File 'lib/tripleloop/document_processor.rb', line 5

def initialize(document)
  @document = Util.with_nested_fetch(document)
end

Instance Attribute Details

#documentObject (readonly)

Returns the value of attribute document.



3
4
5
# File 'lib/tripleloop/document_processor.rb', line 3

def document
  @document
end

Class Method Details

.batch_process(documents) ⇒ Object



24
25
26
27
28
29
30
31
32
# File 'lib/tripleloop/document_processor.rb', line 24

def self.batch_process(documents)
  documents.map { |doc|
    self.new(doc).extracted_statements
  }.reduce(Hash.new([])) { |accu, statements|
    accu.merge(statements) { |k, olds, news|
      olds.concat(news)
    }
  }
end

.extractors(*args) ⇒ Object



9
10
11
12
13
14
15
16
# File 'lib/tripleloop/document_processor.rb', line 9

def self.extractors(*args)
  options = args.last.respond_to?(:fetch) ? args.pop : {}
  @extractors ||= {}

  args.each do |ext|
    @extractors[ext] = options
  end
end

Instance Method Details

#extracted_statementsObject



18
19
20
21
22
# File 'lib/tripleloop/document_processor.rb', line 18

def extracted_statements
  @extracted_statements ||= Hash[extractor_instances.map { |extractor|
    [extractor.name.to_sym, extractor.extract]
  }]
end