Class: Iudex::DA::Importer

Inherits:
Object
  • Object
show all
Includes:
Gravitext::HTMap, Core, Filter::KeyHelper
Defined in:
lib/iudex-da/importer.rb

Instance Method Summary collapse

Methods included from Filter::KeyHelper

lookup_key

Constructor Details

#initializeImporter

Returns a new instance of Importer.



32
33
34
35
36
37
# File 'lib/iudex-da/importer.rb', line 32

def initialize()
  @dsf = PoolDataSourceFactory.new
  UniMap.define_accessors

  Hooker.apply( [ :iudex, :importer ], self )
end

Instance Method Details

#closeObject



86
87
88
89
# File 'lib/iudex-da/importer.rb', line 86

def close
  @dsf.close if @dsf
  @dsf = nil
end

#import(input) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/iudex-da/importer.rb', line 47

def import( input )

  cmapper = ContentMapper.new( keys( import_keys ) )
  transformer = BaseTransformer.new
  updater = ContentUpdater.new( @dsf.create, cmapper, transformer )

  tmpl = template_map
  batch = []

  input.each do |line|
    umap = tmpl.clone
    parse_to( line, umap )
    batch << umap
    if batch.length >= 1_000
      updater.update( batch )
      batch.clear
     end
  end
  updater.update( batch ) unless batch.empty?
end

#import_files(files = ARGV) ⇒ Object



39
40
41
42
43
44
45
# File 'lib/iudex-da/importer.rb', line 39

def import_files( files = ARGV )
  files.each do |fname|
    open( fname, "r" ) do |fin|
      import( fin )
    end
  end
end

#import_keysObject



68
69
70
# File 'lib/iudex-da/importer.rb', line 68

def import_keys
  [ :uhash, :domain, :url, :type, :priority, :next_visit_after ]
end

#parse_to(line, umap) ⇒ Object



80
81
82
83
84
# File 'lib/iudex-da/importer.rb', line 80

def parse_to( line, umap )
  fields = line.split( ',' )
  umap.url = VisitURL.normalize( fields[0] )
  umap
end

#template_mapObject



72
73
74
75
76
77
78
# File 'lib/iudex-da/importer.rb', line 72

def template_map
  umap = UniMap.new
  umap.type = "FEED"
  umap.next_visit_after = Time.now
  umap.priority = 1.0
  umap
end