Class: Iudex::DA::Importer
- Inherits:
-
Object
- Object
- Iudex::DA::Importer
- Includes:
- Gravitext::HTMap, Core, Filter::KeyHelper
- Defined in:
- lib/iudex-da/importer.rb
Instance Method Summary collapse
- #close ⇒ Object
- #import(input) ⇒ Object
- #import_files(files = ARGV) ⇒ Object
- #import_keys ⇒ Object
-
#initialize ⇒ Importer
constructor
A new instance of Importer.
- #parse_to(line, umap) ⇒ Object
- #template_map ⇒ Object
Methods included from Filter::KeyHelper
Constructor Details
#initialize ⇒ Importer
Returns a new instance of Importer.
32 33 34 35 36 37 |
# File 'lib/iudex-da/importer.rb', line 32 def initialize() @dsf = PoolDataSourceFactory.new UniMap.define_accessors Hooker.apply( [ :iudex, :importer ], self ) end |
Instance Method Details
#close ⇒ Object
86 87 88 89 |
# File 'lib/iudex-da/importer.rb', line 86 def close @dsf.close if @dsf @dsf = nil end |
#import(input) ⇒ Object
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/iudex-da/importer.rb', line 47 def import( input ) cmapper = ContentMapper.new( keys( import_keys ) ) transformer = BaseTransformer.new updater = ContentUpdater.new( @dsf.create, cmapper, transformer ) tmpl = template_map batch = [] input.each do |line| umap = tmpl.clone parse_to( line, umap ) batch << umap if batch.length >= 1_000 updater.update( batch ) batch.clear end end updater.update( batch ) unless batch.empty? end |
#import_files(files = ARGV) ⇒ Object
39 40 41 42 43 44 45 |
# File 'lib/iudex-da/importer.rb', line 39 def import_files( files = ARGV ) files.each do |fname| open( fname, "r" ) do |fin| import( fin ) end end end |
#import_keys ⇒ Object
68 69 70 |
# File 'lib/iudex-da/importer.rb', line 68 def import_keys [ :uhash, :domain, :url, :type, :priority, :next_visit_after ] end |
#parse_to(line, umap) ⇒ Object
80 81 82 83 84 |
# File 'lib/iudex-da/importer.rb', line 80 def parse_to( line, umap ) fields = line.split( ',' ) umap.url = VisitURL.normalize( fields[0] ) umap end |
#template_map ⇒ Object
72 73 74 75 76 77 78 |
# File 'lib/iudex-da/importer.rb', line 72 def template_map umap = UniMap.new umap.type = "FEED" umap.next_visit_after = Time.now umap.priority = 1.0 umap end |