Class: SimHashGenPerfTestFactory

Inherits:
Object
  • Object
show all
Includes:
Gravitext::HTMap, Iudex::Core, Iudex::Core::Filters, Iudex::Filter::Core, Iudex::HTML, Iudex::HTML::Filters, Iudex::HTML::Tree, Iudex::HTML::Tree::Filters, Iudex::SimHash::Filters, Iudex::SimHash::Filters::FactoryHelper
Defined in:
lib/iudex-simhash/sim_hash_gen_perf_test_factory.rb

Constant Summary collapse

Order =
HTMLTreeFilter::Order

Constants included from Iudex::SimHash::Filters::FactoryHelper

Iudex::SimHash::Filters::FactoryHelper::DEFAULT_WORDS, Iudex::SimHash::Filters::FactoryHelper::Element

Instance Method Summary collapse

Methods included from Iudex::SimHash::Filters::FactoryHelper

#simhash_generator, #simhash_generator_inputs, #simhash_stopwords

Constructor Details

#initializeSimHashGenPerfTestFactory

Returns a new instance of SimHashGenPerfTestFactory.



39
40
41
# File 'lib/iudex-simhash/sim_hash_gen_perf_test_factory.rb', line 39

def initialize
  UniMap.define_accessors
end

Instance Method Details

#contentObject



52
53
54
55
56
57
58
59
60
# File 'lib/iudex-simhash/sim_hash_gen_perf_test_factory.rb', line 52

def content
  map = UniMap.new

  html = File.read( File.join( File.dirname( __FILE__ ),  '..', '..',
                    'test', 'html', 'gentest.html' ) )

  map.source = HTMLUtils::source( html.to_java_bytes, "UTF-8" )
  map
end

#filter_chainObject



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/iudex-simhash/sim_hash_gen_perf_test_factory.rb', line 62

def filter_chain
  filters = []
  filters << HTMLParseFilter.new( ContentKeys::SOURCE,
                                  nil, HTMLKeys::SOURCE_TREE )
  filters << TitleExtractor.new
  filters << TextCtrlWSFilter.new( ContentKeys::TITLE )

  tfc = TreeFilterChain.new( [ MetaSkipFilter.new,
                               CharactersNormalizer.new,
                               WordCounter.new,
                               WordyCounter.new ] )

  filters << HTMLTreeFilter.new( HTMLKeys::SOURCE_TREE,
                                 tfc, Order::DEPTH_FIRST )

  FilterChain.new( "perf_test", filters )
end

#perf_testObject



43
44
45
46
47
48
49
50
# File 'lib/iudex-simhash/sim_hash_gen_perf_test_factory.rb', line 43

def perf_test

  # Initial parse
  map = content
  filter_chain.filter( map )

  SimHashGenPerfTest.new( map, simhash_generator )
end