Module: Iudex::SimHash::Filters::FactoryHelper

Includes:
Core, HTML
Included in:
SimHashGenPerfTestFactory
Defined in:
lib/iudex-simhash/factory_helper.rb

Constant Summary collapse

DEFAULT_WORDS =
File.join( File.dirname( __FILE__ ), '..', '..',
'config', 'stopwords.en' )
Element =
Java::com.gravitext.xml.tree.Element

Instance Method Summary collapse

Instance Method Details

#simhash_generator(input = :simhash_generator_inputs, stopwords = simhash_stopwords) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/iudex-simhash/factory_helper.rb', line 41

def simhash_generator( input = :simhash_generator_inputs,
                       stopwords = simhash_stopwords )

  inputs = send( input ).
    map { |r| Array( r ) }.
    map do | key, ratio |
    key = key.to_k
    i = if( key.value_type == Element.java_class )
          SimHashGenerator::Input.forTree( key )
        else
          SimHashGenerator::Input.forText( key )
        end
    i.wordy_ratio = ratio if ratio
    i
  end

  SimHashGenerator.new( inputs, stopwords )
end

#simhash_generator_inputsObject



60
61
62
63
# File 'lib/iudex-simhash/factory_helper.rb', line 60

def simhash_generator_inputs
  [ [ :title ],
    [ :source_tree, 0.30 ] ]
end

#simhash_stopwords(wfile = DEFAULT_WORDS) ⇒ Object



30
31
32
33
34
35
36
37
# File 'lib/iudex-simhash/factory_helper.rb', line 30

def simhash_stopwords( wfile = DEFAULT_WORDS )
  words =
    File.open( wfile ) { |fin| fin.readlines }.
    map { |w| w.strip }.
    reject { |w| w =~ /^#/ }

  Gen::StopWordSet.new( words )
end