Module: Iudex::SimHash::Filters::FactoryHelper
- Includes:
- Core, HTML
- Included in:
- SimHashGenPerfTestFactory
- Defined in:
- lib/iudex-simhash/factory_helper.rb
Constant Summary collapse
- DEFAULT_WORDS =
File.join( File.dirname( __FILE__ ), '..', '..', 'config', 'stopwords.en' )
- Element =
Java::com.gravitext.xml.tree.Element
Instance Method Summary collapse
- #simhash_generator(input = :simhash_generator_inputs, stopwords = simhash_stopwords) ⇒ Object
- #simhash_generator_inputs ⇒ Object
- #simhash_stopwords(wfile = DEFAULT_WORDS) ⇒ Object
Instance Method Details
#simhash_generator(input = :simhash_generator_inputs, stopwords = simhash_stopwords) ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/iudex-simhash/factory_helper.rb', line 41 def simhash_generator( input = :simhash_generator_inputs, stopwords = simhash_stopwords ) inputs = send( input ). map { |r| Array( r ) }. map do | key, ratio | key = key.to_k i = if( key.value_type == Element.java_class ) SimHashGenerator::Input.forTree( key ) else SimHashGenerator::Input.forText( key ) end i.wordy_ratio = ratio if ratio i end SimHashGenerator.new( inputs, stopwords ) end |
#simhash_generator_inputs ⇒ Object
60 61 62 63 |
# File 'lib/iudex-simhash/factory_helper.rb', line 60 def simhash_generator_inputs [ [ :title ], [ :source_tree, 0.30 ] ] end |
#simhash_stopwords(wfile = DEFAULT_WORDS) ⇒ Object
30 31 32 33 34 35 36 37 |
# File 'lib/iudex-simhash/factory_helper.rb', line 30 def simhash_stopwords( wfile = DEFAULT_WORDS ) words = File.open( wfile ) { |fin| fin.readlines }. map { |w| w.strip }. reject { |w| w =~ /^#/ } Gen::StopWordSet.new( words ) end |