Class: Boilerpipe::Filters::DensityRulesClassifier
- Inherits:
-
Object
- Object
- Boilerpipe::Filters::DensityRulesClassifier
- Defined in:
- lib/boilerpipe/filters/density_rules_classifier.rb
Class Method Summary collapse
Class Method Details
.classify(prev, current, nxt) ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/boilerpipe/filters/density_rules_classifier.rb', line 23 def self.classify(prev, current, nxt) return false if current.link_density > 0.333333 if prev.link_density <= 0.555556 if current.text_density <= 9 return true if nxt.text_density > 10 return prev.text_density <= 4 ? false : true else return nxt.text_density == 0 ? false : true end else return false if nxt.text_density <= 11 true end end |
.process(doc) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 |
# File 'lib/boilerpipe/filters/density_rules_classifier.rb', line 9 def self.process(doc) #return doc if doc.text_blocks.size < 2 empty = Boilerpipe::Document::TextBlock.empty_start text_blocks = [empty] + doc.text_blocks + [empty] text_blocks.each_cons(3) do |slice| prev, current, nxt = *slice current.content = classify(prev, current, nxt) end doc end |