Class: AnyStyle::Feature::Words
- Inherits:
-
AnyStyle::Feature
- Object
- AnyStyle::Feature
- AnyStyle::Feature::Words
- Defined in:
- lib/anystyle/feature/words.rb
Constant Summary collapse
- TITLE_WORDS =
%w{ abstract acknowledgements appendix bibliography bibliographie chapter cited contents figures introduction kurzfassung literatur literature references referenzen secondary section sources summary tables works }
Instance Attribute Summary collapse
-
#dictionary ⇒ Object
readonly
Returns the value of attribute dictionary.
Attributes inherited from AnyStyle::Feature
Instance Method Summary collapse
- #classify(word) ⇒ Object
-
#initialize(dictionary:, **opts) ⇒ Words
constructor
A new instance of Words.
- #observe(token, **opts) ⇒ Object
Methods inherited from AnyStyle::Feature
Methods included from StringUtils
canonize, count, display_chars, display_width, indent, nnum, page_break?, scrub, strip_html, transliterate
Constructor Details
#initialize(dictionary:, **opts) ⇒ Words
Returns a new instance of Words.
30 31 32 33 |
# File 'lib/anystyle/feature/words.rb', line 30 def initialize(dictionary:, **opts) super(**opts) @dictionary = dictionary end |
Instance Attribute Details
#dictionary ⇒ Object (readonly)
Returns the value of attribute dictionary.
4 5 6 |
# File 'lib/anystyle/feature/words.rb', line 4 def dictionary @dictionary end |
Instance Method Details
#classify(word) ⇒ Object
64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/anystyle/feature/words.rb', line 64 def classify(word) case word when /^(\d+|[vx]?iii?|i?[vx]|)$/i :number when /\d/ :numeric when nil :none else :alpha end end |
#observe(token, **opts) ⇒ Object
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/anystyle/feature/words.rb', line 35 def observe(token, **opts) words = token.scan(/\S+/).map { |word| canonize word }.reject(&:empty?) spacers = token.scan(/\S\s\s+\S/) numbers = token.scan(/\d+(\.\d+)?/) title = words.count { |word| TITLE_WORDS.include?(word) } counts = dictionary.tag_counts(words) if words.length > 0 len = words.map(&:length).sort avg = len.reduce(0, :+) / len.length med = len.length.even? ? len[(len.length - 1) / 2, 2].reduce(0, :+) / 2 : len[len.length / 2] else avg, med = 0, 0 end [ words.length, avg, med, spacers.length, classify(words[0]), numbers.length, ratio(title, words.length), *counts.map { |cnt| ratio(cnt, words.length) } ] end |