Class: AnyStyle::Feature::Words

Inherits:

AnyStyle::Feature

Object
AnyStyle::Feature
AnyStyle::Feature::Words

show all

Defined in:: lib/anystyle/feature/words.rb

Constant Summary collapse

TITLE_WORDS =

%w{
  abstract
  acknowledgements
  appendix
  bibliography
  bibliographie
  chapter
  cited
  contents
  figures
  introduction
  kurzfassung
  literatur
  literature
  references
  referenzen
  secondary
  section
  sources
  summary
  tables
  works
}

Instance Attribute Summary collapse

#dictionary ⇒ Object readonly

Returns the value of attribute dictionary.

Attributes inherited from AnyStyle::Feature

#precision

Instance Method Summary collapse

#classify(word) ⇒ Object
#initialize(dictionary:, **opts) ⇒ Words constructor

A new instance of Words.
#observe(token, **opts) ⇒ Object

Methods inherited from AnyStyle::Feature

#next, #prev, #ratio

Methods included from StringUtils

canonize, count, display_chars, display_width, indent, nnum, page_break?, scrub, strip_html, transliterate

Constructor Details

#initialize(dictionary:, **opts) ⇒ `Words`

Returns a new instance of Words.

# File 'lib/anystyle/feature/words.rb', line 30

def initialize(dictionary:, **opts)
  super(**opts)
  @dictionary = dictionary
end

Instance Attribute Details

#dictionary ⇒ `Object` (readonly)

Returns the value of attribute dictionary.



4
5
6

# File 'lib/anystyle/feature/words.rb', line 4

def dictionary
  @dictionary
end

Instance Method Details

#classify(word) ⇒ `Object`

# File 'lib/anystyle/feature/words.rb', line 64

def classify(word)
  case word
  when /^(\d+|[vx]?iii?|i?[vx]|)$/i
    :number
  when /\d/
    :numeric
  when nil
    :none
  else
    :alpha
  end
end

#observe(token, **opts) ⇒ `Object`

# File 'lib/anystyle/feature/words.rb', line 35

def observe(token, **opts)
  words = token.scan(/\S+/).map { |word| canonize word }.reject(&:empty?)
  spacers = token.scan(/\S\s\s+\S/)
  numbers = token.scan(/\d+(\.\d+)?/)
  title = words.count { |word| TITLE_WORDS.include?(word) }
  counts = dictionary.tag_counts(words)

  if words.length > 0
    len = words.map(&:length).sort
    avg = len.reduce(0, :+) / len.length
    med = len.length.even? ?
      len[(len.length - 1) / 2, 2].reduce(0, :+) / 2 :
      len[len.length / 2]
  else
    avg, med = 0, 0
  end

  [
    words.length,
    avg,
    med,
    spacers.length,
    classify(words[0]),
    numbers.length,
    ratio(title, words.length),
    *counts.map { |cnt| ratio(cnt, words.length) }
  ]
end

Class: AnyStyle::Feature::Words

Constant Summary collapse

Instance Attribute Summary collapse

Attributes inherited from AnyStyle::Feature

Instance Method Summary collapse

Methods inherited from AnyStyle::Feature

Methods included from StringUtils

Constructor Details

#initialize(dictionary:, **opts) ⇒ Words

Instance Attribute Details

#dictionary ⇒ Object (readonly)

Instance Method Details

#classify(word) ⇒ Object

#observe(token, **opts) ⇒ Object

#initialize(dictionary:, **opts) ⇒ `Words`

#dictionary ⇒ `Object` (readonly)

#classify(word) ⇒ `Object`

#observe(token, **opts) ⇒ `Object`