Class: AnyStyle::Feature::Line
- Inherits:
-
AnyStyle::Feature
- Object
- AnyStyle::Feature
- AnyStyle::Feature::Line
- Defined in:
- lib/anystyle/feature/line.rb
Instance Attribute Summary
Attributes inherited from AnyStyle::Feature
Instance Method Summary collapse
Methods inherited from AnyStyle::Feature
#initialize, #next, #prev, #ratio
Methods included from StringUtils
canonize, count, display_chars, display_width, indent, nnum, page_break?, scrub, strip_html, transliterate
Constructor Details
This class inherits a constructor from AnyStyle::Feature
Instance Method Details
#classify(chars) ⇒ Object
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/anystyle/feature/line.rb', line 27 def classify(chars) case chars.lstrip when /\.\s*\.\s*\.\s*\.|……+/, /\p{L}\s{5,}\d+$/ :toc when /^[\[\(]?\d+\.?[\]\)]?\s+\p{L}+/ :list when /^(\p{Lu}\.?)\s*(\d+\.)+\s+\p{L}+/ :title when /^(\w+\s)?(tab(le|elle|\.)|fig(ure|\.)|equation|graph|abb(ildung)?)/i :cap when /^\p{Pd}?\d+\p{Pd}?$/, /^[ivx]+$/i :num when /copyright|©|rights reserved/i :copyright when /https?:\/\//i :http else :none end end |
#observe(token, page:, seq:, **opts) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
# File 'lib/anystyle/feature/line.rb', line 4 def observe(token, page:, seq:, **opts) chars = display_chars(token) lttrs = count(chars, /\p{L}/) upper = count(chars, /\p{Lu}/) punct = count(chars, /[\p{Pd}:.,&\(\)"'”„’‚´«「『‘“`»」』]/) white = count(chars, /\s/) width = chars.length [ lttrs, width, ratio(upper, lttrs), ratio(lttrs, chars.length), ratio(white, chars.length), ratio(punct, chars.length), ratio(width, page.width), classify(chars), page_ratio(seq.line_counts[chars], seq.pages.length), page_ratio(seq.nnum_counts[nnum(chars)], seq.pages.length) ] end |
#page_ratio(a, b) ⇒ Object
48 49 50 51 |
# File 'lib/anystyle/feature/line.rb', line 48 def page_ratio(a, b) r = a.to_f / b r == 1 ? '=' : r > 1 ? '+' : (r * 10).round end |