Class: AnyStyle::Feature::Line

Inherits:
AnyStyle::Feature show all
Defined in:
lib/anystyle/feature/line.rb

Instance Attribute Summary

Attributes inherited from AnyStyle::Feature

#precision

Instance Method Summary collapse

Methods inherited from AnyStyle::Feature

#initialize, #next, #prev, #ratio

Methods included from StringUtils

canonize, count, display_chars, display_width, indent, nnum, page_break?, scrub, strip_html, transliterate

Constructor Details

This class inherits a constructor from AnyStyle::Feature

Instance Method Details

#classify(chars) ⇒ Object


27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/anystyle/feature/line.rb', line 27

def classify(chars)
  case chars.lstrip
  when /\.\s*\.\s*\.\s*\.|……+/, /\p{L}\s{5,}\d+$/
    :toc
  when /^[\[\(]?\d+\.?[\]\)]?\s+\p{L}+/
    :list
  when /^(\p{Lu}\.?)\s*(\d+\.)+\s+\p{L}+/
    :title
  when /^(\w+\s)?(tab(le|elle|\.)|fig(ure|\.)|equation|graph|abb(ildung)?)/i
    :cap
  when /^\p{Pd}?\d+\p{Pd}?$/, /^[ivx]+$/i
    :num
  when /copyright|©|rights reserved/i
    :copyright
  when /https?:\/\//i
    :http
  else
    :none
  end
end

#observe(token, page:, seq:, **opts) ⇒ Object


4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/anystyle/feature/line.rb', line 4

def observe(token, page:, seq:, **opts)
  chars = display_chars(token)

  lttrs = count(chars, /\p{L}/)
  upper = count(chars, /\p{Lu}/)
  punct = count(chars, /[\p{Pd}:.,&\(\)"'”„’‚´«「『‘“`»」』]/)
  white = count(chars, /\s/)
  width = chars.length

  [
    lttrs,
    width,
    ratio(upper, lttrs),
    ratio(lttrs, chars.length),
    ratio(white, chars.length),
    ratio(punct, chars.length),
    ratio(width, page.width),
    classify(chars),
    page_ratio(seq.line_counts[chars], seq.pages.length),
    page_ratio(seq.nnum_counts[nnum(chars)], seq.pages.length)
  ]
end

#page_ratio(a, b) ⇒ Object


48
49
50
51
# File 'lib/anystyle/feature/line.rb', line 48

def page_ratio(a, b)
  r = a.to_f / b
  r == 1 ? '=' : r > 1 ? '+' : (r * 10).round
end