Module: AnyStyle::StringUtils

Included in:
Document, Feature, Page, ParserCore, Refs
Defined in:
lib/anystyle/utils.rb

Class Method Summary collapse

Class Method Details

.canonize(string) ⇒ Object


24
25
26
# File 'lib/anystyle/utils.rb', line 24

def canonize(string)
  scrub(transliterate(string)).downcase
end

.count(string, pattern) ⇒ Object


48
49
50
# File 'lib/anystyle/utils.rb', line 48

def count(string, pattern)
  string.to_enum(:scan, pattern).inject(0) { |c| c + 1 }
end

.display_chars(string) ⇒ Object


40
41
42
43
44
45
46
# File 'lib/anystyle/utils.rb', line 40

def display_chars(string)
  string
    .gsub(/\t/, '    ')
    .gsub(/\p{Mn}|\p{Me}|\p{Cc}/, '')
    .gsub(/\p{Zs}/, ' ')
    .rstrip
end

.display_width(string) ⇒ Object


36
37
38
# File 'lib/anystyle/utils.rb', line 36

def display_width(string)
  display_chars(string).length
end

.indent(token) ⇒ Object


52
53
54
# File 'lib/anystyle/utils.rb', line 52

def indent(token)
  display_chars(token)[/^(\s*)/].length
end

.nnum(string, symbol = '#') ⇒ Object


28
29
30
# File 'lib/anystyle/utils.rb', line 28

def nnum(string, symbol = '#')
  string.unicode_normalize.gsub(/\d/, symbol)
end

.page_break?(string) ⇒ Boolean

Returns:

  • (Boolean)

32
33
34
# File 'lib/anystyle/utils.rb', line 32

def page_break?(string)
  string =~ /\f/
end

.scrub(string, blacklist: /[\p{^Alnum}\p{Lm}]/) ⇒ Object


14
15
16
# File 'lib/anystyle/utils.rb', line 14

def scrub(string, blacklist: /[\p{^Alnum}\p{Lm}]/)
  string.scrub.gsub(blacklist, '')
end

.strip_html(string) ⇒ Object


56
57
58
59
# File 'lib/anystyle/utils.rb', line 56

def strip_html(string)
  string
    .gsub(/<\/?(italic|i|strong|b|span|div)(\s+style="[^"]+")?>/i, '')
end

.transliterate(string, form: :nfkd) ⇒ Object


18
19
20
21
22
# File 'lib/anystyle/utils.rb', line 18

def transliterate(string, form: :nfkd)
  string
    .unicode_normalize(form)
    .gsub(/\p{Mark}/, '')
end