Module: AnyStyle::StringUtils
- Included in:
- Document, Feature, Page, ParserCore, Refs
- Defined in:
- lib/anystyle/utils.rb
Class Method Summary collapse
- .canonize(string) ⇒ Object
- .count(string, pattern) ⇒ Object
- .display_chars(string) ⇒ Object
- .display_width(string) ⇒ Object
- .indent(token) ⇒ Object
- .nnum(string, symbol = '#') ⇒ Object
- .page_break?(string) ⇒ Boolean
- .scrub(string, blacklist: /[\p{^Alnum}\p{Lm}]/) ⇒ Object
- .strip_html(string) ⇒ Object
- .transliterate(string, form: :nfkd) ⇒ Object
Class Method Details
.canonize(string) ⇒ Object
24 25 26 |
# File 'lib/anystyle/utils.rb', line 24 def canonize(string) scrub(transliterate(string)).downcase end |
.count(string, pattern) ⇒ Object
48 49 50 |
# File 'lib/anystyle/utils.rb', line 48 def count(string, pattern) string.to_enum(:scan, pattern).inject(0) { |c| c + 1 } end |
.display_chars(string) ⇒ Object
40 41 42 43 44 45 46 |
# File 'lib/anystyle/utils.rb', line 40 def display_chars(string) string .gsub(/\t/, ' ') .gsub(/\p{Mn}|\p{Me}|\p{Cc}/, '') .gsub(/\p{Zs}/, ' ') .rstrip end |
.display_width(string) ⇒ Object
36 37 38 |
# File 'lib/anystyle/utils.rb', line 36 def display_width(string) display_chars(string).length end |
.indent(token) ⇒ Object
52 53 54 |
# File 'lib/anystyle/utils.rb', line 52 def indent(token) display_chars(token)[/^(\s*)/].length end |
.nnum(string, symbol = '#') ⇒ Object
28 29 30 |
# File 'lib/anystyle/utils.rb', line 28 def nnum(string, symbol = '#') string.unicode_normalize.gsub(/\d/, symbol) end |
.page_break?(string) ⇒ Boolean
32 33 34 |
# File 'lib/anystyle/utils.rb', line 32 def page_break?(string) string =~ /\f/ end |
.scrub(string, blacklist: /[\p{^Alnum}\p{Lm}]/) ⇒ Object
14 15 16 |
# File 'lib/anystyle/utils.rb', line 14 def scrub(string, blacklist: /[\p{^Alnum}\p{Lm}]/) string.scrub.gsub(blacklist, '') end |
.strip_html(string) ⇒ Object
56 57 58 59 |
# File 'lib/anystyle/utils.rb', line 56 def strip_html(string) string .gsub(/<\/?(italic|i|strong|b|span|div)(\s+style="[^"]+")?>/i, '') end |
.transliterate(string, form: :nfkd) ⇒ Object
18 19 20 21 22 |
# File 'lib/anystyle/utils.rb', line 18 def transliterate(string, form: :nfkd) string .unicode_normalize(form) .gsub(/\p{Mark}/, '') end |