Module: TextUtils::UnicodeHelper
- Included in:
- TextUtils
- Defined in:
- lib/textutils/helper/unicode_helper.rb
Constant Summary collapse
- U_HYPHEN =
NB:
U_HYPHEN_MINUS is standard ascii hyphen/minus e.g. - see en.wikipedia.org/wiki/Dash
"\u2010"
- U_NON_BREAKING_HYPHEN =
unambigous hyphen
"\u2011"
- U_MINUS =
unambigous non-breaking hyphen
"\u2212"
- U_NDASH =
unambigous minus sign (html => −)
"\u2013"
- U_MDASH =
ndash (html => – ascii => –)
"\u2014"
Instance Method Summary collapse
-
#convert_unicode_dashes_to_plain_ascii(text, opts = {}) ⇒ Object
mdash (html => — ascii => —).
Instance Method Details
#convert_unicode_dashes_to_plain_ascii(text, opts = {}) ⇒ Object
mdash (html => — ascii => —)
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/textutils/helper/unicode_helper.rb', line 18 def convert_unicode_dashes_to_plain_ascii( text, opts = {} ) text = text.gsub( /(#{U_HYPHEN}|#{U_NON_BREAKING_HYPHEN}|#{U_MINUS}|#{U_NDASH}|#{U_MDASH})/ ) do |_| # puts "found U+#{'%04X' % $1.ord} (#{$1})" msg = '' if $1 == U_HYPHEN msg << "found hyhpen U+2010 (#{$1})" elsif $1 == U_NON_BREAKING_HYPHEN msg << "found non_breaking_hyhpen U+2011 (#{$1})" elsif $1 == U_MINUS msg << "found minus U+2212 (#{$1})" elsif $1 == U_NDASH msg << "found ndash U+2013 (#{$1})" elsif $1 == U_MDASH msg << "found mdash U+2014 (#{$1})" else msg << "found unknown unicode dash U+#{'%04X' % $1.ord} (#{$1})" end msg << " in file >#{opts[:path]}<" if opts[:path] msg << "; converting to plain ascii hyphen_minus (-)" puts "*** warning: #{msg}" '-' end text end |