Class: Mechanize::Util
- Inherits:
-
Object
- Object
- Mechanize::Util
- Defined in:
- lib/mechanize/util.rb
Constant Summary collapse
- CODE_DIC =
{ :JIS => "ISO-2022-JP", :EUC => "EUC-JP", :SJIS => "SHIFT_JIS", :UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}
- NKF_TO_ICONV =
{ 'ASCII-8BIT' => 'CP1252', 'SHIFT_JIS' => 'CP932', }
Class Method Summary collapse
- .build_query_string(parameters, enc = nil) ⇒ Object
- .detect_charset(src) ⇒ Object
- .from_native_charset(s, code) ⇒ Object
- .html_unescape(s) ⇒ Object
- .to_native_charset(s, code = nil) ⇒ Object
Class Method Details
.build_query_string(parameters, enc = nil) ⇒ Object
17 18 19 20 21 22 |
# File 'lib/mechanize/util.rb', line 17 def build_query_string(parameters, enc=nil) parameters.map { |k,v| # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*. [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k }.compact.join('&') end |
.detect_charset(src) ⇒ Object
57 58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/mechanize/util.rb', line 57 def detect_charset(src) tmp = NKF.guess(src || "<html></html>") if RUBY_VERSION >= "1.9.0" enc = tmp.to_s.upcase else enc = NKF.constants.find{|c| NKF.const_get(c) == tmp } enc = CODE_DIC[enc.intern] end enc = NKF_TO_ICONV[enc] if NKF_TO_ICONV[enc] enc || "CP1252" end |
.from_native_charset(s, code) ⇒ Object
34 35 36 37 38 39 40 41 |
# File 'lib/mechanize/util.rb', line 34 def from_native_charset(s, code) if Mechanize.html_parser == Nokogiri::HTML return unless s Iconv.iconv(code, "UTF-8", s).join("") else return s end end |
.html_unescape(s) ⇒ Object
43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/mechanize/util.rb', line 43 def html_unescape(s) return s unless s s.gsub(/&(\w+|#[0-9]+);/) { |match| number = case match when /&(\w+);/ Mechanize.html_parser::NamedCharacters[$1] when /&#([0-9]+);/ $1.to_i end number ? ([number].pack('U') rescue match) : match } end |
.to_native_charset(s, code = nil) ⇒ Object
24 25 26 27 28 29 30 31 32 |
# File 'lib/mechanize/util.rb', line 24 def to_native_charset(s, code=nil) if Mechanize.html_parser == Nokogiri::HTML return unless s code ||= detect_charset(s) Iconv.iconv("UTF-8", code, s).join("") else s end end |