Class: Mechanize::Util

Inherits:
Object
  • Object
show all
Defined in:
lib/mechanize/util.rb

Constant Summary collapse

CODE_DIC =
{
:JIS => "ISO-2022-JP",
:EUC => "EUC-JP",
:SJIS => "SHIFT_JIS",
:UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}
NEW_RUBY_ENCODING =

true if RUBY_VERSION is 1.9.0 or later

RUBY_VERSION >= '1.9.0'
ENCODING_ERRORS =

contains encoding error classes to raise

if NEW_RUBY_ENCODING
  [EncodingError]
else
  [Iconv::InvalidEncoding, Iconv::IllegalSequence]
end

Class Method Summary collapse

Class Method Details

.build_query_string(parameters, enc = nil) ⇒ Object



20
21
22
23
24
25
# File 'lib/mechanize/util.rb', line 20

def self.build_query_string(parameters, enc=nil)
  parameters.map { |k,v|
    # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
    [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
  }.compact.join('&')
end

.detect_charset(src) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/mechanize/util.rb', line 80

def self.detect_charset(src)
  tmp = NKF.guess(src || "<html></html>")
  if RUBY_VERSION >= "1.9.0"
    enc = tmp.to_s.upcase
  else
    enc = NKF.constants.find{|c|
      NKF.const_get(c) == tmp
    }
    enc = CODE_DIC[enc.intern]
  end
  enc || "ISO-8859-1"
end

.from_native_charset(s, code, ignore_encoding_error = false, log = nil) ⇒ Object

Converts string s from code to UTF-8.



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/mechanize/util.rb', line 40

def self.from_native_charset(s, code, ignore_encoding_error=false, log=nil)
  return s unless s && code
  return s unless Mechanize.html_parser == Nokogiri::HTML

  begin
    encode_to(code, s)
  rescue *ENCODING_ERRORS => ex
    log.debug("from_native_charset: #{ex.class}: form encoding: #{code.inspect} string: #{s}") if log
    if ignore_encoding_error
      s
    else
      raise
    end
  end
end

.html_unescape(s) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/mechanize/util.rb', line 66

def self.html_unescape(s)
  return s unless s
  s.gsub(/&(\w+|#[0-9]+);/) { |match|
    number = case match
             when /&(\w+);/
               Mechanize.html_parser::NamedCharacters[$1]
             when /&#([0-9]+);/
               $1.to_i
             end

    number ? ([number].pack('U') rescue match) : match
  }
end

.to_native_charset(s, code = nil) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
# File 'lib/mechanize/util.rb', line 27

def self.to_native_charset(s, code=nil)
  location = Gem.location_of_caller.join ':'
  warn "#{location}: Mechanize::Util::to_native_charset is deprecated and will be removed October 2011"
  if Mechanize.html_parser == Nokogiri::HTML
    return unless s
    code ||= detect_charset(s)
    Iconv.iconv("UTF-8", code, s).join("")
  else
    s
  end
end

.uri_escape(str) ⇒ Object



93
94
95
96
97
98
99
100
101
# File 'lib/mechanize/util.rb', line 93

def self.uri_escape str
  @parser ||= begin
                URI::Parser.new
              rescue NameError
                URI
              end

  @parser.escape str
end

.uri_unescape(str) ⇒ Object



103
104
105
106
107
108
109
110
111
# File 'lib/mechanize/util.rb', line 103

def self.uri_unescape str
  @parser ||= begin
                URI::Parser.new
              rescue NameError
                URI
              end

  @parser.unescape str
end