Class: Mechanize::Util

Inherits:
Object
  • Object
show all
Defined in:
lib/mechanize/util.rb

Constant Summary collapse

CODE_DIC =
{
  NKF::JIS => "ISO-2022-JP",
  NKF::EUC => "EUC-JP",
  NKF::SJIS => "SHIFT_JIS",
  NKF::UTF8 => "UTF-8",
  NKF::UTF16 => "UTF-16",
  NKF::UTF32 => "UTF-32",
}
NEW_RUBY_ENCODING =

true if RUBY_VERSION is 1.9.0 or later

RUBY_VERSION >= '1.9.0'
ENCODING_ERRORS =

contains encoding error classes to raise

if NEW_RUBY_ENCODING
  [EncodingError]
else
  [Iconv::InvalidEncoding, Iconv::IllegalSequence]
end
DefaultMimeTypes =

default mime type data for Page::Image#mime_type. You can use another Apache-compatible mimetab.

mimetab = WEBrick::HTTPUtils.load_mime_types('/etc/mime.types')
Mechanize::Util::DefaultMimeTypes.replace(mimetab)
WEBrick::HTTPUtils::DefaultMimeTypes

Class Method Summary collapse

Class Method Details

.build_query_string(parameters, enc = nil) ⇒ Object



30
31
32
33
34
35
# File 'lib/mechanize/util.rb', line 30

def self.build_query_string(parameters, enc=nil)
  parameters.map { |k,v|
    # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
    [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
  }.compact.join('&')
end

.detect_charset(src) ⇒ Object



78
79
80
81
82
83
84
85
86
87
# File 'lib/mechanize/util.rb', line 78

def self.detect_charset(src)
  case enc = src && NKF.guess(src)
  when Integer
    # Ruby <= 1.8
    CODE_DIC[enc]
  else
    # Ruby >= 1.9
    enc && enc.to_s.upcase
  end || "ISO-8859-1"
end

.from_native_charset(s, code, ignore_encoding_error = false, log = nil) ⇒ Object

Converts string s from code to UTF-8.



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/mechanize/util.rb', line 38

def self.from_native_charset(s, code, ignore_encoding_error=false, log=nil)
  return s unless s && code
  return s unless Mechanize.html_parser == Nokogiri::HTML

  begin
    encode_to(code, s)
  rescue *ENCODING_ERRORS => ex
    log.debug("from_native_charset: #{ex.class}: form encoding: #{code.inspect} string: #{s}") if log
    if ignore_encoding_error
      s
    else
      raise
    end
  end
end

.html_unescape(s) ⇒ Object



64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/mechanize/util.rb', line 64

def self.html_unescape(s)
  return s unless s
  s.gsub(/&(\w+|#[0-9]+);/) { |match|
    number = case match
             when /&(\w+);/
               Mechanize.html_parser::NamedCharacters[$1]
             when /&#([0-9]+);/
               $1.to_i
             end

    number ? ([number].pack('U') rescue match) : match
  }
end

.uri_escape(str, unsafe = nil) ⇒ Object



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/mechanize/util.rb', line 89

def self.uri_escape str, unsafe = nil
  @parser ||= begin
                URI::Parser.new
              rescue NameError
                URI
              end

  if URI == @parser then
    unsafe ||= URI::UNSAFE
  else
    unsafe ||= @parser.regexp[:UNSAFE]
  end

  @parser.escape str, unsafe
end

.uri_unescape(str) ⇒ Object



105
106
107
108
109
110
111
112
113
# File 'lib/mechanize/util.rb', line 105

def self.uri_unescape str
  @parser ||= begin
                URI::Parser.new
              rescue NameError
                URI
              end

  @parser.unescape str
end