Module: Murlsh::UriAsk
- Defined in:
- lib/murlsh/uri_ask.rb
Overview
URI mixin.
Constant Summary collapse
- HtmlContentTypeRe =
%r{^text/html|application/xhtml\+xml}
Instance Method Summary collapse
-
#content_length(options = {}) ⇒ Object
Get the content length.
-
#content_type(options = {}) ⇒ Object
Get the content type.
-
#decode(s) ⇒ Object
Convert from the character set of this url to utf-8 and decode HTML entities.
-
#default_headers ⇒ Object
Default headers sent with the request.
-
#description(options = {}) ⇒ Object
Get the HTML meta description.
-
#doc(options = {}) ⇒ Object
Get the parsed Nokogiri doc at this url.
-
#get_headers(options = {}) ⇒ Object
Get and cache response headers returned by HTTP GET for this URI.
-
#head_headers(options = {}) ⇒ Object
Get and cache response headers returned by HTTP HEAD for this URI.
-
#header(header_name, options = {}) ⇒ Object
Get the value of a response header.
-
#html?(options = {}) ⇒ Boolean
Return true if the content type is HTML.
-
#title(options = {}) ⇒ Object
Get the HTML title.
Instance Method Details
#content_length(options = {}) ⇒ Object
Get the content length.
Options:
-
:failproof - if true hide all exceptions and return empty string on failure
-
:headers - hash of headers to send in request
20 |
# File 'lib/murlsh/uri_ask.rb', line 20 def content_length(={}); header('content-length', ); end |
#content_type(options = {}) ⇒ Object
Get the content type.
Options:
-
:failproof - if true hide all exceptions and return empty string on failure
-
:headers - hash of headers to send in request
27 |
# File 'lib/murlsh/uri_ask.rb', line 27 def content_type(={}); header('content-type', ); end |
#decode(s) ⇒ Object
Convert from the character set of this url to utf-8 and decode HTML entities.
112 113 114 |
# File 'lib/murlsh/uri_ask.rb', line 112 def decode(s) HTMLEntities.new.decode(Iconv.conv('utf-8', doc.encoding, s)) end |
#default_headers ⇒ Object
Default headers sent with the request.
93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/murlsh/uri_ask.rb', line 93 def default_headers result = { 'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.126 Safari/535.1', } if host.to_s[/^www\.nytimes\.com/] result['Referer'] = 'http://news.google.com/' end result end |
#description(options = {}) ⇒ Object
Get the HTML meta description.
Options:
-
:failproof - if true hide all exceptions and return empty string on failure
-
:headers - hash of headers to send in request
51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/murlsh/uri_ask.rb', line 51 def description(={}) return @description if defined?(@description) @description = '' d = doc() if d and d.description and not d.description.empty? @description = decode(d.description) end @description end |
#doc(options = {}) ⇒ Object
Get the parsed Nokogiri doc at this url.
Options:
-
:failproof - if true hide all exceptions and return empty string on failure
-
:headers - hash of headers to send in request
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/murlsh/uri_ask.rb', line 70 def doc(={}) return @doc if defined?(@doc) [:headers] = default_headers.merge(.fetch(:headers, {})) @doc = nil if html?() Murlsh::failproof() do self.open([:headers]) do |f| data = f.read @doc = Nokogiri(data, to_s) # encoding unknown, reparse with f.charset, default to utf-8 unless @doc.encoding @doc = Nokogiri(data, to_s, f.charset || 'utf-8') end @doc.extend(Murlsh::Doc) end end end @doc end |
#get_headers(options = {}) ⇒ Object
Get and cache response headers returned by HTTP GET for this URI.
Return hash values are single strings.
Options:
-
:failproof - if true hide all exceptions and return empty hash on failure
-
:headers - hash of headers to send in request
162 163 164 165 166 167 168 169 170 171 172 173 174 |
# File 'lib/murlsh/uri_ask.rb', line 162 def get_headers(={}) return @get_headers if defined?(@get_headers) request_headers = default_headers.merge(.fetch(:headers, {})) response_headers = {} # use open-uri instead of Net::HTTP because it handles redirects Murlsh::failproof() do response_headers = self.open(request_headers) { |f| f. } end @get_headers = response_headers end |
#head_headers(options = {}) ⇒ Object
Get and cache response headers returned by HTTP HEAD for this URI.
Return hash values are lists.
Options:
-
:failproof - if true hide all exceptions and return empty hash on failure
-
:headers - hash of headers to send in request
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/murlsh/uri_ask.rb', line 134 def head_headers(={}) return @head_headers if defined?(@head_headers) request_headers = default_headers.merge(.fetch(:headers, {})) response_headers = {} Murlsh::failproof() do http = Net::HTTP.new(host, port) http.use_ssl = (scheme == 'https') extend(Murlsh::URIGetPathQuery) resp = http.request_head(get_path_query, request_headers) if Net::HTTPSuccess === resp response_headers = resp.to_hash end end @head_headers = response_headers end |
#header(header_name, options = {}) ⇒ Object
Get the value of a response header.
Options:
-
:failproof - if true hide all exceptions and return empty string on failure
-
:headers - hash of headers to send in request
121 122 123 124 125 |
# File 'lib/murlsh/uri_ask.rb', line 121 def header(header_name, ={}) result = Array(head_headers()[header_name]).first result = get_headers()[header_name] if result.to_s.empty? result.to_s end |
#html?(options = {}) ⇒ Boolean
Return true if the content type is HTML.
108 |
# File 'lib/murlsh/uri_ask.rb', line 108 def html?(={}); content_type()[HtmlContentTypeRe]; end |
#title(options = {}) ⇒ Object
Get the HTML title.
Options:
-
:failproof - if true hide all exceptions and return url on failure
-
:headers - hash of headers to send in request
34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/murlsh/uri_ask.rb', line 34 def title(={}) return @title if defined?(@title) @title = to_s d = doc() if d and d.title and not d.title.empty?; @title = decode(d.title); end @title end |