Class: SkinnyJeans::StringParser
- Inherits:
-
Object
- Object
- SkinnyJeans::StringParser
- Defined in:
- lib/skinny_jeans/string_parser.rb
Instance Attribute Summary collapse
-
#string_value ⇒ Object
Returns the value of attribute string_value.
Class Method Summary collapse
- .extract_search_query(_url) ⇒ Object
-
.extract_search_query_from_valid_url(url) ⇒ Object
pre: some referring URL from google, yahoo, AOL, bing, ask post: whatever the search query was, ASCII or GTFO.
-
.return_param_from_valid_url_or_path(url_or_path, param_name) ⇒ Object
pre: like example.org?q=cool&fun=no, “fun” post: “no”.
Instance Method Summary collapse
-
#all_urls ⇒ Object
find all URLs in a string that are at beginning or end of string or are tokenized by spaces.
-
#get_search_keyword ⇒ Object
iterate through any URLs we find in a string and return a search query or nil.
-
#initialize(string_value) ⇒ StringParser
constructor
A new instance of StringParser.
Constructor Details
#initialize(string_value) ⇒ StringParser
Returns a new instance of StringParser.
60 61 62 |
# File 'lib/skinny_jeans/string_parser.rb', line 60 def initialize(string_value) @string_value = string_value end |
Instance Attribute Details
#string_value ⇒ Object
Returns the value of attribute string_value.
59 60 61 |
# File 'lib/skinny_jeans/string_parser.rb', line 59 def string_value @string_value end |
Class Method Details
.extract_search_query(_url) ⇒ Object
9 10 11 |
# File 'lib/skinny_jeans/string_parser.rb', line 9 def extract_search_query(_url) self.new(_url).get_search_keyword end |
.extract_search_query_from_valid_url(url) ⇒ Object
pre: some referring URL from google, yahoo, AOL, bing, ask post: whatever the search query was, ASCII or GTFO
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/skinny_jeans/string_parser.rb', line 15 def extract_search_query_from_valid_url(url) val = nil case url when /google\.com/ val=return_param_from_valid_url_or_path(url,"q") when /search\.yahoo\.com/ val=return_param_from_valid_url_or_path(url,"p") when /search\.aol\.com/ val=return_param_from_valid_url_or_path(url,"q") when /ask\.com/ val=return_param_from_valid_url_or_path(url,"q") when /bing\.com/ val=return_param_from_valid_url_or_path(url,"q") when /search\-results\.com/ val=return_param_from_valid_url_or_path(url,"q") end # whitelist of acceptable characters val = !!val && val.gsub(/[^0-9A-Za-z\s"'!@#\$%\^&\*\(\)\?\<\>\[\]:;,\.+-_=]/, '') != val ? nil : val return val end |
.return_param_from_valid_url_or_path(url_or_path, param_name) ⇒ Object
pre: like example.org?q=cool&fun=no, “fun” post: “no”
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/skinny_jeans/string_parser.rb', line 38 def return_param_from_valid_url_or_path(url_or_path, param_name) _uri = URI.parse(URI.encode(url_or_path)) if _uri.query.present? _cgi = CGI.parse(_uri.query) if _cgi.keys.include?(param_name) begin val = URI.decode(_cgi[param_name].join).strip.downcase rescue ArgumentError => e puts "couldn't parse #{_cgi[param_name]}, must not be utf8" end return "no_keyword_referred" if val == "" return val if !val.nil? # return (!val.nil? && val!='' ? val : nil) end end return nil end |
Instance Method Details
#all_urls ⇒ Object
find all URLs in a string that are at beginning or end of string or are tokenized by spaces
65 66 67 68 69 70 71 |
# File 'lib/skinny_jeans/string_parser.rb', line 65 def all_urls # tokenize a string by space # find strings starting with http with optional enclosing quotes # remove those quotes from any matches @all_urls ||= string_value.split(/\s+/).reject { |_string| !_string.match(/^['"]?https?:['"]?/) }.collect { |url| url.gsub(/\A["']/,'') }.collect { |url| url.gsub(/["']\z/,'') } @all_urls.empty? ? nil : @all_urls end |
#get_search_keyword ⇒ Object
iterate through any URLs we find in a string and return a search query or nil
74 75 76 |
# File 'lib/skinny_jeans/string_parser.rb', line 74 def get_search_keyword !all_urls.nil? ? all_urls.collect { |_url| self.class.extract_search_query_from_valid_url(_url) }[0] : nil end |