Class: SkinnyJeans::StringParser

Inherits:
Object
  • Object
show all
Defined in:
lib/skinny_jeans/string_parser.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(string_value) ⇒ StringParser

Returns a new instance of StringParser.



60
61
62
# File 'lib/skinny_jeans/string_parser.rb', line 60

def initialize(string_value)
  @string_value = string_value
end

Instance Attribute Details

#string_valueObject

Returns the value of attribute string_value.



59
60
61
# File 'lib/skinny_jeans/string_parser.rb', line 59

def string_value
  @string_value
end

Class Method Details

.extract_search_query(_url) ⇒ Object



9
10
11
# File 'lib/skinny_jeans/string_parser.rb', line 9

def extract_search_query(_url)
  self.new(_url).get_search_keyword
end

.extract_search_query_from_valid_url(url) ⇒ Object

pre: some referring URL from google, yahoo, AOL, bing, ask post: whatever the search query was, ASCII or GTFO



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/skinny_jeans/string_parser.rb', line 15

def extract_search_query_from_valid_url(url)
  val = nil
  case url
  when /google\.com/
    val=return_param_from_valid_url_or_path(url,"q")
  when /search\.yahoo\.com/
    val=return_param_from_valid_url_or_path(url,"p")
  when /search\.aol\.com/
    val=return_param_from_valid_url_or_path(url,"q")
  when /ask\.com/
    val=return_param_from_valid_url_or_path(url,"q")
  when /bing\.com/
    val=return_param_from_valid_url_or_path(url,"q")
  when /search\-results\.com/
    val=return_param_from_valid_url_or_path(url,"q")
  end
  # whitelist of acceptable characters
  val = !!val && val.gsub(/[^0-9A-Za-z\s"'!@#\$%\^&\*\(\)\?\<\>\[\]:;,\.+-_=]/, '') != val ? nil : val
  return val
end

.return_param_from_valid_url_or_path(url_or_path, param_name) ⇒ Object

pre: like example.org?q=cool&fun=no, “fun” post: “no”



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/skinny_jeans/string_parser.rb', line 38

def return_param_from_valid_url_or_path(url_or_path, param_name)
  _uri = URI.parse(URI.encode(url_or_path))
  if _uri.query.present?
    _cgi = CGI.parse(_uri.query)
    if _cgi.keys.include?(param_name)
      begin
        val = URI.decode(_cgi[param_name].join).strip.downcase
      rescue ArgumentError => e
        puts "couldn't parse #{_cgi[param_name]}, must not be utf8"
      end
      return "no_keyword_referred" if val == ""
      return val if !val.nil?
      # return (!val.nil? && val!='' ? val : nil)
      
    end
  end
  return nil
end

Instance Method Details

#all_urlsObject

find all URLs in a string that are at beginning or end of string or are tokenized by spaces



65
66
67
68
69
70
71
# File 'lib/skinny_jeans/string_parser.rb', line 65

def all_urls
  # tokenize a string by space
    # find strings starting with http with optional enclosing quotes
    # remove those quotes from any matches
  @all_urls ||= string_value.split(/\s+/).reject { |_string| !_string.match(/^['"]?https?:['"]?/) }.collect { |url| url.gsub(/\A["']/,'') }.collect { |url| url.gsub(/["']\z/,'') }
  @all_urls.empty? ? nil : @all_urls
end

#get_search_keywordObject

iterate through any URLs we find in a string and return a search query or nil



74
75
76
# File 'lib/skinny_jeans/string_parser.rb', line 74

def get_search_keyword
  !all_urls.nil? ? all_urls.collect { |_url| self.class.extract_search_query_from_valid_url(_url) }[0] : nil
end