Class: StyleMoonCat::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/stylemooncat/scraper.rb

Overview

extract_data class uses xpath selectors to get attribs

Constant Summary collapse

BASE_URL =
'http://www.stylemooncat.com.tw'
BASE_SCRAPE_URL =
"#{BASE_URL}/PDList.asp?"
SEARCH_URI =
"#{BASE_URL}item1=00"
LATEST_URI =
"#{BASE_SCRAPE_URL}recommand=1312090001"
"#{BASE_SCRAPE_URL}/recommand=1312090003"
TOPS_URI =
"#{BASE_SCRAPE_URL}p1=01"
PANTS_URI =
"#{BASE_SCRAPE_URL}p1=02&p2=01"
ACCESSORIES_URI =
"#{BASE_SCRAPE_URL}p1=06"
ITEM_SELECTOR =

xml selectors that will be used to scrape data

"//div[@class='goodsBox']/div[@class='goodl']"
TITLE_SELECTOR =
"div[@class='pd_info_l']//text()[not(parent::span)]"
IMAGE_SELECTOR =
"a/img/@src"
PRICE_SELECTOR =
"div[@class='pd_info_l']/span//text()[not(parent::strike)]"
"a/@href"

Instance Method Summary collapse

Instance Method Details

#accessories(page, options = {}) ⇒ Object



47
48
49
50
# File 'lib/stylemooncat/scraper.rb', line 47

def accessories(page, options = {})
  uri  = uri_with_options(build_uri(ACCESSORIES_URI, options), page)
  process_request(uri, options)
end

#latest(page, options = {}) ⇒ Object



27
28
29
30
# File 'lib/stylemooncat/scraper.rb', line 27

def latest(page, options = {})
  uri  = uri_with_options(build_uri(LATEST_URI, options), page)
  process_request(uri, options)
end

#pants(page, options = {}) ⇒ Object



42
43
44
45
# File 'lib/stylemooncat/scraper.rb', line 42

def pants(page, options = {})
  uri  = uri_with_options(build_uri(PANTS_URI, options), page)
  process_request(uri, options)
end


32
33
34
35
# File 'lib/stylemooncat/scraper.rb', line 32

def popular(page, options = {})
  uri  = uri_with_options(build_uri(POPULAR_URI, options), page)
  process_request(uri, options)
end

#scrape(type, options = {}) ⇒ Object



57
58
59
60
61
62
63
# File 'lib/stylemooncat/scraper.rb', line 57

def scrape(type, options = {})
  records = []
  valid_args = [:tops, :popular, :pants, :pants,
    :accessories, :latest, :search]
  abort 'invalid parameter - scrape type' unless valid_args.include?(type.to_sym)
  scrape_what(type, options)
end

#search(page, options = {}) ⇒ Object



52
53
54
55
# File 'lib/stylemooncat/scraper.rb', line 52

def search(page, options = {})
  uri  = uri_with_options(build_uri(BASE_SCRAPE_URL, options), page)
  process_request(uri, options)
end

#tops(page, options = {}) ⇒ Object



37
38
39
40
# File 'lib/stylemooncat/scraper.rb', line 37

def tops(page, options = {})
  uri  = uri_with_options(build_uri(TOPS_URI, options), page)
  process_request(uri, options)
end