Module: LeBonCoin::Search

Defined in:
lib/leboncoin/items.rb,
lib/leboncoin/search.rb

Defined Under Namespace

Modules: SearchItems

Class Method Summary collapse

Class Method Details

.loadHTML(url) ⇒ Object

Load the given URL as a well-formed HTML document



8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/leboncoin/search.rb', line 8

def loadHTML url
  require 'open-uri'
  require 'nokogiri'

  doc = begin
    Nokogiri::HTML(open(url))
  rescue
    nil
  end

  return doc
end

.parseItem(url, item = Hash.new) ⇒ Object

Parse item from a given HTML link.



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/leboncoin/search.rb', line 91

def parseItem url, item = Hash.new
  doc = loadHTML url

  # DESCRIPTION
  item["description"] = begin
    LeBonCoin::HTMLUtils.convert(doc.xpath('//span[@class="lbcAd_text"]').inner_html)
  rescue
    nil
  end

  value = begin
    doc.xpath('//span[@class="ad_details_400"]/strong').inner_html.strip
  rescue
    nil
  end

  # POSTCODE
  item["postcode"] = begin
    value[/[0-9]+/]
  rescue
    nil
  end

  # CITY
  item["city"] = begin
    value.gsub(/[0-9]+ /, "")
  rescue
    nil
  end

  return item
end

.parseItemNode(node, item = Hash.new) ⇒ Object

Parse item from a given XML node.



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/leboncoin/search.rb', line 43

def parseItemNode(node, item = Hash.new)
  # DATE
  item["date"] = begin
    require 'date'
    DateTime.parse(
      node.xpath('td[1]')[0].inner_html.strip
        .gsub(/ ao.t<br>/, " aug<br>").gsub(/<br>/, " ")
        .gsub(/Aujourd'hui/, (Date.today - 0).strftime('%d %b').downcase)
        .gsub(/Hier/, (Date.today - 1).strftime('%d %b').downcase)
    )
  rescue
    nil
  end

  # IMAGE
  item["image"] = begin
    node.xpath('td[2]/table/tbody/tr[2]/td[2]/a/img')[0]["src"].strip
  rescue
    nil
  end

  # TITLE
  item["title"] = begin
    LeBonCoin::HTMLUtils.convert(node.xpath('td[3]/a')[0].content.strip)
  rescue
    "UNKNOW TITLE"
  end

  # LINK
  item["link"] = begin
    node.xpath('td[3]/a')[0]["href"].strip
  rescue
    nil
  end

  # PRICE
  item["currency"] = "EUR"
  item["price"] = begin
    node.xpath('td[3]/text()[3]')[0].content.strip.gsub(/..$/, "").to_i
  rescue
    nil
  end

  return parseItem item["link"], item
end

.parseItems(url, size, items = LeBonCoin::Search::SearchItems.new(url)) ⇒ Object

Parse items from a given URL



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/leboncoin/search.rb', line 23

def parseItems url, size, items = LeBonCoin::Search::SearchItems.new(url)
  doc = loadHTML url

  doc.xpath('//table[@id="hl"]/tr').each do |node|
    if items.size < size
      parseItemNode node, items.createItem
    end
  end

  if items.size < size
    doc.xpath('//a[starts-with(text(), "Page suivante")]').each do |node|
      parse node['href'], size, items
    end
  end

  return items
end