Class: LCBO::ProductPage

Inherits:
Object
  • Object
show all
Includes:
CrawlKit::Page
Defined in:
lib/lcbo/pages/product_page.rb

Instance Method Summary collapse

Methods included from CrawlKit::Page

#[], #as_hash, #fields, #http_method, included, #initialize, #is_parsed?, #parse, #process, #request, #request_prototype

Instance Method Details

#find_info_line(regexp) ⇒ Object



332
333
334
# File 'lib/lcbo/pages/product_page.rb', line 332

def find_info_line(regexp)
  info_cell_lines.select { |l| l =~ regexp }.first
end

#get_info_lines_at_offset(offset) ⇒ Object



321
322
323
324
325
326
# File 'lib/lcbo/pages/product_page.rb', line 321

def get_info_lines_at_offset(offset)
  raw_info_cell_lines.select do |line|
    match = line.scan(/\A[\s]+/)[0]
    match ? offset == match.size : false
  end
end

#has_package?Boolean

Returns:

  • (Boolean)


297
298
299
# File 'lib/lcbo/pages/product_page.rb', line 297

def has_package?
  !info_cell_lines[2].include?('Price:')
end

#info_cell_elementObject



354
355
356
# File 'lib/lcbo/pages/product_page.rb', line 354

def info_cell_element
  doc.css('table[width="478"] td[height="271"] td[colspan="2"].main_font')[0]
end

#info_cell_htmlObject



350
351
352
# File 'lib/lcbo/pages/product_page.rb', line 350

def info_cell_html
  @info_cell_html ||= info_cell_element.inner_html
end

#info_cell_line_after(item) ⇒ Object



346
347
348
# File 'lib/lcbo/pages/product_page.rb', line 346

def info_cell_line_after(item)
  (i = info_cell_lines.index(item)) ? info_cell_lines[i + 1] : nil
end

#info_cell_linesObject



340
341
342
343
344
# File 'lib/lcbo/pages/product_page.rb', line 340

def info_cell_lines
  @info_cell_lines ||= begin
    raw_info_cell_lines.map { |l| l.strip }.reject { |l| l == '' }
  end
end

#info_cell_textObject



328
329
330
# File 'lib/lcbo/pages/product_page.rb', line 328

def info_cell_text
  @info_cell_text ||= info_cell_lines.join("\n")
end

#normalize_image_url(url) ⇒ Object



358
359
360
361
362
# File 'lib/lcbo/pages/product_page.rb', line 358

def normalize_image_url(url)
  return unless url
  return if url.include?('default')
  url.include?('http://') ? url : File.join('http://lcbo.com', url)
end

#product_details_form(name) ⇒ Object



316
317
318
319
# File 'lib/lcbo/pages/product_page.rb', line 316

def product_details_form(name)
  doc.css("form[name=\"productdetails\"] input[name=\"#{name}\"]")[0].
    attributes['value'].to_s
end

#raw_info_cell_linesObject



336
337
338
# File 'lib/lcbo/pages/product_page.rb', line 336

def raw_info_cell_lines
  @raw_info_cell_lines ||= info_cell_element.content.split(/\n/)
end

#stock_categoriesObject



301
302
303
304
305
306
307
308
309
310
311
312
313
314
# File 'lib/lcbo/pages/product_page.rb', line 301

def stock_categories
  @stock_categories ||= begin
    # Always appears above alcohol content
    if (idx = info_cell_lines.index { |l| l =~ /Alcohol\/Vol\./ })
      cats = info_cell_lines[idx - 1].
        split(',').
        map(&:strip).
        reject { |cat| cat == '' || cat.nil? }
      cats.empty? ? nil : cats
    else
      nil
    end
  end
end

#verify_product_details_formObject



383
384
385
386
387
# File 'lib/lcbo/pages/product_page.rb', line 383

def verify_product_details_form
  return unless doc.css('form[name="productdetails"]').empty?
  raise CrawlKit::MalformedError,
    "productdetails form not found in doc for product #{id}"
end

#verify_product_nameObject



377
378
379
380
381
# File 'lib/lcbo/pages/product_page.rb', line 377

def verify_product_name
  return unless product_details_form('itemName').strip == ''
  raise CrawlKit::NotFoundError,
    "can not locate name for product #{id}"
end

#verify_response_not_blankObject



371
372
373
374
375
# File 'lib/lcbo/pages/product_page.rb', line 371

def verify_response_not_blank
  return unless html.strip == ''
  raise CrawlKit::NotFoundError,
    "product #{id} does not appear to exist"
end

#verify_third_info_cellObject



364
365
366
367
368
369
# File 'lib/lcbo/pages/product_page.rb', line 364

def verify_third_info_cell
  return unless has_package? && info_cell_lines[2][0,1] != '|'
  raise CrawlKit::MalformedError,
    "Expected third line in info cell to begin with bar. LCBO No: " \
    "#{id}, Dump: #{info_cell_lines[2].inspect}"
end

#volume_helperObject



293
294
295
# File 'lib/lcbo/pages/product_page.rb', line 293

def volume_helper
  @volume_helper ||= CrawlKit::VolumeHelper.new(package)
end