Class: BudgetBytesCli::Scraper

Inherits:

Object

Object
BudgetBytesCli::Scraper

show all

Defined in:: lib/budget_bytes_cli/scraper.rb

Class Method Summary collapse

.create_categories ⇒ Object
.create_page_url(num, url) ⇒ Object
.get_recipes(url) ⇒ Object

functions for scraping recipes within a category.
.get_recipes_from(page_url) ⇒ Object
.locate_categories ⇒ Object
.open_page ⇒ Object

functions for getting categories.
.scrape_recipe(url) ⇒ Object

scrape instructions, ingredients from recipe page.

Class Method Details

.create_categories ⇒ `Object`

# File 'lib/budget_bytes_cli/scraper.rb', line 12

def self.create_categories
    locate_categories.each do |item|
        url = item.css("a").attribute("href").value
        title = item.css("a").children[0].text
        BudgetBytesCli::Category.new(url, title)
    end
end

.create_page_url(num, url) ⇒ `Object`

# File 'lib/budget_bytes_cli/scraper.rb', line 45

def self.create_page_url(num, url)
    if num == 1
        url
    else
        url + "page/" + num.to_s + "/"
    end
end

.get_recipes(url) ⇒ `Object`

functions for scraping recipes within a category

# File 'lib/budget_bytes_cli/scraper.rb', line 21

def self.get_recipes(url)
    first_page = Nokogiri::HTML(open(url))
    
    page_nums = first_page.css(".page-numbers")
    if page_nums.empty?
        pages_total = 1
    else
        pages_total = page_nums.map{|p| p.text.to_i}.max
    end
    
    (1..pages_total).map {|p|get_recipes_from(create_page_url(p, url))}.flatten
end

.get_recipes_from(page_url) ⇒ `Object`

# File 'lib/budget_bytes_cli/scraper.rb', line 34

def self.get_recipes_from(page_url)
    recipe_page = Nokogiri::HTML(open(page_url))
    recipe_links = recipe_page.css(".archive-post a")
    
    recipe_links.map do |r|
        recipe_title = r.attribute("title").value
        recipe_url = r.attribute("href").value
        BudgetBytesCli::Recipe.new(recipe_url, recipe_title)
    end
end

.locate_categories ⇒ `Object`



8
9
10

# File 'lib/budget_bytes_cli/scraper.rb', line 8

def self.locate_categories
    open_page.css(".cat-item")
end

.open_page ⇒ `Object`

functions for getting categories



4
5
6

# File 'lib/budget_bytes_cli/scraper.rb', line 4

def self.open_page
    Nokogiri::HTML(open("https://www.budgetbytes.com/recipes/"))
end

.scrape_recipe(url) ⇒ `Object`

scrape instructions, ingredients from recipe page

# File 'lib/budget_bytes_cli/scraper.rb', line 54

def self.scrape_recipe(url)
    page = Nokogiri::HTML(open(url))
    
    ingredient_amounts = page.css('.wprm-recipe-ingredient-amount').map {|i| i.text}
    ingredient_units = page.css('.wprm-recipe-ingredient-unit').map {|i| i.text}
    ingredient_names = page.css('.wprm-recipe-ingredient-name').map {|i| i.text}
    
    ingredient_array = []
    
    #scraping ingredients for new site
    ingredient_amounts.each_with_index do |ele, idx|
        ingredient_array << [ele, ingredient_units[idx], ingredient_names[idx]].join(' ').strip
    end
    
    #scraping for old site before css switched so that the code above scrapes ingredients
    old_ingredients_table = page.css("tr")
    
    #get rid of first, last rows in table (header and total cost)
    old_ingredients_table.shift
    old_ingredients_table.pop 
    
    old_ingredients_table.each do |old_ingredient|
        old_ingredient_text = old_ingredient.text.split("\n")
        
        #get rid of first blank item, last cost item
        old_ingredient_text.shift
        old_ingredient_text.pop
        
        ingredient_array << old_ingredient_text.join(" ")
    end
    
    recipe_steps = []
    
    #scraping instructions for old site before css switched to have recipe instructions in own class
    page.css("p").map {|i| i.text}.each do |p|
        if p.split(" ")[0] == "STEP"
            recipe_steps << p.split(" ").slice(2, p.length - 2).join(" ")
        end
    end
    
    #scraping instructions for new site
    page.css(".wprm-recipe-instruction-text").each {|i| recipe_steps << i.text}

    [ingredient_array.join("\n"), recipe_steps.join("\n")]
end

Class: BudgetBytesCli::Scraper

Class Method Summary collapse

Class Method Details

.create_categories ⇒ Object

.create_page_url(num, url) ⇒ Object

.get_recipes(url) ⇒ Object

.get_recipes_from(page_url) ⇒ Object

.locate_categories ⇒ Object

.open_page ⇒ Object

.scrape_recipe(url) ⇒ Object

.create_categories ⇒ `Object`

.create_page_url(num, url) ⇒ `Object`

.get_recipes(url) ⇒ `Object`

.get_recipes_from(page_url) ⇒ `Object`

.locate_categories ⇒ `Object`

.open_page ⇒ `Object`

.scrape_recipe(url) ⇒ `Object`