Class: JustTheRecipe::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/just-the-recipe/scraper.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url) ⇒ Scraper

Returns a new instance of Scraper.



5
6
7
# File 'lib/just-the-recipe/scraper.rb', line 5

def initialize(url)
    @url=url
end

Instance Attribute Details

#urlObject

Returns the value of attribute url.



3
4
5
# File 'lib/just-the-recipe/scraper.rb', line 3

def url
  @url
end

Instance Method Details

#create_new_recipe(title, description, ingredients, steps, url) ⇒ Object



53
54
55
# File 'lib/just-the-recipe/scraper.rb', line 53

def create_new_recipe(title, description, ingredients, steps, url)
    JustTheRecipe::Recipe.new(title,description,ingredients,steps,url)
end

#get_recipe_by_schemaObject



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/just-the-recipe/scraper.rb', line 9

def get_recipe_by_schema
    schema = get_schema

    schema.key?("name") ? title = schema["name"] : nil
    schema.key?("description") ? description = schema["description"] : nil
    schema.key?("recipeIngredient") ? ingredients = schema["recipeIngredient"] : ingredients = []

    if schema.key?("recipeInstructions") 
        if schema["recipeInstructions"][0].class == Hash && schema["recipeInstructions"][0].key?("itemListElement")
            steps = schema["recipeInstructions"].map {|section| section["itemListElement"].map {|instruction| instruction["text"].gsub("\n","")}}.flatten
        elsif schema["recipeInstructions"][0].class == Array
            steps = schema["recipeInstructions"].flatten.map {|instruction| instruction["text"].gsub("\n","")}
        elsif schema["recipeInstructions"].class == Array
            steps = schema["recipeInstructions"].map {|instruction| instruction["text"].gsub("\n","")}
        else
            steps = [schema["recipeInstructions"]]
        end
    else 
        steps = [] 
    end

    create_new_recipe(title,description,ingredients,steps, @url)
end

#get_schemaObject



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/just-the-recipe/scraper.rb', line 33

def get_schema   
    noko = Nokogiri::HTML(open(@url))
    if (noko.css('script[type*="application/ld+json"].yoast-schema-graph')).length > 0
        js = (noko.css('script[type*="application/ld+json"].yoast-schema-graph'))
        parsed = JSON.parse(js.text)
        graph = parsed["@graph"]
        recipe = graph.find{|i| i["@type"] == "Recipe"}
    else
        js = (noko.css('script[type*="application/ld+json"]'))
        if js.length == 1 
            parsed = JSON.parse(js.text)
            parsed.class != Array ? parsed = [parsed] : parsed = parsed
            recipe = parsed.find{|i| i["@type"] == "Recipe"}
        else
            parsed = js.map {|i| valid_json?(i.text) ? JSON.parse(i.text) : nil }
            recipe = parsed.find{|i| i["@type"] == "Recipe"}
        end
    end
end

#valid_json?(json) ⇒ Boolean

Returns:

  • (Boolean)


57
58
59
60
61
62
# File 'lib/just-the-recipe/scraper.rb', line 57

def valid_json?(json)
    JSON.parse(json)
    return true
  rescue JSON::ParserError => e
    return false
end

#valid_url?Boolean

Returns:

  • (Boolean)


64
65
66
67
68
69
70
# File 'lib/just-the-recipe/scraper.rb', line 64

def valid_url?
    begin
        get_schema
    rescue
        false          
    end
end