Class: InstagramCrawler::Parser::Html
- Defined in:
- lib/instagram_crawler/parser/html.rb
Instance Attribute Summary collapse
-
#html ⇒ Object
readonly
Returns the value of attribute html.
Instance Method Summary collapse
-
#initialize(url) ⇒ Html
constructor
A new instance of Html.
- #parsing ⇒ Object
- #parsing_photo_page ⇒ Object
- #parsing_video_page ⇒ Object
Constructor Details
#initialize(url) ⇒ Html
Returns a new instance of Html.
6 7 8 |
# File 'lib/instagram_crawler/parser/html.rb', line 6 def initialize(url) @html = get_html(url) end |
Instance Attribute Details
#html ⇒ Object (readonly)
Returns the value of attribute html.
4 5 6 |
# File 'lib/instagram_crawler/parser/html.rb', line 4 def html @html end |
Instance Method Details
#parsing ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 21 22 |
# File 'lib/instagram_crawler/parser/html.rb', line 10 def parsing doc = Nokogiri::HTML(html) js_data = doc.at_xpath("//script[contains(text(),'window._sharedData')]") json = JSON.parse(js_data.text[21..-2]) profile = json["entry_data"]["ProfilePage"][0] page_info = profile["graphql"]["user"]["edge_owner_to_timeline_media"]['page_info'] user_id = profile["logging_page_id"].delete("profilePage_") edges = profile["graphql"]["user"]["edge_owner_to_timeline_media"]["edges"] loop_edges(edges) return page_info, user_id end |
#parsing_photo_page ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/instagram_crawler/parser/html.rb', line 30 def parsing_photo_page doc = Nokogiri::HTML(html) js_data = doc.at_xpath("//script[contains(text(),'window._sharedData')]") json = JSON.parse(js_data.text[21..-2]) shortcode_media = json["entry_data"]["PostPage"][0]["graphql"]["shortcode_media"] if shortcode_media["edge_sidecar_to_children"] shortcode_media["edge_sidecar_to_children"]["edges"] else shortcode_media["display_url"] end end |
#parsing_video_page ⇒ Object
24 25 26 27 28 |
# File 'lib/instagram_crawler/parser/html.rb', line 24 def parsing_video_page doc = Nokogiri::HTML(html) = doc.at_xpath("//meta[@property='og:video']") url = .attribute_nodes.last.value end |