Module: RelatonBsi::Scrapper

Defined in:
lib/relaton_bsi/scrapper.rb

Overview

Scrapper.

Constant Summary collapse

HTTP =
GraphQL::Client::HTTP.new "https://shop-bsi.myshopify.com/api/2021-04/graphql.json" do
  def headers(_context)
    { "x-shopify-storefront-access-token": "c935c196c0b7d1d86bfb5139006cfd46" }
  end
end
Schema =
GraphQL::Client.load_schema File.join(__dir__, "schema.json")
Client =
GraphQL::Client.new(schema: Schema, execute: HTTP)
Product =
Client.parse <<~'GRAPHQL'
  fragment ProductFragment on Product {
    createdAt
    publishedAt
    updatedAt
    productType
    committee: metafield(namespace: "global", key: "committee") {
      value
    }
    designated: metafield(namespace: "global", key: "designatedStandard") {
      value
    }
    packContents: metafield(namespace: "global", key: "packContents") {
      value
    }
    summary: metafield(namespace: "global", key: "summary") {
      value
    }
    corrigendumHandle: metafield(namespace: "global", key: "corrigendumHandle") {
      value
    }
    variants(first: 250) {
      edges {
        node {
          version: metafield(namespace: "global", key: "version") {
            value
          }
          isbn: metafield(namespace: "global", key: "isbn") {
            value
          }
        }
      }
    }
    description
  }
GRAPHQL
Query =
Client.parse <<~GRAPHQL
  query GetProducts($h0: String!) {
    productByHandle(handle: $h0) {
      ...RelatonBsi::Scrapper::Product::ProductFragment
    }
  }
GRAPHQL

Class Method Summary collapse

Class Method Details

.parse_page(hit) ⇒ Hash

Parse page.

Parameters:

Returns:

  • (Hash)


71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/relaton_bsi/scrapper.rb', line 71

def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
  # doc = hit.hit_collection.agent.get hit.hit[:url]
  result = Client.query(Query::GetProducts, variables: { h0: hit.hit[:url] })
  data = result.data.product_by_handle.to_h
  BsiBibliographicItem.new(
    fetched: Date.today.to_s,
    type: "standard",
    docid: fetch_docid(hit.hit[:code], data),
    docnumber: hit.hit[:code].match(/\d+/).to_s,
    language: ["en"],
    script: ["Latn"],
    title: fetch_titles(hit.hit[:title]),
    doctype: fetch_doctype(hit),
    docstatus: fetch_status(hit.hit[:status]),
    ics: fetch_ics(hit.hit[:ics]),
    date: fetch_dates(hit),
    contributor: fetch_contributors(hit),
    editorialgroup: fetch_editorialgroup(data),
    structuredidentifier: fetch_structuredid(hit),
    abstract: fetch_abstract(data),
    copyright: fetch_copyright(hit),
    link: fetch_link(hit.hit[:url]),
    # relation: fetch_relations(doc),
    place: ["London"],
  )
end