Class: Alexandria::BookProviders::ThaliaProvider

Inherits:

Object
AbstractProvider
GenericProvider
WebsiteBasedProvider
Alexandria::BookProviders::ThaliaProvider

Includes:: Logging

Defined in:: lib/alexandria/book_providers/thalia.rb

Constant Summary collapse

SITE =

'http://www.thalia.de'

BASE_SEARCH_URL = type,term

"#{SITE}/shop/bde_bu_hg_startseite/suche/?%s=%s"

Instance Attribute Summary

Attributes inherited from AbstractProvider

#fullname, #name, #prefs

Instance Method Summary collapse

Constructor Details

#initialize ⇒ `ThaliaProvider`

Returns a new instance of ThaliaProvider.

# File 'lib/alexandria/book_providers/thalia.rb', line 39

def initialize
  super('Thalia', 'Thalia (Germany)')
  # no preferences for the moment
  prefs.read
end

Instance Method Details

#create_search_uri(search_type, search_term) ⇒ `Object`

# File 'lib/alexandria/book_providers/thalia.rb', line 62

def create_search_uri(search_type, search_term)
  (search_type_code = {
    SEARCH_BY_ISBN => 'sq',
    SEARCH_BY_AUTHORS => 'sa', # Autor
    SEARCH_BY_TITLE => 'st', # Titel
    SEARCH_BY_KEYWORD => 'ssw' # Schlagwort
  }[search_type]) || ''
  search_type_code = CGI.escape(search_type_code)
  search_term_encoded = if search_type == SEARCH_BY_ISBN
                          # search_term_encoded = search_term.as_isbn_13
                          Library.canonicalise_isbn(search_term) # check this!
                        else
                          CGI.escape(search_term)
                        end
  format(BASE_SEARCH_URL, search_type_code, search_term_encoded)
end

#data_from_label(node, label_text) ⇒ `Object`

# File 'lib/alexandria/book_providers/thalia.rb', line 93

def data_from_label(node, label_text)
  label_node = node % "strong[text()*='#{label_text}']"
  if (item_node = label_node.parent)
    data = ''
    item_node.children.each do |n|
      data += n.to_html if n.text?
    end
    data.strip
  else
    ''
  end
end

#get_book_from_search_result(result) ⇒ `Object`

# File 'lib/alexandria/book_providers/thalia.rb', line 106

def get_book_from_search_result(result)
  log.debug { "Fetching book from #{result[:lookup_url]}" }
  html_data = transport.get_response(URI.parse(result[:lookup_url]))
  parse_result_data(html_data.body, 'noisbn', true)
end

#parse_result_data(html, isbn, recursing = false) ⇒ `Object`

# File 'lib/alexandria/book_providers/thalia.rb', line 112

def parse_result_data(html, isbn, recursing = false)
  doc = html_to_doc(html)

  results_divs = doc / 'div.articlePresentationSearchCH'
  unless results_divs.empty?
    if recursing
      # already recursing, avoid doing so endlessly second time
      # around *should* lead to a book description, not a result
      # list
      return
    end
    # ISBN-lookup results in multiple results (trying to be
    # useful, such as for new editions e.g. 9780974514055
    # "Programming Ruby" )
    results = parse_search_result_data(html)
    isbn10 = Library.canonicalise_isbn(isbn)
    # e.g. .../dave_thomas/ISBN0-9745140-5-5/ID6017044.html
    chosen = results.first # fallback!
    results.each do |rslt|
      if rslt[:lookup_url] =~ /\/ISBN(\d+[\d-]*)\//
        if Regexp.last_match[1].delete('-') == isbn10
          chosen = rslt
          break
        end
      end
    end
    html_data = transport.get_response(URI.parse(chosen[:lookup_url]))
    return parse_result_data(html_data.body, isbn, true)
  end

  begin
    if (div = doc % 'div#contentFull')
      title_img = ((div % :h2) / :img).first
      title = title_img['alt']

      # note, the following img also has alt="von Author, Author..."

      if (author_h = doc % 'h3[text()*="Mehr von"]') # "More from..." links
        authors = []
        author_links = author_h.parent / :a
        author_links.each do |a|
          if a['href'] =~ /BUCH\/sa/
            # 'sa' means search author, there may also be 'ssw' (search keyword) links
            authors << a.inner_text[0..-2].strip
            # NOTE stripping the little >> character here...
          end
        end
      end

      item_details = doc % 'ul.itemDataList'
      isbns = []
      isbns << data_from_label(item_details, 'EAN')
      isbns << data_from_label(item_details, 'ISBN')

      year = nil
      date = data_from_label(item_details, 'Erschienen:')
      year = Regexp.last_match[1].to_i if date =~ /([\d]{4})/

      binding = data_from_label(item_details, 'Einband')

      publisher = data_from_label(item_details, 'Erschienen bei:')

      book = Book.new(title, authors, isbns.first,
                      publisher, year, binding)

      image_url = nil
      if (image_link = doc % 'a[@id=itemPicStart]')
        image_url = image_link['href']
      end

      return [book, image_url]

    end
  rescue => ex
    trace = ex.backtrace.join("\n> ")
    log.warn {
      'Failed parsing search results for Thalia ' \
      "#{ex.message} #{trace}"
    }
    raise NoResultsError
  end
end

#parse_search_result_data(html) ⇒ `Object`

# File 'lib/alexandria/book_providers/thalia.rb', line 79

def parse_search_result_data(html)
  doc = html_to_doc(html)
  book_search_results = []
  results_divs = doc / 'div.articlePresentationSearchCH'
  results_divs.each do |div|
    result = {}
    title_link = div % 'div.articleText/h2/a'
    result[:title] = title_link.inner_html
    result[:lookup_url] = title_link['href']
    book_search_results << result
  end
  book_search_results
end

#search(criterion, type) ⇒ `Object`

# File 'lib/alexandria/book_providers/thalia.rb', line 49

def search(criterion, type)
  req = create_search_uri(type, criterion)
  puts req if $DEBUG
  html_data = transport.get_response(URI.parse(req))
  if type == SEARCH_BY_ISBN
    parse_result_data(html_data.body, criterion)
  else
    results = parse_search_result_data(html_data.body)
    raise NoResultsError if results.empty?
    results.map { |result| get_book_from_search_result(result) }
  end
end

#url(book) ⇒ `Object`



45
46
47

# File 'lib/alexandria/book_providers/thalia.rb', line 45

def url(book)
  create_search_uri(SEARCH_BY_ISBN, book.isbn)
end

Class: Alexandria::BookProviders::ThaliaProvider

Constant Summary collapse

Instance Attribute Summary

Attributes inherited from AbstractProvider

Instance Method Summary collapse

Methods included from Logging

Methods inherited from WebsiteBasedProvider

Methods inherited from AbstractProvider

Constructor Details

#initialize ⇒ ThaliaProvider

Instance Method Details

#create_search_uri(search_type, search_term) ⇒ Object

#data_from_label(node, label_text) ⇒ Object

#get_book_from_search_result(result) ⇒ Object

#parse_result_data(html, isbn, recursing = false) ⇒ Object

#parse_search_result_data(html) ⇒ Object

#search(criterion, type) ⇒ Object

#url(book) ⇒ Object

#initialize ⇒ `ThaliaProvider`

#create_search_uri(search_type, search_term) ⇒ `Object`

#data_from_label(node, label_text) ⇒ `Object`

#get_book_from_search_result(result) ⇒ `Object`

#parse_result_data(html, isbn, recursing = false) ⇒ `Object`

#parse_search_result_data(html) ⇒ `Object`

#search(criterion, type) ⇒ `Object`

#url(book) ⇒ `Object`