Class: Alexandria::BookProviders::ThaliaProvider
- Inherits:
-
WebsiteBasedProvider
- Object
- AbstractProvider
- GenericProvider
- WebsiteBasedProvider
- Alexandria::BookProviders::ThaliaProvider
- Includes:
- Logging
- Defined in:
- lib/alexandria/book_providers/thalia.rb
Constant Summary collapse
- SITE =
'http://www.thalia.de'
- BASE_SEARCH_URL =
type,term
"#{SITE}/shop/bde_bu_hg_startseite/suche/?%s=%s"
Instance Attribute Summary
Attributes inherited from AbstractProvider
Instance Method Summary collapse
- #create_search_uri(search_type, search_term) ⇒ Object
- #data_from_label(node, label_text) ⇒ Object
- #get_book_from_search_result(result) ⇒ Object
-
#initialize ⇒ ThaliaProvider
constructor
A new instance of ThaliaProvider.
- #parse_result_data(html, isbn, recursing = false) ⇒ Object
- #parse_search_result_data(html) ⇒ Object
- #search(criterion, type) ⇒ Object
- #url(book) ⇒ Object
Methods included from Logging
Methods inherited from WebsiteBasedProvider
Methods inherited from AbstractProvider
#<=>, abstract?, #abstract?, #action_name, #enabled, #reinitialize, #remove, #toggle_enabled, #transport, unabstract, #variable_name
Constructor Details
#initialize ⇒ ThaliaProvider
Returns a new instance of ThaliaProvider.
39 40 41 42 43 |
# File 'lib/alexandria/book_providers/thalia.rb', line 39 def initialize super('Thalia', 'Thalia (Germany)') # no preferences for the moment prefs.read end |
Instance Method Details
#create_search_uri(search_type, search_term) ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/alexandria/book_providers/thalia.rb', line 62 def create_search_uri(search_type, search_term) (search_type_code = { SEARCH_BY_ISBN => 'sq', SEARCH_BY_AUTHORS => 'sa', # Autor SEARCH_BY_TITLE => 'st', # Titel SEARCH_BY_KEYWORD => 'ssw' # Schlagwort }[search_type]) || '' search_type_code = CGI.escape(search_type_code) search_term_encoded = if search_type == SEARCH_BY_ISBN # search_term_encoded = search_term.as_isbn_13 Library.canonicalise_isbn(search_term) # check this! else CGI.escape(search_term) end format(BASE_SEARCH_URL, search_type_code, search_term_encoded) end |
#data_from_label(node, label_text) ⇒ Object
93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/alexandria/book_providers/thalia.rb', line 93 def data_from_label(node, label_text) label_node = node % "strong[text()*='#{label_text}']" if (item_node = label_node.parent) data = '' item_node.children.each do |n| data += n.to_html if n.text? end data.strip else '' end end |
#get_book_from_search_result(result) ⇒ Object
106 107 108 109 110 |
# File 'lib/alexandria/book_providers/thalia.rb', line 106 def get_book_from_search_result(result) log.debug { "Fetching book from #{result[:lookup_url]}" } html_data = transport.get_response(URI.parse(result[:lookup_url])) parse_result_data(html_data.body, 'noisbn', true) end |
#parse_result_data(html, isbn, recursing = false) ⇒ Object
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/alexandria/book_providers/thalia.rb', line 112 def parse_result_data(html, isbn, recursing = false) doc = html_to_doc(html) results_divs = doc / 'div.articlePresentationSearchCH' unless results_divs.empty? if recursing # already recursing, avoid doing so endlessly second time # around *should* lead to a book description, not a result # list return end # ISBN-lookup results in multiple results (trying to be # useful, such as for new editions e.g. 9780974514055 # "Programming Ruby" ) results = parse_search_result_data(html) isbn10 = Library.canonicalise_isbn(isbn) # e.g. .../dave_thomas/ISBN0-9745140-5-5/ID6017044.html chosen = results.first # fallback! results.each do |rslt| if rslt[:lookup_url] =~ /\/ISBN(\d+[\d-]*)\// if Regexp.last_match[1].delete('-') == isbn10 chosen = rslt break end end end html_data = transport.get_response(URI.parse(chosen[:lookup_url])) return parse_result_data(html_data.body, isbn, true) end begin if (div = doc % 'div#contentFull') title_img = ((div % :h2) / :img).first title = title_img['alt'] # note, the following img also has alt="von Author, Author..." if ( = doc % 'h3[text()*="Mehr von"]') # "More from..." links = [] = .parent / :a .each do |a| if a['href'] =~ /BUCH\/sa/ # 'sa' means search author, there may also be 'ssw' (search keyword) links << a.inner_text[0..-2].strip # NOTE stripping the little >> character here... end end end item_details = doc % 'ul.itemDataList' isbns = [] isbns << data_from_label(item_details, 'EAN') isbns << data_from_label(item_details, 'ISBN') year = nil date = data_from_label(item_details, 'Erschienen:') year = Regexp.last_match[1].to_i if date =~ /([\d]{4})/ binding = data_from_label(item_details, 'Einband') publisher = data_from_label(item_details, 'Erschienen bei:') book = Book.new(title, , isbns.first, publisher, year, binding) image_url = nil if (image_link = doc % 'a[@id=itemPicStart]') image_url = image_link['href'] end return [book, image_url] end rescue => ex trace = ex.backtrace.join("\n> ") log.warn { 'Failed parsing search results for Thalia ' \ "#{ex.} #{trace}" } raise NoResultsError end end |
#parse_search_result_data(html) ⇒ Object
79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/alexandria/book_providers/thalia.rb', line 79 def parse_search_result_data(html) doc = html_to_doc(html) book_search_results = [] results_divs = doc / 'div.articlePresentationSearchCH' results_divs.each do |div| result = {} title_link = div % 'div.articleText/h2/a' result[:title] = title_link.inner_html result[:lookup_url] = title_link['href'] book_search_results << result end book_search_results end |
#search(criterion, type) ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/alexandria/book_providers/thalia.rb', line 49 def search(criterion, type) req = create_search_uri(type, criterion) puts req if $DEBUG html_data = transport.get_response(URI.parse(req)) if type == SEARCH_BY_ISBN parse_result_data(html_data.body, criterion) else results = parse_search_result_data(html_data.body) raise NoResultsError if results.empty? results.map { |result| get_book_from_search_result(result) } end end |
#url(book) ⇒ Object
45 46 47 |
# File 'lib/alexandria/book_providers/thalia.rb', line 45 def url(book) create_search_uri(SEARCH_BY_ISBN, book.isbn) end |