Class: Biblionet::Extractors::AuthorExtractor
- Defined in:
- lib/bookshark/extractors/author_extractor.rb
Instance Attribute Summary collapse
-
#author ⇒ Object
readonly
Returns the value of attribute author.
Attributes inherited from Base
#biblionet_id, #filepath, #page, #url
Instance Method Summary collapse
-
#extract_author(biblionet_id = @biblionet_id, author_page = @page) ⇒ Object
def to_json_pretty JSON.pretty_generate(@author) unless @author.nil? end.
-
#initialize(uri = nil) ⇒ AuthorExtractor
constructor
A new instance of AuthorExtractor.
- #load_and_extract_author(uri = nil) ⇒ Object
- #split_name(fullname) ⇒ Object
Methods inherited from Base
#decode_text, decode_text, #load_page, #load_page_from_file, #load_page_from_url, #present?, #save_page
Methods included from FileManager
#list_directories, #list_files, #save_to
Constructor Details
#initialize(uri = nil) ⇒ AuthorExtractor
Returns a new instance of AuthorExtractor.
12 13 14 15 |
# File 'lib/bookshark/extractors/author_extractor.rb', line 12 def initialize(uri=nil) super(uri) unless uri.nil? or @page.nil? end |
Instance Attribute Details
#author ⇒ Object (readonly)
Returns the value of attribute author.
10 11 12 |
# File 'lib/bookshark/extractors/author_extractor.rb', line 10 def @author end |
Instance Method Details
#extract_author(biblionet_id = @biblionet_id, author_page = @page) ⇒ Object
def to_json_pretty
JSON.pretty_generate(@author) unless @author.nil?
end
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/bookshark/extractors/author_extractor.rb', line 27 def (biblionet_id=@biblionet_id, =@page) puts "Extracting author: #{biblionet_id}" page = AuthorDataExtractor.new() identity = split_name(page.fullname) = {} if present?(identity[:lastname]) and present?(identity[:firstname]) [:name] = identity[:lastname] + ', ' + identity[:firstname] elsif [:name] = identity[:lastname] end [:firstname] = identity[:firstname] [:lastname] = identity[:lastname] [:extra_info] = identity[:extra_info] [:image] = page.image [:bio] = page.bio [:award] = page.awards [:b_id] = biblionet_id # puts JSON.pretty_generate(author_hash) if [:lastname].nil? and [:firstname].nil? return nil else return @author = end end |
#load_and_extract_author(uri = nil) ⇒ Object
18 19 20 21 |
# File 'lib/bookshark/extractors/author_extractor.rb', line 18 def (uri=nil) load_page(uri) unless uri.nil? or @page.nil? end |
#split_name(fullname) ⇒ Object
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/bookshark/extractors/author_extractor.rb', line 57 def split_name(fullname) #mathes digits-digits or digits- in text like: Tolkien, John Ronald Reuel, 1892-1973 years_re = /\d+-\d*/ parts = fullname.split(',').map(&:strip) identity = {} identity[:lastname] = parts[0] if parts.length == 2 if parts[1] =~ years_re identity[:extra_info] = parts[1] else identity[:firstname] = parts[1] end elsif parts.length == 3 identity[:firstname] = parts[1] identity[:extra_info] = parts[2] end return identity end |