Class: Aozoragen::Webmysteries
- Inherits:
-
Object
- Object
- Aozoragen::Webmysteries
- Includes:
- Util
- Defined in:
- lib/aozoragen/webmysteries.rb
Instance Method Summary collapse
- #each_chapter ⇒ Object
- #each_pages(index) ⇒ Object
- #get_text(xml_id) ⇒ Object
-
#initialize(index_uri) ⇒ Webmysteries
constructor
A new instance of Webmysteries.
- #metainfo ⇒ Object
Methods included from Util
Constructor Details
#initialize(index_uri) ⇒ Webmysteries
Returns a new instance of Webmysteries.
14 15 16 17 |
# File 'lib/aozoragen/webmysteries.rb', line 14 def initialize( index_uri ) @index_uri = URI( index_uri ) @index_html = Nokogiri( open( @index_uri, 'r:UTF-8', &:read ) ) end |
Instance Method Details
#each_chapter ⇒ Object
28 29 30 31 32 33 34 35 36 37 |
# File 'lib/aozoragen/webmysteries.rb', line 28 def each_chapter (@index_html / '#wiki-body h3 + ul li a' ).each_with_index do |a, i| uri = URI( a.attr( 'href' ) ) text = "\n" each_pages( uri ) do |page| text << page end yield( {id: '%02d' % (i+1), uri: uri, text: text} ) end end |
#each_pages(index) ⇒ Object
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/aozoragen/webmysteries.rb', line 39 def each_pages( index ) begin pages = [] html = Nokogiri( open( index, 'r', &:read ) ) (html / 'ul.pageNavi a').each do |a| pages << a.attr( 'href' ) end pages.shift # delete current page begin (html / 'noscript param[name="FlashVars"]')[0].attr( 'value' ).scan( /entry=(\d+)/ ) do |i| yield get_text( i[0] ) end end while html = Nokogiri( open( pages.shift, 'r', &:read ) ) rescue TypeError # ignore open nil end end |
#get_text(xml_id) ⇒ Object
57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/aozoragen/webmysteries.rb', line 57 def get_text( xml_id ) result = '' open( "http://www.webmysteries.jp/entry_xml_data/#{xml_id}.xml" ) do |fx| CGI::unescape( fx.read ).scan( %r|<entryBody>(.*?)</entryBody>|m ) do |entry| result << entry[0].gsub( %r|<.*?>|m, "" ) end end result. gsub( /^.*(つづく).*$/, '[#改ページ]' ). gsub( /(?<=.)(([あ-ん]+))/, '《\1》' ). gsub( /\n{3,}/m, "\n\n" ). for_tategaki. normalize_char end |
#metainfo ⇒ Object
19 20 21 22 23 24 25 26 |
# File 'lib/aozoragen/webmysteries.rb', line 19 def info = {:id => Pathname( @index_uri.path ).basename.sub( %r|.*?-(.*)$|, '\1' ).to_s} info[:title] = (@index_html / '#wiki-body h1')[0].text (@index_html / '#wiki-body h2 + ul li' ).each do |li| info[:author] = [li.text] end info end |