Class: Aozoragen::Webmysteries

Inherits:
Object
  • Object
show all
Includes:
Util
Defined in:
lib/aozoragen/webmysteries.rb

Instance Method Summary collapse

Methods included from Util

#detag

Constructor Details

#initialize(index_uri) ⇒ Webmysteries

Returns a new instance of Webmysteries.



14
15
16
17
# File 'lib/aozoragen/webmysteries.rb', line 14

def initialize( index_uri )
	@index_uri = URI( index_uri )
	@index_html = Nokogiri( open( @index_uri, 'r:UTF-8', &:read ) )
end

Instance Method Details

#each_chapterObject



28
29
30
31
32
33
34
35
36
37
# File 'lib/aozoragen/webmysteries.rb', line 28

def each_chapter
	(@index_html / '#wiki-body h3 + ul li a' ).each_with_index do |a, i|
		uri = URI( a.attr( 'href' ) )
		text = "\n"
		each_pages( uri ) do |page|
			text << page
		end
		yield( {id: '%02d' % (i+1), uri: uri, text: text} )
	end
end

#each_pages(index) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/aozoragen/webmysteries.rb', line 39

def each_pages( index )
	begin
		pages = []
		html = Nokogiri( open( index, 'r', &:read ) )
		(html / 'ul.pageNavi a').each do |a|
			pages << a.attr( 'href' )
		end
		pages.shift	# delete current page
		begin
			(html / 'noscript param[name="FlashVars"]')[0].attr( 'value' ).scan( /entry=(\d+)/ ) do |i|
				yield get_text( i[0] )
			end
		end while html = Nokogiri( open( pages.shift, 'r', &:read ) )
	rescue TypeError
		# ignore open nil
	end
end

#get_text(xml_id) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/aozoragen/webmysteries.rb', line 57

def get_text( xml_id )
	result = ''
	open( "http://www.webmysteries.jp/entry_xml_data/#{xml_id}.xml" ) do |fx|
		CGI::unescape( fx.read ).scan( %r|<entryBody>(.*?)</entryBody>|m ) do |entry|
			result << entry[0].gsub( %r|<.*?>|m, "" )
		end
	end
	result.
		gsub( /^.*(つづく).*$/, '[#改ページ]' ).
		gsub( /(?<=.)(([あ-ん]+))/, '《\1》' ).
		gsub( /\n{3,}/m, "\n\n" ).
		for_tategaki.
		normalize_char
end

#metainfoObject



19
20
21
22
23
24
25
26
# File 'lib/aozoragen/webmysteries.rb', line 19

def metainfo
	info = {:id => Pathname( @index_uri.path ).basename.sub( %r|.*?-(.*)$|, '\1' ).to_s}
	info[:title] = (@index_html / '#wiki-body h1')[0].text
	(@index_html / '#wiki-body h2 + ul li' ).each do |li|
		info[:author] = [li.text]
	end
	info
end