Module: Wikipedia

Defined in:
lib/wikipedia.rb

Constant Summary collapse

URL =
"http://%LANG%.wikipedia.org/w/api.php?action=parse&page="

Class Method Summary collapse

Class Method Details

.article(n, lang = :en) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/wikipedia.rb', line 29

def self.article( n, lang = :en )

	texts = []

	raw_data = open( URL.gsub("%LANG%", lang.to_s)+escape(n) ).read()

	he = HTMLEntities.new()

	# characters = { Regexp.new("\\[(.*)\\]") => '' }

		raw_data = he.decode( he.decode( raw_data ) ).gsub("\n", "") # >:D

	Hpricot(raw_data).search('p').each do |ph|
		texts << escape_text( ph.inner_text )
	end

	return texts

end

.escape(s) ⇒ Object



49
50
51
52
53
54
55
# File 'lib/wikipedia.rb', line 49

def self.escape(s)

	s.capitalize_every_word!

	CGI.escape( s )

end

.escape_text(s) ⇒ Object



57
58
59
60
61
62
63
64
65
# File 'lib/wikipedia.rb', line 57

def self.escape_text(s)

	# Hpricot's inner_text() does this already but we don't want the cite-notes stuff: [0], [1], etc.

	{ Regexp.new("\\[(.*)\\]") => '' }.each { |str, replace_with| s.gsub!( str, replace_with ) }

	s

end