Module: Wikipedia
- Defined in:
- lib/wikipedia.rb
Constant Summary collapse
- URL =
"http://%LANG%.wikipedia.org/w/api.php?action=parse&page="
Class Method Summary collapse
Class Method Details
.article(n, lang = :en) ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/wikipedia.rb', line 29 def self.article( n, lang = :en ) texts = [] raw_data = open( URL.gsub("%LANG%", lang.to_s)+escape(n) ).read() he = HTMLEntities.new() # characters = { Regexp.new("\\[(.*)\\]") => '' } raw_data = he.decode( he.decode( raw_data ) ).gsub("\n", "") # >:D Hpricot(raw_data).search('p').each do |ph| texts << escape_text( ph.inner_text ) end return texts end |
.escape(s) ⇒ Object
49 50 51 52 53 54 55 |
# File 'lib/wikipedia.rb', line 49 def self.escape(s) s.capitalize_every_word! CGI.escape( s ) end |
.escape_text(s) ⇒ Object
57 58 59 60 61 62 63 64 65 |
# File 'lib/wikipedia.rb', line 57 def self.escape_text(s) # Hpricot's inner_text() does this already but we don't want the cite-notes stuff: [0], [1], etc. { Regexp.new("\\[(.*)\\]") => '' }.each { |str, replace_with| s.gsub!( str, replace_with ) } s end |