Class: Wikipedia::Page
- Inherits:
-
Object
- Object
- Wikipedia::Page
- Defined in:
- lib/wikipedia/page.rb
Class Method Summary collapse
Instance Method Summary collapse
- #categories ⇒ Object
- #content ⇒ Object
- #coordinates ⇒ Object
- #editurl ⇒ Object
- #extlinks ⇒ Object
- #fullurl ⇒ Object
- #image_descriptionurl ⇒ Object
- #image_descriptionurls ⇒ Object
- #image_metadata ⇒ Object
- #image_url ⇒ Object
- #image_urls ⇒ Object
- #images ⇒ Object
-
#initialize(json) ⇒ Page
constructor
A new instance of Page.
- #json ⇒ Object
- #links ⇒ Object
- #page ⇒ Object
- #raw_data ⇒ Object
- #redirect? ⇒ Boolean
- #redirect_title ⇒ Object
- #sanitized_content ⇒ Object
- #summary ⇒ Object
- #templates ⇒ Object
- #text ⇒ Object
- #title ⇒ Object
Constructor Details
#initialize(json) ⇒ Page
Returns a new instance of Page.
3 4 5 6 7 |
# File 'lib/wikipedia/page.rb', line 3 def initialize(json) require 'json' @json = json @data = JSON::load(json) end |
Class Method Details
.sanitize(s) ⇒ Object
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/wikipedia/page.rb', line 109 def self.sanitize( s ) if s s = s.dup # strip anything inside curly braces! while s =~ /\{\{[^\{\}]+?\}\}/ s.gsub!(/\{\{[^\{\}]+?\}\}/, '') end # strip info box s.sub!(/^\{\|[^\{\}]+?\n\|\}\n/, '') # strip internal links s.gsub!(/\[\[([^\]\|]+?)\|([^\]\|]+?)\]\]/, '\2') s.gsub!(/\[\[([^\]\|]+?)\]\]/, '\1') # strip images and file links s.gsub!(/\[\[Image:[^\[\]]+?\]\]/, '') s.gsub!(/\[\[File:[^\[\]]+?\]\]/, '') # convert bold/italic to html s.gsub!(/'''''(.+?)'''''/, '<b><i>\1</i></b>') s.gsub!(/'''(.+?)'''/, '<b>\1</b>') s.gsub!(/''(.+?)''/, '<i>\1</i>') # misc s.gsub!(/<ref[^<>]*>[\s\S]*?<\/ref>/, '') s.gsub!(/<!--[^>]+?-->/, '') s.gsub!(' ', ' ') s.strip! # create paragraphs sections = s.split("\n\n") if sections.size > 1 s = sections.map {|s| "<p>#{s.strip}</p>" }.join("\n") end s end end |
Instance Method Details
#categories ⇒ Object
51 52 53 |
# File 'lib/wikipedia/page.rb', line 51 def categories page['categories'].map {|c| c['title'] } if page['categories'] end |
#content ⇒ Object
13 14 15 |
# File 'lib/wikipedia/page.rb', line 13 def content page['revisions'].first['*'] if page['revisions'] end |
#coordinates ⇒ Object
83 84 85 |
# File 'lib/wikipedia/page.rb', line 83 def coordinates page['coordinates'].first.values if page['coordinates'] end |
#editurl ⇒ Object
39 40 41 |
# File 'lib/wikipedia/page.rb', line 39 def editurl page['editurl'] end |
#extlinks ⇒ Object
59 60 61 |
# File 'lib/wikipedia/page.rb', line 59 def extlinks page['extlinks'].map {|c| c['*'] } if page['extlinks'] end |
#fullurl ⇒ Object
35 36 37 |
# File 'lib/wikipedia/page.rb', line 35 def fullurl page['fullurl'] end |
#image_descriptionurl ⇒ Object
71 72 73 |
# File 'lib/wikipedia/page.rb', line 71 def image_descriptionurl page['imageinfo'].first['descriptionurl'] if page['imageinfo'] end |
#image_descriptionurls ⇒ Object
79 80 81 |
# File 'lib/wikipedia/page.rb', line 79 def image_descriptionurls .map {|img| img.image_descriptionurl } end |
#image_metadata ⇒ Object
91 92 93 94 95 96 97 98 99 |
# File 'lib/wikipedia/page.rb', line 91 def unless @cached_image_metadata if list = images filtered = list.select {|i| i =~ /:.+\.(jpg|jpeg|png|gif|svg)$/i && !i.include?("LinkFA-star") } @cached_image_metadata = filtered.map {|title| Wikipedia.find_image(title) } end end @cached_image_metadata || [] end |
#image_url ⇒ Object
67 68 69 |
# File 'lib/wikipedia/page.rb', line 67 def image_url page['imageinfo'].first['url'] if page['imageinfo'] end |
#image_urls ⇒ Object
75 76 77 |
# File 'lib/wikipedia/page.rb', line 75 def image_urls .map {|img| img.image_url } end |
#images ⇒ Object
63 64 65 |
# File 'lib/wikipedia/page.rb', line 63 def images page['images'].map {|c| c['title'] } if page['images'] end |
#json ⇒ Object
105 106 107 |
# File 'lib/wikipedia/page.rb', line 105 def json @json end |
#links ⇒ Object
55 56 57 |
# File 'lib/wikipedia/page.rb', line 55 def links page['links'].map {|c| c['title'] } if page['links'] end |
#page ⇒ Object
9 10 11 |
# File 'lib/wikipedia/page.rb', line 9 def page @data['query']['pages'].values.first end |
#raw_data ⇒ Object
87 88 89 |
# File 'lib/wikipedia/page.rb', line 87 def raw_data @data end |
#redirect? ⇒ Boolean
21 22 23 |
# File 'lib/wikipedia/page.rb', line 21 def redirect? content && content.match(/\#REDIRECT\s*\[\[(.*?)\]\]/i) end |
#redirect_title ⇒ Object
25 26 27 28 29 |
# File 'lib/wikipedia/page.rb', line 25 def redirect_title if matches = redirect? matches[1] end end |
#sanitized_content ⇒ Object
17 18 19 |
# File 'lib/wikipedia/page.rb', line 17 def sanitized_content self.class.sanitize(content) end |
#summary ⇒ Object
47 48 49 |
# File 'lib/wikipedia/page.rb', line 47 def summary s = (page['extract'].split(pattern="=="))[0].strip end |
#templates ⇒ Object
101 102 103 |
# File 'lib/wikipedia/page.rb', line 101 def templates page['templates'].map {|c| c['title'] } if page['templates'] end |
#text ⇒ Object
43 44 45 |
# File 'lib/wikipedia/page.rb', line 43 def text page['extract'] end |
#title ⇒ Object
31 32 33 |
# File 'lib/wikipedia/page.rb', line 31 def title page['title'] end |