Class: Worldfootball::Page
- Inherits:
-
Object
- Object
- Worldfootball::Page
- Defined in:
- lib/worldfootball/page.rb,
lib/worldfootball/page_team.rb,
lib/worldfootball/page_report.rb,
lib/worldfootball/page_schedule.rb
Defined Under Namespace
Classes: Report, Schedule, Team
Constant Summary collapse
- GENERATED_RE =
<!– [generated 2020-06-30 22:30:19] –>
<!-- [generated 2020-06-30 22:30:19] -->
%r{ <!-- [ ]+ \[generated [ ]+ (?<date>\d+-\d+-\d+) [ ]+ (?<time>\d+:\d+:\d+) \] [ ]+ --> }x
Class Method Summary collapse
Instance Method Summary collapse
- #assert(cond, msg) ⇒ Object
-
#debug? ⇒ Boolean
helper methods.
- #doc ⇒ Object
- #generated ⇒ Object
-
#generated_in_days_ago ⇒ Object
convenience helper / formatter.
-
#initialize(html) ⇒ Page
constructor
A new instance of Page.
- #keywords ⇒ Object
-
#log(msg) ⇒ Object
append to log.
- #squish(str) ⇒ Object
- #title ⇒ Object
-
#url ⇒ Object
<meta property=“og:url” content=“//www.weltfussball.de/alle_spiele/aut-bundesliga-2010-2011/” />.
Constructor Details
#initialize(html) ⇒ Page
Returns a new instance of Page.
10 11 12 13 14 |
# File 'lib/worldfootball/page.rb', line 10 def initialize( html ) ## todo/fix - fix upstream in wget!!!! why? why not? ## normalize unicode (to nfc - ruby's default norm form) @html = html.unicode_normalize end |
Class Method Details
.from_file(path) ⇒ Object
5 6 7 8 |
# File 'lib/worldfootball/page.rb', line 5 def self.from_file( path ) html = File.open( path, 'r:utf-8' ) {|f| f.read } new( html ) end |
Instance Method Details
#assert(cond, msg) ⇒ Object
102 103 104 105 106 107 108 109 |
# File 'lib/worldfootball/page.rb', line 102 def assert( cond, msg ) if cond # do nothing else puts "!!! assert failed (in parse page) - #{msg}" exit 1 end end |
#debug? ⇒ Boolean
helper methods
92 |
# File 'lib/worldfootball/page.rb', line 92 def debug?() Worldfootball.debug?; end |
#doc ⇒ Object
16 17 18 19 |
# File 'lib/worldfootball/page.rb', line 16 def doc ## note: if we use a fragment and NOT a document - no access to page head (and meta elements and such) @doc ||= Nokogiri::HTML( @html ) end |
#generated ⇒ Object
66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/worldfootball/page.rb', line 66 def generated @generated ||= begin m=GENERATED_RE.match( @html ) if m DateTime.strptime( "#{m[:date]} #{m[:time]}", '%Y-%m-%d %H:%M:%S') else puts "!! WARN - no generated timestamp found in page" nil end end end |
#generated_in_days_ago ⇒ Object
convenience helper / formatter
79 80 81 82 83 84 85 86 |
# File 'lib/worldfootball/page.rb', line 79 def generated_in_days_ago if generated diff_in_days = Date.today.jd - generated.jd "#{diff_in_days}d" else '?' end end |
#keywords ⇒ Object
27 28 29 30 31 32 33 34 35 36 37 38 39 |
# File 'lib/worldfootball/page.rb', line 27 def keywords # <meta name="keywords" # content="Bundesliga, 2010/2011, Spielplan, KSV Superfund, SC Magna Wiener Neustadt, SV Ried, FC Wacker Innsbruck, Austria Wien, Sturm Graz, SV Mattersburg, LASK Linz, Rapid Wien, RB Salzburg" /> @keywords ||= doc.css( 'meta[name="keywords"]' ).first @keywords[:content] ## get content attribute ## or doc.xpath( '//meta[@name="keywords"]' ).first ## pp keywords # puts " #{keywords[:content]}" # keywords = doc.at( 'meta[@name="Keywords"]' ) # pp keywords ## check for end |
#log(msg) ⇒ Object
append to log
112 113 114 115 116 117 |
# File 'lib/worldfootball/page.rb', line 112 def log( msg ) ### append to log File.open( './logs.txt', 'a:utf-8' ) do |f| f.write( msg ) f.write( "\n" ) end end |
#squish(str) ⇒ Object
95 96 97 98 99 100 |
# File 'lib/worldfootball/page.rb', line 95 def squish( str ) str = str.strip str = str.gsub( "\u{00A0}", ' ' ) # Unicode Character 'NO-BREAK SPACE' (U+00A0) str = str.gsub( /[ \t\n]+/, ' ' ) ## fold whitespace to one max. str end |
#title ⇒ Object
21 22 23 24 25 |
# File 'lib/worldfootball/page.rb', line 21 def title # <title>Bundesliga 2010/2011 » Spielplan</title> @title ||= doc.css( 'title' ).first @title.text ## get element's text content end |
#url ⇒ Object
<meta property=“og:url”
content="//www.weltfussball.de/alle_spiele/aut-bundesliga-2010-2011/" />
43 44 45 46 |
# File 'lib/worldfootball/page.rb', line 43 def url @url ||= doc.css( 'meta[property="og:url"]' ).first @url[:content] end |