Class: Worldfootball::Page
- Inherits:
-
Object
- Object
- Worldfootball::Page
show all
- Defined in:
- lib/webget-football/worldfootball/page.rb,
lib/webget-football/worldfootball/page_report.rb,
lib/webget-football/worldfootball/page_schedule.rb
Defined Under Namespace
Classes: Report, Schedule
Constant Summary
collapse
- GENERATED_RE =
<!– [generated 2020-06-30 22:30:19] –>
<!-- [generated 2020-06-30 22:30:19] -->
%r{
<!--
[ ]+
\[generated
[ ]+
(?<date>\d+-\d+-\d+)
[ ]+
(?<time>\d+:\d+:\d+)
\]
[ ]+
-->
}x
Class Method Summary
collapse
Instance Method Summary
collapse
Constructor Details
#initialize(html) ⇒ Page
Returns a new instance of Page.
10
11
12
|
# File 'lib/webget-football/worldfootball/page.rb', line 10
def initialize( html )
@html = html
end
|
Class Method Details
.from_file(path) ⇒ Object
5
6
7
8
|
# File 'lib/webget-football/worldfootball/page.rb', line 5
def self.from_file( path )
html = File.open( path, 'r:utf-8' ) {|f| f.read }
new( html )
end
|
Instance Method Details
#assert(cond, msg) ⇒ Object
96
97
98
99
100
101
102
103
|
# File 'lib/webget-football/worldfootball/page.rb', line 96
def assert( cond, msg )
if cond
else
puts "!!! assert failed (in parse page) - #{msg}"
exit 1
end
end
|
#doc ⇒ Object
14
15
16
17
|
# File 'lib/webget-football/worldfootball/page.rb', line 14
def doc
@doc ||= Nokogiri::HTML( @html )
end
|
#generated ⇒ Object
64
65
66
67
68
69
70
71
72
73
74
|
# File 'lib/webget-football/worldfootball/page.rb', line 64
def generated
@generated ||= begin
m=GENERATED_RE.match( @html )
if m
DateTime.strptime( "#{m[:date]} #{m[:time]}", '%Y-%m-%d %H:%M:%S')
else
puts "!! WARN - no generated timestamp found in page"
nil
end
end
end
|
#generated_in_days_ago ⇒ Object
convenience helper / formatter
77
78
79
80
81
82
83
84
|
# File 'lib/webget-football/worldfootball/page.rb', line 77
def generated_in_days_ago
if generated
diff_in_days = Date.today.jd - generated.jd
"#{diff_in_days}d"
else
'?'
end
end
|
#keywords ⇒ Object
25
26
27
28
29
30
31
32
33
34
35
36
37
|
# File 'lib/webget-football/worldfootball/page.rb', line 25
def keywords
@keywords ||= doc.css( 'meta[name="keywords"]' ).first
@keywords[:content]
end
|
#squish(str) ⇒ Object
89
90
91
92
93
94
|
# File 'lib/webget-football/worldfootball/page.rb', line 89
def squish( str )
str = str.strip
str = str.gsub( "\u{00A0}", ' ' ) str = str.gsub( /[ \t\n]+/, ' ' ) str
end
|
#title ⇒ Object
19
20
21
22
23
|
# File 'lib/webget-football/worldfootball/page.rb', line 19
def title
@title ||= doc.css( 'title' ).first
@title.text end
|
#url ⇒ Object
<meta property=“og:url”
content="//www.weltfussball.de/alle_spiele/aut-bundesliga-2010-2011/" />
41
42
43
44
|
# File 'lib/webget-football/worldfootball/page.rb', line 41
def url
@url ||= doc.css( 'meta[property="og:url"]' ).first
@url[:content]
end
|