Class: LameSitemapper::Page

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/page.rb

Constant Summary collapse

NON_SCRAPED_DEPTH =
1
NON_SCRAPED_DOMAIN =
2
NON_SCRAPED_ROBOTS =
4
NON_SCRAPED_NO_HTML =
8
NON_SCRAPED_NOT_ACCESSIBLE =
16

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ Page

Returns a new instance of Page.



24
25
26
27
28
29
30
31
32
# File 'lib/page.rb', line 24

def initialize(path)
  @path = path
  @sub_pages = []
  @anchors = []
  @images = []
  @links = []
  @scripts = []
  @non_scraped_code = 0
end

Instance Attribute Details

#anchorsObject (readonly)

Returns the value of attribute anchors.



13
14
15
# File 'lib/page.rb', line 13

def anchors
  @anchors
end

#imagesObject (readonly)

Returns the value of attribute images.



14
15
16
# File 'lib/page.rb', line 14

def images
  @images
end

Returns the value of attribute links.



15
16
17
# File 'lib/page.rb', line 15

def links
  @links
end

#pathObject

Returns the value of attribute path.



11
12
13
# File 'lib/page.rb', line 11

def path
  @path
end

#scriptsObject (readonly)

Returns the value of attribute scripts.



16
17
18
# File 'lib/page.rb', line 16

def scripts
  @scripts
end

#sub_pagesObject (readonly)

Returns the value of attribute sub_pages.



12
13
14
# File 'lib/page.rb', line 12

def sub_pages
  @sub_pages
end

Instance Method Details

#<<(page) ⇒ Object



34
35
36
37
38
# File 'lib/page.rb', line 34

def <<(page)
  @sub_pages << page

  self
end

#depth_reached=(value) ⇒ Object



72
73
74
75
76
77
78
# File 'lib/page.rb', line 72

def depth_reached=(value)
  if value
    @non_scraped_code |= Page::NON_SCRAPED_DEPTH
  else
    @non_scraped_code &= ~Page::NON_SCRAPED_DEPTH
  end
end

#depth_reached?Boolean

Returns:

  • (Boolean)


68
69
70
# File 'lib/page.rb', line 68

def depth_reached?
  @non_scraped_code & Page::NON_SCRAPED_DEPTH > 0
end

#each {|_self| ... } ⇒ Object

Yields:

  • (_self)

Yield Parameters:



116
117
118
119
120
121
# File 'lib/page.rb', line 116

def each(&block)
  return enum_for(:each) unless block_given?

  yield self
  @sub_pages.each { |p| p.each(&block) }
end

#external_domain=(value) ⇒ Object



60
61
62
63
64
65
66
# File 'lib/page.rb', line 60

def external_domain=(value)
  if value
    @non_scraped_code |= Page::NON_SCRAPED_DOMAIN
  else
    @non_scraped_code &= ~Page::NON_SCRAPED_DOMAIN
  end
end

#external_domain?Boolean

Returns:

  • (Boolean)


56
57
58
# File 'lib/page.rb', line 56

def external_domain?
  @non_scraped_code & Page::NON_SCRAPED_DOMAIN > 0
end

#format_codesObject



104
105
106
107
108
109
110
111
112
113
114
# File 'lib/page.rb', line 104

def format_codes
  reasons = []

  reasons << "depth" if depth_reached?
  reasons << "robots" if robots_forbidden?
  reasons << "ext" if external_domain?
  reasons << "nohtml" if no_html?
  reasons << "noacc" if not_accessible?

  "#{reasons.join('|')} "
end

#no_html=(value) ⇒ Object



84
85
86
87
88
89
90
# File 'lib/page.rb', line 84

def no_html=(value)
  if value
    @non_scraped_code |= Page::NON_SCRAPED_NO_HTML
  else
    @non_scraped_code &= ~Page::NON_SCRAPED_NO_HTML
  end
end

#no_html?Boolean

Returns:

  • (Boolean)


80
81
82
# File 'lib/page.rb', line 80

def no_html?
  @non_scraped_code & Page::NON_SCRAPED_NO_HTML > 0
end

#not_accessible=(value) ⇒ Object



96
97
98
99
100
101
102
# File 'lib/page.rb', line 96

def not_accessible=(value)
  if value
    @non_scraped_code |= Page::NON_SCRAPED_NOT_ACCESSIBLE
  else
    @non_scraped_code &= ~Page::NON_SCRAPED_NOT_ACCESSIBLE
  end
end

#not_accessible?Boolean

Returns:

  • (Boolean)


92
93
94
# File 'lib/page.rb', line 92

def not_accessible?
  @non_scraped_code & Page::NON_SCRAPED_NOT_ACCESSIBLE > 0
end

#robots_forbidden=(value) ⇒ Object



48
49
50
51
52
53
54
# File 'lib/page.rb', line 48

def robots_forbidden=(value)
  if value
    @non_scraped_code |= Page::NON_SCRAPED_ROBOTS
  else
    @non_scraped_code &= ~Page::NON_SCRAPED_ROBOTS
  end
end

#robots_forbidden?Boolean

Returns:

  • (Boolean)


44
45
46
# File 'lib/page.rb', line 44

def robots_forbidden?
  @non_scraped_code & Page::NON_SCRAPED_ROBOTS > 0
end

#scraped?Boolean

Returns:

  • (Boolean)


40
41
42
# File 'lib/page.rb', line 40

def scraped?
  @non_scraped_code == 0
end