Class: ValidateWebsite::Core

Inherits:
Object
  • Object
show all
Includes:
ColorfulMessages
Defined in:
lib/validate_website/core.rb

Constant Summary collapse

EXIT_SUCCESS =
0
EXIT_FAILURE_MARKUP =
64
EXIT_FAILURE_NOT_FOUND =
65
EXIT_FAILURE_MARKUP_NOT_FOUND =
66

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from ColorfulMessages

#color, #error, #info, #note, #success, #warning

Constructor Details

#initialize(options = {}, validation_type = :crawl) ⇒ Core

Returns a new instance of Core.



25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/validate_website/core.rb', line 25

def initialize(options={}, validation_type=:crawl)
  @markup_error = nil
  @not_found_error = nil

  @options = Parser.parse(options, validation_type)

  @file = @options[:file]
  if @file
    # truncate file
    open(@file, 'w').write('')
  end

  @site = @options[:site]
end

Instance Attribute Details

#anemoneObject (readonly)

Returns the value of attribute anemone.



16
17
18
# File 'lib/validate_website/core.rb', line 16

def anemone
  @anemone
end

#optionsObject (readonly)

Returns the value of attribute options.



16
17
18
# File 'lib/validate_website/core.rb', line 16

def options
  @options
end

#siteObject

Returns the value of attribute site.



15
16
17
# File 'lib/validate_website/core.rb', line 15

def site
  @site
end

Instance Method Details

#crawl(opts = {}) ⇒ Object

Parameters:

  • options (Hash)

    :quiet [Boolean] no output (true, false) :color [Boolean] color output (true, false) :exclude [String] a String used by Regexp.new :markup_validation [Boolean] Check the markup validity :not_found [Boolean] Check for not found page (404)



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/validate_website/core.rb', line 49

def crawl(opts={})
  opts = @options.merge(opts)
  puts color(:note, "validating #{@site}", opts[:color]) unless opts[:quiet]

  @anemone = Anemone.crawl(@site, opts) do |anemone|
    anemone.skip_links_like Regexp.new(opts[:exclude]) if opts[:exclude]

    # select the links on each page to follow (iframe, link, css url)
    anemone.focus_crawl { |page|
      links = []
      if page.html?
        links.concat extract_urls_from_img_script_iframe_link(page)
      end
      if page.content_type == 'text/css'
        links.concat extract_urls_from_css(page)
      end
      links.uniq!
      page.links.concat(links)
    }

    anemone.on_every_page { |page|
      url = page.url.to_s

      if opts[:markup_validation]
        # validate html/html+xml
        if page.html? && page.fetched?
          validate(page.doc, page.body, url, opts)
        end
      end

      if opts[:not_found] && page.not_found?
        @not_found_error = true
        puts color(:error, "%s linked in %s but not exist" % [url, page.referer], opts[:color])
        to_file(url)
      end

      # throw away the page (hope this saves memory)
      page = nil
    }
  end
end

#crawl_static(opts = {}) ⇒ Object



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/validate_website/core.rb', line 91

def crawl_static(opts={})
  opts = @options.merge(opts)
  puts color(:note, "validating #{@site}", opts[:color])

  files = Dir.glob(opts[:pattern])
  files.each do |f|
    next unless File.file?(f)

    page = Anemone::Page.new(URI.parse(opts[:site] + URI.encode(f)),
                             :body => open(f).read,
                             :headers => {'content-type' => ['text/html', 'application/xhtml+xml']})

    if opts[:markup_validation]
      validate(page.doc, page.body, f)
    end
    if opts[:not_found]
      links = page.links
      links.concat extract_urls_from_img_script_iframe_link(page)
      check_static_not_found(links.uniq)
    end
  end
end

#exit_statusObject



114
115
116
117
118
119
120
121
122
123
124
# File 'lib/validate_website/core.rb', line 114

def exit_status
  if @markup_error && @not_found_error
    EXIT_FAILURE_MARKUP_NOT_FOUND
  elsif @markup_error
    EXIT_FAILURE_MARKUP
  elsif @not_found_error
    EXIT_FAILURE_NOT_FOUND
  else
    EXIT_SUCCESS
  end
end