Class: LameSitemapper::Cli

Inherits:
Object
  • Object
show all
Defined in:
lib/cli.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(out = nil, args = [], run_file = File.basename(__FILE__)) ⇒ Cli

Returns a new instance of Cli.



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/cli.rb', line 15

def initialize(out = nil, args = [], run_file = File.basename(__FILE__))
  @out = out
  @args = args
  
  @options = OpenStruct.new
  @options.use_robots = LameSitemapper::SETTINGS[:use_robots]
  @options.max_page_depth = LameSitemapper::SETTINGS[:max_page_depth]
  @options.log_level = LameSitemapper::SETTINGS[:log_level].to_i
  @options.report_type = LameSitemapper::SETTINGS[:report_type]
  @options.frequency_type = LameSitemapper::SETTINGS[:sitemap_frequency_type]
  @options.scraper_threads = LameSitemapper::SETTINGS[:scraper_threads].to_i

  Thread.current[:name] = "**"

  @opt_parser = OptionParser.new do |opts|
    opts.banner = "Generate sitemap.xml for a given url."
    opts.separator ""
    opts.separator "Usage: ruby #{run_file} [options] <uri>"
    opts.separator "url needs to be in the form of e.g. http://www.nisdom.com"
    opts.separator ""
    opts.separator "Specific options:"

    opts.on("--[no-]robots", "Run with robots.txt") do |r|
      @options.use_robots = r
    end

    opts.on("-l", "--log-level LEVEL", "Set log level from 0 to 4, 0 is most verbose (default 1)") do |level|
      if level.to_i < 0 || level.to_i > 4
        @out.puts opts if @out
        exit
      end
      LOGGER.level = level.to_i
    end

    opts.on("-d", "--depth DEPTH", "Set maximum page traversal depth from 1 to 10 (default 10)") do |depth|
      if depth.to_i < 1 || depth.to_i > 10
        @out.puts opts if @out
        exit
      end
      @options.max_page_depth = depth.to_i
    end

    report_types = [:text, :sitemap, :html, :graph, :test_yml]
    opts.on("-r", "--report-type TYPE", report_types, "Set report type #{report_types.map {|f| '\'' + f.to_s + '\''}.join(", ")} (defalut 'text')") do |type|
      @options.report_type = type
    end

    change_frequency = [:none, :always, :hourly, :daily, :weekly, :monthly, :yearly, :never]
    opts.on("--change-frequency FREQ", change_frequency, "Set sitemap's page change frequency #{change_frequency.map {|f| '\'' + f.to_s + '\''}.join(", ")} (default 'daily')") do |freq|
      @options.frequency_type = freq
    end

    opts.on("-t", "--scraper-threads NUM", "Set number of scraper threads from 1 to 10 (default 1)") do |num|
      if num.to_i < 1 || num.to_i > 10
        @out.puts opts if @out
        exit
      end
      @options.scraper_threads = num.to_i
    end

    opts.separator ""
    opts.separator "Common options:"

    opts.on_tail("-h", "--help", "Display this screen") do
      @out.puts opts if @out
      exit
    end

    opts.on_tail("-v", "--version", "Show version") do
      @out.puts LameSitemapper::VERSION if @out
      exit
    end
  end
end

Instance Attribute Details

#opt_parserObject (readonly)

Returns the value of attribute opt_parser.



13
14
15
# File 'lib/cli.rb', line 13

def opt_parser
  @opt_parser
end

Instance Method Details

#runObject



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/cli.rb', line 90

def run
  @opt_parser.parse! @args
  if @args.empty?
    @out.puts @opt_parser if @out
    exit
  end

  start_url = @args.shift
  normalized_host = UrlHelper::get_normalized_host(start_url)
  normalized_start_url = UrlHelper::get_normalized_url(normalized_host, start_url)
  if normalized_host.nil? || normalized_start_url.nil?
    @out.puts @opt_parser if @out
    exit
  end

  LOGGER.info "starting with #{normalized_start_url}, options #{@options.inspect}"
  
  start_time = Time.now
  root, normalized_start_url = Core.new(@out, @options).start(normalized_host, normalized_start_url)
  return unless root

  LOGGER.info "found #{root.count} pages in #{Time.now - start_time}s"

  @out.puts ReportGenerator.new(@options, normalized_start_url).send("to_#{@options.report_type}", root) if @out
rescue OptionParser::InvalidArgument, OptionParser::InvalidOption, OptionParser::MissingArgument =>e
  @out.puts e if @out
  @out.puts @opt_parser if @out
  exit
end