Class: Websitary::Configuration

Inherits:
Object
  • Object
show all
Defined in:
lib/websitary/configuration.rb

Overview

This class defines the scope in which profiles are evaluated. Most of its methods are suitable for use in profiles.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(app, args = []) ⇒ Configuration

Returns a new instance of Configuration.



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/websitary/configuration.rb', line 32

def initialize(app, args=[])
    @logger = Websitary::AppLog.new
    $logger.debug "Configuration#initialize"
    @app    = app
    @cfgdir = ENV['HOME'] ? File.join(ENV['HOME'], '.websitary') : '.'
    [
        ENV['USERPROFILE'] && File.join(ENV['USERPROFILE'], 'websitary'),
        File.join(Config::CONFIG['sysconfdir'], 'websitary')
    ].each do |dir|
        if File.exists?(dir)
            @cfgdir = dir
            break
        end
    end

    @cmd_edit          = 'vi "%s"'
    @execute           = 'downdiff'
    @quicklist_profile = 'quicklist'
    @view              = 'w3m "%s"'

    @allow             = {}
    @default_options   = {}
    @default_profiles  = [@quicklist_profile]
    @done              = []
    @mtimes            = Websitary::FileMTimes.new(self)
    @options           = {}
    @outfile           = {}
    @profiles          = []
    @robots            = {}
    @todo              = []
    @exclude           = [/^\s*(javascript|mailto):/]
    @urlencmap         = {}
    @urls              = {}

    @suffix = {
        'text' => 'txt'
        # 'rss'  => 'xml'
    }

    migrate
    initialize_options
    profile 'config.rb'
    parse_command_line_args(args)

    @output_format   ||= ['html']
    @output_title      = %{#{Websitary::APPNAME}: #{@profiles.join(", ")}}
end

Instance Attribute Details

#cfgdirObject

The user configuration directory



19
20
21
# File 'lib/websitary/configuration.rb', line 19

def cfgdir
  @cfgdir
end

#doneObject

Array of downloaded urls.



17
18
19
# File 'lib/websitary/configuration.rb', line 17

def done
  @done
end

#executeObject

What to do



21
22
23
# File 'lib/websitary/configuration.rb', line 21

def execute
  @execute
end

#mtimesObject

Cached mtimes



25
26
27
# File 'lib/websitary/configuration.rb', line 25

def mtimes
  @mtimes
end

#optionsObject

Global Options



23
24
25
# File 'lib/websitary/configuration.rb', line 23

def options
  @options
end

#quicklist_profileObject

The name of the quicklist profile



27
28
29
# File 'lib/websitary/configuration.rb', line 27

def quicklist_profile
  @quicklist_profile
end

#todoObject (readonly)

Array of urls to be downloaded.



15
16
17
# File 'lib/websitary/configuration.rb', line 15

def todo
  @todo
end

#urlsObject

Hash (key = URL, value = Hash of options)



13
14
15
# File 'lib/websitary/configuration.rb', line 13

def urls
  @urls
end

Instance Method Details

#call_cmd(cmd, cmdargs, args = {}) ⇒ Object

Apply the argument to cmd (a format String or a Proc). If a String, execute the command.



532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
# File 'lib/websitary/configuration.rb', line 532

def call_cmd(cmd, cmdargs, args={})
    default = args[:default]
    url     = args[:url]
    timeout = url ? url_get(url, :timeout) : nil
    if timeout
        begin
            Timeout::timeout(timeout) do |timeout_length|
                eval_arg(cmd, cmdargs, default) {|cmd| `#{cmd}`}
            end
        rescue Timeout::Error
            $logger.error "Timeout #{timeout}: #{url}"
            return default
        end
    else
        eval_arg(cmd, cmdargs, default) {|cmd| `#{cmd}`}
    end
end

#canonic_filename(filename) ⇒ Object



1053
1054
1055
# File 'lib/websitary/configuration.rb', line 1053

def canonic_filename(filename)
    call_cmd(optval_get(:global, :canonic_filename), [filename], :default => filename)
end

#canonic_url(url) ⇒ Object

Strip the url’s last part (after #).



823
824
825
# File 'lib/websitary/configuration.rb', line 823

def canonic_url(url)
    url.sub(/#.*$/, '')
end

#clean_url(url) ⇒ Object



817
818
819
# File 'lib/websitary/configuration.rb', line 817

def clean_url(url)
    url && url.strip
end

#comment_element(doc, elt) ⇒ Object



934
935
936
937
# File 'lib/websitary/configuration.rb', line 934

def comment_element(doc, elt)
    doc.insert_before(elt, '<!-- WEBSITARY: ')
    doc.insert_after(elt, '-->')
end

#default(*profile_names) ⇒ Object

Configuration command: Set the default profiles



286
287
288
# File 'lib/websitary/configuration.rb', line 286

def default(*profile_names)
    @default_profiles = profile_names
end

#diff(diff) ⇒ Object

Configuration command: Set the default diff program.



472
473
474
# File 'lib/websitary/configuration.rb', line 472

def diff(diff)
    @options[:diff][:default] = diff
end

#diffname(url, ensure_dir = false) ⇒ Object

Get the diff filename.



714
715
716
# File 'lib/websitary/configuration.rb', line 714

def diffname(url, ensure_dir=false)
    encoded_filename('diff', url, ensure_dir, 'md5')
end

#diffprocess(&block) ⇒ Object

Configuration command: Set the default diff processor. The block takes the diff text (STRING) as argument.



434
435
436
# File 'lib/websitary/configuration.rb', line 434

def diffprocess(&block)
    @options[:diff][:default] = block
end

#download(download) ⇒ Object

Configuration command: Set the default dowloader.



479
480
481
# File 'lib/websitary/configuration.rb', line 479

def download(download)
    @options[:download][:default] = download
end

#downloadprocess(&block) ⇒ Object

Configuration command: Set the default download processor. The block takes the downloaded text (STRING) as argument.



426
427
428
# File 'lib/websitary/configuration.rb', line 426

def downloadprocess(&block)
    @options[:downloadprocess][:default] = block
end

#edit(cmd) ⇒ Object

Configuration command: Set the editor.



441
442
443
# File 'lib/websitary/configuration.rb', line 441

def edit(cmd)
    @cmd_edit = cmd
end

#edit_profile(profile = nil) ⇒ Object



1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
# File 'lib/websitary/configuration.rb', line 1019

def edit_profile(profile=nil)
    profile ||= @profiles
    case profile
    when Array
        profile.each {|p| edit_profile p}
    else
        fn = profile_filename(profile)
        $logger.debug "edit: #{fn}"
        `#{@cmd_edit % fn}`
    end
end

#eligible_path?(url, path0, path) ⇒ Boolean

Check whether path is eligible on the basis of url or path0. This checks either for a :match option for url or the extensions of path0 and path.

Returns:

  • (Boolean)


855
856
857
858
859
860
861
862
# File 'lib/websitary/configuration.rb', line 855

def eligible_path?(url, path0, path)
    rx = url_get(url, :match)
    if rx
        return path =~ rx
    else
        return File.extname(path0) == File.extname(path)
    end
end

#encoded_basename(url, type = 'tree') ⇒ Object



761
762
763
764
765
766
767
768
769
# File 'lib/websitary/configuration.rb', line 761

def encoded_basename(url, type='tree')
    m = "encoded_basename_#{type}"
    if respond_to?(m)
        return send(m, url)
    else
        $logger.fatal "Unknown cache type: #{type}"
        exit 5
    end
end

#encoded_basename_flat(url) ⇒ Object



777
778
779
# File 'lib/websitary/configuration.rb', line 777

def encoded_basename_flat(url)
    encode(url)
end

#encoded_basename_md5(url) ⇒ Object



782
783
784
# File 'lib/websitary/configuration.rb', line 782

def encoded_basename_md5(url)
    Digest::MD5.hexdigest(url)
end

#encoded_basename_tree(url) ⇒ Object



772
773
774
# File 'lib/websitary/configuration.rb', line 772

def encoded_basename_tree(url)
    ensure_filename(encode(url, '/'))
end

#encoded_filename(dir, url, ensure_dir = false, type = nil) ⇒ Object



742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
# File 'lib/websitary/configuration.rb', line 742

def encoded_filename(dir, url, ensure_dir=false, type=nil)
    type ||= url_get(url, :cachetype, 'tree')
    $logger.debug "encoded_filename: type=#{type} url=#{url}"
    rv = File.join(@cfgdir, dir, encoded_basename(url, type))
    rd = File.dirname(rv)
    $logger.debug "encoded_filename: rv0=#{rv}"
    fm = optval_get(:global, :filename_size, 255)
    rdok = !ensure_dir || @app.ensure_dir(rd, false)
    if !rdok or rv.size > fm or File.directory?(rv)
        # $logger.debug "Filename too long (:global=>:filename_size = #{fm}), try md5 encoded filename instead: #{url}"
        $logger.info "Can't use filename, try 'md5' instead: #{url}"
        rv = File.join(@cfgdir, dir, encoded_basename(url, :md5))
        rd = File.dirname(rv)
    end
    @urlencmap[rv] = url
    return rv
end

#eval_arg(format, args, default = nil, &process_string) ⇒ Object

Apply some arguments to a format.

format

String or Proc

args

Array of Arguments



510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
# File 'lib/websitary/configuration.rb', line 510

def eval_arg(format, args, default=nil, &process_string)
    case format
    when nil
        return default
    when Proc
        # $logger.debug "eval proc: #{format} #{args.inspect}" #DBG#
        $logger.debug "eval proc: #{format}/#{args.size}"
        return format.call(*args)
    else
        ca = format % args
        # $logger.debug "eval string: #{ca}" #DBG#
        if process_string
            return process_string.call(ca)
        else
            return ca
        end
    end
end

#exclude(*urls) ⇒ Object

Configuration command: Add URL-exclusion patterns (REGEXPs or STRINGs).



448
449
450
451
452
453
454
455
456
457
458
459
460
# File 'lib/websitary/configuration.rb', line 448

def exclude(*urls)
    @exclude += urls.map do |url|
        case url
        when Regexp
            url
        when String
            Regexp.new(Regexp.escape(url))
        else
            $logger.fatal "Must be regexp or string: #{url.inspect}"
            exit 5
        end
    end
end

#format(url, difftext) ⇒ Object

Format a diff according to URL’s source options.



500
501
502
503
504
# File 'lib/websitary/configuration.rb', line 500

def format(url, difftext)
    fmt  = url_get(url, :format)
    text = format_text(url, difftext)
    eval_arg(fmt, [text], text)
end

#format_text(url, text) ⇒ Object



484
485
486
487
488
489
490
491
492
493
494
495
496
# File 'lib/websitary/configuration.rb', line 484

def format_text(url, text)
    enc = url_get(url, :iconv)
    if enc
        denc = optval_get(:global, :encoding)
        begin
            require 'iconv'
            text = Iconv.conv(denc, enc, text)
        rescue Exception => e
            $logger.error "IConv failed #{enc} => #{denc}: #{e}"
        end
    end
    return text
end

#get_output_html(difftext) ⇒ Object



653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
# File 'lib/websitary/configuration.rb', line 653

def get_output_html(difftext)
    difftext = difftext.map do |url, text|
        tags = url_get(url, :strip_tags)
        text = strip_tags(text, :tags => tags) if tags
        text.empty? ? nil : [url, text]
    end
    difftext.compact!
    sort_difftext!(difftext)

    toc = difftext.map do |url, text|
        ti  = url_get(url, :title, File.basename(url))
        tid = html_toc_id(url)
        bid = html_body_id(url)
        %{<li id="#{tid}" class="toc"><a class="toc" href="\##{bid}">#{ti}</a></li>}
    end.join("\n")

    idx = 0
    cnt = difftext.map do |url, text|
        idx += 1
        ti   = url_get(url, :title, File.basename(url))
        bid  = html_body_id(url)
        if (rewrite = url_get(url, :rewrite_link))
            urlr = eval_arg(rewrite, [url])
            ext  = ''
        else
            old  = %{<a class="old" href="#{file_url(oldname(url))}">old</a>}
            lst  = %{<a class="latest" href="#{file_url(latestname(url))}">latest</a>}
            ext  = %{ (#{old}, #{lst})}
            urlr = url
        end
        note    = difftext_annotation(url)
        onclick = optval_get(:global, :toggle_body) ? 'onclick="ToggleBody(this)"' : ''
        <<HTML
<div id="#{bid}" class="webpage" #{onclick}>
<div class="count">
#{idx}
</div>
<h1 class="diff">
<a class="external" href="#{urlr}">#{format_text(url, ti)}</a>#{ext}
</h1>
<div id="#{bid}_body">
<div class="annotation">
#{note && CGI::escapeHTML(note)}
</div>
<div class="diff,difftext">
#{format(url, text)}
</div>
</div>
</div>
HTML
    end.join(('<hr class="separator"/>') + "\n")

    success, template = opt_get(:page, :format)
    unless success
        success, template = opt_get(:page, :simple)
    end
    return eval_arg(template, [@output_title, toc, cnt])
end

#get_output_rss(difftext) ⇒ Object



601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
# File 'lib/websitary/configuration.rb', line 601

def get_output_rss(difftext)
    success, rss_url = opt_get(:rss, :url)
    if success
        success, rss_version = opt_get(:rss, :version)
        # require "rss/#{rss_version}"

        rss         = RSS::Rss.new(rss_version)
        chan        = RSS::Rss::Channel.new
        chan.title  = @output_title
        [:description, :copyright, :category, :language, :image, :webMaster, :pubDate].each do |field|
            ok, val = opt_get(:rss, field)
            item.send(format_symbol(field, '%s='), val) if ok
        end
        chan.link   = rss_url
        rss.channel = chan

        cnt = difftext.map do |url, text|
            rss_format = url_get(url, :rss_format, 'plain_text')
            text = strip_tags(text, :format => rss_format)
            next if text.empty?

            item = RSS::Rss::Channel::Item.new
            item.date  = Time.now
            item.title = url_get(url, :title, File.basename(url))
            item.link  = eval_arg(url_get(url, :rewrite_link, '%s'), [url])
            [:author, :date, :enclosure, :category, :pubDate].each do |field|
                val = url_get(url, format_symbol(field, 'rss_%s'))
                item.send(format_symbol(field, '%s='), val) if val
            end

            annotation = difftext_annotation(url)
            annotation = "<pre>#{annotation}</pre>" if annotation
            case rss_format
            when 'plain_text'
                item.description = %{#{annotation}<pre>#{text}</pre>}
            else
                item.description = %{#{annotation}\n#{text}}
            end
            chan.items << item
        end

        return rss.to_s

    else

        $logger.fatal "Global option :rss[:url] not defined."
        exit 5

    end
end

#get_output_text(difftext) ⇒ Object



586
587
588
589
590
591
592
593
594
595
596
597
598
# File 'lib/websitary/configuration.rb', line 586

def get_output_text(difftext)
    difftext.map do |url, difftext|
        if difftext
            difftext = html_to_text(difftext) if is_html?(difftext)
            !difftext.empty? && [
                eval_arg(url_get(url, :rewrite_link, '%s'), [url]), 
                difftext_annotation(url), 
                nil, 
                difftext
            ].join("\n")
        end
    end.compact.join("\n\n#{('-' * 68)}\n\n")
end

#global(options) ⇒ Object

Set a global option.



388
389
390
391
392
# File 'lib/websitary/configuration.rb', line 388

def global(options)
    options.each do |type, value|
        @options[:global][type] = value
    end
end

#guess_dir(path) ⇒ Object

Guess path’s dirname.

foo/bar     -> foo
foo/bar.txt -> foo
foo/bar/    -> foo/bar


799
800
801
# File 'lib/websitary/configuration.rb', line 799

def guess_dir(path)
    path[-1..-1] == '/' ? path[0..-2] : File.dirname(path)
end

#highlighter(rx, color = nil, group = nil, tag = 'span') ⇒ Object

Return a Proc that takes an text as argument and highlight occurences of rx.

rx

Regular expression

color

A string, sets the class to highlight-color (default: “yellow”)

group

A number (default: 0)

tag

The HTML tag to use (default: “span”)



1009
1010
1011
# File 'lib/websitary/configuration.rb', line 1009

def highlighter(rx, color=nil, group=nil, tag='span')
    lambda {|text| text.gsub(rx, %{<#{tag} class="highlight-#{color || 'red'}">\\#{group || 0}</#{tag}>})}
end

#is_excluded?(url) ⇒ Boolean

Returns:

  • (Boolean)


349
350
351
352
353
# File 'lib/websitary/configuration.rb', line 349

def is_excluded?(url)
    rv = @exclude.any? {|p| url =~ p}
    $logger.debug "is_excluded: #{url}: #{rv}"
    rv
end

#latestname(url, ensure_dir = false, type = nil) ⇒ Object

Get the filename for the freshly downloaded copy.



726
727
728
# File 'lib/websitary/configuration.rb', line 726

def latestname(url, ensure_dir=false, type=nil)
    encoded_filename('latest', url, ensure_dir, type)
end

#oldname(url, ensure_dir = false, type = nil) ⇒ Object

Get the backup filename.



720
721
722
# File 'lib/websitary/configuration.rb', line 720

def oldname(url, ensure_dir=false, type=nil)
    encoded_filename('old', url, ensure_dir, type)
end

#opt_get(opt, val) ⇒ Object



263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# File 'lib/websitary/configuration.rb', line 263

def opt_get(opt, val)
    vals = @options[opt]
    $logger.debug "val=#{val} vals=#{vals.inspect}"
    if vals and vals.has_key?(val)
        rv = vals[val]
        $logger.debug "opt_get ok: #{opt} => #{rv.inspect}"
        case rv
        when Symbol
            $logger.debug "opt_get re: #{rv}"
            return opt_get(opt, rv)
        else
            $logger.debug "opt_get true, #{rv}"
            return [true, rv]
        end
    else
        $logger.debug "opt_get no: #{opt} => #{val.inspect}"
        return [false, val]
    end
end

#option(type, options) ⇒ Object

Configuration command: Set global options.

type

Symbol

options

Hash



376
377
378
379
380
381
382
383
384
# File 'lib/websitary/configuration.rb', line 376

def option(type, options)
    $logger.info "option #{type}: #{options.inspect}"
    o = @options[type]
    if o
        o.merge!(options)
    else
        $logger.error "Unknown option type: #{type} (#{options.inspect})"
    end
end

#optval_get(opt, val, default = nil) ⇒ Object



248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/websitary/configuration.rb', line 248

def optval_get(opt, val, default=nil)
    case val
    when Symbol
        ok, val = opt_get(opt, val)
        if ok
            val
        else
            default
        end
    else
        val
    end
end

#output_file(filename, outformat = nil) ⇒ Object

Set the output file.



367
368
369
# File 'lib/websitary/configuration.rb', line 367

def output_file(filename, outformat=nil)
    @outfile[outformat] = filename
end

#output_format(*format) ⇒ Object

Set the output format.



357
358
359
360
361
362
363
# File 'lib/websitary/configuration.rb', line 357

def output_format(*format)
    unless format.all? {|e| ['text', 'html', 'rss'].include?(e)}
        $logger.fatal "Unknown output format: #{format}"
        exit 5
    end
    @output_format = format
end

#parse_command_line_args(args) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/websitary/configuration.rb', line 81

def parse_command_line_args(args)
    $logger.debug "parse_command_line_args: #{args}"
    opts    = OptionParser.new do |opts|
        opts.banner =  "Usage: #{Websitary::APPNAME} [OPTIONS] [PROFILES] > [OUT]"
        opts.separator ''
        opts.separator "#{Websitary::APPNAME} is a free software with ABSOLUTELY NO WARRANTY under"
        opts.separator 'the terms of the GNU General Public License version 2 or newer.'
        opts.separator ''

        opts.separator 'General Options:'

        opts.on('-c', '--cfg=DIR', String, 'Configuration directory') do |value|
            @cfgdir = value
        end

        opts.on('-e', '--execute=COMMAND', String, 'Define what to do (default: downdiff)') do |value|
            @execute = value
        end

        # opts.on('-E', '--edit=PROFILE', String, 'Edit a profile') do |value|
        #   edit_profile value
        #   exit 0
        # end

        opts.on('-f', '--output-format=FORMAT', 'Output format (html, text, rss)') do |value|
            output_format(*value.split(/,/))
        end

        opts.on('--[no-]ignore-age', 'Ignore age limits') do |bool|
            set :ignore_age => bool
        end

        opts.on('--log=DESTINATION', String, 'Log destination') do |value|
            @logger = Websitary::AppLog.new(value != '-' && value)
        end

        opts.on('-o', '--output=FILENAME', String, 'Output') do |value|
            output_file(value)
        end

        opts.on('-s', '--set=NAME=VAR', String, 'Set a default option') do |value|
            key, val = value.split(/=/, 2)
            set key.intern => eval(val)
        end

        opts.on('-t', '--timer=N', Numeric, 'Repeat every N seconds (never exit)') do |value|
            global(:timer => value)
        end

        opts.on('-x', '--exclude=N', Regexp, 'Exclude URLs matching this pattern') do |value|
            exclude(Regexp.new(value))
        end

        opts.separator ''
        opts.separator "Available commands (default: #@execute):"
        commands = @app.methods.map do |m|
            mt = m.match(/^execute_(.*)$/)
            mt && mt[1]
        end
        commands.compact!
        commands.sort!
        opts.separator commands.join(', ')

        opts.separator ''
        opts.separator 'Available profiles:'
        opts.separator Dir[File.join(@cfgdir, '*.rb')].map {|f| File.basename(f, '.*')}.join(', ')

        opts.separator ''
        opts.separator 'Other Options:'

        opts.on('--debug', 'Show debug messages') do |v|
            $VERBOSE = $DEBUG = true
            @logger.set_level(:debug)
        end

        opts.on('-q', '--quiet', 'Be mostly quiet') do |v|
            @logger.set_level(:quiet)
        end

        opts.on('-v', '--verbose', 'Run verbosely') do |v|
            $VERBOSE = true
            @logger.set_level(:verbose)
        end

        opts.on('--version', 'Run verbosely') do |v|
            puts Websitary::VERSION
            exit 1
        end

        opts.on_tail('-h', '--help', 'Show this message') do
            puts opts
            exit 1
        end
    end

    @profiles = opts.parse!(args)
    @profiles = @default_profiles if @profiles.empty?
    cla_handler = "cmdline_arg_#{@execute}"
    cla_handler = nil unless @app.respond_to?(cla_handler)
    for pn in @profiles
        if cla_handler
            @app.send(cla_handler, self, pn)
        else
            profile pn
        end
    end

    self
end

#profile(profile_name) ⇒ Object

Configuration command: Load a profile



298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
# File 'lib/websitary/configuration.rb', line 298

def profile(profile_name)
    case profile_name
    when '-'
        readlines.map! {|l| l.chomp}.each {|url| source url}
    when '__END__'
        $logger.debug "Profile: __END__"
        contents = DATA.read
        return eval_profile(contents)
    else
        fn = profile_filename(profile_name)
        if fn
            $logger.debug "Profile: #{fn}"
            contents = File.read(fn)
            return eval_profile(contents, fn)
        else
            $logger.error "Unknown profile: #{profile_name}"
        end
    end
    return false
end

#profile_filename(profile_name, check_file_exists = true) ⇒ Object



1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
# File 'lib/websitary/configuration.rb', line 1032

def profile_filename(profile_name, check_file_exists=true)
    if File.extname(profile_name) != '.rb'
        profile_name = "#{profile_name}.rb"
    end
    filename = nil
    ['.', @cfgdir].each do |d|
        filename = File.join(d, profile_name)
        if File.exists?(filename)
            return filename
        end
    end
    return check_file_exists ? nil : filename
end

#push_hrefs(url, hpricot, &condition) ⇒ Object

Scan hpricot document for hrefs and push the onto @todo if not already included.



867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
# File 'lib/websitary/configuration.rb', line 867

def push_hrefs(url, hpricot, &condition)
    begin
        $logger.debug "push_refs: #{url}"
        return if robots?(hpricot, 'nofollow') or is_excluded?(url)
        depth = url_get(url, :depth)
        return if depth and depth <= 0
        uri0  = URI.parse(url)
        # pn0   = Pathname.new(guess_dir(File.expand_path(uri0.path)))
        pn0   = Pathname.new(guess_dir(uri0.path))
        (hpricot / 'a').each do |a|
            next if a['rel'] == 'nofollow'
            href = clean_url(a['href'])
            next if href.nil? or href == url or is_excluded?(href)
            uri  = URI.parse(href)
            pn   = guess_dir(uri.path)
            href = rewrite_href(href, url, uri0, pn0, true)
            curl = canonic_url(href)
            next if !href or href.nil? or @done.include?(curl) or @todo.include?(curl)
            # pn   = Pathname.new(guess_dir(File.expand_path(uri.path)))
            uri  = URI.parse(href)
            pn   = Pathname.new(guess_dir(uri.path))
            next unless condition.call(uri0, pn0, uri, pn)
            next unless robots_allowed?(curl, uri)
            opts = @urls[url].dup
            # opts[:title] = File.basename(curl)
            opts[:title] = [opts[:title], File.basename(curl)].join(' - ')
            opts[:depth] = depth - 1 if depth and depth >= 0
            # opts[:sleep] = delay if delay
            url_set(curl, opts)
            to_do curl
        end
    rescue Exception => e
        # $logger.error e  #DBG#
        $logger.error e.message
        $logger.debug e.backtrace
    end
end

#quicklist(profile_name) ⇒ Object



291
292
293
# File 'lib/websitary/configuration.rb', line 291

def quicklist(profile_name)
    @quicklist_profile = profile_name
end

#rewrite_href(href, url, uri = nil, urd = nil, local = false) ⇒ Object

Try to make href an absolute url.



941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
# File 'lib/websitary/configuration.rb', line 941

def rewrite_href(href, url, uri=nil, urd=nil, local=false)
    begin
        return nil if !href or is_excluded?(href)
        uri ||= URI.parse(url)
        if href =~ /^\s*\//
            return uri.merge(href).to_s
        end
        urh   = URI.parse(href)
        urd ||= guess_dir(uri.path)
        rv    = nil

        # $logger.debug "DBG", uri, urh, #DBG#
        if href =~ /\w+:/
            # $logger.debug "DBG href=#$0" #DBG#
            rv = href
        elsif urh.relative?
            # $logger.debug "DBG urh relative" #DBG#
            if uri.relative?
                # $logger.debug "DBG both relative" #DBG#
                if uri.instance_of?(URI::Generic)
                    rv = File.join(urd, href)
                    # $logger.debug "DBG rv=#{rv}" #DBG#
                end
            else
                rv = uri.merge(href).to_s
                # $logger.debug "DBG relativ rv=#{rv}" #DBG#
                if local
                    hf = latestname(rv)
                    if @todo.include?(rv) or @done.include?(rv) or File.exist?(hf)
                        rv = hf
                        # $logger.debug "DBG relativ, local rv=#{rv}" #DBG#
                    end
                end
            end
        elsif href[0..0] == '#'
            # $logger.debug "DBG anchor" #DBG#
            rv = url + href
        elsif uri.host == urh.host
            # $logger.debug "DBG merge" #DBG#
            rv = uri.merge(href).to_s
        else
            # $logger.debug "as is" #DBG#
            rv = href
        end

        case rv
        when String
            return rv
        when nil
        else
            $logger.error "Internal error: href=#{href}"
            $logger.debug caller.join("\n")
        end
        return
    rescue Exception => e
        # $logger.error e  #DBG#
        $logger.error e.message
        $logger.debug e.backtrace
    end
    return nil
end

#rewrite_urls(url, doc) ⇒ Object

Rewrite urls in doc

url

String

doc

Hpricot document



909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
# File 'lib/websitary/configuration.rb', line 909

def rewrite_urls(url, doc)
    uri = URI.parse(url)
    urd = guess_dir(uri.path)
    (doc / 'a').each do |a|
        href = clean_url(a['href'])
        if is_excluded?(href)
            comment_element(doc, a)
        else
            href = rewrite_href(href, url, uri, urd, true)
            a['href'] = href if href
        end
    end
    (doc / 'img').each do |a|
        href = clean_url(a['src'])
        if is_excluded?(href)
            comment_element(doc, a)
        else
            href = rewrite_href(href, url, uri, urd, false)
            a['src'] = href if href
        end
    end
    doc
end

#save_dir(url, dir, title = nil) ⇒ Object



804
805
806
807
808
809
810
811
812
813
814
# File 'lib/websitary/configuration.rb', line 804

def save_dir(url, dir, title=nil)
    case dir
    when true
        title ||= url_get(url, :title)
        dir = File.join(@cfgdir, 'attachments', encode(title))
    when Proc
        dir = dir.call(url)
    end
    @app.ensure_dir(dir) if dir
    return dir
end

#set(options) ⇒ Object

Configuration command: Set the default value for source-options.



397
398
399
400
# File 'lib/websitary/configuration.rb', line 397

def set(options)
    $logger.debug "set: #{options.inspect}"
    @default_options.merge!(options)
end

#shortcut(symbol, args) ⇒ Object

Define a options shortcut.



321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/websitary/configuration.rb', line 321

def shortcut(symbol, args)
    ak = args.keys
    ok = @options.keys
    dk = ok - ak

    # :downloadprocess
    if !ak.include?(:delegate) and
        dk.any? {|e| [:download, :downloadformat, :diff, :format, :diffprocess].include?(e)}
        $logger.warn "Shortcut #{symbol}: Undefined fields: #{dk.inspect}"
    end

    if ak.include?(:delegate)
        dk.each do |field|
            @options[field][symbol] = args[:delegate]
        end
    end

    args.each do |field, val|
        @options[field][symbol] = val unless field == :delegate
    end
end

#show_output(difftext) ⇒ Object

Generate & view the final output.

difftext

Hash



553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
# File 'lib/websitary/configuration.rb', line 553

def show_output(difftext)
    if difftext.empty?
        msg = ['No news is good news']
        msg << "try again in #{@app.format_tdiff(@app.tdiff_min)}" if @app.tdiff_min
        $logger.warn msg.join('; ')
        return 0
    end

    @output_format.each do |outformat|
        meth = "get_output_#{outformat}"

        unless respond_to?(meth)
            $logger.fatal "Unknown output format: #{outformat}"
            exit 5
        end

        out = send(meth, difftext)
        if out
            outfile = get_outfile(outformat)
            case outfile
            when '-'
                puts out
            else
                write_file(outfile) {|io| io.puts out}
                meth = "view_output_#{outformat}"
                self.send(meth, outfile)
            end
        end
    end
    return 1
end

#source(urls, opts = {}) ⇒ Object

Configuration command: Define a source.

urls

String



415
416
417
418
419
420
# File 'lib/websitary/configuration.rb', line 415

def source(urls, opts={})
    urls.split("\n").flatten.compact.each do |url|
        url_set(url, @default_options.dup.update(opts))
        to_do url
    end
end

#strip_tags(doc, args = {}) ⇒ Object



834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
# File 'lib/websitary/configuration.rb', line 834

def strip_tags(doc, args={})
    tags = args[:tags] || strip_tags_default
    case doc
    when String
        doc = Hpricot(doc)
    end
    tags.each do |tag|
        doc.search(tag).remove
    end
    case args[:format]
    when :hpricot
        doc
    else
        doc.send("to_#{args[:format] || :html}")
    end
end

#strip_tags_defaultObject



828
829
830
831
# File 'lib/websitary/configuration.rb', line 828

def strip_tags_default
    success, tags = opt_get(:strip_tags, :default)
    tags.dup if success
end

#to_do(url) ⇒ Object



344
345
346
# File 'lib/websitary/configuration.rb', line 344

def to_do(url)
    @todo << url unless is_excluded?(url)
end

#unset(*options) ⇒ Object

Configuration command: Unset a default source-option.



405
406
407
408
409
# File 'lib/websitary/configuration.rb', line 405

def unset(*options)
    for option in options
        @default_options.delete(option)
    end
end

#url_from_filename(filename) ⇒ Object



731
732
733
734
735
736
737
738
739
# File 'lib/websitary/configuration.rb', line 731

def url_from_filename(filename)
    rv = @urlencmap[filename]
    if rv
        $logger.debug "Map filename: #{filename} -> #{rv}"
    else
        $logger.warn "Unmapped filename: #{filename}"
    end
    rv
end

#url_get(url, opt, default = nil) ⇒ Object

Retrieve an option for an url

url

String

opt

Symbol



201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# File 'lib/websitary/configuration.rb', line 201

def url_get(url, opt, default=nil)
    opts = @urls[url]
    unless opts
        $logger.debug "Non-registered URL: #{url}"
        return default
    end
    $logger.debug "get: opts=#{opts.inspect}"
    case opt
    when :diffprocess, :format
        opt_ = opts.has_key?(opt) ? opt : :diff
    else
        opt_ = opt
    end

    $logger.debug "get: opt=#{opt} opt_=#{opt_}"
    $logger.debug "get: #{opts[opt_]} #{opts[:use]}" if opts
    if opts.has_key?(opt_)
        val = opts[opt_]
    elsif opts.has_key?(:use)
        val = opts[:use]
    else
        val = nil
    end

    case val
    when nil
    when Symbol
        $logger.debug "get: val=#{val}"
        success, rv = opt_get(opt, val)
        $logger.debug "get: #{success}, #{rv}"
        if success
            return rv
        end
    else
        $logger.debug "get: return val=#{val}"
        return val
    end
    unless default
        success, default1 = opt_get(opt, :default)
        default = default1 if success
    end

    $logger.debug "get: return default=#{default}"
    return default
end

#url_set(url, items) ⇒ Object



192
193
194
195
# File 'lib/websitary/configuration.rb', line 192

def url_set(url, items)
    opts = @urls[url] ||= {}
    opts.merge!(items)
end

#urlextname(url) ⇒ Object



787
788
789
790
791
792
# File 'lib/websitary/configuration.rb', line 787

def urlextname(url)
    begin
        return File.extname(URI.parse(url).path)
    rescue Exception => e
    end
end

#view(view) ⇒ Object

Configuration command: Set the viewer.



465
466
467
# File 'lib/websitary/configuration.rb', line 465

def view(view)
    @view = view
end

#view_output(outfile = nil) ⇒ Object



1014
1015
1016
# File 'lib/websitary/configuration.rb', line 1014

def view_output(outfile=nil)
    send("view_output_#{@output_format[0]}", outfile || get_outfile)
end

#write_file(filename, mode = 'w', &block) ⇒ Object



1047
1048
1049
1050
# File 'lib/websitary/configuration.rb', line 1047

def write_file(filename, mode='w', &block)
    File.open(filename, mode) {|io| block.call(io)}
    @mtimes.set(filename)
end