Class: Websitary::Configuration
- Inherits:
-
Object
- Object
- Websitary::Configuration
- Defined in:
- lib/websitary/configuration.rb
Overview
This class defines the scope in which profiles are evaluated. Most of its methods are suitable for use in profiles.
Instance Attribute Summary collapse
-
#cfgdir ⇒ Object
The user configuration directory.
-
#done ⇒ Object
Array of downloaded urls.
-
#execute ⇒ Object
What to do.
-
#mtimes ⇒ Object
Cached mtimes.
-
#options ⇒ Object
Global Options.
-
#quicklist_profile ⇒ Object
The name of the quicklist profile.
-
#todo ⇒ Object
readonly
Array of urls to be downloaded.
-
#urls ⇒ Object
Hash (key = URL, value = Hash of options).
Instance Method Summary collapse
-
#call_cmd(cmd, cmdargs, args = {}) ⇒ Object
Apply the argument to cmd (a format String or a Proc).
- #canonic_filename(filename) ⇒ Object
-
#canonic_url(url) ⇒ Object
Strip the url’s last part (after #).
- #clean_url(url) ⇒ Object
- #comment_element(doc, elt) ⇒ Object
-
#default(*profile_names) ⇒ Object
Configuration command: Set the default profiles.
-
#diff(diff) ⇒ Object
Configuration command: Set the default diff program.
-
#diffname(url, ensure_dir = false) ⇒ Object
Get the diff filename.
-
#diffprocess(&block) ⇒ Object
Configuration command: Set the default diff processor.
-
#download(download) ⇒ Object
Configuration command: Set the default dowloader.
-
#downloadprocess(&block) ⇒ Object
Configuration command: Set the default download processor.
-
#edit(cmd) ⇒ Object
Configuration command: Set the editor.
- #edit_profile(profile = nil) ⇒ Object
-
#eligible_path?(url, path0, path) ⇒ Boolean
Check whether path is eligible on the basis of url or path0.
- #encoded_basename(url, type = 'tree') ⇒ Object
- #encoded_basename_flat(url) ⇒ Object
- #encoded_basename_md5(url) ⇒ Object
- #encoded_basename_tree(url) ⇒ Object
- #encoded_filename(dir, url, ensure_dir = false, type = nil) ⇒ Object
-
#eval_arg(format, args, default = nil, &process_string) ⇒ Object
Apply some arguments to a format.
-
#exclude(*urls) ⇒ Object
Configuration command: Add URL-exclusion patterns (REGEXPs or STRINGs).
-
#format(url, difftext) ⇒ Object
Format a diff according to URL’s source options.
- #format_text(url, text) ⇒ Object
- #get_output_html(difftext) ⇒ Object
- #get_output_rss(difftext) ⇒ Object
- #get_output_text(difftext) ⇒ Object
-
#global(options) ⇒ Object
Set a global option.
-
#guess_dir(path) ⇒ Object
Guess path’s dirname.
-
#highlighter(rx, color = nil, group = nil, tag = 'span') ⇒ Object
Return a Proc that takes an text as argument and highlight occurences of rx.
-
#initialize(app, args = []) ⇒ Configuration
constructor
A new instance of Configuration.
- #is_excluded?(url) ⇒ Boolean
-
#latestname(url, ensure_dir = false, type = nil) ⇒ Object
Get the filename for the freshly downloaded copy.
-
#oldname(url, ensure_dir = false, type = nil) ⇒ Object
Get the backup filename.
- #opt_get(opt, val) ⇒ Object
-
#option(type, options) ⇒ Object
Configuration command: Set global options.
- #optval_get(opt, val, default = nil) ⇒ Object
-
#output_file(filename, outformat = nil) ⇒ Object
Set the output file.
-
#output_format(*format) ⇒ Object
Set the output format.
- #parse_command_line_args(args) ⇒ Object
-
#profile(profile_name) ⇒ Object
Configuration command: Load a profile.
- #profile_filename(profile_name, check_file_exists = true) ⇒ Object
-
#push_hrefs(url, hpricot, &condition) ⇒ Object
Scan hpricot document for hrefs and push the onto @todo if not already included.
- #quicklist(profile_name) ⇒ Object
-
#rewrite_href(href, url, uri = nil, urd = nil, local = false) ⇒ Object
Try to make href an absolute url.
-
#rewrite_urls(url, doc) ⇒ Object
- Rewrite urls in doc url
- String doc
-
Hpricot document.
- #save_dir(url, dir, title = nil) ⇒ Object
-
#set(options) ⇒ Object
Configuration command: Set the default value for source-options.
-
#shortcut(symbol, args) ⇒ Object
Define a options shortcut.
-
#show_output(difftext) ⇒ Object
Generate & view the final output.
-
#source(urls, opts = {}) ⇒ Object
Configuration command: Define a source.
- #strip_tags(doc, args = {}) ⇒ Object
- #strip_tags_default ⇒ Object
- #to_do(url) ⇒ Object
-
#unset(*options) ⇒ Object
Configuration command: Unset a default source-option.
- #url_from_filename(filename) ⇒ Object
-
#url_get(url, opt, default = nil) ⇒ Object
- Retrieve an option for an url url
- String opt
-
Symbol.
- #url_set(url, items) ⇒ Object
- #urlextname(url) ⇒ Object
-
#view(view) ⇒ Object
Configuration command: Set the viewer.
- #view_output(outfile = nil) ⇒ Object
- #write_file(filename, mode = 'w', &block) ⇒ Object
Constructor Details
#initialize(app, args = []) ⇒ Configuration
Returns a new instance of Configuration.
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/websitary/configuration.rb', line 32 def initialize(app, args=[]) @logger = Websitary::AppLog.new $logger.debug "Configuration#initialize" @app = app @cfgdir = ENV['HOME'] ? File.join(ENV['HOME'], '.websitary') : '.' [ ENV['USERPROFILE'] && File.join(ENV['USERPROFILE'], 'websitary'), File.join(Config::CONFIG['sysconfdir'], 'websitary') ].each do |dir| if File.exists?(dir) @cfgdir = dir break end end @cmd_edit = 'vi "%s"' @execute = 'downdiff' @quicklist_profile = 'quicklist' @view = 'w3m "%s"' @allow = {} @default_options = {} @default_profiles = [@quicklist_profile] @done = [] @mtimes = Websitary::FileMTimes.new(self) @options = {} @outfile = {} @profiles = [] @robots = {} @todo = [] @exclude = [/^\s*(javascript|mailto):/] @urlencmap = {} @urls = {} @suffix = { 'text' => 'txt' # 'rss' => 'xml' } migrate profile 'config.rb' parse_command_line_args(args) @output_format ||= ['html'] @output_title = %{#{Websitary::APPNAME}: #{@profiles.join(", ")}} end |
Instance Attribute Details
#cfgdir ⇒ Object
The user configuration directory
19 20 21 |
# File 'lib/websitary/configuration.rb', line 19 def cfgdir @cfgdir end |
#done ⇒ Object
Array of downloaded urls.
17 18 19 |
# File 'lib/websitary/configuration.rb', line 17 def done @done end |
#execute ⇒ Object
What to do
21 22 23 |
# File 'lib/websitary/configuration.rb', line 21 def execute @execute end |
#mtimes ⇒ Object
Cached mtimes
25 26 27 |
# File 'lib/websitary/configuration.rb', line 25 def mtimes @mtimes end |
#options ⇒ Object
Global Options
23 24 25 |
# File 'lib/websitary/configuration.rb', line 23 def @options end |
#quicklist_profile ⇒ Object
The name of the quicklist profile
27 28 29 |
# File 'lib/websitary/configuration.rb', line 27 def quicklist_profile @quicklist_profile end |
#todo ⇒ Object (readonly)
Array of urls to be downloaded.
15 16 17 |
# File 'lib/websitary/configuration.rb', line 15 def todo @todo end |
#urls ⇒ Object
Hash (key = URL, value = Hash of options)
13 14 15 |
# File 'lib/websitary/configuration.rb', line 13 def urls @urls end |
Instance Method Details
#call_cmd(cmd, cmdargs, args = {}) ⇒ Object
Apply the argument to cmd (a format String or a Proc). If a String, execute the command.
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 |
# File 'lib/websitary/configuration.rb', line 532 def call_cmd(cmd, cmdargs, args={}) default = args[:default] url = args[:url] timeout = url ? url_get(url, :timeout) : nil if timeout begin Timeout::timeout(timeout) do |timeout_length| eval_arg(cmd, cmdargs, default) {|cmd| `#{cmd}`} end rescue Timeout::Error $logger.error "Timeout #{timeout}: #{url}" return default end else eval_arg(cmd, cmdargs, default) {|cmd| `#{cmd}`} end end |
#canonic_filename(filename) ⇒ Object
1053 1054 1055 |
# File 'lib/websitary/configuration.rb', line 1053 def canonic_filename(filename) call_cmd(optval_get(:global, :canonic_filename), [filename], :default => filename) end |
#canonic_url(url) ⇒ Object
Strip the url’s last part (after #).
823 824 825 |
# File 'lib/websitary/configuration.rb', line 823 def canonic_url(url) url.sub(/#.*$/, '') end |
#clean_url(url) ⇒ Object
817 818 819 |
# File 'lib/websitary/configuration.rb', line 817 def clean_url(url) url && url.strip end |
#comment_element(doc, elt) ⇒ Object
934 935 936 937 |
# File 'lib/websitary/configuration.rb', line 934 def comment_element(doc, elt) doc.insert_before(elt, '<!-- WEBSITARY: ') doc.insert_after(elt, '-->') end |
#default(*profile_names) ⇒ Object
Configuration command: Set the default profiles
286 287 288 |
# File 'lib/websitary/configuration.rb', line 286 def default(*profile_names) @default_profiles = profile_names end |
#diff(diff) ⇒ Object
Configuration command: Set the default diff program.
472 473 474 |
# File 'lib/websitary/configuration.rb', line 472 def diff(diff) @options[:diff][:default] = diff end |
#diffname(url, ensure_dir = false) ⇒ Object
Get the diff filename.
714 715 716 |
# File 'lib/websitary/configuration.rb', line 714 def diffname(url, ensure_dir=false) encoded_filename('diff', url, ensure_dir, 'md5') end |
#diffprocess(&block) ⇒ Object
Configuration command: Set the default diff processor. The block takes the diff text (STRING) as argument.
434 435 436 |
# File 'lib/websitary/configuration.rb', line 434 def diffprocess(&block) @options[:diff][:default] = block end |
#download(download) ⇒ Object
Configuration command: Set the default dowloader.
479 480 481 |
# File 'lib/websitary/configuration.rb', line 479 def download(download) @options[:download][:default] = download end |
#downloadprocess(&block) ⇒ Object
Configuration command: Set the default download processor. The block takes the downloaded text (STRING) as argument.
426 427 428 |
# File 'lib/websitary/configuration.rb', line 426 def downloadprocess(&block) @options[:downloadprocess][:default] = block end |
#edit(cmd) ⇒ Object
Configuration command: Set the editor.
441 442 443 |
# File 'lib/websitary/configuration.rb', line 441 def edit(cmd) @cmd_edit = cmd end |
#edit_profile(profile = nil) ⇒ Object
1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 |
# File 'lib/websitary/configuration.rb', line 1019 def edit_profile(profile=nil) profile ||= @profiles case profile when Array profile.each {|p| edit_profile p} else fn = profile_filename(profile) $logger.debug "edit: #{fn}" `#{@cmd_edit % fn}` end end |
#eligible_path?(url, path0, path) ⇒ Boolean
Check whether path is eligible on the basis of url or path0. This checks either for a :match option for url or the extensions of path0 and path.
855 856 857 858 859 860 861 862 |
# File 'lib/websitary/configuration.rb', line 855 def eligible_path?(url, path0, path) rx = url_get(url, :match) if rx return path =~ rx else return File.extname(path0) == File.extname(path) end end |
#encoded_basename(url, type = 'tree') ⇒ Object
761 762 763 764 765 766 767 768 769 |
# File 'lib/websitary/configuration.rb', line 761 def encoded_basename(url, type='tree') m = "encoded_basename_#{type}" if respond_to?(m) return send(m, url) else $logger.fatal "Unknown cache type: #{type}" exit 5 end end |
#encoded_basename_flat(url) ⇒ Object
777 778 779 |
# File 'lib/websitary/configuration.rb', line 777 def encoded_basename_flat(url) encode(url) end |
#encoded_basename_md5(url) ⇒ Object
782 783 784 |
# File 'lib/websitary/configuration.rb', line 782 def encoded_basename_md5(url) Digest::MD5.hexdigest(url) end |
#encoded_basename_tree(url) ⇒ Object
772 773 774 |
# File 'lib/websitary/configuration.rb', line 772 def encoded_basename_tree(url) ensure_filename(encode(url, '/')) end |
#encoded_filename(dir, url, ensure_dir = false, type = nil) ⇒ Object
742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 |
# File 'lib/websitary/configuration.rb', line 742 def encoded_filename(dir, url, ensure_dir=false, type=nil) type ||= url_get(url, :cachetype, 'tree') $logger.debug "encoded_filename: type=#{type} url=#{url}" rv = File.join(@cfgdir, dir, encoded_basename(url, type)) rd = File.dirname(rv) $logger.debug "encoded_filename: rv0=#{rv}" fm = optval_get(:global, :filename_size, 255) rdok = !ensure_dir || @app.ensure_dir(rd, false) if !rdok or rv.size > fm or File.directory?(rv) # $logger.debug "Filename too long (:global=>:filename_size = #{fm}), try md5 encoded filename instead: #{url}" $logger.info "Can't use filename, try 'md5' instead: #{url}" rv = File.join(@cfgdir, dir, encoded_basename(url, :md5)) rd = File.dirname(rv) end @urlencmap[rv] = url return rv end |
#eval_arg(format, args, default = nil, &process_string) ⇒ Object
Apply some arguments to a format.
- format
-
String or Proc
- args
-
Array of Arguments
510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 |
# File 'lib/websitary/configuration.rb', line 510 def eval_arg(format, args, default=nil, &process_string) case format when nil return default when Proc # $logger.debug "eval proc: #{format} #{args.inspect}" #DBG# $logger.debug "eval proc: #{format}/#{args.size}" return format.call(*args) else ca = format % args # $logger.debug "eval string: #{ca}" #DBG# if process_string return process_string.call(ca) else return ca end end end |
#exclude(*urls) ⇒ Object
Configuration command: Add URL-exclusion patterns (REGEXPs or STRINGs).
448 449 450 451 452 453 454 455 456 457 458 459 460 |
# File 'lib/websitary/configuration.rb', line 448 def exclude(*urls) @exclude += urls.map do |url| case url when Regexp url when String Regexp.new(Regexp.escape(url)) else $logger.fatal "Must be regexp or string: #{url.inspect}" exit 5 end end end |
#format(url, difftext) ⇒ Object
Format a diff according to URL’s source options.
500 501 502 503 504 |
# File 'lib/websitary/configuration.rb', line 500 def format(url, difftext) fmt = url_get(url, :format) text = format_text(url, difftext) eval_arg(fmt, [text], text) end |
#format_text(url, text) ⇒ Object
484 485 486 487 488 489 490 491 492 493 494 495 496 |
# File 'lib/websitary/configuration.rb', line 484 def format_text(url, text) enc = url_get(url, :iconv) if enc denc = optval_get(:global, :encoding) begin require 'iconv' text = Iconv.conv(denc, enc, text) rescue Exception => e $logger.error "IConv failed #{enc} => #{denc}: #{e}" end end return text end |
#get_output_html(difftext) ⇒ Object
653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 |
# File 'lib/websitary/configuration.rb', line 653 def get_output_html(difftext) difftext = difftext.map do |url, text| = url_get(url, :strip_tags) text = (text, :tags => ) if text.empty? ? nil : [url, text] end difftext.compact! sort_difftext!(difftext) toc = difftext.map do |url, text| ti = url_get(url, :title, File.basename(url)) tid = html_toc_id(url) bid = html_body_id(url) %{<li id="#{tid}" class="toc"><a class="toc" href="\##{bid}">#{ti}</a></li>} end.join("\n") idx = 0 cnt = difftext.map do |url, text| idx += 1 ti = url_get(url, :title, File.basename(url)) bid = html_body_id(url) if (rewrite = url_get(url, :rewrite_link)) urlr = eval_arg(rewrite, [url]) ext = '' else old = %{<a class="old" href="#{file_url(oldname(url))}">old</a>} lst = %{<a class="latest" href="#{file_url(latestname(url))}">latest</a>} ext = %{ (#{old}, #{lst})} urlr = url end note = difftext_annotation(url) onclick = optval_get(:global, :toggle_body) ? 'onclick="ToggleBody(this)"' : '' <<HTML <div id="#{bid}" class="webpage" #{onclick}> <div class="count"> #{idx} </div> <h1 class="diff"> <a class="external" href="#{urlr}">#{format_text(url, ti)}</a>#{ext} </h1> <div id="#{bid}_body"> <div class="annotation"> #{note && CGI::escapeHTML(note)} </div> <div class="diff,difftext"> #{format(url, text)} </div> </div> </div> HTML end.join(('<hr class="separator"/>') + "\n") success, template = opt_get(:page, :format) unless success success, template = opt_get(:page, :simple) end return eval_arg(template, [@output_title, toc, cnt]) end |
#get_output_rss(difftext) ⇒ Object
601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 |
# File 'lib/websitary/configuration.rb', line 601 def get_output_rss(difftext) success, rss_url = opt_get(:rss, :url) if success success, rss_version = opt_get(:rss, :version) # require "rss/#{rss_version}" rss = RSS::Rss.new(rss_version) chan = RSS::Rss::Channel.new chan.title = @output_title [:description, :copyright, :category, :language, :image, :webMaster, :pubDate].each do |field| ok, val = opt_get(:rss, field) item.send(format_symbol(field, '%s='), val) if ok end chan.link = rss_url rss.channel = chan cnt = difftext.map do |url, text| rss_format = url_get(url, :rss_format, 'plain_text') text = (text, :format => rss_format) next if text.empty? item = RSS::Rss::Channel::Item.new item.date = Time.now item.title = url_get(url, :title, File.basename(url)) item.link = eval_arg(url_get(url, :rewrite_link, '%s'), [url]) [:author, :date, :enclosure, :category, :pubDate].each do |field| val = url_get(url, format_symbol(field, 'rss_%s')) item.send(format_symbol(field, '%s='), val) if val end annotation = difftext_annotation(url) annotation = "<pre>#{annotation}</pre>" if annotation case rss_format when 'plain_text' item.description = %{#{annotation}<pre>#{text}</pre>} else item.description = %{#{annotation}\n#{text}} end chan.items << item end return rss.to_s else $logger.fatal "Global option :rss[:url] not defined." exit 5 end end |
#get_output_text(difftext) ⇒ Object
586 587 588 589 590 591 592 593 594 595 596 597 598 |
# File 'lib/websitary/configuration.rb', line 586 def get_output_text(difftext) difftext.map do |url, difftext| if difftext difftext = html_to_text(difftext) if is_html?(difftext) !difftext.empty? && [ eval_arg(url_get(url, :rewrite_link, '%s'), [url]), difftext_annotation(url), nil, difftext ].join("\n") end end.compact.join("\n\n#{('-' * 68)}\n\n") end |
#global(options) ⇒ Object
Set a global option.
388 389 390 391 392 |
# File 'lib/websitary/configuration.rb', line 388 def global() .each do |type, value| @options[:global][type] = value end end |
#guess_dir(path) ⇒ Object
Guess path’s dirname.
foo/bar -> foo
foo/bar.txt -> foo
foo/bar/ -> foo/bar
799 800 801 |
# File 'lib/websitary/configuration.rb', line 799 def guess_dir(path) path[-1..-1] == '/' ? path[0..-2] : File.dirname(path) end |
#highlighter(rx, color = nil, group = nil, tag = 'span') ⇒ Object
Return a Proc that takes an text as argument and highlight occurences of rx.
- rx
-
Regular expression
- color
-
A string, sets the class to highlight-color (default: “yellow”)
- group
-
A number (default: 0)
- tag
-
The HTML tag to use (default: “span”)
1009 1010 1011 |
# File 'lib/websitary/configuration.rb', line 1009 def highlighter(rx, color=nil, group=nil, tag='span') lambda {|text| text.gsub(rx, %{<#{tag} class="highlight-#{color || 'red'}">\\#{group || 0}</#{tag}>})} end |
#is_excluded?(url) ⇒ Boolean
349 350 351 352 353 |
# File 'lib/websitary/configuration.rb', line 349 def is_excluded?(url) rv = @exclude.any? {|p| url =~ p} $logger.debug "is_excluded: #{url}: #{rv}" rv end |
#latestname(url, ensure_dir = false, type = nil) ⇒ Object
Get the filename for the freshly downloaded copy.
726 727 728 |
# File 'lib/websitary/configuration.rb', line 726 def latestname(url, ensure_dir=false, type=nil) encoded_filename('latest', url, ensure_dir, type) end |
#oldname(url, ensure_dir = false, type = nil) ⇒ Object
Get the backup filename.
720 721 722 |
# File 'lib/websitary/configuration.rb', line 720 def oldname(url, ensure_dir=false, type=nil) encoded_filename('old', url, ensure_dir, type) end |
#opt_get(opt, val) ⇒ Object
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 |
# File 'lib/websitary/configuration.rb', line 263 def opt_get(opt, val) vals = @options[opt] $logger.debug "val=#{val} vals=#{vals.inspect}" if vals and vals.has_key?(val) rv = vals[val] $logger.debug "opt_get ok: #{opt} => #{rv.inspect}" case rv when Symbol $logger.debug "opt_get re: #{rv}" return opt_get(opt, rv) else $logger.debug "opt_get true, #{rv}" return [true, rv] end else $logger.debug "opt_get no: #{opt} => #{val.inspect}" return [false, val] end end |
#option(type, options) ⇒ Object
Configuration command: Set global options.
- type
-
Symbol
- options
-
Hash
376 377 378 379 380 381 382 383 384 |
# File 'lib/websitary/configuration.rb', line 376 def option(type, ) $logger.info "option #{type}: #{.inspect}" o = @options[type] if o o.merge!() else $logger.error "Unknown option type: #{type} (#{.inspect})" end end |
#optval_get(opt, val, default = nil) ⇒ Object
248 249 250 251 252 253 254 255 256 257 258 259 260 |
# File 'lib/websitary/configuration.rb', line 248 def optval_get(opt, val, default=nil) case val when Symbol ok, val = opt_get(opt, val) if ok val else default end else val end end |
#output_file(filename, outformat = nil) ⇒ Object
Set the output file.
367 368 369 |
# File 'lib/websitary/configuration.rb', line 367 def output_file(filename, outformat=nil) @outfile[outformat] = filename end |
#output_format(*format) ⇒ Object
Set the output format.
357 358 359 360 361 362 363 |
# File 'lib/websitary/configuration.rb', line 357 def output_format(*format) unless format.all? {|e| ['text', 'html', 'rss'].include?(e)} $logger.fatal "Unknown output format: #{format}" exit 5 end @output_format = format end |
#parse_command_line_args(args) ⇒ Object
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
# File 'lib/websitary/configuration.rb', line 81 def parse_command_line_args(args) $logger.debug "parse_command_line_args: #{args}" opts = OptionParser.new do |opts| opts. = "Usage: #{Websitary::APPNAME} [OPTIONS] [PROFILES] > [OUT]" opts.separator '' opts.separator "#{Websitary::APPNAME} is a free software with ABSOLUTELY NO WARRANTY under" opts.separator 'the terms of the GNU General Public License version 2 or newer.' opts.separator '' opts.separator 'General Options:' opts.on('-c', '--cfg=DIR', String, 'Configuration directory') do |value| @cfgdir = value end opts.on('-e', '--execute=COMMAND', String, 'Define what to do (default: downdiff)') do |value| @execute = value end # opts.on('-E', '--edit=PROFILE', String, 'Edit a profile') do |value| # edit_profile value # exit 0 # end opts.on('-f', '--output-format=FORMAT', 'Output format (html, text, rss)') do |value| output_format(*value.split(/,/)) end opts.on('--[no-]ignore-age', 'Ignore age limits') do |bool| set :ignore_age => bool end opts.on('--log=DESTINATION', String, 'Log destination') do |value| @logger = Websitary::AppLog.new(value != '-' && value) end opts.on('-o', '--output=FILENAME', String, 'Output') do |value| output_file(value) end opts.on('-s', '--set=NAME=VAR', String, 'Set a default option') do |value| key, val = value.split(/=/, 2) set key.intern => eval(val) end opts.on('-t', '--timer=N', Numeric, 'Repeat every N seconds (never exit)') do |value| global(:timer => value) end opts.on('-x', '--exclude=N', Regexp, 'Exclude URLs matching this pattern') do |value| exclude(Regexp.new(value)) end opts.separator '' opts.separator "Available commands (default: #@execute):" commands = @app.methods.map do |m| mt = m.match(/^execute_(.*)$/) mt && mt[1] end commands.compact! commands.sort! opts.separator commands.join(', ') opts.separator '' opts.separator 'Available profiles:' opts.separator Dir[File.join(@cfgdir, '*.rb')].map {|f| File.basename(f, '.*')}.join(', ') opts.separator '' opts.separator 'Other Options:' opts.on('--debug', 'Show debug messages') do |v| $VERBOSE = $DEBUG = true @logger.set_level(:debug) end opts.on('-q', '--quiet', 'Be mostly quiet') do |v| @logger.set_level(:quiet) end opts.on('-v', '--verbose', 'Run verbosely') do |v| $VERBOSE = true @logger.set_level(:verbose) end opts.on('--version', 'Run verbosely') do |v| puts Websitary::VERSION exit 1 end opts.on_tail('-h', '--help', 'Show this message') do puts opts exit 1 end end @profiles = opts.parse!(args) @profiles = @default_profiles if @profiles.empty? cla_handler = "cmdline_arg_#{@execute}" cla_handler = nil unless @app.respond_to?(cla_handler) for pn in @profiles if cla_handler @app.send(cla_handler, self, pn) else profile pn end end self end |
#profile(profile_name) ⇒ Object
Configuration command: Load a profile
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 |
# File 'lib/websitary/configuration.rb', line 298 def profile(profile_name) case profile_name when '-' readlines.map! {|l| l.chomp}.each {|url| source url} when '__END__' $logger.debug "Profile: __END__" contents = DATA.read return eval_profile(contents) else fn = profile_filename(profile_name) if fn $logger.debug "Profile: #{fn}" contents = File.read(fn) return eval_profile(contents, fn) else $logger.error "Unknown profile: #{profile_name}" end end return false end |
#profile_filename(profile_name, check_file_exists = true) ⇒ Object
1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 |
# File 'lib/websitary/configuration.rb', line 1032 def profile_filename(profile_name, check_file_exists=true) if File.extname(profile_name) != '.rb' profile_name = "#{profile_name}.rb" end filename = nil ['.', @cfgdir].each do |d| filename = File.join(d, profile_name) if File.exists?(filename) return filename end end return check_file_exists ? nil : filename end |
#push_hrefs(url, hpricot, &condition) ⇒ Object
Scan hpricot document for hrefs and push the onto @todo if not already included.
867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 |
# File 'lib/websitary/configuration.rb', line 867 def push_hrefs(url, hpricot, &condition) begin $logger.debug "push_refs: #{url}" return if robots?(hpricot, 'nofollow') or is_excluded?(url) depth = url_get(url, :depth) return if depth and depth <= 0 uri0 = URI.parse(url) # pn0 = Pathname.new(guess_dir(File.expand_path(uri0.path))) pn0 = Pathname.new(guess_dir(uri0.path)) (hpricot / 'a').each do |a| next if a['rel'] == 'nofollow' href = clean_url(a['href']) next if href.nil? or href == url or is_excluded?(href) uri = URI.parse(href) pn = guess_dir(uri.path) href = rewrite_href(href, url, uri0, pn0, true) curl = canonic_url(href) next if !href or href.nil? or @done.include?(curl) or @todo.include?(curl) # pn = Pathname.new(guess_dir(File.expand_path(uri.path))) uri = URI.parse(href) pn = Pathname.new(guess_dir(uri.path)) next unless condition.call(uri0, pn0, uri, pn) next unless robots_allowed?(curl, uri) opts = @urls[url].dup # opts[:title] = File.basename(curl) opts[:title] = [opts[:title], File.basename(curl)].join(' - ') opts[:depth] = depth - 1 if depth and depth >= 0 # opts[:sleep] = delay if delay url_set(curl, opts) to_do curl end rescue Exception => e # $logger.error e #DBG# $logger.error e. $logger.debug e.backtrace end end |
#quicklist(profile_name) ⇒ Object
291 292 293 |
# File 'lib/websitary/configuration.rb', line 291 def quicklist(profile_name) @quicklist_profile = profile_name end |
#rewrite_href(href, url, uri = nil, urd = nil, local = false) ⇒ Object
Try to make href an absolute url.
941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 |
# File 'lib/websitary/configuration.rb', line 941 def rewrite_href(href, url, uri=nil, urd=nil, local=false) begin return nil if !href or is_excluded?(href) uri ||= URI.parse(url) if href =~ /^\s*\// return uri.merge(href).to_s end urh = URI.parse(href) urd ||= guess_dir(uri.path) rv = nil # $logger.debug "DBG", uri, urh, #DBG# if href =~ /\w+:/ # $logger.debug "DBG href=#$0" #DBG# rv = href elsif urh.relative? # $logger.debug "DBG urh relative" #DBG# if uri.relative? # $logger.debug "DBG both relative" #DBG# if uri.instance_of?(URI::Generic) rv = File.join(urd, href) # $logger.debug "DBG rv=#{rv}" #DBG# end else rv = uri.merge(href).to_s # $logger.debug "DBG relativ rv=#{rv}" #DBG# if local hf = latestname(rv) if @todo.include?(rv) or @done.include?(rv) or File.exist?(hf) rv = hf # $logger.debug "DBG relativ, local rv=#{rv}" #DBG# end end end elsif href[0..0] == '#' # $logger.debug "DBG anchor" #DBG# rv = url + href elsif uri.host == urh.host # $logger.debug "DBG merge" #DBG# rv = uri.merge(href).to_s else # $logger.debug "as is" #DBG# rv = href end case rv when String return rv when nil else $logger.error "Internal error: href=#{href}" $logger.debug caller.join("\n") end return rescue Exception => e # $logger.error e #DBG# $logger.error e. $logger.debug e.backtrace end return nil end |
#rewrite_urls(url, doc) ⇒ Object
Rewrite urls in doc
- url
-
String
- doc
-
Hpricot document
909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 |
# File 'lib/websitary/configuration.rb', line 909 def rewrite_urls(url, doc) uri = URI.parse(url) urd = guess_dir(uri.path) (doc / 'a').each do |a| href = clean_url(a['href']) if is_excluded?(href) comment_element(doc, a) else href = rewrite_href(href, url, uri, urd, true) a['href'] = href if href end end (doc / 'img').each do |a| href = clean_url(a['src']) if is_excluded?(href) comment_element(doc, a) else href = rewrite_href(href, url, uri, urd, false) a['src'] = href if href end end doc end |
#save_dir(url, dir, title = nil) ⇒ Object
804 805 806 807 808 809 810 811 812 813 814 |
# File 'lib/websitary/configuration.rb', line 804 def save_dir(url, dir, title=nil) case dir when true title ||= url_get(url, :title) dir = File.join(@cfgdir, 'attachments', encode(title)) when Proc dir = dir.call(url) end @app.ensure_dir(dir) if dir return dir end |
#set(options) ⇒ Object
Configuration command: Set the default value for source-options.
397 398 399 400 |
# File 'lib/websitary/configuration.rb', line 397 def set() $logger.debug "set: #{.inspect}" @default_options.merge!() end |
#shortcut(symbol, args) ⇒ Object
Define a options shortcut.
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 |
# File 'lib/websitary/configuration.rb', line 321 def shortcut(symbol, args) ak = args.keys ok = @options.keys dk = ok - ak # :downloadprocess if !ak.include?(:delegate) and dk.any? {|e| [:download, :downloadformat, :diff, :format, :diffprocess].include?(e)} $logger.warn "Shortcut #{symbol}: Undefined fields: #{dk.inspect}" end if ak.include?(:delegate) dk.each do |field| @options[field][symbol] = args[:delegate] end end args.each do |field, val| @options[field][symbol] = val unless field == :delegate end end |
#show_output(difftext) ⇒ Object
Generate & view the final output.
- difftext
-
Hash
553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 |
# File 'lib/websitary/configuration.rb', line 553 def show_output(difftext) if difftext.empty? msg = ['No news is good news'] msg << "try again in #{@app.format_tdiff(@app.tdiff_min)}" if @app.tdiff_min $logger.warn msg.join('; ') return 0 end @output_format.each do |outformat| meth = "get_output_#{outformat}" unless respond_to?(meth) $logger.fatal "Unknown output format: #{outformat}" exit 5 end out = send(meth, difftext) if out outfile = get_outfile(outformat) case outfile when '-' puts out else write_file(outfile) {|io| io.puts out} meth = "view_output_#{outformat}" self.send(meth, outfile) end end end return 1 end |
#source(urls, opts = {}) ⇒ Object
Configuration command: Define a source.
- urls
-
String
415 416 417 418 419 420 |
# File 'lib/websitary/configuration.rb', line 415 def source(urls, opts={}) urls.split("\n").flatten.compact.each do |url| url_set(url, @default_options.dup.update(opts)) to_do url end end |
#strip_tags(doc, args = {}) ⇒ Object
834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 |
# File 'lib/websitary/configuration.rb', line 834 def (doc, args={}) = args[:tags] || case doc when String doc = Hpricot(doc) end .each do |tag| doc.search(tag).remove end case args[:format] when :hpricot doc else doc.send("to_#{args[:format] || :html}") end end |
#strip_tags_default ⇒ Object
828 829 830 831 |
# File 'lib/websitary/configuration.rb', line 828 def success, = opt_get(:strip_tags, :default) .dup if success end |
#to_do(url) ⇒ Object
344 345 346 |
# File 'lib/websitary/configuration.rb', line 344 def to_do(url) @todo << url unless is_excluded?(url) end |
#unset(*options) ⇒ Object
Configuration command: Unset a default source-option.
405 406 407 408 409 |
# File 'lib/websitary/configuration.rb', line 405 def unset(*) for option in @default_options.delete(option) end end |
#url_from_filename(filename) ⇒ Object
731 732 733 734 735 736 737 738 739 |
# File 'lib/websitary/configuration.rb', line 731 def url_from_filename(filename) rv = @urlencmap[filename] if rv $logger.debug "Map filename: #{filename} -> #{rv}" else $logger.warn "Unmapped filename: #{filename}" end rv end |
#url_get(url, opt, default = nil) ⇒ Object
Retrieve an option for an url
- url
-
String
- opt
-
Symbol
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 |
# File 'lib/websitary/configuration.rb', line 201 def url_get(url, opt, default=nil) opts = @urls[url] unless opts $logger.debug "Non-registered URL: #{url}" return default end $logger.debug "get: opts=#{opts.inspect}" case opt when :diffprocess, :format opt_ = opts.has_key?(opt) ? opt : :diff else opt_ = opt end $logger.debug "get: opt=#{opt} opt_=#{opt_}" $logger.debug "get: #{opts[opt_]} #{opts[:use]}" if opts if opts.has_key?(opt_) val = opts[opt_] elsif opts.has_key?(:use) val = opts[:use] else val = nil end case val when nil when Symbol $logger.debug "get: val=#{val}" success, rv = opt_get(opt, val) $logger.debug "get: #{success}, #{rv}" if success return rv end else $logger.debug "get: return val=#{val}" return val end unless default success, default1 = opt_get(opt, :default) default = default1 if success end $logger.debug "get: return default=#{default}" return default end |
#url_set(url, items) ⇒ Object
192 193 194 195 |
# File 'lib/websitary/configuration.rb', line 192 def url_set(url, items) opts = @urls[url] ||= {} opts.merge!(items) end |
#urlextname(url) ⇒ Object
787 788 789 790 791 792 |
# File 'lib/websitary/configuration.rb', line 787 def urlextname(url) begin return File.extname(URI.parse(url).path) rescue Exception => e end end |
#view(view) ⇒ Object
Configuration command: Set the viewer.
465 466 467 |
# File 'lib/websitary/configuration.rb', line 465 def view(view) @view = view end |
#view_output(outfile = nil) ⇒ Object
1014 1015 1016 |
# File 'lib/websitary/configuration.rb', line 1014 def view_output(outfile=nil) send("view_output_#{@output_format[0]}", outfile || get_outfile) end |
#write_file(filename, mode = 'w', &block) ⇒ Object
1047 1048 1049 1050 |
# File 'lib/websitary/configuration.rb', line 1047 def write_file(filename, mode='w', &block) File.open(filename, mode) {|io| block.call(io)} @mtimes.set(filename) end |