Class: Websitary::Htmldiff

Inherits:
Object
  • Object
show all
Defined in:
lib/websitary/htmldiff.rb

Overview

A simple class to generate diffs for html files using hpricot. It’s quite likely that it will miss certain details and yields wrong results (especially wrong-negative) in certain occasions.

Constant Summary collapse

VERSION =
'0.1'
REVISION =
'180'

Instance Method Summary collapse

Constructor Details

#initialize(args) ⇒ Htmldiff

args

A hash

Fields:

:oldtext

The old version

:newtext

The new version

:highlight

Don’t strip old content but highlight new one with this color

:args

Command-line arguments



28
29
30
31
32
33
34
35
36
37
38
# File 'lib/websitary/htmldiff.rb', line 28

def initialize(args)
    @args = args
    @high = args[:highlight] || args[:highlightcolor]
    @old  = explode(args[:olddoc] || Hpricot(args[:oldtext] || File.read(args[:oldfile])))
    @new  =         args[:newdoc] || Hpricot(args[:newtext] || File.read(args[:newfile]))
    @ignore  = args[:ignore]
    if @ignore and !@ignore.kind_of?(Enumerable)
        die "Ignore must be of kind Enumerable: #{ignore.inspect}"
    end
    @changed = false
end

Instance Method Details

#diffObject

Do the diff. Return an empty string if nothing has changed.



42
43
44
45
# File 'lib/websitary/htmldiff.rb', line 42

def diff
    rv = process.to_s
    @changed ? rv : ''
end

#explode(node) ⇒ Object

Collect all nodes and subnodes in a hpricot document.



89
90
91
92
93
94
95
96
97
98
99
# File 'lib/websitary/htmldiff.rb', line 89

def explode(node)
    if node.respond_to?(:each_child)
        acc = [node.to_html.strip]
        node.each_child do |child|
            acc += explode(child)
        end
        acc
    else
        [node.to_html.strip]
    end
end

#highlight(child) ⇒ Object



102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/websitary/htmldiff.rb', line 102

def highlight(child)
    @changed = true
    if @high
        if child.respond_to?(:each_child)
            acc = []
            child.each_child do |ch|
                acc << replace_inner(ch, highlight(ch).to_s)
            end
            replace_inner(child, acc.join("\n"))
        else
            case @args[:highlight]
            when String
                opts = %{class="#{@args[:highlight]}"}
            when true, Numeric
                opts = %{class="highlight"}
            else
                opts = %{style="background-color: #{@args[:highlightcolor]};"}
            end
            ihtml = %{<span #{opts}>#{child.to_s}</span>}
            replace_inner(child, ihtml)
        end
    else
        child
    end
end

#ignore(node, node_as_string) ⇒ Object



74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/websitary/htmldiff.rb', line 74

def ignore(node, node_as_string)
    return @ignore && @ignore.any? do |i|
        case i
        when Regexp
            node_as_string =~ i
        when Proc
            l.call(node)
        else
            die "Unknown type for ignore expression: #{i.inspect}"
        end
    end
end

#process(node = @new) ⇒ Object

It goes like this: if a node isn’t in the list of old nodes either the node or its content has changed. If the content is a single node, the whole node has changed. If only some sub-nodes have changed, collect those.



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/websitary/htmldiff.rb', line 52

def process(node=@new)
    acc = []
    node.each_child do |child|
        ch = child.to_html.strip
        next if ch.nil? or ch.empty?
        if @old.include?(ch) or ignore(child, ch)
            if @high
                acc << child
            end
        else
            if child.respond_to?(:each_child)
                acc << process(child)
            else
                acc << highlight(child).to_s
                acc << '<br />' unless @high
            end
        end
    end
    replace_inner(node, acc.join("\n"))
end

#replace_inner(child, ihtml) ⇒ Object



129
130
131
132
133
134
135
136
137
138
139
# File 'lib/websitary/htmldiff.rb', line 129

def replace_inner(child, ihtml)
    case child
    when Hpricot::Comment
        child
    when Hpricot::Text
        Hpricot(ihtml)
    else
        child.inner_html = ihtml
        child
    end
end