Class: CML::Parser

Inherits:
Object
  • Object
show all
Includes:
ParserLogic
Defined in:
lib/cml/parser.rb

Constant Summary collapse

TAG_XPATH =
'//cml:*'
BASE_TAGS_XPATH =
".#{TAG_XPATH}[not(ancestor::cml:checkboxes or ancestor::cml:ratings or ancestor::cml:select or ancestor::cml:radios or self::cml:gold or self::cml:instructions)]"
GROUP_XPATH =
"//cml:group"
GROUP_DESCENDANTS_XPATH =
".#{GROUP_XPATH}#{TAG_XPATH}"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from ParserLogic

#has_grouped_logic?, #has_liquid_logic?, #has_nested_logic?, #logic_tree

Constructor Details

#initialize(content, opts = {}) ⇒ Parser

Returns a new instance of Parser.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/cml/parser.rb', line 13

def initialize(content, opts = {})
  @opts = opts.merge(:parser => self)
  if content.is_a?(String)
    #Because nokogiri is munging my CDATA sections, we parse it out ahead of time
    @cdata = content.scan(/(<(script|style)[^>]*?>)(.*?)(<\/\2>)/m)
    @doc = Parser.parse(content)
  else
    @cdata = []
    @doc = content
  end
  #Pull all cml tags that aren't children
  @cftags = @doc.xpath(BASE_TAGS_XPATH, "xmlns:cml"=>"http://crowdflower.com")
  normalize if opts[:normalize]
  @cml_tag_map = {}
  @tags = @cftags.map do |t|
    cml_tag = Parser.tag_class(t.name).new(t, @opts)
    @cml_tag_map[t.object_id] = cml_tag
  end.flatten
end

Instance Attribute Details

#cftagsObject (readonly)

Returns the value of attribute cftags.



6
7
8
# File 'lib/cml/parser.rb', line 6

def cftags
  @cftags
end

#cml_tag_mapObject (readonly)

Returns the value of attribute cml_tag_map.



6
7
8
# File 'lib/cml/parser.rb', line 6

def cml_tag_map
  @cml_tag_map
end

#docObject (readonly)

Returns the value of attribute doc.



6
7
8
# File 'lib/cml/parser.rb', line 6

def doc
  @doc
end

#errorsObject (readonly)

Returns the value of attribute errors.



6
7
8
# File 'lib/cml/parser.rb', line 6

def errors
  @errors
end

#tagsObject (readonly)

Returns the value of attribute tags.



6
7
8
# File 'lib/cml/parser.rb', line 6

def tags
  @tags
end

Class Method Details

.escape(string) ⇒ Object



188
189
190
191
192
193
# File 'lib/cml/parser.rb', line 188

def self.escape( string )
  string.to_s.gsub( /&/, "&amp;" ).
         gsub( /</, "&lt;" ).
         gsub( />/, "&gt;" ).
         gsub( /"/, "&quot;" )
end

.parse(content) ⇒ Object



33
34
35
36
37
38
# File 'lib/cml/parser.rb', line 33

def self.parse(content)
  #This sucks, we remove scripts, styles, and close non self closed tags
  #We could potentially add CDATA clauses to them, but this is "easier"
  xhtml = content.gsub(/(<(script|style)[^>]*?>)(.*?)(<\/\2>)/m, "\\1\\4").gsub(/(<(input|link|img|br|hr).*?)\/?>/,'\1/>') #base, basefont, area, meta
  Nokogiri::XML("<root xmlns:cml=\"http://crowdflower.com\">#{xhtml}</root>")
end

.tag_class(name) ⇒ Object

This takes the name of the tag and converts it to the appropriate tag class



196
197
198
# File 'lib/cml/parser.rb', line 196

def self.tag_class(name)
  CML::TagClasses[name] || CML::Tags::Unknown      
end

Instance Method Details

#convert(opts = nil) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/cml/parser.rb', line 40

def convert(opts = nil)
  @opts.merge!(opts) if opts
  cloned = @doc.dup
  
  base_nodes = cloned.xpath(BASE_TAGS_XPATH, "xmlns:cml"=>"http://crowdflower.com")
  group_children_nodes = cloned.xpath(GROUP_DESCENDANTS_XPATH, "xmlns:cml"=>"http://crowdflower.com")
  
  (base_nodes - group_children_nodes).each_with_index do |node, i|
    node.namespace = nil
    real_index = base_nodes.index( node )
    node.replace(self.tags[real_index].convert(opts))
  end
  cloned
end

#each_cml_group_descendantObject

Do something for each <cml::group> descendant “base” CML::Tag



56
57
58
59
60
61
62
63
64
65
66
# File 'lib/cml/parser.rb', line 56

def each_cml_group_descendant
  return unless block_given?
  group_nodes = @doc.xpath(".#{GROUP_XPATH}", "xmlns:cml"=>"http://crowdflower.com")
  group_nodes.each do |group_node|
    group_descendant_nodes = group_node.xpath(BASE_TAGS_XPATH, "xmlns:cml"=>"http://crowdflower.com")
    group_descendant_nodes.each_with_index do |node, i|
      cml_tag = @cml_tag_map[node.object_id]
      yield group_node, cml_tag, i
    end
  end
end

#fieldsObject



80
81
82
83
84
85
86
87
# File 'lib/cml/parser.rb', line 80

def fields
  @fields = {}
  @tags.each do |g|
    agg = g.attrs["aggregation"]
    @fields[g.name] = agg.to_s if agg
  end
  @fields
end

#finite_fieldsObject



89
90
91
92
93
94
95
96
97
# File 'lib/cml/parser.rb', line 89

def finite_fields
  return @finite_fields if @finite_fields
  @finite_fields = {}
  @tags.each do |t|
    next unless t.finite_value?
    @finite_fields[t.name] = t
  end
  @finite_fields
end

#golds(rich = false, opts = {}) ⇒ Object



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/cml/parser.rb', line 99

def golds(rich = false, opts={})
  unless @golds
    @golds = @tags.map do |tag|
      next unless gold = tag.gold?
      Gold.new(tag, gold)
    end.compact
  end
  
  if rich
    golds = @golds
  else
    golds = rich_gold_to_hash( @golds )
    if opts[:no_meta]
      golds.reject! { |k,v| k =~ /^_/ }
    end
  end
  
  golds
end

#normalizeObject



68
69
70
71
72
73
74
75
76
77
78
# File 'lib/cml/parser.rb', line 68

def normalize
  @cftags.each do |t|
    if ["radios", "select"].include?(t.name)
      child = t.name == "radios" ? "radio" : "option"
      t.name = "checkboxes"
      t.xpath("cml:#{child}").each { |c| c.name = "checkbox" }
    elsif t.name == "meta"
      t.name = "text"
    end
  end
end

#rich_gold_to_hash(golds) ⇒ Object



119
120
121
122
123
# File 'lib/cml/parser.rb', line 119

def rich_gold_to_hash(golds)
  golds.inject({}) do |m, gold|
    m.merge(gold.to_hash)
  end
end

#to_cmlObject



164
165
166
167
168
169
170
171
172
173
# File 'lib/cml/parser.rb', line 164

def to_cml
  cml = @doc.to_xhtml.gsub(/<\/?root[^>]*?>|<\/?>/,'').gsub(/(<(input|link|img|br|hr).*?)\/?>/,'\1/>').gsub(/%7B%7B/,'{{').gsub(/%7D%7D/,'}}')
  #Hack to ensure the next sub doesn't match...
  @cdata.each do |matches| 
    cml.sub!(/(<(script|style)[^>]*>)<\/\2>/m) do |m|
      "#{$1}#{matches[2].empty? ? " " : matches[2]}</#{$2}>"
    end
  end
  cml
end

#to_html(opts = nil) ⇒ Object



175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/cml/parser.rb', line 175

def to_html(opts = nil)
  #We convert the entire document and strip root tags / rando empty tags ALA libxml 2.6.32
  #We're also adding self closing tags
  html = convert(opts).to_xhtml.gsub(/<\/?(root|group)[^>]*?>|<\/?>/,'').gsub(/(<(input|link|img|br|hr).*?)\/?>/,'\1/>').gsub(/%7B%7B/,'{{').gsub(/%7D%7D/,'}}') 
  #Let's re-insert that CDATA, tricky because scripts will sometimes contain single quotes...
  @cdata.each do |matches| 
    html.sub!(/(<(script|style)[^>]*>)<\/\2>/m) do |m|
      "#{$1}#{matches[2].empty? ? " " : matches[2]}</#{$2}>"
    end
  end
  wrap(html)
end

#to_sObject



160
161
162
# File 'lib/cml/parser.rb', line 160

def to_s
  to_html
end

#valid?Boolean

Returns:

  • (Boolean)


125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/cml/parser.rb', line 125

def valid?
  @errors = []
  if e = @doc.errors.select {|e| e.fatal? }.last
    @errors << "Malformed CML (#{e.level}).  #{e.message.chomp} on line #{e.line} column #{e.column}."
  end
  @tags.select {|t| t.validate? && t.name =~ /^\s*$/ }.each do |t|
    @errors << "#{t.to_s.split("\n")[0]} does not have a label or name specified."
  end
  dupes = @tags.select do |tag|
    tag.validate? && @tags.select {|t| t.name == tag.name}.length > 1
  end
  (dupes[1..-1] || []).each do |t|
    @errors << "#{t.to_s.split("\n")[0]} has a duplicated name, please specify a unique name attribute."
  end
  @tags.each do |t|
    next if !t.children || ["group","iterate"].include?(t.tag) 
    dupes = t.children.select do |child|
      child.validate? && t.children.select {|c| c.value == child.value}.length > 1
    end
    (dupes[1..-1] || []).each do |c|
      @errors << "#{c} a child of #{t.to_s.split("\n")[0]} has a duplicated value, please specify a unique value attribute."
    end
  end
  if !logic_tree.valid?
    @errors += logic_tree.errors
  end
  @errors.length == 0
end

#wrap(content) ⇒ Object



154
155
156
157
158
# File 'lib/cml/parser.rb', line 154

def wrap(content)
  #This happens when variables are inside of href's
  content = content.gsub(/%7B%7B/,'{{').gsub(/%7D%7D/,'}}')
  @opts[:no_wrap] ? content : "<div class=\"cml#{" "+@opts[:class] if @opts[:class]}\" id=\"#{@opts[:prefix]}\">#{content}</div>"
end