Class: NormalizeXml::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/normalizexml/parser.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeParser

Returns a new instance of Parser.



26
27
28
29
30
31
32
33
# File 'lib/normalizexml/parser.rb', line 26

def initialize()
  $LOG.debug "Parser::initialize"

  @infile   = nil
  @outfile  = nil
  @verbose  = false

end

Instance Attribute Details

#infileObject

Returns the value of attribute infile.



22
23
24
# File 'lib/normalizexml/parser.rb', line 22

def infile
  @infile
end

#outfileObject

Returns the value of attribute outfile.



23
24
25
# File 'lib/normalizexml/parser.rb', line 23

def outfile
  @outfile
end

#verboseObject

Returns the value of attribute verbose.



24
25
26
# File 'lib/normalizexml/parser.rb', line 24

def verbose
  @verbose
end

Instance Method Details

#normalizeObject

Raises:

  • (ArgumentError)


42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/normalizexml/parser.rb', line 42

def normalize()
  $LOG.debug "Parser::normalize"

  raise ArgumentError.new("infile not provided") unless !@infile.nil? && !@infile.empty?
  if( @outfile.nil? || @outfile.empty? )
    newfilename = File.basename(@infile, ".xml") + ".nml.xml"
    @outfile = File.join( File.absolute_path( File.dirname(@infile) ), newfilename )
  end

  puts "Normalizing file: #{@infile}\n\n" if @verbose

  # Find instances of '> <' and replace them with '><'.
  strip_spaces(@infile, @outfile)

  f = File.open(@outfile, 'r')
  doc = Nokogiri::XML(f)
  f.close

  remove_id_attributes(doc)
  remove_order_attributes(doc)
  normalize_ppm_datatypes(doc)
  sort_derivedparameters(doc)
  sort_conditions(doc)
  update_condition_attributes(doc)

  f = File.open(@outfile, 'w')
  doc.write_xml_to(f)
  f.close

  if @verbose
    puts "Normalization of #{@infile} complete."
    puts "Output file: #{@outfile}"
    puts
  end
end

#normalize_ppm_datatypes(doc) ⇒ Object

Normalize PPM DataType attributes by removing them

doc

Nokogiri::XML document



170
171
172
173
174
175
176
177
# File 'lib/normalizexml/parser.rb', line 170

def normalize_ppm_datatypes(doc)
  # Normalize all PPM DataType attributes by removing them
  nodes = doc.xpath('//PPM')
  nodes.each do |n|
    n.remove_attribute('DataType')
  end

end

#parseLine(ln) ⇒ Object

DEPRECATED - DO NOT USE



246
247
248
249
250
251
252
253
254
# File 'lib/normalizexml/parser.rb', line 246

def parseLine(ln)
  out = ln.gsub( /Order=\"(\d+)\"/, 'Order="0"')
  out = out.gsub( /Order= '(\d+)'/, 'Order="0"')
  out = out.gsub( /  Order=\"0\"/, ' Order="0"')
  out = out.gsub( /  Order=\"0\"/, ' Order="0"')
  out = out.gsub( /Order=\"0\">/, 'Order="0" >')
  out = out.gsub( /Id=\"(\d+)\"/, 'Id="0"')

end

#remove_id_attributes(doc) ⇒ Object

Remove Id attributes

doc

Nokogiri::XML document



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/normalizexml/parser.rb', line 90

def remove_id_attributes(doc)
  # Normalize all ruleset Ids
  rulesets = doc.xpath('//Ruleset')
  rulesets.each do |rs|
    rs.remove_attribute('Id')
  end

  # Normalize all rule Ids
  rules = doc.xpath('//Rule')
  rules.each do |r|
    r.remove_attribute('Id')
  end

  # Remove all DPM Ids
  rules = doc.xpath('//DPM')
  rules.each do |r|
    r.remove_attribute('Id')
  end

end

#remove_order_attributes(doc) ⇒ Object

Remove Order attributes

doc

Nokogiri::XML document



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# File 'lib/normalizexml/parser.rb', line 144

def remove_order_attributes(doc)
  # Normalize all Compute Order attributes by removing them
  nodes = doc.xpath('//Compute')
  nodes.each do |n|
    n.remove_attribute('Order')
  end

  # Normalize all AssignTo Order attributes by removing them
  nodes = doc.xpath('//AssignTo')
  nodes.each do |n|
    n.remove_attribute('Order')
  end

  # Normalize all Message Order attributes by removing them
  nodes = doc.xpath('//Message')
  nodes.each do |n|
    n.remove_attribute('Order')
  end

end

#sort_conditions(doc) ⇒ Object

Sort all Conditions children by sorting them alphabetically by Name.

doc

Nokogiri::XML document

To get this to work, the following flow was used:

1. Create a sorted array of all child nodes.
2. Create a new node and add each node from the array (giving us sorted order).
3. Delete (unlink) each of the sorted child nodes from the original node.
4. Add the new node as a sibling to the originial node.
5. Delete (unlink) the original node
6. Rename the new node to the same name as the deleted node.


223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# File 'lib/normalizexml/parser.rb', line 223

def sort_conditions(doc)
  node = doc.xpath('//Conditions')
  # Return if this guideline does not contain the conditions element.
  return if node.empty?

  sorted = node.children.sort_by do |n1|
    n1['Name']
  end
  node.children.each { |n| n.unlink }

  newnode = doc.create_element "SortedConditions"
  sorted.each do |n|
    newnode << n;
  end

  doc.at('Conditions').add_next_sibling( newnode )
  node.unlink
  newnode.name = 'Conditions'
end

#sort_derivedparameters(doc) ⇒ Object

Sort all DERIVEDPARAMETERS children by sorting them alphabetically by Name.

doc

Nokogiri::XML document

To get this to work, the following flow was used:

1. Create a sorted array of all child nodes.
2. Create a new node and add each node from the array (giving us sorted order).
3. Delete (unlink) each of the sorted child nodes from the original node.
4. Add the new node as a sibling to the originial node.
5. Delete (unlink) the original node
6. Rename the new node to the same name as the deleted node.


192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# File 'lib/normalizexml/parser.rb', line 192

def sort_derivedparameters(doc)
  node = doc.xpath('//DERIVEDPARAMETERS')
  
  sorted = node.children.sort_by do |n1|
    n1['Name']
  end
  node.children.each { |n| n.unlink }

  newnode = doc.create_element "SortedDPMs"
  sorted.each do |n|
    newnode << n;
  end

  doc.at('DERIVEDPARAMETERS').add_next_sibling( newnode )
  node.unlink
  newnode.name = 'DERIVEDPARAMETERS'
end

#strip_spaces(infile, outfile) ⇒ Object



79
80
81
82
83
# File 'lib/normalizexml/parser.rb', line 79

def strip_spaces(infile, outfile)
  buffer = IO.read(infile)
  buffer.gsub!('> <', '><')
  File.open(outfile, 'w') {|f| f.write buffer }
end

#testFalseObject



263
264
265
266
# File 'lib/normalizexml/parser.rb', line 263

def testFalse()
  $LOG.debug "Parser::testFalse"
  false
end

#testTrueObject



257
258
259
260
# File 'lib/normalizexml/parser.rb', line 257

def testTrue()
  $LOG.debug "Parser::testTrue"
  true
end

#update_condition_attributes(doc) ⇒ Object

Add Condition attributes

doc

Nokogiri::XML document



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/normalizexml/parser.rb', line 116

def update_condition_attributes(doc)
  # Add SystemName attribute to condition elements without a Name element.
  conddefs = {}

  # Get all conditions.
  conditions = doc.xpath('//Message[@Type="Condition"]')

  # Make a crossref hash of the IDs and Names.
  conditions.each do |c|
    conddefs[c['Id']] = c['Name'] if c.has_attribute?('Name')
  end

  # Add SystemName attribute if Name is missing (condition references)
  conditions.each do |c|
    c['SystemName'] = conddefs[c['Id']] if !c.has_attribute?('Name')
  end

  # Remove all condition IDs
  conditions.each do |c|
    c.remove_attribute('Id')
  end
end