Class: Rets::Parser::Compact

Inherits:
Object
  • Object
show all
Defined in:
lib/rets/parser/compact.rb

Defined Under Namespace

Classes: SaxParser

Constant Summary collapse

DEFAULT_DELIMITER =
"\t"
INCLUDE_NULL_FIELDS =
-1
InvalidDelimiter =
Class.new(ArgumentError)

Class Method Summary collapse

Class Method Details

.get_count(xml) ⇒ Object



106
107
108
109
110
111
112
113
# File 'lib/rets/parser/compact.rb', line 106

def self.get_count(xml)
  doc = Nokogiri.parse(xml.to_s)
  if node = doc.at("//COUNT")
    node.attr('Records').to_i
  else
    0
  end
end

.parse(columns, data, delimiter = nil) ⇒ Object

Parses a single row of RETS-COMPACT data.

Delimiter must be a regexp because String#split behaves differently when given a string pattern. (It removes leading spaces).



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/rets/parser/compact.rb', line 71

def self.parse(columns, data, delimiter = nil)
  delimiter ||= DEFAULT_DELIMITER
  delimiter = Regexp.new(Regexp.escape(delimiter))

  if delimiter == // || delimiter == /,/
    raise Rets::Parser::Compact::InvalidDelimiter, "Empty or invalid delimiter found, unable to parse."
  end

  column_names = columns.split(delimiter)
  data_values = data.split(delimiter, INCLUDE_NULL_FIELDS).map do |x|
    safely_decode_character_references!(x)
    CGI.unescape_html(x)
  end

  zipped_key_values = column_names.zip(data_values).map { |k, v| [k.freeze, v.to_s] }

  hash = Hash[*zipped_key_values.flatten]
  hash.reject { |key, value| key.empty? && value.to_s.empty? }
end

.parse_document(xml) ⇒ Object



13
14
15
16
17
18
19
20
# File 'lib/rets/parser/compact.rb', line 13

def self.parse_document(xml)
  doc = SaxParser.new
  parser = Nokogiri::XML::SAX::Parser.new(doc)
  io = StringIO.new(xml.to_s)

  parser.parse(io)
  doc.results.map {|r| parse(doc.columns, r, doc.delimiter) }
end

.safely_decode_character_references!(string) ⇒ Object



91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/rets/parser/compact.rb', line 91

def self.safely_decode_character_references!(string)
  string.gsub!(/&#(x)?([\h]+);/) do
    if $2
      base = $1 == "x" ? 16 : 10
      int = Integer($2, base)
      begin
        int.chr(Encoding::UTF_8)
      rescue RangeError
        ""
      end
    end
  end
  string
end