Class: LibXML::XML::Parser

Inherits:
Object
  • Object
show all
Defined in:
ext/libxml/ruby_xml_parser.c,
lib/libxml/parser.rb,
ext/libxml/ruby_xml_parser.c

Overview

The XML::Parser provides a tree based API for processing xml documents, in contract to XML::Reader’s stream based api and XML::SaxParser callback based API.

As a result, parsing a document creates an in-memory document object that consist of any number of XML::Node instances. This is simple and powerful model, but has the major limitation that the size of the document that can be processed is limited by the amount of memory available. In such cases, it is better to use the XML::Reader.

Using the parser is simple:

parser = XML::Parser.file('my_file')
doc = parser.parse

You can also parse documents (see XML::Parser.document), strings (see XML::Parser.string) and io objects (see XML::Parser.io).

Defined Under Namespace

Modules: Options Classes: Context

Constant Summary collapse

VERSION =

Bunch of deprecated methods that have moved to the XML module

XML::VERSION
VERNUM =
XML::VERNUM

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(context) ⇒ XML::Parser

Creates a new XML::Parser from the specified XML::Parser::Context.



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'ext/libxml/ruby_xml_parser.c', line 39

static VALUE rxml_parser_initialize(int argc, VALUE *argv, VALUE self)
{
  VALUE context = Qnil;

  rb_scan_args(argc, argv, "01", &context);

  if (context == Qnil)
  {
    rb_warn("Passing no parameters to XML::Parser.new is deprecated.  Pass an instance of XML::Parser::Context instead.");
    context = rb_class_new_instance(0, NULL, cXMLParserContext);
  }

  rb_ivar_set(self, CONTEXT_ATTR, context);
  return self;
}

Instance Attribute Details

#contextObject (readonly)

#inputObject (readonly)

Class Method Details

.catalog_dumpObject



221
222
223
224
# File 'lib/libxml/parser.rb', line 221

def self.catalog_dump
  warn("XML::Parser.catalog_dump has been deprecated.  Use XML.catalog_dump instead")
  XML.catalog_dump
end

.catalog_removeObject



226
227
228
229
# File 'lib/libxml/parser.rb', line 226

def self.catalog_remove
  warn("XML::Parser.catalog_remove has been deprecated.  Use XML.catalog_remove instead")
  XML.catalog_remove
end

.check_lib_versionsObject



231
232
233
234
# File 'lib/libxml/parser.rb', line 231

def self.check_lib_versions
  warn("XML::Parser.check_lib_versions has been deprecated.  Use XML.check_lib_versions instead")
  XML.check_lib_versions
end

.debug_entitiesObject



236
237
238
239
# File 'lib/libxml/parser.rb', line 236

def self.debug_entities
  warn("XML::Parser.debug_entities has been deprecated.  Use XML.debug_entities instead")
  XML.debug_entities
end

.debug_entities=(value) ⇒ Object



241
242
243
244
# File 'lib/libxml/parser.rb', line 241

def self.debug_entities=(value)
  warn("XML::Parser.debug_entities_set has been deprecated.  Use XML.debug_entities= value instead")
  XML.debug_entities= value
end

.default_compressionObject



246
247
248
249
# File 'lib/libxml/parser.rb', line 246

def self.default_compression
  warn("XML::Parser.default_compression has been deprecated.  Use XML.default_compression instead")
  XML.default_compression
end

.default_compression=(value) ⇒ Object



251
252
253
254
# File 'lib/libxml/parser.rb', line 251

def self.default_compression=(value)
  warn("XML::Parser.default_compression= value has been deprecated.  Use XML.default_compression= value instead")
  XML.default_compression= value
end

.default_keep_blanksObject



256
257
258
259
# File 'lib/libxml/parser.rb', line 256

def self.default_keep_blanks
  warn("XML::Parser.default_keep_blanks has been deprecated.  Use XML.default_keep_blanks instead")
  XML.default_keep_blanks
end

.default_keep_blanks=(value) ⇒ Object



261
262
263
264
# File 'lib/libxml/parser.rb', line 261

def self.default_keep_blanks=(value)
  warn("XML::Parser.default_keep_blanks= value has been deprecated.  Use XML.default_keep_blanks= value instead")
  XML.default_keep_blanks= value
end

.default_line_numbersObject



276
277
278
279
# File 'lib/libxml/parser.rb', line 276

def self.default_line_numbers
  warn("XML::Parser.default_line_numbers has been deprecated.  Use XML.default_line_numbers instead")
  XML.default_line_numbers
end

.default_line_numbers=(value) ⇒ Object



281
282
283
284
# File 'lib/libxml/parser.rb', line 281

def self.default_line_numbers=(value)
  warn("XML::Parser.default_line_numbers= value has been deprecated.  Use XML.default_line_numbers= value instead")
  XML.default_line_numbers= value
end

.default_load_external_dtdObject



266
267
268
269
# File 'lib/libxml/parser.rb', line 266

def self.default_load_external_dtd
  warn("XML::Parser.default_load_external_dtd has been deprecated.  Use XML.default_load_external_dtd instead")
  XML.default_load_external_dtd
end

.default_load_external_dtd=(value) ⇒ Object



271
272
273
274
# File 'lib/libxml/parser.rb', line 271

def self.default_load_external_dtd=(value)
  warn("XML::Parser.default_load_external_dtd= value has been deprecated.  Use XML.default_load_external_dtd= value instead")
  XML.default_load_external_dtd= value
end

.default_pedantic_parserObject



286
287
288
289
# File 'lib/libxml/parser.rb', line 286

def self.default_pedantic_parser
  warn("XML::Parser.default_pedantic_parser has been deprecated.  Use XML.default_pedantic_parser instead")
  XML.default_pedantic_parser
end

.default_pedantic_parser=(value) ⇒ Object



291
292
293
294
# File 'lib/libxml/parser.rb', line 291

def self.default_pedantic_parser=(value)
  warn("XML::Parser.default_pedantic_parser= value has been deprecated.  Use XML.default_pedantic_parser= value instead")
  XML.default_pedantic_parser= value
end

.default_substitute_entitiesObject



296
297
298
299
# File 'lib/libxml/parser.rb', line 296

def self.default_substitute_entities
  warn("XML::Parser.default_substitute_entities has been deprecated.  Use XML.default_substitute_entities instead")
  XML.default_substitute_entities
end

.default_substitute_entities=(value) ⇒ Object



301
302
303
304
# File 'lib/libxml/parser.rb', line 301

def self.default_substitute_entities=(value)
  warn("XML::Parser.default_substitute_entities= value has been deprecated.  Use XML.default_substitute_entities= value instead")
  XML.default_substitute_entities= value
end

.default_tree_indent_stringObject



306
307
308
309
# File 'lib/libxml/parser.rb', line 306

def self.default_tree_indent_string
  warn("XML::Parser.default_tree_indent_string has been deprecated.  Use XML.default_tree_indent_string instead")
  XML.default_tree_indent_string
end

.default_tree_indent_string=(value) ⇒ Object



311
312
313
314
# File 'lib/libxml/parser.rb', line 311

def self.default_tree_indent_string=(value)
  warn("XML::Parser.default_tree_indent_string= value has been deprecated.  Use XML.default_tree_indent_string= value instead")
  XML.default_tree_indent_string= value
end

.default_validity_checkingObject



316
317
318
319
# File 'lib/libxml/parser.rb', line 316

def self.default_validity_checking
  warn("XML::Parser.default_validity_checking has been deprecated.  Use XML.default_validity_checking instead")
  XML.default_validity_checking
end

.default_validity_checking=(value) ⇒ Object



321
322
323
324
# File 'lib/libxml/parser.rb', line 321

def self.default_validity_checking=(value)
  warn("XML::Parser.default_validity_checking= value has been deprecated.  Use XML.default_validity_checking= value instead")
  XML.default_validity_checking= value
end

.default_warningsObject



326
327
328
329
# File 'lib/libxml/parser.rb', line 326

def self.default_warnings
  warn("XML::Parser.default_warnings has been deprecated.  Use XML.default_warnings instead")
  XML.default_warnings
end

.default_warnings=(value) ⇒ Object



331
332
333
334
# File 'lib/libxml/parser.rb', line 331

def self.default_warnings=(value)
  warn("XML::Parser.default_warnings= value has been deprecated.  Use XML.default_warnings= value instead")
  XML.default_warnings= value
end

.document(doc) ⇒ Object

call-seq:

XML::Parser.document(document) -> XML::Parser

Creates a new parser for the specified document.

Parameters:

document - A preparsed document.


14
15
16
17
# File 'lib/libxml/parser.rb', line 14

def self.document(doc)
  context = XML::Parser::Context.document(doc)
  self.new(context)
end

.enabled_automata?Boolean

Returns:

  • (Boolean)


131
132
133
134
# File 'lib/libxml/parser.rb', line 131

def self.enabled_automata?
  warn("XML::Parser.enabled_automata? has been deprecated.  Use XML.enabled_automata? instead")
  XML.enabled_automata?
end

.enabled_c14n?Boolean

Returns:

  • (Boolean)


136
137
138
139
# File 'lib/libxml/parser.rb', line 136

def self.enabled_c14n?
  warn("XML::Parser.enabled_c14n? has been deprecated.  Use XML.enabled_c14n? instead")
  XML.enabled_c14n?
end

.enabled_catalog?Boolean

Returns:

  • (Boolean)


141
142
143
144
# File 'lib/libxml/parser.rb', line 141

def self.enabled_catalog?
  warn("XML::Parser.enabled_catalog? has been deprecated.  Use XML.enabled_catalog? instead")
  XML.enabled_catalog?
end

.enabled_debug?Boolean

Returns:

  • (Boolean)


146
147
148
149
# File 'lib/libxml/parser.rb', line 146

def self.enabled_debug?
  warn("XML::Parser.enabled_debug? has been deprecated.  Use XML.enabled_debug? instead")
  XML.enabled_debug?
end

.enabled_docbook?Boolean

Returns:

  • (Boolean)


151
152
153
154
# File 'lib/libxml/parser.rb', line 151

def self.enabled_docbook?
  warn("XML::Parser.enabled_docbook? has been deprecated.  Use XML.enabled_docbook? instead")
  XML.enabled_docbook?
end

.enabled_ftp?Boolean

Returns:

  • (Boolean)


156
157
158
159
# File 'lib/libxml/parser.rb', line 156

def self.enabled_ftp?
  warn("XML::Parser.enabled_ftp? has been deprecated.  Use XML.enabled_ftp? instead")
  XML.enabled_ftp?
end

.enabled_html?Boolean

Returns:

  • (Boolean)


166
167
168
169
# File 'lib/libxml/parser.rb', line 166

def self.enabled_html?
  warn("XML::Parser.enabled_html? has been deprecated.  Use XML.enabled_html? instead")
  XML.enabled_html?
end

.enabled_http?Boolean

Returns:

  • (Boolean)


161
162
163
164
# File 'lib/libxml/parser.rb', line 161

def self.enabled_http?
  warn("XML::Parser.enabled_http? has been deprecated.  Use XML.enabled_http? instead")
  XML.enabled_http?
end

.enabled_iconv?Boolean

Returns:

  • (Boolean)


171
172
173
174
# File 'lib/libxml/parser.rb', line 171

def self.enabled_iconv?
  warn("XML::Parser.enabled_iconv? has been deprecated.  Use XML.enabled_iconv? instead")
  XML.enabled_iconv?
end

.enabled_memory_debug?Boolean

Returns:

  • (Boolean)


176
177
178
179
# File 'lib/libxml/parser.rb', line 176

def self.enabled_memory_debug?
  warn("XML::Parser.enabled_memory_debug_location? has been deprecated.  Use XML.enabled_memory_debug_location? instead")
  XML.enabled_memory_debug_location?
end

.enabled_regexp?Boolean

Returns:

  • (Boolean)


181
182
183
184
# File 'lib/libxml/parser.rb', line 181

def self.enabled_regexp?
  warn("XML::Parser.enabled_regexp? has been deprecated.  Use XML.enabled_regexp? instead")
  XML.enabled_regexp?
end

.enabled_schemas?Boolean

Returns:

  • (Boolean)


186
187
188
189
# File 'lib/libxml/parser.rb', line 186

def self.enabled_schemas?
  warn("XML::Parser.enabled_schemas? has been deprecated.  Use XML.enabled_schemas? instead")
  XML.enabled_schemas?
end

.enabled_thread?Boolean

Returns:

  • (Boolean)


191
192
193
194
# File 'lib/libxml/parser.rb', line 191

def self.enabled_thread?
  warn("XML::Parser.enabled_thread? has been deprecated.  Use XML.enabled_thread? instead")
  XML.enabled_thread?
end

.enabled_unicode?Boolean

Returns:

  • (Boolean)


196
197
198
199
# File 'lib/libxml/parser.rb', line 196

def self.enabled_unicode?
  warn("XML::Parser.enabled_unicode? has been deprecated.  Use XML.enabled_unicode? instead")
  XML.enabled_unicode?
end

.enabled_xinclude?Boolean

Returns:

  • (Boolean)


201
202
203
204
# File 'lib/libxml/parser.rb', line 201

def self.enabled_xinclude?
  warn("XML::Parser.enabled_xinclude? has been deprecated.  Use XML.enabled_xinclude? instead")
  XML.enabled_xinclude?
end

.enabled_xpath?Boolean

Returns:

  • (Boolean)


206
207
208
209
# File 'lib/libxml/parser.rb', line 206

def self.enabled_xpath?
  warn("XML::Parser.enabled_xpath? has been deprecated.  Use XML.enabled_xpath? instead")
  XML.enabled_xpath?
end

.enabled_xpointer?Boolean

Returns:

  • (Boolean)


211
212
213
214
# File 'lib/libxml/parser.rb', line 211

def self.enabled_xpointer?
  warn("XML::Parser.enabled_xpointer? has been deprecated.  Use XML.enabled_xpointer? instead")
  XML.enabled_xpointer?
end

.enabled_zlib?Boolean

Returns:

  • (Boolean)


216
217
218
219
# File 'lib/libxml/parser.rb', line 216

def self.enabled_zlib?
  warn("XML::Parser.enabled_zlib? has been deprecated.  Use XML.enabled_zlib? instead")
  XML.enabled_zlib?
end

.featuresObject



336
337
338
339
# File 'lib/libxml/parser.rb', line 336

def self.features
  warn("XML::Parser.features has been deprecated.  Use XML.features instead")
  XML.features
end

.file(path, options = {}) ⇒ Object

call-seq:

XML::Parser.file(path) -> XML::Parser
XML::Parser.file(path, :encoding => XML::Encoding::UTF_8,
                       :options => XML::Parser::Options::NOENT) -> XML::Parser

Creates a new parser for the specified file or uri.

You may provide an optional hash table to control how the parsing is performed. Valid options are:

encoding - The document encoding, defaults to nil. Valid values
           are the encoding constants defined on XML::Encoding.
options - Parser options.  Valid values are the constants defined on
          XML::Parser::Options.  Mutliple options can be combined
          by using Bitwise OR (|).


34
35
36
37
38
39
# File 'lib/libxml/parser.rb', line 34

def self.file(path, options = {})
  context = XML::Parser::Context.file(path)
  context.encoding = options[:encoding] if options[:encoding]
  context.options = options[:options] if options[:options]
  self.new(context)
end

.filename(value) ⇒ Object



351
352
353
354
# File 'lib/libxml/parser.rb', line 351

def self.filename(value)
  warn("Parser.filename is deprecated.  Use Parser.file instead")
  self.file(value)
end

.indent_tree_outputObject



341
342
343
344
# File 'lib/libxml/parser.rb', line 341

def self.indent_tree_output
  warn("XML::Parser.indent_tree_output has been deprecated.  Use XML.indent_tree_output instead")
  XML.indent_tree_output
end

.indent_tree_output=(value) ⇒ Object



346
347
348
349
# File 'lib/libxml/parser.rb', line 346

def self.indent_tree_output=(value)
  warn("XML::Parser.indent_tree_output= value has been deprecated.  Use XML.indent_tree_output= value instead")
  XML.indent_tree_output= value
end

.io(io, options = {}) ⇒ Object

call-seq:

XML::Parser.io(io) -> XML::Parser
XML::Parser.io(io, :encoding => XML::Encoding::UTF_8,
                   :options => XML::Parser::Options::NOENT
                   :base_uri="http://libxml.org") -> XML::Parser

Creates a new parser for the specified io object.

Parameters:

io - io object that contains the xml to parser
base_uri - The base url for the parsed document.
encoding - The document encoding, defaults to nil. Valid values
           are the encoding constants defined on XML::Encoding.
options - Parser options.  Valid values are the constants defined on
          XML::Parser::Options.  Mutliple options can be combined
          by using Bitwise OR (|).


58
59
60
61
62
63
64
# File 'lib/libxml/parser.rb', line 58

def self.io(io, options = {})
  context = XML::Parser::Context.io(io)
  context.base_uri = options[:base_uri] if options[:base_uri]
  context.encoding = options[:encoding] if options[:encoding]
  context.options = options[:options] if options[:options]
  self.new(context)
end

.memory_dumpObject



356
357
358
359
# File 'lib/libxml/parser.rb', line 356

def self.memory_dump
  warn("XML::Parser.memory_dump has been deprecated.  Use XML.memory_dump instead")
  XML.memory_dump
end

.memory_usedObject



361
362
363
364
# File 'lib/libxml/parser.rb', line 361

def self.memory_used
  warn("XML::Parser.memory_used has been deprecated.  Use XML.memory_used instead")
  XML.memory_used
end

.register_error_handler(proc) ⇒ Object



91
92
93
94
95
96
97
98
# File 'lib/libxml/parser.rb', line 91

def self.register_error_handler(proc)
  warn('Parser.register_error_handler is deprecated.  Use Error.set_handler instead')
  if proc.nil?
    Error.reset_handler
  else
    Error.set_handler(&proc)
  end
end

.string(string, options = {}) ⇒ Object

call-seq:

XML::Parser.string(string)
XML::Parser.string(string, :encoding => XML::Encoding::UTF_8,
                           :options => XML::Parser::Options::NOENT
                           :base_uri="http://libxml.org") -> XML::Parser

Creates a new parser by parsing the specified string.

You may provide an optional hash table to control how the parsing is performed. Valid options are:

base_uri - The base url for the parsed document.
encoding - The document encoding, defaults to nil. Valid values
           are the encoding constants defined on XML::Encoding.
options - Parser options.  Valid values are the constants defined on
          XML::Parser::Options.  Mutliple options can be combined
          by using Bitwise OR (|).


83
84
85
86
87
88
89
# File 'lib/libxml/parser.rb', line 83

def self.string(string, options = {})
  context = XML::Parser::Context.string(string)
  context.base_uri = options[:base_uri] if options[:base_uri]
  context.encoding = options[:encoding] if options[:encoding]
  context.options = options[:options] if options[:options]
  self.new(context)
end

Instance Method Details

#document=(value) ⇒ Object



106
107
108
109
# File 'lib/libxml/parser.rb', line 106

def document=(value)
  warn("XML::Parser#document= is deprecated.  Use XML::Parser.document= instead")
  @context = XML::Parser::Context.document(value)
end

#file=(value) ⇒ Object



111
112
113
114
# File 'lib/libxml/parser.rb', line 111

def file=(value)
  warn("XML::Parser#file is deprecated.  Use XML::Parser.file instead")
  @context = XML::Parser::Context.file(value)
end

#filename=(value) ⇒ Object



116
117
118
119
# File 'lib/libxml/parser.rb', line 116

def filename=(value)
  warn("XML::Parser#filename is deprecated.  Use XML::Parser.file instead")
  self.file = value
end

#io=(value) ⇒ Object



121
122
123
124
# File 'lib/libxml/parser.rb', line 121

def io=(value)
  warn("XML::Parser#io is deprecated.  Use XML::Parser.io instead")
  @context = XML::Parser::Context.io(value)
end

#parseXML::Document

Parse the input XML and create an XML::Document with it’s content. If an error occurs, XML::Parser::ParseError is thrown.

Returns:



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'ext/libxml/ruby_xml_parser.c', line 63

static VALUE rxml_parser_parse(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  VALUE context = rb_ivar_get(self, CONTEXT_ATTR);
  
  Data_Get_Struct(context, xmlParserCtxt, ctxt);

  if ((xmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) && ! ctxt->recovery)
  {
    if (ctxt->myDoc)
      xmlFreeDoc(ctxt->myDoc);
    rxml_raise(&ctxt->lastError);
  }

  rb_funcall(context, rb_intern("close"), 0);

  return rxml_document_wrap(ctxt->myDoc);
}

#string=(value) ⇒ Object



126
127
128
129
# File 'lib/libxml/parser.rb', line 126

def string=(value)
  warn("XML::Parser#string is deprecated.  Use XML::Parser.string instead")
  @context = XML::Parser::Context.string(value)
end