Class: LibXML::XML::Parser

Inherits:
Object
  • Object
show all
Defined in:
ext/libxml/ruby_xml_parser.c,
lib/libxml/parser.rb,
ext/libxml/ruby_xml_parser.c

Overview

The XML::Parser provides a tree based API for processing xml documents, in contract to XML::Reader’s stream based api and XML::SaxParser callback based API.

As a result, parsing a document creates an in-memory document object that consist of any number of XML::Node instances. This is simple and powerful model, but has the major limitation that the size of the document that can be processed is limited by the amount of memory available. In such cases, it is better to use the XML::Reader.

Using the parser is simple:

parser = XML::Parser.file('my_file')
doc = parser.parse

You can also parse documents (see XML::Parser.document), strings (see XML::Parser.string) and io objects (see XML::Parser.io).

Defined Under Namespace

Modules: Options Classes: Context

Constant Summary collapse

VERSION =

Bunch of deprecated methods that have moved to the XML module

XML::VERSION
VERNUM =
XML::VERNUM

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(context) ⇒ XML::Parser

Creates a new XML::Parser from the specified XML::Parser::Context.



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'ext/libxml/ruby_xml_parser.c', line 41

static VALUE rxml_parser_initialize(int argc, VALUE *argv, VALUE self)
{
  VALUE context = Qnil;

  rb_scan_args(argc, argv, "01", &context);

  if (context == Qnil)
  {
    rb_warn("Passing no parameters to XML::Parser.new is deprecated.  Pass an instance of XML::Parser::Context instead.");
    context = rb_class_new_instance(0, NULL, cXMLParserContext);
  }

  rb_ivar_set(self, CONTEXT_ATTR, context);
  return self;
}

Instance Attribute Details

#contextObject (readonly)

#inputObject (readonly)

Class Method Details

.catalog_dumpObject



219
220
221
222
# File 'lib/libxml/parser.rb', line 219

def self.catalog_dump
  warn("XML::Parser.catalog_dump has been deprecated.  Use XML.catalog_dump instead")
  XML.catalog_dump
end

.catalog_removeObject



224
225
226
227
# File 'lib/libxml/parser.rb', line 224

def self.catalog_remove
  warn("XML::Parser.catalog_remove has been deprecated.  Use XML.catalog_remove instead")
  XML.catalog_remove
end

.check_lib_versionsObject



229
230
231
232
# File 'lib/libxml/parser.rb', line 229

def self.check_lib_versions
  warn("XML::Parser.check_lib_versions has been deprecated.  Use XML.check_lib_versions instead")
  XML.check_lib_versions
end

.debug_entitiesObject



234
235
236
237
# File 'lib/libxml/parser.rb', line 234

def self.debug_entities
  warn("XML::Parser.debug_entities has been deprecated.  Use XML.debug_entities instead")
  XML.debug_entities
end

.debug_entities=(value) ⇒ Object



239
240
241
242
# File 'lib/libxml/parser.rb', line 239

def self.debug_entities=(value)
  warn("XML::Parser.debug_entities_set has been deprecated.  Use XML.debug_entities= value instead")
  XML.debug_entities= value
end

.default_compressionObject



244
245
246
247
# File 'lib/libxml/parser.rb', line 244

def self.default_compression
  warn("XML::Parser.default_compression has been deprecated.  Use XML.default_compression instead")
  XML.default_compression
end

.default_compression=(value) ⇒ Object



249
250
251
252
# File 'lib/libxml/parser.rb', line 249

def self.default_compression=(value)
  warn("XML::Parser.default_compression= value has been deprecated.  Use XML.default_compression= value instead")
  XML.default_compression= value
end

.default_keep_blanksObject



254
255
256
257
# File 'lib/libxml/parser.rb', line 254

def self.default_keep_blanks
  warn("XML::Parser.default_keep_blanks has been deprecated.  Use XML.default_keep_blanks instead")
  XML.default_keep_blanks
end

.default_keep_blanks=(value) ⇒ Object



259
260
261
262
# File 'lib/libxml/parser.rb', line 259

def self.default_keep_blanks=(value)
  warn("XML::Parser.default_keep_blanks= value has been deprecated.  Use XML.default_keep_blanks= value instead")
  XML.default_keep_blanks= value
end

.default_line_numbersObject



274
275
276
277
# File 'lib/libxml/parser.rb', line 274

def self.default_line_numbers
  warn("XML::Parser.default_line_numbers has been deprecated.  Use XML.default_line_numbers instead")
  XML.default_line_numbers
end

.default_line_numbers=(value) ⇒ Object



279
280
281
282
# File 'lib/libxml/parser.rb', line 279

def self.default_line_numbers=(value)
  warn("XML::Parser.default_line_numbers= value has been deprecated.  Use XML.default_line_numbers= value instead")
  XML.default_line_numbers= value
end

.default_load_external_dtdObject



264
265
266
267
# File 'lib/libxml/parser.rb', line 264

def self.default_load_external_dtd
  warn("XML::Parser.default_load_external_dtd has been deprecated.  Use XML.default_load_external_dtd instead")
  XML.default_load_external_dtd
end

.default_load_external_dtd=(value) ⇒ Object



269
270
271
272
# File 'lib/libxml/parser.rb', line 269

def self.default_load_external_dtd=(value)
  warn("XML::Parser.default_load_external_dtd= value has been deprecated.  Use XML.default_load_external_dtd= value instead")
  XML.default_load_external_dtd= value
end

.default_pedantic_parserObject



284
285
286
287
# File 'lib/libxml/parser.rb', line 284

def self.default_pedantic_parser
  warn("XML::Parser.default_pedantic_parser has been deprecated.  Use XML.default_pedantic_parser instead")
  XML.default_pedantic_parser
end

.default_pedantic_parser=(value) ⇒ Object



289
290
291
292
# File 'lib/libxml/parser.rb', line 289

def self.default_pedantic_parser=(value)
  warn("XML::Parser.default_pedantic_parser= value has been deprecated.  Use XML.default_pedantic_parser= value instead")
  XML.default_pedantic_parser= value
end

.default_substitute_entitiesObject



294
295
296
297
# File 'lib/libxml/parser.rb', line 294

def self.default_substitute_entities
  warn("XML::Parser.default_substitute_entities has been deprecated.  Use XML.default_substitute_entities instead")
  XML.default_substitute_entities
end

.default_substitute_entities=(value) ⇒ Object



299
300
301
302
# File 'lib/libxml/parser.rb', line 299

def self.default_substitute_entities=(value)
  warn("XML::Parser.default_substitute_entities= value has been deprecated.  Use XML.default_substitute_entities= value instead")
  XML.default_substitute_entities= value
end

.default_tree_indent_stringObject



304
305
306
307
# File 'lib/libxml/parser.rb', line 304

def self.default_tree_indent_string
  warn("XML::Parser.default_tree_indent_string has been deprecated.  Use XML.default_tree_indent_string instead")
  XML.default_tree_indent_string
end

.default_tree_indent_string=(value) ⇒ Object



309
310
311
312
# File 'lib/libxml/parser.rb', line 309

def self.default_tree_indent_string=(value)
  warn("XML::Parser.default_tree_indent_string= value has been deprecated.  Use XML.default_tree_indent_string= value instead")
  XML.default_tree_indent_string= value
end

.default_validity_checkingObject



314
315
316
317
# File 'lib/libxml/parser.rb', line 314

def self.default_validity_checking
  warn("XML::Parser.default_validity_checking has been deprecated.  Use XML.default_validity_checking instead")
  XML.default_validity_checking
end

.default_validity_checking=(value) ⇒ Object



319
320
321
322
# File 'lib/libxml/parser.rb', line 319

def self.default_validity_checking=(value)
  warn("XML::Parser.default_validity_checking= value has been deprecated.  Use XML.default_validity_checking= value instead")
  XML.default_validity_checking= value
end

.default_warningsObject



324
325
326
327
# File 'lib/libxml/parser.rb', line 324

def self.default_warnings
  warn("XML::Parser.default_warnings has been deprecated.  Use XML.default_warnings instead")
  XML.default_warnings
end

.default_warnings=(value) ⇒ Object



329
330
331
332
# File 'lib/libxml/parser.rb', line 329

def self.default_warnings=(value)
  warn("XML::Parser.default_warnings= value has been deprecated.  Use XML.default_warnings= value instead")
  XML.default_warnings= value
end

.document(doc) ⇒ Object

call-seq:

XML::Parser.document(document) -> XML::Parser

Creates a new parser for the specified document.

Parameters:

document - A preparsed document.


12
13
14
15
# File 'lib/libxml/parser.rb', line 12

def self.document(doc)
  context = XML::Parser::Context.document(doc)
  self.new(context)
end

.enabled_automata?Boolean

Returns:

  • (Boolean)


129
130
131
132
# File 'lib/libxml/parser.rb', line 129

def self.enabled_automata?
  warn("XML::Parser.enabled_automata? has been deprecated.  Use XML.enabled_automata? instead")
  XML.enabled_automata?
end

.enabled_c14n?Boolean

Returns:

  • (Boolean)


134
135
136
137
# File 'lib/libxml/parser.rb', line 134

def self.enabled_c14n?
  warn("XML::Parser.enabled_c14n? has been deprecated.  Use XML.enabled_c14n? instead")
  XML.enabled_c14n?
end

.enabled_catalog?Boolean

Returns:

  • (Boolean)


139
140
141
142
# File 'lib/libxml/parser.rb', line 139

def self.enabled_catalog?
  warn("XML::Parser.enabled_catalog? has been deprecated.  Use XML.enabled_catalog? instead")
  XML.enabled_catalog?
end

.enabled_debug?Boolean

Returns:

  • (Boolean)


144
145
146
147
# File 'lib/libxml/parser.rb', line 144

def self.enabled_debug?
  warn("XML::Parser.enabled_debug? has been deprecated.  Use XML.enabled_debug? instead")
  XML.enabled_debug?
end

.enabled_docbook?Boolean

Returns:

  • (Boolean)


149
150
151
152
# File 'lib/libxml/parser.rb', line 149

def self.enabled_docbook?
  warn("XML::Parser.enabled_docbook? has been deprecated.  Use XML.enabled_docbook? instead")
  XML.enabled_docbook?
end

.enabled_ftp?Boolean

Returns:

  • (Boolean)


154
155
156
157
# File 'lib/libxml/parser.rb', line 154

def self.enabled_ftp?
  warn("XML::Parser.enabled_ftp? has been deprecated.  Use XML.enabled_ftp? instead")
  XML.enabled_ftp?
end

.enabled_html?Boolean

Returns:

  • (Boolean)


164
165
166
167
# File 'lib/libxml/parser.rb', line 164

def self.enabled_html?
  warn("XML::Parser.enabled_html? has been deprecated.  Use XML.enabled_html? instead")
  XML.enabled_html?
end

.enabled_http?Boolean

Returns:

  • (Boolean)


159
160
161
162
# File 'lib/libxml/parser.rb', line 159

def self.enabled_http?
  warn("XML::Parser.enabled_http? has been deprecated.  Use XML.enabled_http? instead")
  XML.enabled_http?
end

.enabled_iconv?Boolean

Returns:

  • (Boolean)


169
170
171
172
# File 'lib/libxml/parser.rb', line 169

def self.enabled_iconv?
  warn("XML::Parser.enabled_iconv? has been deprecated.  Use XML.enabled_iconv? instead")
  XML.enabled_iconv?
end

.enabled_memory_debug?Boolean

Returns:

  • (Boolean)


174
175
176
177
# File 'lib/libxml/parser.rb', line 174

def self.enabled_memory_debug?
  warn("XML::Parser.enabled_memory_debug_location? has been deprecated.  Use XML.enabled_memory_debug_location? instead")
  XML.enabled_memory_debug_location?
end

.enabled_regexp?Boolean

Returns:

  • (Boolean)


179
180
181
182
# File 'lib/libxml/parser.rb', line 179

def self.enabled_regexp?
  warn("XML::Parser.enabled_regexp? has been deprecated.  Use XML.enabled_regexp? instead")
  XML.enabled_regexp?
end

.enabled_schemas?Boolean

Returns:

  • (Boolean)


184
185
186
187
# File 'lib/libxml/parser.rb', line 184

def self.enabled_schemas?
  warn("XML::Parser.enabled_schemas? has been deprecated.  Use XML.enabled_schemas? instead")
  XML.enabled_schemas?
end

.enabled_thread?Boolean

Returns:

  • (Boolean)


189
190
191
192
# File 'lib/libxml/parser.rb', line 189

def self.enabled_thread?
  warn("XML::Parser.enabled_thread? has been deprecated.  Use XML.enabled_thread? instead")
  XML.enabled_thread?
end

.enabled_unicode?Boolean

Returns:

  • (Boolean)


194
195
196
197
# File 'lib/libxml/parser.rb', line 194

def self.enabled_unicode?
  warn("XML::Parser.enabled_unicode? has been deprecated.  Use XML.enabled_unicode? instead")
  XML.enabled_unicode?
end

.enabled_xinclude?Boolean

Returns:

  • (Boolean)


199
200
201
202
# File 'lib/libxml/parser.rb', line 199

def self.enabled_xinclude?
  warn("XML::Parser.enabled_xinclude? has been deprecated.  Use XML.enabled_xinclude? instead")
  XML.enabled_xinclude?
end

.enabled_xpath?Boolean

Returns:

  • (Boolean)


204
205
206
207
# File 'lib/libxml/parser.rb', line 204

def self.enabled_xpath?
  warn("XML::Parser.enabled_xpath? has been deprecated.  Use XML.enabled_xpath? instead")
  XML.enabled_xpath?
end

.enabled_xpointer?Boolean

Returns:

  • (Boolean)


209
210
211
212
# File 'lib/libxml/parser.rb', line 209

def self.enabled_xpointer?
  warn("XML::Parser.enabled_xpointer? has been deprecated.  Use XML.enabled_xpointer? instead")
  XML.enabled_xpointer?
end

.enabled_zlib?Boolean

Returns:

  • (Boolean)


214
215
216
217
# File 'lib/libxml/parser.rb', line 214

def self.enabled_zlib?
  warn("XML::Parser.enabled_zlib? has been deprecated.  Use XML.enabled_zlib? instead")
  XML.enabled_zlib?
end

.featuresObject



334
335
336
337
# File 'lib/libxml/parser.rb', line 334

def self.features
  warn("XML::Parser.features has been deprecated.  Use XML.features instead")
  XML.features
end

.file(path, options = {}) ⇒ Object

call-seq:

XML::Parser.file(path) -> XML::Parser
XML::Parser.file(path, :encoding => XML::Encoding::UTF_8,
                       :options => XML::Parser::Options::NOENT) -> XML::Parser

Creates a new parser for the specified file or uri.

You may provide an optional hash table to control how the parsing is performed. Valid options are:

encoding - The document encoding, defaults to nil. Valid values
           are the encoding constants defined on XML::Encoding.
options - Parser options.  Valid values are the constants defined on
          XML::Parser::Options.  Mutliple options can be combined
          by using Bitwise OR (|).


32
33
34
35
36
37
# File 'lib/libxml/parser.rb', line 32

def self.file(path, options = {})
  context = XML::Parser::Context.file(path)
  context.encoding = options[:encoding] if options[:encoding]
  context.options = options[:options] if options[:options]
  self.new(context)
end

.filename(value) ⇒ Object



349
350
351
352
# File 'lib/libxml/parser.rb', line 349

def self.filename(value)
  warn("Parser.filename is deprecated.  Use Parser.file instead")
  self.file(value)
end

.indent_tree_outputObject



339
340
341
342
# File 'lib/libxml/parser.rb', line 339

def self.indent_tree_output
  warn("XML::Parser.indent_tree_output has been deprecated.  Use XML.indent_tree_output instead")
  XML.indent_tree_output
end

.indent_tree_output=(value) ⇒ Object



344
345
346
347
# File 'lib/libxml/parser.rb', line 344

def self.indent_tree_output=(value)
  warn("XML::Parser.indent_tree_output= value has been deprecated.  Use XML.indent_tree_output= value instead")
  XML.indent_tree_output= value
end

.io(io, options = {}) ⇒ Object

call-seq:

XML::Parser.io(io) -> XML::Parser
XML::Parser.io(io, :encoding => XML::Encoding::UTF_8,
                   :options => XML::Parser::Options::NOENT
                   :base_uri="http://libxml.org") -> XML::Parser

Creates a new parser for the specified io object.

Parameters:

io - io object that contains the xml to parser
base_uri - The base url for the parsed document.
encoding - The document encoding, defaults to nil. Valid values
           are the encoding constants defined on XML::Encoding.
options - Parser options.  Valid values are the constants defined on
          XML::Parser::Options.  Mutliple options can be combined
          by using Bitwise OR (|).


56
57
58
59
60
61
62
# File 'lib/libxml/parser.rb', line 56

def self.io(io, options = {})
  context = XML::Parser::Context.io(io)
  context.base_uri = options[:base_uri] if options[:base_uri]
  context.encoding = options[:encoding] if options[:encoding]
  context.options = options[:options] if options[:options]
  self.new(context)
end

.memory_dumpObject



354
355
356
357
# File 'lib/libxml/parser.rb', line 354

def self.memory_dump
  warn("XML::Parser.memory_dump has been deprecated.  Use XML.memory_dump instead")
  XML.memory_dump
end

.memory_usedObject



359
360
361
362
# File 'lib/libxml/parser.rb', line 359

def self.memory_used
  warn("XML::Parser.memory_used has been deprecated.  Use XML.memory_used instead")
  XML.memory_used
end

.register_error_handler(proc) ⇒ Object



89
90
91
92
93
94
95
96
# File 'lib/libxml/parser.rb', line 89

def self.register_error_handler(proc)
  warn('Parser.register_error_handler is deprecated.  Use Error.set_handler instead')
  if proc.nil?
    Error.reset_handler
  else
    Error.set_handler(&proc)
  end
end

.string(string, options = {}) ⇒ Object

call-seq:

XML::Parser.string(string)
XML::Parser.string(string, :encoding => XML::Encoding::UTF_8,
                           :options => XML::Parser::Options::NOENT
                           :base_uri="http://libxml.org") -> XML::Parser

Creates a new parser by parsing the specified string.

You may provide an optional hash table to control how the parsing is performed. Valid options are:

base_uri - The base url for the parsed document.
encoding - The document encoding, defaults to nil. Valid values
           are the encoding constants defined on XML::Encoding.
options - Parser options.  Valid values are the constants defined on
          XML::Parser::Options.  Mutliple options can be combined
          by using Bitwise OR (|).


81
82
83
84
85
86
87
# File 'lib/libxml/parser.rb', line 81

def self.string(string, options = {})
  context = XML::Parser::Context.string(string)
  context.base_uri = options[:base_uri] if options[:base_uri]
  context.encoding = options[:encoding] if options[:encoding]
  context.options = options[:options] if options[:options]
  self.new(context)
end

Instance Method Details

#document=(value) ⇒ Object



104
105
106
107
# File 'lib/libxml/parser.rb', line 104

def document=(value)
  warn("XML::Parser#document= is deprecated.  Use XML::Parser.document= instead")
  @context = XML::Parser::Context.document(value)
end

#file=(value) ⇒ Object



109
110
111
112
# File 'lib/libxml/parser.rb', line 109

def file=(value)
  warn("XML::Parser#file is deprecated.  Use XML::Parser.file instead")
  @context = XML::Parser::Context.file(value)
end

#filename=(value) ⇒ Object



114
115
116
117
# File 'lib/libxml/parser.rb', line 114

def filename=(value)
  warn("XML::Parser#filename is deprecated.  Use XML::Parser.file instead")
  self.file = value
end

#io=(value) ⇒ Object



119
120
121
122
# File 'lib/libxml/parser.rb', line 119

def io=(value)
  warn("XML::Parser#io is deprecated.  Use XML::Parser.io instead")
  @context = XML::Parser::Context.io(value)
end

#parseXML::Document

Parse the input XML and create an XML::Document with it’s content. If an error occurs, XML::Parser::ParseError is thrown.

Returns:



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'ext/libxml/ruby_xml_parser.c', line 65

static VALUE rxml_parser_parse(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  VALUE context = rb_ivar_get(self, CONTEXT_ATTR);
  
  Data_Get_Struct(context, xmlParserCtxt, ctxt);

  if ((xmlParseDocument(ctxt) == -1 || !ctxt->wellFormed) && ! ctxt->recovery)
  {
    if (ctxt->myDoc)
      xmlFreeDoc(ctxt->myDoc);
    rxml_raise(&ctxt->lastError);
  }

  return rxml_document_wrap(ctxt->myDoc);
}

#string=(value) ⇒ Object



124
125
126
127
# File 'lib/libxml/parser.rb', line 124

def string=(value)
  warn("XML::Parser#string is deprecated.  Use XML::Parser.string instead")
  @context = XML::Parser::Context.string(value)
end