Module: Ox

Defined in:
lib/ox.rb,
lib/ox/bag.rb,
lib/ox/sax.rb,
lib/ox/node.rb,
lib/ox/cdata.rb,
lib/ox/error.rb,
lib/ox/element.rb,
lib/ox/comment.rb,
lib/ox/doctype.rb,
lib/ox/version.rb,
lib/ox/document.rb,
lib/ox/hasattrs.rb,
lib/ox/instruct.rb,
lib/ox/xmlrpc_adapter.rb,
ext/ox/ox.c

Overview

Description:

Ox handles XML documents in two ways. It is a generic XML parser and writer as well as a fast Object / XML marshaller. Ox was written for speed as a replacement for Nokogiri and for Marshal.

As an XML parser it is 2 or more times faster than Nokogiri and as a generic XML writer it is 14 times faster than Nokogiri. Of course different files may result in slightly different times.

As an Object serializer Ox is 4 times faster than the standard Ruby Marshal.dump(). Ox is 3 times faster than Marshal.load().

Object Dump Sample:

require 'ox'

class Sample
  attr_accessor :a, :b, :c

  def initialize(a, b, c)
    @a = a
    @b = b
    @c = c
  end
end

# Create Object
obj = Sample.new(1, "bee", ['x', :y, 7.0])
# Now dump the Object to an XML String.
xml = Ox.dump(obj)
# Convert the object back into a Sample Object.
obj2 = Ox.parse_obj(xml)

Generic XML Writing and Parsing:

require 'ox'

doc = Ox::Document.new(:version => '1.0')

top = Ox::Element.new('top')
top[:name] = 'sample'
doc << top

mid = Ox::Element.new('middle')
mid[:name] = 'second'
top << mid

bot = Ox::Element.new('bottom')
bot[:name] = 'third'
mid << bot

xml = Ox.dump(doc)
puts xml
doc2 = Ox.parse(xml)
puts "Same? #{doc == doc2}"

Defined Under Namespace

Modules: HasAttrs Classes: ArgError, Bag, CData, Comment, DocType, Document, Element, Error, Instruct, InvalidPath, Node, ParseError, Sax, StreamParser

Constant Summary collapse

VERSION =

Current version of the module.

'2.1.6'

Class Method Summary collapse

Class Method Details

.cache8_testObject


872
873
874
875
876
# File 'ext/ox/ox.c', line 872

static VALUE
cache8_test(VALUE self) {
    ox_cache8_test();
    return Qnil;
}

.cache_testObject


864
865
866
867
868
# File 'ext/ox/ox.c', line 864

static VALUE
cache_test(VALUE self) {
    ox_cache_test();
    return Qnil;
}

.ox_default_optionsHash

Returns the default load and dump options as a Hash. The options are

  • indent: [Fixnum] number of spaces to indent each element in an XML document

  • trace: [Fixnum] trace level where 0 is silent

  • encoding: [String] character encoding for the XML file

  • with_dtd: [true|false|nil] include DTD in the dump

  • with_instruct: [true|false|nil] include instructions in the dump

  • with_xml: [true|false|nil] include XML prolog in the dump

  • circular: [true|false|nil] support circular references while dumping

  • xsd_date: [true|false|nil] use XSD date format instead of decimal format

  • mode: [:object|:generic|:limited|nil] load method to use for XML

  • effort: [:strict|:tolerant|:auto_define] set the tolerance level for loading

  • symbolize_keys: [true|false|nil] symbolize element attribute keys or leave as Strings

  • skip: [:skip_none|:skip_return|:skip_white] determines how to handle white space in text

Note that an indent of less than zero will result in a tight one line output unless the text in the XML fields contain new line characters.


230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
# File 'ext/ox/ox.c', line 230

static VALUE
get_def_opts(VALUE self) {
    VALUE opts = rb_hash_new();
    int   elen = (int)strlen(ox_default_options.encoding);

    rb_hash_aset(opts, ox_encoding_sym, (0 == elen) ? Qnil : rb_str_new(ox_default_options.encoding, elen));
    rb_hash_aset(opts, indent_sym, INT2FIX(ox_default_options.indent));
    rb_hash_aset(opts, trace_sym, INT2FIX(ox_default_options.trace));
    rb_hash_aset(opts, with_dtd_sym, (Yes == ox_default_options.with_dtd) ? Qtrue : ((No == ox_default_options.with_dtd) ? Qfalse : Qnil));
    rb_hash_aset(opts, with_xml_sym, (Yes == ox_default_options.with_xml) ? Qtrue : ((No == ox_default_options.with_xml) ? Qfalse : Qnil));
    rb_hash_aset(opts, with_instruct_sym, (Yes == ox_default_options.with_instruct) ? Qtrue : ((No == ox_default_options.with_instruct) ? Qfalse : Qnil));
    rb_hash_aset(opts, circular_sym, (Yes == ox_default_options.circular) ? Qtrue : ((No == ox_default_options.circular) ? Qfalse : Qnil));
    rb_hash_aset(opts, xsd_date_sym, (Yes == ox_default_options.xsd_date) ? Qtrue : ((No == ox_default_options.xsd_date) ? Qfalse : Qnil));
    rb_hash_aset(opts, symbolize_keys_sym, (Yes == ox_default_options.sym_keys) ? Qtrue : ((No == ox_default_options.sym_keys) ? Qfalse : Qnil));
    switch (ox_default_options.mode) {
    case ObjMode: rb_hash_aset(opts, mode_sym, object_sym); break;
    case GenMode: rb_hash_aset(opts, mode_sym, generic_sym);  break;
    case LimMode: rb_hash_aset(opts, mode_sym, limited_sym);  break;
    case NoMode:
    default:    rb_hash_aset(opts, mode_sym, Qnil);   break;
    }
    switch (ox_default_options.effort) {
    case StrictEffort:    rb_hash_aset(opts, effort_sym, strict_sym);   break;
    case TolerantEffort:  rb_hash_aset(opts, effort_sym, tolerant_sym);    break;
    case AutoEffort:    rb_hash_aset(opts, effort_sym, auto_define_sym);  break;
    case NoEffort:
    default:      rb_hash_aset(opts, effort_sym, Qnil);      break;
    }
    switch (ox_default_options.skip) {
    case NoSkip:    rb_hash_aset(opts, skip_sym, skip_none_sym);    break;
    case CrSkip:    rb_hash_aset(opts, skip_sym, skip_return_sym);    break;
    case SpcSkip:   rb_hash_aset(opts, skip_sym, skip_white_sym);    break;
    default:      rb_hash_aset(opts, skip_sym, Qnil);      break;
    }
    return opts;
}

.ox_default_options=(opts) ⇒ nil

Sets the default options for load and dump.


285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
# File 'ext/ox/ox.c', line 285

static VALUE
set_def_opts(VALUE self, VALUE opts) {
    struct _YesNoOpt  ynos[] = {
  { with_xml_sym, &ox_default_options.with_xml },
  { with_dtd_sym, &ox_default_options.with_dtd },
  { with_instruct_sym, &ox_default_options.with_instruct },
  { xsd_date_sym, &ox_default_options.xsd_date },
  { circular_sym, &ox_default_options.circular },
  { symbolize_keys_sym, &ox_default_options.sym_keys },
  { Qnil, 0 }
    };
    YesNoOpt  o;
    VALUE v;
    
    Check_Type(opts, T_HASH);

    v = rb_hash_aref(opts, ox_encoding_sym);
    if (Qnil == v) {
  *ox_default_options.encoding = '\0';
    } else {
  Check_Type(v, T_STRING);
  strncpy(ox_default_options.encoding, StringValuePtr(v), sizeof(ox_default_options.encoding) - 1);
#if HAS_ENCODING_SUPPORT
  ox_default_options.rb_enc = rb_enc_find(ox_default_options.encoding);
#elif HAS_PRIVATE_ENCODING
  ox_default_options.rb_enc = rb_str_new2(ox_default_options.encoding);
  rb_gc_register_address(&ox_default_options.rb_enc);
#endif
    }

    v = rb_hash_aref(opts, indent_sym);
    if (Qnil != v) {
  Check_Type(v, T_FIXNUM);
  ox_default_options.indent = FIX2INT(v);
    }

    v = rb_hash_aref(opts, trace_sym);
    if (Qnil != v) {
  Check_Type(v, T_FIXNUM);
  ox_default_options.trace = FIX2INT(v);
    }

    v = rb_hash_aref(opts, mode_sym);
    if (Qnil == v) {
  ox_default_options.mode = NoMode;
    } else if (object_sym == v) {
  ox_default_options.mode = ObjMode;
    } else if (generic_sym == v) {
  ox_default_options.mode = GenMode;
    } else if (limited_sym == v) {
  ox_default_options.mode = LimMode;
    } else {
  rb_raise(ox_parse_error_class, ":mode must be :object, :generic, :limited, or nil.\n");
    }

    v = rb_hash_aref(opts, effort_sym);
    if (Qnil == v) {
  ox_default_options.effort = NoEffort;
    } else if (strict_sym == v) {
  ox_default_options.effort = StrictEffort;
    } else if (tolerant_sym == v) {
  ox_default_options.effort = TolerantEffort;
    } else if (auto_define_sym == v) {
  ox_default_options.effort = AutoEffort;
    } else {
  rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, :auto_define, or nil.\n");
    }

    v = rb_hash_aref(opts, skip_sym);
    if (Qnil == v) {
  ox_default_options.skip = NoSkip;
    } else if (skip_none_sym == v) {
  ox_default_options.skip = NoSkip;
    } else if (skip_return_sym == v) {
  ox_default_options.skip = CrSkip;
    } else if (skip_white_sym == v) {
  ox_default_options.skip = SpcSkip;
    } else {
  rb_raise(ox_parse_error_class, ":skip must be :skip_none, :skip_return, :skip_white, or nil.\n");
    }

    for (o = ynos; 0 != o->attr; o++) {
  v = rb_hash_lookup(opts, o->sym);
  if (Qnil == v) {
      *o->attr = NotSet;
  } else if (Qtrue == v) {
      *o->attr = Yes;
  } else if (Qfalse == v) {
      *o->attr = No;
  } else {
      rb_raise(ox_parse_error_class, "%s must be true or false.\n", rb_id2name(SYM2ID(o->sym)));
  }
    }
    return Qnil;
}

.dump(obj, options) ⇒ Object

Dumps an Object (obj) to a string. Note that an indent of less than zero will result in a tight one line output unless the text in the XML fields contain new line characters.


806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
# File 'ext/ox/ox.c', line 806

static VALUE
dump(int argc, VALUE *argv, VALUE self) {
    char    *xml;
    struct _Options copts = ox_default_options;
    VALUE   rstr;
    
    if (2 == argc) {
  parse_dump_options(argv[1], &copts);
    }
    if (0 == (xml = ox_write_obj_to_str(*argv, &copts))) {
  rb_raise(rb_eNoMemError, "Not enough memory.\n");
    }
    rstr = rb_str_new2(xml);
#if HAS_ENCODING_SUPPORT
    if ('\0' != *copts.encoding) {
  rb_enc_associate(rstr, rb_enc_find(copts.encoding));
    }
#elif HAS_PRIVATE_ENCODING
    if ('\0' != *copts.encoding) {
  rb_funcall(rstr, ox_force_encoding_id, 1, rb_str_new2(copts.encoding));
    }
#endif
    xfree(xml);

    return rstr;
}

.load(xml, options) ⇒ Ox::Document, ...

Parses and XML document String into an Ox::Document, or Ox::Element, or Object depending on the options. Raises an exception if the XML is malformed or the classes specified are not valid.


584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
# File 'ext/ox/ox.c', line 584

static VALUE
load_str(int argc, VALUE *argv, VALUE self) {
    char  *xml;
    size_t  len;
    VALUE obj;
    VALUE encoding;
    struct _Err err;

    err_init(&err);
    Check_Type(*argv, T_STRING);
    /* the xml string gets modified so make a copy of it */
    len = RSTRING_LEN(*argv) + 1;
    if (SMALL_XML < len) {
  xml = ALLOC_N(char, len);
    } else {
  xml = ALLOCA_N(char, len);
    }
#if HAS_ENCODING_SUPPORT
#ifdef MACRUBY_RUBY
    encoding = rb_funcall(*argv, rb_intern("encoding"), 0);
#else
    encoding = rb_obj_encoding(*argv);
#endif
#elif HAS_PRIVATE_ENCODING
    encoding = rb_funcall(*argv, rb_intern("encoding"), 0);
#else
    encoding = Qnil;
#endif
    memcpy(xml, StringValuePtr(*argv), len);
    obj = load(xml, argc - 1, argv + 1, self, encoding, &err);
    if (SMALL_XML < len) {
  xfree(xml);
    }
    if (err_has(&err)) {
  ox_err_raise(&err);
    }
    return obj;
}

.load_file(file_path, options) ⇒ Ox::Document, ...

Parses and XML document from a file into an Ox::Document, or Ox::Element, or Object depending on the options. Raises an exception if the XML is malformed or the classes specified are not valid.


641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
# File 'ext/ox/ox.c', line 641

static VALUE
load_file(int argc, VALUE *argv, VALUE self) {
    char  *path;
    char  *xml;
    FILE  *f;
    size_t  len;
    VALUE obj;
    struct _Err err;

    err_init(&err);
    Check_Type(*argv, T_STRING);
    path = StringValuePtr(*argv);
    if (0 == (f = fopen(path, "r"))) {
  rb_raise(rb_eIOError, "%s\n", strerror(errno));
    }
    fseek(f, 0, SEEK_END);
    len = ftell(f);
    if (SMALL_XML < len) {
  xml = ALLOC_N(char, len + 1);
    } else {
  xml = ALLOCA_N(char, len + 1);
    }
    fseek(f, 0, SEEK_SET);
    if (len != fread(xml, 1, len, f)) {
  ox_err_set(&err, rb_eLoadError, "Failed to read %ld bytes from %s.\n", (long)len, path);
  obj = Qnil;
    } else {
  xml[len] = '\0';
  obj = load(xml, argc - 1, argv + 1, self, Qnil, &err);
    }
    fclose(f);
    if (SMALL_XML < len) {
  xfree(xml);
    }
    if (err_has(&err)) {
  ox_err_raise(&err);
    }
    return obj;
}

.parse(xml) ⇒ Ox::Document, Ox::Element

Parses and XML document String into an Ox::Document or Ox::Element.

Raises:

  • (Exception)

    if the XML is malformed.


434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
# File 'ext/ox/ox.c', line 434

static VALUE
to_gen(VALUE self, VALUE ruby_xml) {
    char    *xml, *x;
    size_t    len;
    VALUE   obj;
    struct _Options options = ox_default_options;
    struct _Err   err;

    err_init(&err);
    Check_Type(ruby_xml, T_STRING);
    /* the xml string gets modified so make a copy of it */
    len = RSTRING_LEN(ruby_xml) + 1;
    x = defuse_bom(StringValuePtr(ruby_xml), &options);
    if (SMALL_XML < len) {
  xml = ALLOC_N(char, len);
    } else {
  xml = ALLOCA_N(char, len);
    }
    memcpy(xml, x, len);
    obj = ox_parse(xml, ox_gen_callbacks, 0, &options, &err);
    if (SMALL_XML < len) {
  xfree(xml);
    }
    if (err_has(&err)) {
  ox_err_raise(&err);
    }
    return obj;
}

.parse_obj(xml) ⇒ Object

Parses an XML document String that is in the object format and returns an Object of the type represented by the XML. This function expects an optimized XML formated String. For other formats use the more generic Ox.load() method. Raises an exception if the XML is malformed or the classes specified in the file are not valid.


391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
# File 'ext/ox/ox.c', line 391

static VALUE
to_obj(VALUE self, VALUE ruby_xml) {
    char    *xml, *x;
    size_t    len;
    VALUE   obj;
    struct _Options options = ox_default_options;
    struct _Err   err;

    err_init(&err);
    Check_Type(ruby_xml, T_STRING);
    /* the xml string gets modified so make a copy of it */
    len = RSTRING_LEN(ruby_xml) + 1;
    x = defuse_bom(StringValuePtr(ruby_xml), &options);
    if (SMALL_XML < len) {
  xml = ALLOC_N(char, len);
    } else {
  xml = ALLOCA_N(char, len);
    }
    memcpy(xml, x, len);
#if HAS_GC_GUARD
    rb_gc_disable();
#endif
    obj = ox_parse(xml, ox_obj_callbacks, 0, &options, &err);
    if (SMALL_XML < len) {
  xfree(xml);
    }
#if HAS_GC_GUARD
    RB_GC_GUARD(obj);
    rb_gc_enable();
#endif
    if (err_has(&err)) {
  ox_err_raise(&err);
    }
    return obj;
}

.sax_parse(handler, io, options) ⇒ Object

Parses an IO stream or file containing an XML document. Raises an exception if the XML is malformed or the classes specified are not valid.


693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
# File 'ext/ox/ox.c', line 693

static VALUE
sax_parse(int argc, VALUE *argv, VALUE self) {
    struct _SaxOptions  options;

    options.symbolize = 1;
    options.convert_special = 0;
    options.smart = 0;
    options.skip = NoSkip;

    if (argc < 2) {
  rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_parse.\n");
    }
    if (3 <= argc && rb_cHash == rb_obj_class(argv[2])) {
  VALUE  h = argv[2];
  VALUE  v;
  
  if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
      options.convert_special = (Qtrue == v);
  }
  if (Qnil != (v = rb_hash_lookup(h, smart_sym))) {
      options.smart = (Qtrue == v);
  }
  if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) {
      options.symbolize = (Qtrue == v);
  }
  if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
      if (skip_return_sym == v) {
    options.skip = CrSkip;
      } else if (skip_white_sym == v) {
    options.skip = SpcSkip;
      }
  }
    }
    ox_sax_parse(argv[0], argv[1], &options);

    return Qnil;
}

.to_file(file_path, obj, options) ⇒ Object

Dumps an Object to the specified file. Note that an indent of less than zero will result in a tight one line output unless the text in the XML fields contain new line characters.


849
850
851
852
853
854
855
856
857
858
859
860
# File 'ext/ox/ox.c', line 849

static VALUE
to_file(int argc, VALUE *argv, VALUE self) {
    struct _Options copts = ox_default_options;
    
    if (3 == argc) {
  parse_dump_options(argv[2], &copts);
    }
    Check_Type(*argv, T_STRING);
    ox_write_obj_to_file(argv[1], StringValuePtr(*argv), &copts);

    return Qnil;
}

.dump(obj, options) ⇒ Object

Dumps an Object (obj) to a string. Note that an indent of less than zero will result in a tight one line output unless the text in the XML fields contain new line characters.


806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
# File 'ext/ox/ox.c', line 806

static VALUE
dump(int argc, VALUE *argv, VALUE self) {
    char    *xml;
    struct _Options copts = ox_default_options;
    VALUE   rstr;
    
    if (2 == argc) {
  parse_dump_options(argv[1], &copts);
    }
    if (0 == (xml = ox_write_obj_to_str(*argv, &copts))) {
  rb_raise(rb_eNoMemError, "Not enough memory.\n");
    }
    rstr = rb_str_new2(xml);
#if HAS_ENCODING_SUPPORT
    if ('\0' != *copts.encoding) {
  rb_enc_associate(rstr, rb_enc_find(copts.encoding));
    }
#elif HAS_PRIVATE_ENCODING
    if ('\0' != *copts.encoding) {
  rb_funcall(rstr, ox_force_encoding_id, 1, rb_str_new2(copts.encoding));
    }
#endif
    xfree(xml);

    return rstr;
}