Method: String#undump

Defined in:
string.c

#undumpString

Returns an unescaped version of the string. This does the inverse of String#dump.

"\"hello \\n ''\"".undump #=> "hello \n ''"

Returns:

[View source]

6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
# File 'string.c', line 6558

static VALUE
str_undump(VALUE str)
{
    const char *s = RSTRING_PTR(str);
    const char *s_end = RSTRING_END(str);
    rb_encoding *enc = rb_enc_get(str);
    VALUE undumped = rb_enc_str_new(s, 0L, enc);
    bool utf8 = false;
    bool binary = false;
    int w;

    rb_must_asciicompat(str);
    if (rb_str_is_ascii_only_p(str) == Qfalse) {
	rb_raise(rb_eRuntimeError, "non-ASCII character detected");
    }
    if (!str_null_check(str, &w)) {
	rb_raise(rb_eRuntimeError, "string contains null byte");
    }
    if (RSTRING_LEN(str) < 2) goto invalid_format;
    if (*s != '"') goto invalid_format;

    /* strip '"' at the start */
    s++;

    for (;;) {
	if (s >= s_end) {
	    rb_raise(rb_eRuntimeError, "unterminated dumped string");
	}

	if (*s == '"') {
	    /* epilogue */
	    s++;
	    if (s == s_end) {
		/* ascii compatible dumped string */
		break;
	    }
	    else {
		static const char force_encoding_suffix[] = ".force_encoding(\""; /* "\")" */
		static const char dup_suffix[] = ".dup";
		const char *encname;
		int encidx;
		ptrdiff_t size;

		/* check separately for strings dumped by older versions */
		size = sizeof(dup_suffix) - 1;
		if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;

		size = sizeof(force_encoding_suffix) - 1;
		if (s_end - s <= size) goto invalid_format;
		if (memcmp(s, force_encoding_suffix, size) != 0) goto invalid_format;
		s += size;

		if (utf8) {
		    rb_raise(rb_eRuntimeError, "dumped string contained Unicode escape but used force_encoding");
		}

		encname = s;
		s = memchr(s, '"', s_end-s);
		size = s - encname;
		if (!s) goto invalid_format;
		if (s_end - s != 2) goto invalid_format;
		if (s[0] != '"' || s[1] != ')') goto invalid_format;

		encidx = rb_enc_find_index2(encname, (long)size);
		if (encidx < 0) {
		    rb_raise(rb_eRuntimeError, "dumped string has unknown encoding name");
		}
		rb_enc_associate_index(undumped, encidx);
	    }
	    break;
	}

	if (*s == '\\') {
	    s++;
	    if (s >= s_end) {
		rb_raise(rb_eRuntimeError, "invalid escape");
	    }
	    undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
	}
	else {
	    rb_str_cat(undumped, s++, 1);
	}
    }

    return undumped;
invalid_format:
    rb_raise(rb_eRuntimeError, "invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
}