Class: Iconv

Inherits:
Data
  • Object
show all
Defined in:
iconv.c

Overview

Summary

Ruby extension for charset conversion.

Abstract

Iconv is a wrapper class for the UNIX 95 iconv() function family, which translates string between various encoding systems.

See Open Group's on-line documents for more details.

Which coding systems are available is platform-dependent.

Examples

  1. Simple conversion between two charsets.

    converted_text = Iconv.conv('iso-8859-15', 'utf-8', text)
    
  2. Instantiate a new Iconv and use method Iconv#iconv.

    cd = Iconv.new(to, from)
    begin
      input.each { |s| output << cd.iconv(s) }
      output << cd.iconv(nil)                   # Don't forget this!
    ensure
      cd.close
    end
    
  3. Invoke Iconv.open with a block.

    Iconv.open(to, from) do |cd|
      input.each { |s| output << cd.iconv(s) }
      output << cd.iconv(nil)
    end
    
  4. Shorthand for (3).

    Iconv.iconv(to, from, *input.to_a)
    

Attentions

Even if some extentions of implementation dependent are useful, DON'T USE those extentions in libraries and scripts to widely distribute. If you want to use those feature, use String#encode.

Defined Under Namespace

Classes: BrokenLibrary, Failure, IllegalSequence, InvalidCharacter, InvalidEncoding, OutOfRange

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#new(to, from, [options]) ⇒ Object

Creates new code converter from a coding-system designated with from to another one designated with to.

Parameters

to

encoding name for destination

from

encoding name for source

options

options for converter

Exceptions

TypeError

if to or from aren't String

InvalidEncoding

if designated converter couldn't find out

SystemCallError

if iconv_open(3) fails



651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
# File 'iconv.c', line 651

static VALUE
iconv_initialize(int argc, VALUE *argv, VALUE self)
{
    VALUE to, from, options;
    struct rb_iconv_opt_t opt;
    int idx;

    rb_scan_args(argc, argv, "21", &to, &from, &options);
    get_iconv_opt(&opt, options);
    iconv_free(check_iconv(self));
    DATA_PTR(self) = NULL;
    DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx));
    if (idx >= 0) ENCODING_SET(self, idx);
    return self;
}

Class Method Details

.charset_mapObject

Returns the map from canonical name to system dependent name.



139
140
141
142
143
# File 'iconv.c', line 139

static VALUE
charset_map_get(void)
{
    return charset_map;
}

.conv(to, from, str) ⇒ Object

Shorthand for

Iconv.iconv(to, from, str).join

See Iconv.iconv.



759
760
761
762
763
764
765
766
767
768
769
770
# File 'iconv.c', line 759

static VALUE
iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str)
{
    struct iconv_env_t arg;

    arg.argc = 1;
    arg.argv = &str;
    arg.append = rb_str_append;
    arg.ret = rb_str_new(0, 0);
    arg.cd = iconv_create(to, from, NULL, &arg.toidx);
    return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}

.ctlmethodsArray

Returns available iconvctl() method list.

Returns:

  • (Array)


1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
# File 'iconv.c', line 1072

static VALUE
iconv_s_ctlmethods(VALUE klass)
{
    VALUE ary = rb_ary_new();
#ifdef ICONV_TRIVIALP
    rb_ary_push(ary, ID2SYM(rb_intern("trivial?")));
#endif
#ifdef ICONV_GET_TRANSLITERATE
    rb_ary_push(ary, ID2SYM(rb_intern("transliterate?")));
#endif
#ifdef ICONV_SET_TRANSLITERATE
    rb_ary_push(ary, ID2SYM(rb_intern("transliterate=")));
#endif
#ifdef ICONV_GET_DISCARD_ILSEQ
    rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?")));
#endif
#ifdef ICONV_SET_DISCARD_ILSEQ
    rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq=")));
#endif
    return ary;
}

.iconv(to, from, *strs) ⇒ Object

Shorthand for

Iconv.open(to, from) { |cd|
  (strs + [nil]).collect { |s| cd.iconv(s) }
}

Parameters

to, from

see Iconv.new

strs

strings to be converted

Exceptions

Exceptions thrown by Iconv.new, Iconv.open and Iconv#iconv.



735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
# File 'iconv.c', line 735

static VALUE
iconv_s_iconv(int argc, VALUE *argv, VALUE self)
{
    struct iconv_env_t arg;

    if (argc < 2)		/* needs `to' and `from' arguments at least */
	rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2);

    arg.argc = argc -= 2;
    arg.argv = argv + 2;
    arg.append = rb_ary_push;
    arg.ret = rb_ary_new2(argc);
    arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx);
    return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}

.list {|*aliases| ... } ⇒ Object

Iterates each alias sets.

Yields:

  • (*aliases)


819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
# File 'iconv.c', line 819

static VALUE
iconv_s_list(void)
{
#ifdef HAVE_ICONVLIST
    int state;
    VALUE args[2];

    args[1] = rb_block_given_p() ? 0 : rb_ary_new();
    iconvlist(list_iconv, args);
    state = *(int *)args;
    if (state) rb_jump_tag(state);
    if (args[1]) return args[1];
#elif defined(HAVE___ICONV_FREE_LIST)
    char **list;
    size_t sz, i;
    VALUE ary;

    if (__iconv_get_list(&list, &sz)) return Qnil;

    ary = rb_ary_new2(sz);
    for (i = 0; i < sz; i++) {
	rb_ary_push(ary, rb_str_new2(list[i]));
    }
    __iconv_free_list(list, sz);

    if (!rb_block_given_p())
	return ary;
    for (i = 0; i < RARRAY_LEN(ary); i++) {
	rb_yield(RARRAY_PTR(ary)[i]);
    }
#endif
    return Qnil;
}

.open(to, from) {|iconv| ... } ⇒ Object

Equivalent to Iconv.new except that when it is called with a block, it yields with the new instance and closes it, and returns the result which returned from the block.

Yields:



675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
# File 'iconv.c', line 675

static VALUE
iconv_s_open(int argc, VALUE *argv, VALUE self)
{
    VALUE to, from, options, cd;
    struct rb_iconv_opt_t opt;
    int idx;

    rb_scan_args(argc, argv, "21", &to, &from, &options);
    get_iconv_opt(&opt, options);
    cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx));

    self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
    if (idx >= 0) ENCODING_SET(self, idx);

    if (rb_block_given_p()) {
	return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
    }
    else {
	return self;
    }
}

Instance Method Details

#closeObject

Finishes conversion.

After calling this, calling Iconv#iconv will cause an exception, but multiple calls of #close are guaranteed to end successfully.

Returns a string containing the byte sequence to change the output buffer to its initial shift state.



875
876
877
878
879
880
881
882
# File 'iconv.c', line 875

static VALUE
iconv_finish(VALUE self)
{
    VALUE cd = check_iconv(self);

    if (!cd) return Qnil;
    return rb_ensure(iconv_init_state, self, iconv_free, cd);
}

#conv(str...) ⇒ Object

Equivalent to

iconv(nil, str..., nil).join


951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
# File 'iconv.c', line 951

static VALUE
iconv_conv(int argc, VALUE *argv, VALUE self)
{
    iconv_t cd = VALUE2ICONV(check_iconv(self));
    VALUE str, s;
    int toidx = ENCODING_GET(self);

    str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
    if (argc > 0) {
	do {
	    s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL);
	    if (RSTRING_LEN(s))
		rb_str_buf_append(str, s);
	} while (--argc);
	s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
	if (RSTRING_LEN(s))
	    rb_str_buf_append(str, s);
    }

    return str;
}

#discard_ilseq=(flag) ⇒ Object

Sets discard_ilseq flag.



1055
1056
1057
1058
1059
1060
1061
# File 'iconv.c', line 1055

static VALUE
iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq)
{
    int dis = RTEST(discard_ilseq);
    iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis);
    return self;
}

#discard_ilseq?Boolean

Returns discard_ilseq flag.

Returns:

  • (Boolean)


1036
1037
1038
1039
1040
1041
1042
1043
# File 'iconv.c', line 1036

static VALUE
iconv_get_discard_ilseq(VALUE self)
{
    int dis = 0;
    iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis);
    if (dis) return Qtrue;
    return Qfalse;
}

#iconv(str, start = 0, length = -1) ⇒ Object

Converts string and returns the result.

  • If str is a String, converts str[start, length] and returns the converted string.

  • If str is nil, places converter itself into initial shift state and just returns a string containing the byte sequence to change the output buffer to its initial shift state.

  • Otherwise, raises an exception.

Parameters

str

string to be converted, or nil

start

starting offset

length

conversion length; nil or -1 means whole the string from start

Exceptions

  • IconvIllegalSequence

  • IconvInvalidCharacter

  • IconvOutOfRange

Examples

See the Iconv documentation.



911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
# File 'iconv.c', line 911

static VALUE
iconv_iconv(int argc, VALUE *argv, VALUE self)
{
    VALUE str, n1, n2;
    VALUE cd = check_iconv(self);
    long start = 0, length = 0, slen = 0;

    rb_scan_args(argc, argv, "12", &str, &n1, &n2);
    if (!NIL_P(str)) {
	VALUE n = rb_str_length(StringValue(str));
	slen = NUM2LONG(n);
    }
    if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
	if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
	    length = NIL_P(n2) ? -1 : NUM2LONG(n2);
	}
    }
    if (start > 0 || length > 0) {
	rb_encoding *enc = rb_enc_get(str);
	const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
	const char *ps = s;
	if (start > 0) {
	    start = (ps = rb_enc_nth(s, e, start, enc)) - s;
	}
	if (length > 0) {
	    length = rb_enc_nth(ps, e, length, enc) - ps;
	}
    }

    return iconv_convert(VALUE2ICONV(cd), str, start, length, ENCODING_GET(self), NULL);
}

#transliterate=(flag) ⇒ Object

Sets transliterate flag.



1018
1019
1020
1021
1022
1023
1024
# File 'iconv.c', line 1018

static VALUE
iconv_set_transliterate(VALUE self, VALUE transliterate)
{
    int trans = RTEST(transliterate);
    iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans);
    return self;
}

#transliterate?Boolean

Returns transliterate flag.

Returns:

  • (Boolean)


999
1000
1001
1002
1003
1004
1005
1006
# File 'iconv.c', line 999

static VALUE
iconv_get_transliterate(VALUE self)
{
    int trans = 0;
    iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans);
    if (trans) return Qtrue;
    return Qfalse;
}

#trivial?Boolean

Returns trivial flag.

Returns:

  • (Boolean)


980
981
982
983
984
985
986
987
# File 'iconv.c', line 980

static VALUE
iconv_trivialp(VALUE self)
{
    int trivial = 0;
    iconv_ctl(self, ICONV_TRIVIALP, trivial);
    if (trivial) return Qtrue;
    return Qfalse;
}