Class: Iconv

Inherits:
Object
  • Object
show all
Defined in:
ext/iconv/iconv.c,
lib/iconv.rb,
lib/iconv/version.rb,
ext/iconv/iconv.c

Overview

Summary

Ruby extension for charset conversion.

Abstract

Iconv is a wrapper class for the UNIX 95 iconv() function family, which translates string between various encoding systems.

See Open Group’s on-line documents for more details.

Which coding systems are available is platform-dependent.

Examples

  1. Simple conversion between two charsets.

    converted_text = Iconv.conv('iso-8859-15', 'utf-8', text)
    
  2. Instantiate a new Iconv and use method Iconv#iconv.

    cd = Iconv.new(to, from)
    begin
      input.each { |s| output << cd.iconv(s) }
      output << cd.iconv(nil)                   # Don't forget this!
    ensure
      cd.close
    end
    
  3. Invoke Iconv.open with a block.

    Iconv.open(to, from) do |cd|
      input.each { |s| output << cd.iconv(s) }
      output << cd.iconv(nil)
    end
    
  4. Shorthand for (3).

    Iconv.iconv(to, from, *input.to_a)
    

Attentions

Even if some extensions of implementation dependent are useful, DON’T USE those extensions in libraries and scripts to widely distribute. If you want to use those feature, use String#encode.

Defined Under Namespace

Modules: Failure Classes: BrokenLibrary, IllegalSequence, InvalidCharacter, InvalidEncoding, OutOfRange

Constant Summary collapse

VERSION =
"1.1.0"

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#new(to, from, [options]) ⇒ Object

Creates new code converter from a coding-system designated with from to another one designated with to.

Parameters

to

encoding name for destination

from

encoding name for source

options

options for converter

Exceptions

TypeError

if to or from aren’t String

InvalidEncoding

if designated converter couldn’t find out

SystemCallError

if iconv_open(3) fails



736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
# File 'ext/iconv/iconv.c', line 736

static VALUE
iconv_initialize(int argc, VALUE *argv, VALUE self)
{
    VALUE to, from, options;
    struct rb_iconv_opt_t opt;
    int idx;

    rb_scan_args(argc, argv, "21", &to, &from, &options);
    get_iconv_opt(&opt, options);
    iconv_free(check_iconv(self));
    DATA_PTR(self) = NULL;
    DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx));
#ifdef HAVE_RUBY_ENCODING_H
    ICONV_ENCODING_SET(self, idx);
#endif
    return self;
}

Class Method Details

.charset_mapObject

Returns the map from canonical name to system dependent name.



206
207
208
209
210
# File 'ext/iconv/iconv.c', line 206

static VALUE
charset_map_get(VALUE klass)
{
    return charset_map;
}

.conv(to, from, str) ⇒ Object

Shorthand for

Iconv.iconv(to, from, str).join

See Iconv.iconv.



849
850
851
852
853
854
855
856
857
858
859
860
# File 'ext/iconv/iconv.c', line 849

static VALUE
iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str)
{
    struct iconv_env_t arg;

    arg.argc = 1;
    arg.argv = &str;
    arg.append = rb_str_append;
    arg.ret = rb_str_new(0, 0);
    arg.cd = iconv_create(to, from, NULL, &arg.toidx);
    return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}

.ctlmethodsArray

Returns available iconvctl() method list.

Returns:

  • (Array)


1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
# File 'ext/iconv/iconv.c', line 1175

static VALUE
iconv_s_ctlmethods(VALUE klass)
{
    VALUE ary = rb_ary_new();
#ifdef ICONV_TRIVIALP
    rb_ary_push(ary, ID2SYM(rb_intern("trivial?")));
#endif
#ifdef ICONV_GET_TRANSLITERATE
    rb_ary_push(ary, ID2SYM(rb_intern("transliterate?")));
#endif
#ifdef ICONV_SET_TRANSLITERATE
    rb_ary_push(ary, ID2SYM(rb_intern("transliterate=")));
#endif
#ifdef ICONV_GET_DISCARD_ILSEQ
    rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?")));
#endif
#ifdef ICONV_SET_DISCARD_ILSEQ
    rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq=")));
#endif
    return ary;
}

.iconv(to, from, *strs) ⇒ Object

Shorthand for

Iconv.open(to, from) { |cd|
  (strs + [nil]).collect { |s| cd.iconv(s) }
}

Parameters

to, from

see Iconv.new

strs

strings to be converted

Exceptions

Exceptions thrown by Iconv.new, Iconv.open and Iconv#iconv.



825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
# File 'ext/iconv/iconv.c', line 825

static VALUE
iconv_s_iconv(int argc, VALUE *argv, VALUE self)
{
    struct iconv_env_t arg;

    if (argc < 2)		/* needs `to' and `from' arguments at least */
	rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2);

    arg.argc = argc -= 2;
    arg.argv = argv + 2;
    arg.append = rb_ary_push;
    arg.ret = rb_ary_new2(argc);
    arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx);
    return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}

.list {|*aliases| ... } ⇒ Object

Iterates each alias sets.

Yields:

  • (*aliases)


909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
# File 'ext/iconv/iconv.c', line 909

static VALUE
iconv_s_list(VALUE klass)
{
#ifdef HAVE_ICONVLIST
    int state;
    VALUE args[2];

    args[1] = rb_block_given_p() ? 0 : rb_ary_new();
    iconvlist(list_iconv, args);
    state = (int)args[0];
    if (state) rb_jump_tag(state);
    if (args[1]) return args[1];
#elif defined(HAVE___ICONV_FREE_LIST)
    char **list;
    size_t sz, i;
    VALUE ary;

    if (__iconv_get_list(&list, &sz)) return Qnil;

    ary = rb_ary_new2(sz);
    for (i = 0; i < sz; i++) {
	rb_ary_push(ary, rb_str_new2(list[i]));
    }
    __iconv_free_list(list, sz);

    if (!rb_block_given_p())
	return ary;
    for (i = 0; i < RARRAY_LEN(ary); i++) {
	rb_yield(RARRAY_PTR(ary)[i]);
    }
#endif
    return Qnil;
}

.open(to, from) {|iconv| ... } ⇒ Object

Equivalent to Iconv.new except that when it is called with a block, it yields with the new instance and closes it, and returns the result which returned from the block.

Yields:



762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
# File 'ext/iconv/iconv.c', line 762

static VALUE
iconv_s_open(int argc, VALUE *argv, VALUE self)
{
    VALUE to, from, options, cd;
    struct rb_iconv_opt_t opt;
    int idx;

    rb_scan_args(argc, argv, "21", &to, &from, &options);
    get_iconv_opt(&opt, options);
    cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx));

    self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
#ifdef HAVE_RUBY_ENCODING_H
    if (idx >= 0) ICONV_ENCODING_SET(self, idx);
#endif

    if (rb_block_given_p()) {
	return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
    }
    else {
	return self;
    }
}

Instance Method Details

#closeObject

Finishes conversion.

After calling this, calling Iconv#iconv will cause an exception, but multiple calls of #close are guaranteed to end successfully.

Returns a string containing the byte sequence to change the output buffer to its initial shift state.



965
966
967
968
969
970
971
972
# File 'ext/iconv/iconv.c', line 965

static VALUE
iconv_finish(VALUE self)
{
    VALUE cd = check_iconv(self);

    if (!cd) return Qnil;
    return rb_ensure(iconv_init_state, self, iconv_free, cd);
}

#conv(str...) ⇒ Object

Equivalent to

iconv(nil, str..., nil).join


1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
# File 'ext/iconv/iconv.c', line 1054

static VALUE
iconv_conv(int argc, VALUE *argv, VALUE self)
{
    iconv_t cd = VALUE2ICONV(check_iconv(self));
    VALUE str, s;
    int toidx = ICONV_ENCODING_GET(self);

    str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
    if (argc > 0) {
	do {
	    s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL);
	    if (RSTRING_LEN(s))
		rb_str_buf_append(str, s);
	} while (--argc);
	s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
	if (RSTRING_LEN(s))
	    rb_str_buf_append(str, s);
    }

    return str;
}

#discard_ilseq=(flag) ⇒ Object

Sets discard_ilseq flag.



1158
1159
1160
1161
1162
1163
1164
# File 'ext/iconv/iconv.c', line 1158

static VALUE
iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq)
{
    int dis = RTEST(discard_ilseq);
    iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis);
    return self;
}

#discard_ilseq?Boolean

Returns discard_ilseq flag.

Returns:

  • (Boolean)


1139
1140
1141
1142
1143
1144
1145
1146
# File 'ext/iconv/iconv.c', line 1139

static VALUE
iconv_get_discard_ilseq(VALUE self)
{
    int dis = 0;
    iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis);
    if (dis) return Qtrue;
    return Qfalse;
}

#iconv(str, start = 0, length = -1) ⇒ Object

Converts string and returns the result.

  • If str is a String, converts str[start, length] and returns the converted string.

  • If str is nil, places converter itself into initial shift state and just returns a string containing the byte sequence to change the output buffer to its initial shift state.

  • Otherwise, raises an exception.

Parameters

str

string to be converted, or nil

start

starting offset

length

conversion length; nil or -1 means whole the string from start

Exceptions

  • IconvIllegalSequence

  • IconvInvalidCharacter

  • IconvOutOfRange

Examples

See the Iconv documentation.



1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
# File 'ext/iconv/iconv.c', line 1001

static VALUE
iconv_iconv(int argc, VALUE *argv, VALUE self)
{
    VALUE str, n1, n2;
    VALUE cd = check_iconv(self);
    long start = 0, length = 0, slen = 0;

    rb_scan_args(argc, argv, "12", &str, &n1, &n2);
    if (!NIL_P(str)) {
#ifdef HAVE_RUBY_ENCODING_H
	VALUE n = rb_str_length(StringValue(str));
	slen = NUM2LONG(n);
#else
	slen = RSTRING_LEN(StringValue(str));
#endif
    }
    if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
	if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
	    length = NIL_P(n2) ? -1 : NUM2LONG(n2);
	}
    }
    if (start > 0 || length > 0) {
#ifdef HAVE_RUBY_ENCODING_H
	const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
	const char *ps = s;
	rb_encoding *enc = rb_enc_get(str);
	if (start > 0) {
	    start = (ps = rb_enc_nth(s, e, start, enc)) - s;
	}
	if (length > 0) {
	    length = rb_enc_nth(ps, e, length, enc) - ps;
	}
#else
	if (start > slen) {
	    start = slen;
	}
	if (length > slen - start) {
	    length = slen - start;
	}
#endif
    }

    return iconv_convert(VALUE2ICONV(cd), str, start, length, ICONV_ENCODING_GET(self), NULL);
}

#transliterate=(flag) ⇒ Object

Sets transliterate flag.



1121
1122
1123
1124
1125
1126
1127
# File 'ext/iconv/iconv.c', line 1121

static VALUE
iconv_set_transliterate(VALUE self, VALUE transliterate)
{
    int trans = RTEST(transliterate);
    iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans);
    return self;
}

#transliterate?Boolean

Returns transliterate flag.

Returns:

  • (Boolean)


1102
1103
1104
1105
1106
1107
1108
1109
# File 'ext/iconv/iconv.c', line 1102

static VALUE
iconv_get_transliterate(VALUE self)
{
    int trans = 0;
    iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans);
    if (trans) return Qtrue;
    return Qfalse;
}

#trivial?Boolean

Returns trivial flag.

Returns:

  • (Boolean)


1083
1084
1085
1086
1087
1088
1089
1090
# File 'ext/iconv/iconv.c', line 1083

static VALUE
iconv_trivialp(VALUE self)
{
    int trivial = 0;
    iconv_ctl(self, ICONV_TRIVIALP, trivial);
    if (trivial) return Qtrue;
    return Qfalse;
}