Class: Iconv
- Inherits:
-
Data
- Object
- Data
- Iconv
- Defined in:
- iconv.c
Overview
Summary
Ruby extension for charset conversion.
Abstract
Iconv is a wrapper class for the UNIX 95 iconv()
function family, which translates string between various encoding systems.
See Open Group's on-line documents for more details.
-
iconv.h
: www.opengroup.org/onlinepubs/007908799/xsh/iconv.h.html -
iconv_open()
: www.opengroup.org/onlinepubs/007908799/xsh/iconv_open.html -
iconv()
: www.opengroup.org/onlinepubs/007908799/xsh/iconv.html -
iconv_close()
: www.opengroup.org/onlinepubs/007908799/xsh/iconv_close.html
Which coding systems are available is platform-dependent.
Examples
-
Simple conversion between two charsets.
converted_text = Iconv.conv('iso-8859-15', 'utf-8', text)
-
Instantiate a new Iconv and use method Iconv#iconv.
cd = Iconv.new(to, from) begin input.each { |s| output << cd.iconv(s) } output << cd.iconv(nil) # Don't forget this! ensure cd.close end
-
Invoke Iconv.open with a block.
Iconv.open(to, from) do |cd| input.each { |s| output << cd.iconv(s) } output << cd.iconv(nil) end
-
Shorthand for (3).
Iconv.iconv(to, from, *input.to_a)
Attentions
Even if some extentions of implementation dependent are useful, DON'T USE those extentions in libraries and scripts to widely distribute. If you want to use those feature, use String#encode.
Defined Under Namespace
Classes: BrokenLibrary, Failure, IllegalSequence, InvalidCharacter, InvalidEncoding, OutOfRange
Class Method Summary collapse
-
.charset_map ⇒ Object
Returns the map from canonical name to system dependent name.
-
.conv(to, from, str) ⇒ Object
Shorthand for Iconv.iconv(to, from, str).join See Iconv.iconv.
-
.ctlmethods ⇒ Array
Returns available iconvctl() method list.
-
.iconv(to, from, *strs) ⇒ Object
Shorthand for Iconv.open(to, from) { |cd| (strs + [nil]).collect { |s| cd.iconv(s) } }.
-
.list {|*aliases| ... } ⇒ Object
Iterates each alias sets.
-
.open(to, from) {|iconv| ... } ⇒ Object
Equivalent to Iconv.new except that when it is called with a block, it yields with the new instance and closes it, and returns the result which returned from the block.
Instance Method Summary collapse
-
#close ⇒ Object
Finishes conversion.
-
#conv(str...) ⇒ Object
Equivalent to.
-
#discard_ilseq=(flag) ⇒ Object
Sets discard_ilseq flag.
-
#discard_ilseq? ⇒ Boolean
Returns discard_ilseq flag.
-
#iconv(str, start = 0, length = -1) ⇒ Object
Converts string and returns the result.
-
#new(to, from, [options]) ⇒ Object
constructor
Creates new code converter from a coding-system designated with
from
to another one designated withto
. -
#transliterate=(flag) ⇒ Object
Sets transliterate flag.
-
#transliterate? ⇒ Boolean
Returns transliterate flag.
-
#trivial? ⇒ Boolean
Returns trivial flag.
Constructor Details
#new(to, from, [options]) ⇒ Object
Creates new code converter from a coding-system designated with from
to another one designated with to
.
Parameters
to
-
encoding name for destination
from
-
encoding name for source
options
-
options for converter
Exceptions
- TypeError
-
if
to
orfrom
aren't String - InvalidEncoding
-
if designated converter couldn't find out
- SystemCallError
-
if
iconv_open(3)
fails
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 |
# File 'iconv.c', line 651
static VALUE
iconv_initialize(int argc, VALUE *argv, VALUE self)
{
VALUE to, from, options;
struct rb_iconv_opt_t opt;
int idx;
rb_scan_args(argc, argv, "21", &to, &from, &options);
get_iconv_opt(&opt, options);
iconv_free(check_iconv(self));
DATA_PTR(self) = NULL;
DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx));
if (idx >= 0) ENCODING_SET(self, idx);
return self;
}
|
Class Method Details
.charset_map ⇒ Object
Returns the map from canonical name to system dependent name.
139 140 141 142 143 |
# File 'iconv.c', line 139
static VALUE
charset_map_get(void)
{
return charset_map;
}
|
.conv(to, from, str) ⇒ Object
759 760 761 762 763 764 765 766 767 768 769 770 |
# File 'iconv.c', line 759
static VALUE
iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str)
{
struct iconv_env_t arg;
arg.argc = 1;
arg.argv = &str;
arg.append = rb_str_append;
arg.ret = rb_str_new(0, 0);
arg.cd = iconv_create(to, from, NULL, &arg.toidx);
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}
|
.ctlmethods ⇒ Array
Returns available iconvctl() method list.
1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 |
# File 'iconv.c', line 1072
static VALUE
iconv_s_ctlmethods(VALUE klass)
{
VALUE ary = rb_ary_new();
#ifdef ICONV_TRIVIALP
rb_ary_push(ary, ID2SYM(rb_intern("trivial?")));
#endif
#ifdef ICONV_GET_TRANSLITERATE
rb_ary_push(ary, ID2SYM(rb_intern("transliterate?")));
#endif
#ifdef ICONV_SET_TRANSLITERATE
rb_ary_push(ary, ID2SYM(rb_intern("transliterate=")));
#endif
#ifdef ICONV_GET_DISCARD_ILSEQ
rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?")));
#endif
#ifdef ICONV_SET_DISCARD_ILSEQ
rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq=")));
#endif
return ary;
}
|
.iconv(to, from, *strs) ⇒ Object
735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 |
# File 'iconv.c', line 735
static VALUE
iconv_s_iconv(int argc, VALUE *argv, VALUE self)
{
struct iconv_env_t arg;
if (argc < 2) /* needs `to' and `from' arguments at least */
rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2);
arg.argc = argc -= 2;
arg.argv = argv + 2;
arg.append = rb_ary_push;
arg.ret = rb_ary_new2(argc);
arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx);
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}
|
.list {|*aliases| ... } ⇒ Object
Iterates each alias sets.
819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 |
# File 'iconv.c', line 819
static VALUE
iconv_s_list(void)
{
#ifdef HAVE_ICONVLIST
int state;
VALUE args[2];
args[1] = rb_block_given_p() ? 0 : rb_ary_new();
iconvlist(list_iconv, args);
state = *(int *)args;
if (state) rb_jump_tag(state);
if (args[1]) return args[1];
#elif defined(HAVE___ICONV_FREE_LIST)
char **list;
size_t sz, i;
VALUE ary;
if (__iconv_get_list(&list, &sz)) return Qnil;
ary = rb_ary_new2(sz);
for (i = 0; i < sz; i++) {
rb_ary_push(ary, rb_str_new2(list[i]));
}
__iconv_free_list(list, sz);
if (!rb_block_given_p())
return ary;
for (i = 0; i < RARRAY_LEN(ary); i++) {
rb_yield(RARRAY_PTR(ary)[i]);
}
#endif
return Qnil;
}
|
.open(to, from) {|iconv| ... } ⇒ Object
Equivalent to Iconv.new except that when it is called with a block, it yields with the new instance and closes it, and returns the result which returned from the block.
675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 |
# File 'iconv.c', line 675
static VALUE
iconv_s_open(int argc, VALUE *argv, VALUE self)
{
VALUE to, from, options, cd;
struct rb_iconv_opt_t opt;
int idx;
rb_scan_args(argc, argv, "21", &to, &from, &options);
get_iconv_opt(&opt, options);
cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx));
self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
if (idx >= 0) ENCODING_SET(self, idx);
if (rb_block_given_p()) {
return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
}
else {
return self;
}
}
|
Instance Method Details
#close ⇒ Object
Finishes conversion.
After calling this, calling Iconv#iconv will cause an exception, but multiple calls of #close are guaranteed to end successfully.
Returns a string containing the byte sequence to change the output buffer to its initial shift state.
875 876 877 878 879 880 881 882 |
# File 'iconv.c', line 875
static VALUE
iconv_finish(VALUE self)
{
VALUE cd = check_iconv(self);
if (!cd) return Qnil;
return rb_ensure(iconv_init_state, self, iconv_free, cd);
}
|
#conv(str...) ⇒ Object
Equivalent to
iconv(nil, str..., nil).join
951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 |
# File 'iconv.c', line 951
static VALUE
iconv_conv(int argc, VALUE *argv, VALUE self)
{
iconv_t cd = VALUE2ICONV(check_iconv(self));
VALUE str, s;
int toidx = ENCODING_GET(self);
str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
if (argc > 0) {
do {
s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL);
if (RSTRING_LEN(s))
rb_str_buf_append(str, s);
} while (--argc);
s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
if (RSTRING_LEN(s))
rb_str_buf_append(str, s);
}
return str;
}
|
#discard_ilseq=(flag) ⇒ Object
Sets discard_ilseq flag.
1055 1056 1057 1058 1059 1060 1061 |
# File 'iconv.c', line 1055
static VALUE
iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq)
{
int dis = RTEST(discard_ilseq);
iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis);
return self;
}
|
#discard_ilseq? ⇒ Boolean
Returns discard_ilseq flag.
1036 1037 1038 1039 1040 1041 1042 1043 |
# File 'iconv.c', line 1036
static VALUE
iconv_get_discard_ilseq(VALUE self)
{
int dis = 0;
iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis);
if (dis) return Qtrue;
return Qfalse;
}
|
#iconv(str, start = 0, length = -1) ⇒ Object
Converts string and returns the result.
-
If
str
is a String, convertsstr[start, length]
and returns the converted string. -
If
str
isnil
, places converter itself into initial shift state and just returns a string containing the byte sequence to change the output buffer to its initial shift state. -
Otherwise, raises an exception.
Parameters
- str
-
string to be converted, or nil
- start
-
starting offset
- length
-
conversion length; nil or -1 means whole the string from start
Exceptions
-
IconvIllegalSequence
-
IconvInvalidCharacter
-
IconvOutOfRange
Examples
See the Iconv documentation.
911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 |
# File 'iconv.c', line 911
static VALUE
iconv_iconv(int argc, VALUE *argv, VALUE self)
{
VALUE str, n1, n2;
VALUE cd = check_iconv(self);
long start = 0, length = 0, slen = 0;
rb_scan_args(argc, argv, "12", &str, &n1, &n2);
if (!NIL_P(str)) {
VALUE n = rb_str_length(StringValue(str));
slen = NUM2LONG(n);
}
if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
length = NIL_P(n2) ? -1 : NUM2LONG(n2);
}
}
if (start > 0 || length > 0) {
rb_encoding *enc = rb_enc_get(str);
const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
const char *ps = s;
if (start > 0) {
start = (ps = rb_enc_nth(s, e, start, enc)) - s;
}
if (length > 0) {
length = rb_enc_nth(ps, e, length, enc) - ps;
}
}
return iconv_convert(VALUE2ICONV(cd), str, start, length, ENCODING_GET(self), NULL);
}
|
#transliterate=(flag) ⇒ Object
Sets transliterate flag.
1018 1019 1020 1021 1022 1023 1024 |
# File 'iconv.c', line 1018
static VALUE
iconv_set_transliterate(VALUE self, VALUE transliterate)
{
int trans = RTEST(transliterate);
iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans);
return self;
}
|
#transliterate? ⇒ Boolean
Returns transliterate flag.
999 1000 1001 1002 1003 1004 1005 1006 |
# File 'iconv.c', line 999
static VALUE
iconv_get_transliterate(VALUE self)
{
int trans = 0;
iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans);
if (trans) return Qtrue;
return Qfalse;
}
|
#trivial? ⇒ Boolean
Returns trivial flag.
980 981 982 983 984 985 986 987 |
# File 'iconv.c', line 980
static VALUE
iconv_trivialp(VALUE self)
{
int trivial = 0;
iconv_ctl(self, ICONV_TRIVIALP, trivial);
if (trivial) return Qtrue;
return Qfalse;
}
|