Class: Iconv
- Inherits:
-
Object
- Object
- Iconv
- Defined in:
- ext/iconv/iconv.c,
lib/iconv.rb,
lib/iconv/version.rb,
ext/iconv/iconv.c
Overview
Summary
Ruby extension for charset conversion.
Abstract
Iconv is a wrapper class for the UNIX 95 iconv()
function family, which translates string between various encoding systems.
See Open Group’s on-line documents for more details.
-
iconv.h
: www.opengroup.org/onlinepubs/007908799/xsh/iconv.h.html -
iconv_open()
: www.opengroup.org/onlinepubs/007908799/xsh/iconv_open.html -
iconv()
: www.opengroup.org/onlinepubs/007908799/xsh/iconv.html -
iconv_close()
: www.opengroup.org/onlinepubs/007908799/xsh/iconv_close.html
Which coding systems are available is platform-dependent.
Examples
-
Simple conversion between two charsets.
converted_text = Iconv.conv('iso-8859-15', 'utf-8', text)
-
Instantiate a new Iconv and use method Iconv#iconv.
cd = Iconv.new(to, from) begin input.each { |s| output << cd.iconv(s) } output << cd.iconv(nil) # Don't forget this! ensure cd.close end
-
Invoke Iconv.open with a block.
Iconv.open(to, from) do |cd| input.each { |s| output << cd.iconv(s) } output << cd.iconv(nil) end
-
Shorthand for (3).
Iconv.iconv(to, from, *input.to_a)
Attentions
Even if some extensions of implementation dependent are useful, DON’T USE those extensions in libraries and scripts to widely distribute. If you want to use those feature, use String#encode.
Defined Under Namespace
Modules: Failure Classes: BrokenLibrary, IllegalSequence, InvalidCharacter, InvalidEncoding, OutOfRange
Constant Summary collapse
- VERSION =
"1.1.0"
Class Method Summary collapse
-
.charset_map ⇒ Object
Returns the map from canonical name to system dependent name.
-
.conv(to, from, str) ⇒ Object
Shorthand for Iconv.iconv(to, from, str).join See Iconv.iconv.
-
.ctlmethods ⇒ Array
Returns available iconvctl() method list.
-
.iconv(to, from, *strs) ⇒ Object
Shorthand for Iconv.open(to, from) { |cd| (strs + [nil]).collect { |s| cd.iconv(s) } }.
-
.list {|*aliases| ... } ⇒ Object
Iterates each alias sets.
-
.open(to, from) {|iconv| ... } ⇒ Object
Equivalent to Iconv.new except that when it is called with a block, it yields with the new instance and closes it, and returns the result which returned from the block.
Instance Method Summary collapse
-
#close ⇒ Object
Finishes conversion.
-
#conv(str...) ⇒ Object
Equivalent to.
-
#discard_ilseq=(flag) ⇒ Object
Sets discard_ilseq flag.
-
#discard_ilseq? ⇒ Boolean
Returns discard_ilseq flag.
-
#iconv(str, start = 0, length = -1) ⇒ Object
Converts string and returns the result.
-
#new(to, from, [options]) ⇒ Object
constructor
Creates new code converter from a coding-system designated with
from
to another one designated withto
. -
#transliterate=(flag) ⇒ Object
Sets transliterate flag.
-
#transliterate? ⇒ Boolean
Returns transliterate flag.
-
#trivial? ⇒ Boolean
Returns trivial flag.
Constructor Details
#new(to, from, [options]) ⇒ Object
Creates new code converter from a coding-system designated with from
to another one designated with to
.
Parameters
to
-
encoding name for destination
from
-
encoding name for source
options
-
options for converter
Exceptions
- TypeError
-
if
to
orfrom
aren’t String - InvalidEncoding
-
if designated converter couldn’t find out
- SystemCallError
-
if
iconv_open(3)
fails
736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 |
# File 'ext/iconv/iconv.c', line 736
static VALUE
iconv_initialize(int argc, VALUE *argv, VALUE self)
{
VALUE to, from, options;
struct rb_iconv_opt_t opt;
int idx;
rb_scan_args(argc, argv, "21", &to, &from, &options);
get_iconv_opt(&opt, options);
iconv_free(check_iconv(self));
DATA_PTR(self) = NULL;
DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx));
#ifdef HAVE_RUBY_ENCODING_H
ICONV_ENCODING_SET(self, idx);
#endif
return self;
}
|
Class Method Details
.charset_map ⇒ Object
Returns the map from canonical name to system dependent name.
206 207 208 209 210 |
# File 'ext/iconv/iconv.c', line 206
static VALUE
charset_map_get(VALUE klass)
{
return charset_map;
}
|
.conv(to, from, str) ⇒ Object
849 850 851 852 853 854 855 856 857 858 859 860 |
# File 'ext/iconv/iconv.c', line 849
static VALUE
iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str)
{
struct iconv_env_t arg;
arg.argc = 1;
arg.argv = &str;
arg.append = rb_str_append;
arg.ret = rb_str_new(0, 0);
arg.cd = iconv_create(to, from, NULL, &arg.toidx);
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}
|
.ctlmethods ⇒ Array
Returns available iconvctl() method list.
1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 |
# File 'ext/iconv/iconv.c', line 1175
static VALUE
iconv_s_ctlmethods(VALUE klass)
{
VALUE ary = rb_ary_new();
#ifdef ICONV_TRIVIALP
rb_ary_push(ary, ID2SYM(rb_intern("trivial?")));
#endif
#ifdef ICONV_GET_TRANSLITERATE
rb_ary_push(ary, ID2SYM(rb_intern("transliterate?")));
#endif
#ifdef ICONV_SET_TRANSLITERATE
rb_ary_push(ary, ID2SYM(rb_intern("transliterate=")));
#endif
#ifdef ICONV_GET_DISCARD_ILSEQ
rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?")));
#endif
#ifdef ICONV_SET_DISCARD_ILSEQ
rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq=")));
#endif
return ary;
}
|
.iconv(to, from, *strs) ⇒ Object
825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 |
# File 'ext/iconv/iconv.c', line 825
static VALUE
iconv_s_iconv(int argc, VALUE *argv, VALUE self)
{
struct iconv_env_t arg;
if (argc < 2) /* needs `to' and `from' arguments at least */
rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2);
arg.argc = argc -= 2;
arg.argv = argv + 2;
arg.append = rb_ary_push;
arg.ret = rb_ary_new2(argc);
arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx);
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}
|
.list {|*aliases| ... } ⇒ Object
Iterates each alias sets.
909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 |
# File 'ext/iconv/iconv.c', line 909
static VALUE
iconv_s_list(VALUE klass)
{
#ifdef HAVE_ICONVLIST
int state;
VALUE args[2];
args[1] = rb_block_given_p() ? 0 : rb_ary_new();
iconvlist(list_iconv, args);
state = (int)args[0];
if (state) rb_jump_tag(state);
if (args[1]) return args[1];
#elif defined(HAVE___ICONV_FREE_LIST)
char **list;
size_t sz, i;
VALUE ary;
if (__iconv_get_list(&list, &sz)) return Qnil;
ary = rb_ary_new2(sz);
for (i = 0; i < sz; i++) {
rb_ary_push(ary, rb_str_new2(list[i]));
}
__iconv_free_list(list, sz);
if (!rb_block_given_p())
return ary;
for (i = 0; i < RARRAY_LEN(ary); i++) {
rb_yield(RARRAY_PTR(ary)[i]);
}
#endif
return Qnil;
}
|
.open(to, from) {|iconv| ... } ⇒ Object
Equivalent to Iconv.new except that when it is called with a block, it yields with the new instance and closes it, and returns the result which returned from the block.
762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 |
# File 'ext/iconv/iconv.c', line 762
static VALUE
iconv_s_open(int argc, VALUE *argv, VALUE self)
{
VALUE to, from, options, cd;
struct rb_iconv_opt_t opt;
int idx;
rb_scan_args(argc, argv, "21", &to, &from, &options);
get_iconv_opt(&opt, options);
cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx));
self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
#ifdef HAVE_RUBY_ENCODING_H
if (idx >= 0) ICONV_ENCODING_SET(self, idx);
#endif
if (rb_block_given_p()) {
return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
}
else {
return self;
}
}
|
Instance Method Details
#close ⇒ Object
Finishes conversion.
After calling this, calling Iconv#iconv will cause an exception, but multiple calls of #close are guaranteed to end successfully.
Returns a string containing the byte sequence to change the output buffer to its initial shift state.
965 966 967 968 969 970 971 972 |
# File 'ext/iconv/iconv.c', line 965
static VALUE
iconv_finish(VALUE self)
{
VALUE cd = check_iconv(self);
if (!cd) return Qnil;
return rb_ensure(iconv_init_state, self, iconv_free, cd);
}
|
#conv(str...) ⇒ Object
Equivalent to
iconv(nil, str..., nil).join
1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 |
# File 'ext/iconv/iconv.c', line 1054
static VALUE
iconv_conv(int argc, VALUE *argv, VALUE self)
{
iconv_t cd = VALUE2ICONV(check_iconv(self));
VALUE str, s;
int toidx = ICONV_ENCODING_GET(self);
str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
if (argc > 0) {
do {
s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL);
if (RSTRING_LEN(s))
rb_str_buf_append(str, s);
} while (--argc);
s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
if (RSTRING_LEN(s))
rb_str_buf_append(str, s);
}
return str;
}
|
#discard_ilseq=(flag) ⇒ Object
Sets discard_ilseq flag.
1158 1159 1160 1161 1162 1163 1164 |
# File 'ext/iconv/iconv.c', line 1158
static VALUE
iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq)
{
int dis = RTEST(discard_ilseq);
iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis);
return self;
}
|
#discard_ilseq? ⇒ Boolean
Returns discard_ilseq flag.
1139 1140 1141 1142 1143 1144 1145 1146 |
# File 'ext/iconv/iconv.c', line 1139
static VALUE
iconv_get_discard_ilseq(VALUE self)
{
int dis = 0;
iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis);
if (dis) return Qtrue;
return Qfalse;
}
|
#iconv(str, start = 0, length = -1) ⇒ Object
Converts string and returns the result.
-
If
str
is a String, convertsstr[start, length]
and returns the converted string. -
If
str
isnil
, places converter itself into initial shift state and just returns a string containing the byte sequence to change the output buffer to its initial shift state. -
Otherwise, raises an exception.
Parameters
- str
-
string to be converted, or nil
- start
-
starting offset
- length
-
conversion length; nil or -1 means whole the string from start
Exceptions
-
IconvIllegalSequence
-
IconvInvalidCharacter
-
IconvOutOfRange
Examples
See the Iconv documentation.
1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 |
# File 'ext/iconv/iconv.c', line 1001
static VALUE
iconv_iconv(int argc, VALUE *argv, VALUE self)
{
VALUE str, n1, n2;
VALUE cd = check_iconv(self);
long start = 0, length = 0, slen = 0;
rb_scan_args(argc, argv, "12", &str, &n1, &n2);
if (!NIL_P(str)) {
#ifdef HAVE_RUBY_ENCODING_H
VALUE n = rb_str_length(StringValue(str));
slen = NUM2LONG(n);
#else
slen = RSTRING_LEN(StringValue(str));
#endif
}
if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
length = NIL_P(n2) ? -1 : NUM2LONG(n2);
}
}
if (start > 0 || length > 0) {
#ifdef HAVE_RUBY_ENCODING_H
const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
const char *ps = s;
rb_encoding *enc = rb_enc_get(str);
if (start > 0) {
start = (ps = rb_enc_nth(s, e, start, enc)) - s;
}
if (length > 0) {
length = rb_enc_nth(ps, e, length, enc) - ps;
}
#else
if (start > slen) {
start = slen;
}
if (length > slen - start) {
length = slen - start;
}
#endif
}
return iconv_convert(VALUE2ICONV(cd), str, start, length, ICONV_ENCODING_GET(self), NULL);
}
|
#transliterate=(flag) ⇒ Object
Sets transliterate flag.
1121 1122 1123 1124 1125 1126 1127 |
# File 'ext/iconv/iconv.c', line 1121
static VALUE
iconv_set_transliterate(VALUE self, VALUE transliterate)
{
int trans = RTEST(transliterate);
iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans);
return self;
}
|
#transliterate? ⇒ Boolean
Returns transliterate flag.
1102 1103 1104 1105 1106 1107 1108 1109 |
# File 'ext/iconv/iconv.c', line 1102
static VALUE
iconv_get_transliterate(VALUE self)
{
int trans = 0;
iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans);
if (trans) return Qtrue;
return Qfalse;
}
|
#trivial? ⇒ Boolean
Returns trivial flag.
1083 1084 1085 1086 1087 1088 1089 1090 |
# File 'ext/iconv/iconv.c', line 1083
static VALUE
iconv_trivialp(VALUE self)
{
int trivial = 0;
iconv_ctl(self, ICONV_TRIVIALP, trivial);
if (trivial) return Qtrue;
return Qfalse;
}
|