Method: IO#each_codepoint

Defined in:
io.c

#each_codepoint {|c| ... } ⇒ self #each_codepointObject

Calls the given block with each codepoint in the stream; returns self:

f = File.new('t.rus')
a = []
f.each_codepoint {|c| a << c }
a # => [1090, 1077, 1089, 1090]
f.close

Returns an Enumerator if no block is given.

Related: IO#each_byte, IO#each_char.

Overloads:

  • #each_codepoint {|c| ... } ⇒ self

    Yields:

    • (c)

    Returns:

    • (self)
[View source]

4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
# File 'io.c', line 4892

static VALUE
rb_io_each_codepoint(VALUE io)
{
    rb_io_t *fptr;
    rb_encoding *enc;
    unsigned int c;
    int r, n;

    RETURN_ENUMERATOR(io, 0, 0);
    GetOpenFile(io, fptr);
    rb_io_check_char_readable(fptr);

    READ_CHECK(fptr);
    if (NEED_READCONV(fptr)) {
        SET_BINARY_MODE(fptr);
        r = 1;		/* no invalid char yet */
        for (;;) {
            make_readconv(fptr, 0);
            for (;;) {
                if (fptr->cbuf.len) {
                    if (fptr->encs.enc)
                        r = rb_enc_precise_mbclen(fptr->cbuf.ptr+fptr->cbuf.off,
                                                  fptr->cbuf.ptr+fptr->cbuf.off+fptr->cbuf.len,
                                                  fptr->encs.enc);
                    else
                        r = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1);
                    if (!MBCLEN_NEEDMORE_P(r))
                        break;
                    if (fptr->cbuf.len == fptr->cbuf.capa) {
                        rb_raise(rb_eIOError, "too long character");
                    }
                }
                if (more_char(fptr) == MORE_CHAR_FINISHED) {
                    clear_readconv(fptr);
                    if (!MBCLEN_CHARFOUND_P(r)) {
                        enc = fptr->encs.enc;
                        goto invalid;
                    }
                    return io;
                }
            }
            if (MBCLEN_INVALID_P(r)) {
                enc = fptr->encs.enc;
                goto invalid;
            }
            n = MBCLEN_CHARFOUND_LEN(r);
            if (fptr->encs.enc) {
                c = rb_enc_codepoint(fptr->cbuf.ptr+fptr->cbuf.off,
                                     fptr->cbuf.ptr+fptr->cbuf.off+fptr->cbuf.len,
                                     fptr->encs.enc);
            }
            else {
                c = (unsigned char)fptr->cbuf.ptr[fptr->cbuf.off];
            }
            fptr->cbuf.off += n;
            fptr->cbuf.len -= n;
            rb_yield(UINT2NUM(c));
            rb_io_check_byte_readable(fptr);
        }
    }
    NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
    enc = io_input_encoding(fptr);
    while (io_fillbuf(fptr) >= 0) {
        r = rb_enc_precise_mbclen(fptr->rbuf.ptr+fptr->rbuf.off,
                                  fptr->rbuf.ptr+fptr->rbuf.off+fptr->rbuf.len, enc);
        if (MBCLEN_CHARFOUND_P(r) &&
            (n = MBCLEN_CHARFOUND_LEN(r)) <= fptr->rbuf.len) {
            c = rb_enc_codepoint(fptr->rbuf.ptr+fptr->rbuf.off,
                                 fptr->rbuf.ptr+fptr->rbuf.off+fptr->rbuf.len, enc);
            fptr->rbuf.off += n;
            fptr->rbuf.len -= n;
            rb_yield(UINT2NUM(c));
        }
        else if (MBCLEN_INVALID_P(r)) {
            goto invalid;
        }
        else if (MBCLEN_NEEDMORE_P(r)) {
            char cbuf[8], *p = cbuf;
            int more = MBCLEN_NEEDMORE_LEN(r);
            if (more > numberof(cbuf)) goto invalid;
            more += n = fptr->rbuf.len;
            if (more > numberof(cbuf)) goto invalid;
            while ((n = (int)read_buffered_data(p, more, fptr)) > 0 &&
                   (p += n, (more -= n) > 0)) {
                if (io_fillbuf(fptr) < 0) goto invalid;
                if ((n = fptr->rbuf.len) > more) n = more;
            }
            r = rb_enc_precise_mbclen(cbuf, p, enc);
            if (!MBCLEN_CHARFOUND_P(r)) goto invalid;
            c = rb_enc_codepoint(cbuf, p, enc);
            rb_yield(UINT2NUM(c));
        }
        else {
            continue;
        }
        rb_io_check_byte_readable(fptr);
    }
    return io;

  invalid:
    rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
    UNREACHABLE_RETURN(Qundef);
}