Method: Enumerable#sort_by

Defined in:
enum.c

#sort_by {|obj| ... } ⇒ Array #sort_byObject

Sorts enum using a set of keys generated by mapping the values in enum through the given block.

If no block is given, an enumerator is returned instead.

%w{apple pear fig}.sort_by { |word| word.length}
              #=> ["fig", "pear", "apple"]

The current implementation of sort_by generates an array of tuples containing the original collection element and the mapped value. This makes sort_by fairly expensive when the keysets are simple.

require 'benchmark'

a = (1..100000).map { rand(100000) }

Benchmark.bm(10) do |b|
  b.report("Sort")    { a.sort }
  b.report("Sort by") { a.sort_by { |a| a } }
end

produces:

user     system      total        real
Sort        0.180000   0.000000   0.180000 (  0.175469)
Sort by     1.980000   0.040000   2.020000 (  2.013586)

However, consider the case where comparing the keys is a non-trivial operation. The following code sorts some files on modification time using the basic sort method.

files = Dir["*"]
sorted = files.sort { |a, b| File.new(a).mtime <=> File.new(b).mtime }
sorted   #=> ["mon", "tues", "wed", "thurs"]

This sort is inefficient: it generates two new File objects during every comparison. A slightly better technique is to use the Kernel#test method to generate the modification times directly.

files = Dir["*"]
sorted = files.sort { |a, b|
  test(?M, a) <=> test(?M, b)
}
sorted   #=> ["mon", "tues", "wed", "thurs"]

This still generates many unnecessary Time objects. A more efficient technique is to cache the sort keys (modification times in this case) before the sort. Perl users often call this approach a Schwartzian Transform, after Randal Schwartz. We construct a temporary array, where each element is an array containing our sort key along with the filename. We sort this array, and then extract the filename from the result.

sorted = Dir["*"].collect { |f|
   [test(?M, f), f]
}.sort.collect { |f| f[1] }
sorted   #=> ["mon", "tues", "wed", "thurs"]

This is exactly what sort_by does internally.

sorted = Dir["*"].sort_by { |f| test(?M, f) }
sorted   #=> ["mon", "tues", "wed", "thurs"]

Overloads:

  • #sort_by {|obj| ... } ⇒ Array

    Yields:

    • (obj)

    Returns:



956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
# File 'enum.c', line 956

static VALUE
enum_sort_by(VALUE obj)
{
    VALUE ary, buf;
    NODE *memo;
    long i;
    struct sort_by_data *data;

    RETURN_SIZED_ENUMERATOR(obj, 0, 0, enum_size);

    if (RB_TYPE_P(obj, T_ARRAY) && RARRAY_LEN(obj) <= LONG_MAX/2) {
  ary = rb_ary_new2(RARRAY_LEN(obj)*2);
    }
    else {
  ary = rb_ary_new();
    }
    RBASIC_CLEAR_CLASS(ary);
    buf = rb_ary_tmp_new(SORT_BY_BUFSIZE*2);
    rb_ary_store(buf, SORT_BY_BUFSIZE*2-1, Qnil);
    memo = NEW_MEMO(0, 0, 0);
    OBJ_INFECT(memo, obj);
    data = (struct sort_by_data *)&memo->u1;
    data->ary = ary;
    data->buf = buf;
    data->n = 0;
    rb_block_call(obj, id_each, 0, 0, sort_by_i, (VALUE)memo);
    ary = data->ary;
    buf = data->buf;
    if (data->n) {
  rb_ary_resize(buf, data->n*2);
  rb_ary_concat(ary, buf);
    }
    if (RARRAY_LEN(ary) > 2) {
  RARRAY_PTR_USE(ary, ptr,
          ruby_qsort(ptr, RARRAY_LEN(ary)/2, 2*sizeof(VALUE),
         sort_by_cmp, (void *)ary));
    }
    if (RBASIC(ary)->klass) {
  rb_raise(rb_eRuntimeError, "sort_by reentered");
    }
    for (i=1; i<RARRAY_LEN(ary); i+=2) {
  RARRAY_ASET(ary, i/2, RARRAY_AREF(ary, i));
    }
    rb_ary_resize(ary, RARRAY_LEN(ary)/2);
    RBASIC_SET_CLASS_RAW(ary, rb_cArray);
    OBJ_INFECT(ary, memo);

    return ary;
}