Class: Spark::ExternalSorter

Inherits:

Object

Object
Spark::ExternalSorter

show all

Includes:: Helper::System

Defined in:: lib/spark/sort.rb

Constant Summary collapse

MEMORY_RESERVE = Items from GC cannot be destroyed so #make_parts need some reserve

MEMORY_FREE_CHUNK = How big will be chunk for adding new memory because GC not cleaning immediately un-referenced variables

START_SLICE_SIZE = How many items will be evaluate from iterator at start

MAX_SLICE_SIZE = Maximum of slicing. Memory control can be avoided by large value.

10_000

EVAL_N_VALUES = How many values will be taken from each enumerator.

KEY_FUNCTION = Default key function

lambda{|item| item}

Instance Attribute Summary collapse

#memory_chunk ⇒ Object readonly

Returns the value of attribute memory_chunk.
#memory_limit ⇒ Object readonly

Returns the value of attribute memory_limit.
#serializer ⇒ Object readonly

Returns the value of attribute serializer.
#total_memory ⇒ Object readonly

Returns the value of attribute total_memory.

Instance Method Summary collapse

#add_memory! ⇒ Object
#initialize(total_memory, serializer) ⇒ ExternalSorter constructor

A new instance of ExternalSorter.
#sort_by(iterator, ascending = true, key_function = KEY_FUNCTION) ⇒ Object

Methods included from Helper::System

included

Constructor Details

#initialize(total_memory, serializer) ⇒ `ExternalSorter`

Returns a new instance of ExternalSorter.

# File 'lib/spark/sort.rb', line 61

def initialize(total_memory, serializer)
  @total_memory = total_memory
  @memory_limit = total_memory * (100-MEMORY_RESERVE)    / 100
  @memory_chunk = total_memory * (100-MEMORY_FREE_CHUNK) / 100
  @serializer   = serializer
end

Instance Attribute Details

#memory_chunk ⇒ `Object` (readonly)

Returns the value of attribute memory_chunk.



59
60
61

# File 'lib/spark/sort.rb', line 59

def memory_chunk
  @memory_chunk
end

#memory_limit ⇒ `Object` (readonly)

Returns the value of attribute memory_limit.



59
60
61

# File 'lib/spark/sort.rb', line 59

def memory_limit
  @memory_limit
end

#serializer ⇒ `Object` (readonly)

Returns the value of attribute serializer.



59
60
61

# File 'lib/spark/sort.rb', line 59

def serializer
  @serializer
end

#total_memory ⇒ `Object` (readonly)

Returns the value of attribute total_memory.



59
60
61

# File 'lib/spark/sort.rb', line 59

def total_memory
  @total_memory
end

Instance Method Details

#add_memory! ⇒ `Object`



68
69
70

# File 'lib/spark/sort.rb', line 68

def add_memory!
  @memory_limit += memory_chunk
end

#sort_by(iterator, ascending = true, key_function = KEY_FUNCTION) ⇒ `Object`

# File 'lib/spark/sort.rb', line 72

def sort_by(iterator, ascending=true, key_function=KEY_FUNCTION)
  return to_enum(__callee__, iterator, key_function) unless block_given?

  create_temp_folder
  internal_sorter = Spark::InternalSorter.get(ascending, key_function)

  # Make N sorted enumerators
  parts = make_parts(iterator, internal_sorter)

  return [] if parts.empty?

  # Need new key function because items have new structure
  # From: [1,2,3] to [[1, Enumerator],[2, Enumerator],[3, Enumerator]]
  key_function_with_enum = lambda{|(key, _)| key_function[key]}
  internal_sorter = Spark::InternalSorter.get(ascending, key_function_with_enum)

  heap  = []
  enums = []

  # Load first items to heap
  parts.each do |part|
    EVAL_N_VALUES.times {
      begin
        heap << [part.next, part]
      rescue StopIteration
        break
      end
    }
  end

  # Parts can be empty but heap not
  while parts.any? || heap.any?
    internal_sorter.sort(heap)

    # Since parts are sorted and heap contains EVAL_N_VALUES method
    # can add EVAL_N_VALUES items to the result
    EVAL_N_VALUES.times {
      break if heap.empty?

      item, enum = heap.shift
      enums << enum

      yield item
    }

    # Add new element to heap from part of which was result item
    while (enum = enums.shift)
      begin
        heap << [enum.next, enum]
      rescue StopIteration
        parts.delete(enum)
        enums.delete(enum)
      end
    end
  end

ensure
  destroy_temp_folder
end

Class: Spark::ExternalSorter

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Helper::System

Constructor Details

#initialize(total_memory, serializer) ⇒ ExternalSorter

Instance Attribute Details

#memory_chunk ⇒ Object (readonly)

#memory_limit ⇒ Object (readonly)

#serializer ⇒ Object (readonly)

#total_memory ⇒ Object (readonly)