Module: Piglet::Relation::Relation

Included in:: Field::Field::DummyRelation, Inout::Load, Cogroup, Cross, Distinct, Filter, Foreach, Group, Join, Limit, NestedForeach, Order, RelationShard, Sample, Split, Stream, Union

Defined in:: lib/piglet/relation/relation.rb

Instance Attribute Summary collapse

#sources ⇒ Object readonly

Returns the value of attribute sources.

Instance Method Summary collapse

#[](n) ⇒ Object
#alias ⇒ Object

The name this relation will get in Pig Latin.
#cogroup(description) ⇒ Object

COGROUP.
#cross(*args) ⇒ Object

CROSS.
#distinct(options = {}) ⇒ Object

DISTINCT.
#eql?(other) ⇒ Boolean
#field(name) ⇒ Object
#filter(&block) ⇒ Object

FILTER.
#foreach(&block) ⇒ Object

FOREACH …
#group(*args) ⇒ Object

GROUP.
#hash ⇒ Object
#join(description) ⇒ Object

JOIN.
#limit(n) ⇒ Object

LIMIT.
#method_missing(name, *args) ⇒ Object
#nested_foreach(&block) ⇒ Object

FOREACH …
#next_field_alias ⇒ Object
#order(*args) ⇒ Object

ORDER.
#sample(n) ⇒ Object

SAMPLE.
#schema ⇒ Object
#split(&block) ⇒ Object

SPLIT.
#stream(*args) ⇒ Object

STREAM.
#union(*relations) ⇒ Object

UNION.

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(name, *args) ⇒ `Object`

# File 'lib/piglet/relation/relation.rb', line 174

def method_missing(name, *args)
  if name.to_s =~ /^\w+$/ && args.empty?
    field(name)
  else
    super
  end
end

Instance Attribute Details

#sources ⇒ `Object` (readonly)

Returns the value of attribute sources.



6
7
8

# File 'lib/piglet/relation/relation.rb', line 6

def sources
  @sources
end

Instance Method Details

#[](n) ⇒ `Object`



182
183
184

# File 'lib/piglet/relation/relation.rb', line 182

def [](n)
  field("\$#{n}")
end

#alias ⇒ `Object`

The name this relation will get in Pig Latin. Then name is generated when the relation is outputed by the interpreter, and will be unique.



10
11
12

# File 'lib/piglet/relation/relation.rb', line 10

def alias
  @alias ||= @interpreter.next_relation_alias
end

#cogroup(description) ⇒ `Object`

COGROUP

x.cogroup(x => :a, y => :b)                 # => COGROUP x BY a, y BY b
x.cogroup(x => :a, y => :b, z => :c)        # => COGROUP x BY a, y BY b, z BY c
x.cogroup(x => [:a, :b], y => [:c, :d])     # => COGROUP x BY (a, b), y BY (c, d)
x.cogroup(x => :a, y => [:b, :inner])       # => COGROUP x BY a, y BY b INNER
x.cogroup(x => :a, y => :b, :parallel => 5) # => COGROUP x BY a, y BY b PARALLEL 5



45
46
47

# File 'lib/piglet/relation/relation.rb', line 45

def cogroup(description)
  Cogroup.new(self, @interpreter, description)
end

#cross(*args) ⇒ `Object`

CROSS

x.cross(y)                      # => CROSS x, y
x.cross(y, z, w)                # => CROSS x, y, z, w
x.cross([y, z], :parallel => 5) # => CROSS x, y, z, w PARALLEL 5

# File 'lib/piglet/relation/relation.rb', line 54

def cross(*args)
  relations, options = split_at_options(args)
  Cross.new(([self] + relations).flatten, @interpreter, options)
end

#distinct(options = {}) ⇒ `Object`

DISTINCT

x.distinct                 # => DISTINCT x
x.distinct(:parallel => 5) # => DISTINCT x PARALLEL 5



34
35
36

# File 'lib/piglet/relation/relation.rb', line 34

def distinct(options={})
  Distinct.new(self, @interpreter, options)
end

#eql?(other) ⇒ `Boolean`

Returns:

(Boolean)



190
191
192

# File 'lib/piglet/relation/relation.rb', line 190

def eql?(other)
  other.is_a?(Relation) && other.alias == self.alias
end

#field(name) ⇒ `Object`

# File 'lib/piglet/relation/relation.rb', line 159

def field(name)
  type = schema.field_type(name) rescue nil
  Field::Reference.new(name, self, :type => type)
end

#filter(&block) ⇒ `Object`

FILTER

x.filter { a == b }          # => FILTER x BY a == b
x.filter { a > b && c == 3 } # => FILTER x BY a > b AND c == 3

# File 'lib/piglet/relation/relation.rb', line 63

def filter(&block)
  context = BlockContext.new(self, @interpreter)
  Filter.new(self, @interpreter, context.instance_eval(&block))
end

#foreach(&block) ⇒ `Object`

FOREACH … GENERATE

x.foreach { a }            # => FOREACH x GENERATE a
x.foreach { [a, b] }       # => FOREACH x GENERATE a, b
x.foreach { a.max }        # => FOREACH x GENERATE MAX(a)
x.foreach { a.avg.as(:b) } # => FOREACH x GENERATE AVG(a) AS b

See #nested_foreach for FOREACH … { … GENERATE }

# File 'lib/piglet/relation/relation.rb', line 76

def foreach(&block)
  context = BlockContext.new(self, @interpreter)
  Foreach.new(self, @interpreter, context.instance_eval(&block))
end

#group(*args) ⇒ `Object`

GROUP

x.group(:a)                           # => GROUP x By a
x.group(:a, :b, :c)                   # => GROUP x BY (a, b, c)
x.group([:a, :b, :c], :parallel => 3) # => GROUP x BY (a, b, c) PARALLEL 3

# File 'lib/piglet/relation/relation.rb', line 25

def group(*args)
  grouping, options = split_at_options(args)
  Group.new(self, @interpreter, [grouping].flatten, options)
end

#hash ⇒ `Object`



186
187
188

# File 'lib/piglet/relation/relation.rb', line 186

def hash
  self.alias.hash
end

#join(description) ⇒ `Object`

JOIN

x.join(x => :a, y => :b)                        # => JOIN x BY a, y BY b
x.join(x => :a, y => :b, z => :c)               # => JOIN x BY a, y BY b, z BY c
x.join(x => :a, y => :b, :using => :replicated) # => JOIN x BY a, y BY b USING "replicated"
x.join(x => :a, y => :b, :parallel => 5)        # => JOIN x BY a, y BY b PARALLEL 5



97
98
99

# File 'lib/piglet/relation/relation.rb', line 97

def join(description)
  Join.new(self, @interpreter, description)
end

#limit(n) ⇒ `Object`

LIMIT

x.limit(10) # => LIMIT x 10



104
105
106

# File 'lib/piglet/relation/relation.rb', line 104

def limit(n)
  Limit.new(self, @interpreter, n)
end

#nested_foreach(&block) ⇒ `Object`

FOREACH … { … GENERATE }

x.nested_foreach { [a.distinct] } # => FOREACH x { a1 = DISTINCT a; GENERATE a1 }

See #foreach for FOREACH … GENERATE

# File 'lib/piglet/relation/relation.rb', line 86

def nested_foreach(&block)
  context = BlockContext.new(self, @interpreter)
  NestedForeach.new(self, @interpreter, context.instance_eval(&block))
end

#next_field_alias ⇒ `Object`

# File 'lib/piglet/relation/relation.rb', line 14

def next_field_alias
  @field_counter ||= 0
  @field_counter += 1
  "#{self.alias}_field_#{@field_counter}"
end

#order(*args) ⇒ `Object`

ORDER

x.order(:a)                      # => ORDER x BY a
x.order(:a, :b)                  # => ORDER x BY a, b
x.order([:a, :asc], [:b, :desc]) # => ORDER x BY a ASC, b DESC
x.order(:a, :parallel => 5)      # => ORDER x BY a PARALLEL 5

–

NOTE: the syntax x.order(:a => :asc, :b => :desc) would be nice, but in Ruby 1.8 the order of the keys cannot be guaranteed.

# File 'lib/piglet/relation/relation.rb', line 119

def order(*args)
  fields, options = split_at_options(args)
  fields = *fields
  Order.new(self, @interpreter, fields, options)
end

#sample(n) ⇒ `Object`

SAMPLE

x.sample(5) # => SAMPLE x 5;



128
129
130

# File 'lib/piglet/relation/relation.rb', line 128

def sample(n)
  Sample.new(self, @interpreter, n)
end

#schema ⇒ `Object`

# File 'lib/piglet/relation/relation.rb', line 164

def schema
  if @sources.nil?
    raise Piglet::Schema::SchemaError, 'Could not determine the schema since there was no source relation and this relation does not define its own schema'
  elsif @sources.size > 1
    raise Piglet::Schema::SchemaError, 'Could not determine the schema since there were more than one source relation'
  else
    @sources.first.schema
  end
end

#split(&block) ⇒ `Object`

SPLIT

y, z = x.split { [a <= 3, b > 4] } # => SPLIT x INTO y IF a <= 3, z IF a > 4

# File 'lib/piglet/relation/relation.rb', line 135

def split(&block)
  context = BlockContext.new(self, @interpreter)
  Split.new(self, @interpreter, context.instance_eval(&block)).shards
end

#stream(*args) ⇒ `Object`

STREAM

x.stream(:command => 'cut -f 3')       # => STREAM x THROUGH `cut -f 3`
x.stream(:cmd)                         # => STREAM x THROUGH cmd
x.stream(y, :command => 'cut -f 3')    # => STREAM x, y THROUGH `cut -f 3`
x.stream(:cmd, :schema => [%w(a int)]) # => STREAM x THROUGH cmd AS (a:int)

# File 'lib/piglet/relation/relation.rb', line 146

def stream(*args)
  fields, options = split_at_options(args)
  Stream.new(self, @interpreter, fields, options)
end

#union(*relations) ⇒ `Object`

UNION

x.union(y)    # => UNION x, y
x.union(y, z) # => UNION x, y, z



155
156
157

# File 'lib/piglet/relation/relation.rb', line 155

def union(*relations)
  Union.new(([self] + relations).flatten, @interpreter)
end

Module: Piglet::Relation::Relation

Instance Attribute Summary collapse

Instance Method Summary collapse

Dynamic Method Handling

#method_missing(name, *args) ⇒ Object

Instance Attribute Details

#sources ⇒ Object (readonly)

Instance Method Details

#[](n) ⇒ Object

#alias ⇒ Object

#cogroup(description) ⇒ Object

#cross(*args) ⇒ Object

#distinct(options = {}) ⇒ Object

#eql?(other) ⇒ Boolean

#field(name) ⇒ Object

#filter(&block) ⇒ Object

#foreach(&block) ⇒ Object

#group(*args) ⇒ Object

#hash ⇒ Object

#join(description) ⇒ Object

#limit(n) ⇒ Object

#nested_foreach(&block) ⇒ Object

#next_field_alias ⇒ Object

#order(*args) ⇒ Object

#sample(n) ⇒ Object

#schema ⇒ Object

#split(&block) ⇒ Object

#stream(*args) ⇒ Object

#union(*relations) ⇒ Object