Module: Piglet::Relation::Relation

Instance Attribute Summary collapse

Instance Method Summary collapse

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(name, *args) ⇒ Object



174
175
176
177
178
179
180
# File 'lib/piglet/relation/relation.rb', line 174

def method_missing(name, *args)
  if name.to_s =~ /^\w+$/ && args.empty?
    field(name)
  else
    super
  end
end

Instance Attribute Details

#sourcesObject (readonly)

Returns the value of attribute sources.



6
7
8
# File 'lib/piglet/relation/relation.rb', line 6

def sources
  @sources
end

Instance Method Details

#[](n) ⇒ Object



182
183
184
# File 'lib/piglet/relation/relation.rb', line 182

def [](n)
  field("\$#{n}")
end

#aliasObject

The name this relation will get in Pig Latin. Then name is generated when the relation is outputed by the interpreter, and will be unique.



10
11
12
# File 'lib/piglet/relation/relation.rb', line 10

def alias
  @alias ||= @interpreter.next_relation_alias
end

#cogroup(description) ⇒ Object

COGROUP

x.cogroup(x => :a, y => :b)                 # => COGROUP x BY a, y BY b
x.cogroup(x => :a, y => :b, z => :c)        # => COGROUP x BY a, y BY b, z BY c
x.cogroup(x => [:a, :b], y => [:c, :d])     # => COGROUP x BY (a, b), y BY (c, d)
x.cogroup(x => :a, y => [:b, :inner])       # => COGROUP x BY a, y BY b INNER
x.cogroup(x => :a, y => :b, :parallel => 5) # => COGROUP x BY a, y BY b PARALLEL 5


45
46
47
# File 'lib/piglet/relation/relation.rb', line 45

def cogroup(description)
  Cogroup.new(self, @interpreter, description)
end

#cross(*args) ⇒ Object

CROSS

x.cross(y)                      # => CROSS x, y
x.cross(y, z, w)                # => CROSS x, y, z, w
x.cross([y, z], :parallel => 5) # => CROSS x, y, z, w PARALLEL 5


54
55
56
57
# File 'lib/piglet/relation/relation.rb', line 54

def cross(*args)
  relations, options = split_at_options(args)
  Cross.new(([self] + relations).flatten, @interpreter, options)
end

#distinct(options = {}) ⇒ Object

DISTINCT

x.distinct                 # => DISTINCT x
x.distinct(:parallel => 5) # => DISTINCT x PARALLEL 5


34
35
36
# File 'lib/piglet/relation/relation.rb', line 34

def distinct(options={})
  Distinct.new(self, @interpreter, options)
end

#eql?(other) ⇒ Boolean

Returns:

  • (Boolean)


190
191
192
# File 'lib/piglet/relation/relation.rb', line 190

def eql?(other)
  other.is_a?(Relation) && other.alias == self.alias
end

#field(name) ⇒ Object



159
160
161
162
# File 'lib/piglet/relation/relation.rb', line 159

def field(name)
  type = schema.field_type(name) rescue nil
  Field::Reference.new(name, self, :type => type)
end

#filter(&block) ⇒ Object

FILTER

x.filter { a == b }          # => FILTER x BY a == b
x.filter { a > b && c == 3 } # => FILTER x BY a > b AND c == 3


63
64
65
66
# File 'lib/piglet/relation/relation.rb', line 63

def filter(&block)
  context = BlockContext.new(self, @interpreter)
  Filter.new(self, @interpreter, context.instance_eval(&block))
end

#foreach(&block) ⇒ Object

FOREACH … GENERATE

x.foreach { a }            # => FOREACH x GENERATE a
x.foreach { [a, b] }       # => FOREACH x GENERATE a, b
x.foreach { a.max }        # => FOREACH x GENERATE MAX(a)
x.foreach { a.avg.as(:b) } # => FOREACH x GENERATE AVG(a) AS b

See #nested_foreach for FOREACH … { … GENERATE }



76
77
78
79
# File 'lib/piglet/relation/relation.rb', line 76

def foreach(&block)
  context = BlockContext.new(self, @interpreter)
  Foreach.new(self, @interpreter, context.instance_eval(&block))
end

#group(*args) ⇒ Object

GROUP

x.group(:a)                           # => GROUP x By a
x.group(:a, :b, :c)                   # => GROUP x BY (a, b, c)
x.group([:a, :b, :c], :parallel => 3) # => GROUP x BY (a, b, c) PARALLEL 3


25
26
27
28
# File 'lib/piglet/relation/relation.rb', line 25

def group(*args)
  grouping, options = split_at_options(args)
  Group.new(self, @interpreter, [grouping].flatten, options)
end

#hashObject



186
187
188
# File 'lib/piglet/relation/relation.rb', line 186

def hash
  self.alias.hash
end

#join(description) ⇒ Object

JOIN

x.join(x => :a, y => :b)                        # => JOIN x BY a, y BY b
x.join(x => :a, y => :b, z => :c)               # => JOIN x BY a, y BY b, z BY c
x.join(x => :a, y => :b, :using => :replicated) # => JOIN x BY a, y BY b USING "replicated"
x.join(x => :a, y => :b, :parallel => 5)        # => JOIN x BY a, y BY b PARALLEL 5


97
98
99
# File 'lib/piglet/relation/relation.rb', line 97

def join(description)
  Join.new(self, @interpreter, description)
end

#limit(n) ⇒ Object

LIMIT

x.limit(10) # => LIMIT x 10


104
105
106
# File 'lib/piglet/relation/relation.rb', line 104

def limit(n)
  Limit.new(self, @interpreter, n)
end

#nested_foreach(&block) ⇒ Object

FOREACH … { … GENERATE }

x.nested_foreach { [a.distinct] } # => FOREACH x { a1 = DISTINCT a; GENERATE a1 }

See #foreach for FOREACH … GENERATE



86
87
88
89
# File 'lib/piglet/relation/relation.rb', line 86

def nested_foreach(&block)
  context = BlockContext.new(self, @interpreter)
  NestedForeach.new(self, @interpreter, context.instance_eval(&block))
end

#next_field_aliasObject



14
15
16
17
18
# File 'lib/piglet/relation/relation.rb', line 14

def next_field_alias
  @field_counter ||= 0
  @field_counter += 1
  "#{self.alias}_field_#{@field_counter}"
end

#order(*args) ⇒ Object

ORDER

x.order(:a)                      # => ORDER x BY a
x.order(:a, :b)                  # => ORDER x BY a, b
x.order([:a, :asc], [:b, :desc]) # => ORDER x BY a ASC, b DESC
x.order(:a, :parallel => 5)      # => ORDER x BY a PARALLEL 5

NOTE: the syntax x.order(:a => :asc, :b => :desc) would be nice, but in Ruby 1.8 the order of the keys cannot be guaranteed.



119
120
121
122
123
# File 'lib/piglet/relation/relation.rb', line 119

def order(*args)
  fields, options = split_at_options(args)
  fields = *fields
  Order.new(self, @interpreter, fields, options)
end

#sample(n) ⇒ Object

SAMPLE

x.sample(5) # => SAMPLE x 5;


128
129
130
# File 'lib/piglet/relation/relation.rb', line 128

def sample(n)
  Sample.new(self, @interpreter, n)
end

#schemaObject



164
165
166
167
168
169
170
171
172
# File 'lib/piglet/relation/relation.rb', line 164

def schema
  if @sources.nil?
    raise Piglet::Schema::SchemaError, 'Could not determine the schema since there was no source relation and this relation does not define its own schema'
  elsif @sources.size > 1
    raise Piglet::Schema::SchemaError, 'Could not determine the schema since there were more than one source relation'
  else
    @sources.first.schema
  end
end

#split(&block) ⇒ Object

SPLIT

y, z = x.split { [a <= 3, b > 4] } # => SPLIT x INTO y IF a <= 3, z IF a > 4


135
136
137
138
# File 'lib/piglet/relation/relation.rb', line 135

def split(&block)
  context = BlockContext.new(self, @interpreter)
  Split.new(self, @interpreter, context.instance_eval(&block)).shards
end

#stream(*args) ⇒ Object

STREAM

x.stream(:command => 'cut -f 3')       # => STREAM x THROUGH `cut -f 3`
x.stream(:cmd)                         # => STREAM x THROUGH cmd
x.stream(y, :command => 'cut -f 3')    # => STREAM x, y THROUGH `cut -f 3`
x.stream(:cmd, :schema => [%w(a int)]) # => STREAM x THROUGH cmd AS (a:int)


146
147
148
149
# File 'lib/piglet/relation/relation.rb', line 146

def stream(*args)
  fields, options = split_at_options(args)
  Stream.new(self, @interpreter, fields, options)
end

#union(*relations) ⇒ Object

UNION

x.union(y)    # => UNION x, y
x.union(y, z) # => UNION x, y, z


155
156
157
# File 'lib/piglet/relation/relation.rb', line 155

def union(*relations)
  Union.new(([self] + relations).flatten, @interpreter)
end