Class: Rubydora::Datastream

Inherits:
Object
  • Object
show all
Extended by:
ActiveModel::Callbacks
Includes:
ActiveModel::Dirty
Defined in:
lib/rubydora/datastream.rb

Overview

This class represents a Fedora datastream object and provides helper methods for creating and manipulating them.

Constant Summary collapse

DS_ATTRIBUTES =

mapping datastream attributes (and api parameters) to datastream profile names

{:controlGroup => :dsControlGroup, :dsLocation => :dsLocation, :altIDs => nil, :dsLabel => :dsLabel, :versionable => :dsVersionable, :dsState => :dsState, :formatURI => :dsFormatURI, :checksumType => :dsChecksumType, :checksum => :dsChecksum, :mimeType => :dsMIME, :logMessage => nil, :ignoreContent => nil, :lastModifiedDate => nil, :content => nil, :asOfDateTime => nil}
DS_DEFAULT_ATTRIBUTES =
{ :controlGroup => 'M', :dsState => 'A', :versionable => true }
DS_READONLY_ATTRIBUTES =
[ :dsCreateDate , :dsSize, :dsVersionID ]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(digital_object, dsid, options = {}, default_instance_attributes = {}) ⇒ Datastream

Initialize a Rubydora::Datastream object, which may or may not already exist in the datastore.

Provides ‘after_initialize` callback for extensions

Parameters:

  • (Rubydora::DigitalObject)
  • Datastream (String)

    ID

  • default (Hash)

    attribute values (used esp. for creating new datastreams)



103
104
105
106
107
108
109
110
111
112
113
# File 'lib/rubydora/datastream.rb', line 103

def initialize digital_object, dsid, options = {}, default_instance_attributes = {}
  _run_initialize_callbacks do
    @digital_object = digital_object
    @dsid = dsid
    @options = options
    @default_attributes = default_attributes.merge(default_instance_attributes)
    options.each do |key, value|
      self.send(:"#{key}=", value)
    end
  end
end

Instance Attribute Details

#digital_objectObject (readonly)

Returns the value of attribute digital_object.



17
18
19
# File 'lib/rubydora/datastream.rb', line 17

def digital_object
  @digital_object
end

#dsidObject (readonly)

Returns the value of attribute dsid.



17
18
19
# File 'lib/rubydora/datastream.rb', line 17

def dsid
  @dsid
end

Class Method Details

.default_attributesObject



82
83
84
# File 'lib/rubydora/datastream.rb', line 82

def self.default_attributes
  DS_DEFAULT_ATTRIBUTES
end

Instance Method Details

#asOfDateTime(asOfDateTime = nil) ⇒ Object



74
75
76
77
78
79
80
# File 'lib/rubydora/datastream.rb', line 74

def asOfDateTime asOfDateTime = nil
  if asOfDateTime == nil
    return @asOfDateTime
  end

  return self.class.new(@digital_object, @dsid, @options.merge(:asOfDateTime => asOfDateTime))
end

#changed?Boolean

Returns:

  • (Boolean)


200
201
202
# File 'lib/rubydora/datastream.rb', line 200

def changed?
  super || content_changed?
end

#contentObject Also known as: read

This method is overridden in ActiveFedora, so we didn’t



127
128
129
# File 'lib/rubydora/datastream.rb', line 127

def content
  local_or_remote_content(true)
end

#content=(new_content) ⇒ String or IO

Set the content of the datastream

Parameters:

  • (String or IO)

Returns:

  • (String or IO)


175
176
177
178
# File 'lib/rubydora/datastream.rb', line 175

def content= new_content
  raise "Can't change values on older versions" if @asOfDateTime
   @content = new_content
end

#content_changed?Boolean

Returns:

  • (Boolean)


180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/rubydora/datastream.rb', line 180

def content_changed?
  return false if ['E','R'].include? controlGroup
  return true if new? and !local_or_remote_content(false).blank? # new datastreams must have content

  if controlGroup == "X"
    if self.eager_load_datastream_content
      return !EquivalentXml.equivalent?(Nokogiri::XML(content), Nokogiri::XML(datastream_content))
    else
      return !EquivalentXml.equivalent?(Nokogiri::XML(content), Nokogiri::XML(@datastream_content))
    end
  else
    if self.eager_load_datastream_content
      return local_or_remote_content(false) != datastream_content
    else
      return local_or_remote_content(false) != @datastream_content
    end
  end
  super
end

#createRubydora::Datastream

Add datastream to Fedora



329
330
331
332
333
334
335
336
# File 'lib/rubydora/datastream.rb', line 329

def create
  check_if_read_only
  run_callbacks :create do
    repository.add_datastream to_api_params.merge({ :pid => pid, :dsid => dsid, :content => content })
    reset_profile_attributes
    self.class.new(digital_object, dsid, @options)
  end
end

#datastream_contentObject



152
153
154
155
156
157
158
159
160
161
162
# File 'lib/rubydora/datastream.rb', line 152

def datastream_content
  return nil if new?

  @datastream_content ||=begin
    options = { :pid => pid, :dsid => dsid }
    options[:asOfDateTime] = asOfDateTime if asOfDateTime

    repository.datastream_dissemination options
  rescue RestClient::ResourceNotFound
  end
end

#datastream_will_change!Object



363
364
365
# File 'lib/rubydora/datastream.rb', line 363

def datastream_will_change!
  attribute_will_change! :profile
end

#default_attributesObject



86
87
88
# File 'lib/rubydora/datastream.rb', line 86

def default_attributes
  @default_attributes ||= self.class.default_attributes
end

#default_attributes=(attributes) ⇒ Object



90
91
92
# File 'lib/rubydora/datastream.rb', line 90

def default_attributes= attributes
  @default_attributes = default_attributes.merge attributes
end

#deleteRubydora::Datastream

Purge the datastream from Fedora

Returns:



353
354
355
356
357
358
359
360
361
# File 'lib/rubydora/datastream.rb', line 353

def delete
  check_if_read_only
  run_callbacks :destroy do
    repository.purge_datastream(:pid => pid, :dsid => dsid) unless self.new?
    digital_object.datastreams.delete(dsid)
    reset_profile_attributes
    self
  end
end

#has_content?Boolean

Returns:

  • (Boolean)


204
205
206
207
208
209
210
211
212
213
214
215
216
# File 'lib/rubydora/datastream.rb', line 204

def has_content?
  # persisted objects are required to have content
  return true unless new?

  # type E and R objects should have content.
  return !dsLocation.blank? if ['E','R'].include? controlGroup

  # if we've set content, then we have content.

  # return true if instance_variable_defined? :@content

  behaves_like_io?(@content) || !content.blank?
end

#local_or_remote_content(ensure_fetch = true) ⇒ String

Retrieve the content of the datastream (and cache it)

Parameters:

  • ensure_fetch (Boolean) (defaults to: true)

    <true> if true, it will grab the content from the repository if is not already loaded

Returns:

  • (String)


134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/rubydora/datastream.rb', line 134

def local_or_remote_content(ensure_fetch = true)
  return @content if new? 

  @content ||= ensure_fetch ? datastream_content : @datastream_content

  if behaves_like_io?(@content)
    begin
      @content.rewind
      @content.read
    ensure
      @content.rewind
    end
  else
    @content
  end
end

#new?Boolean

Does this datastream already exist?

Returns:

  • (Boolean)


122
123
124
# File 'lib/rubydora/datastream.rb', line 122

def new?
  digital_object.nil? || digital_object.new? || profile_xml.blank?
end

#pidObject

Helper method to get digital object pid



116
117
118
# File 'lib/rubydora/datastream.rb', line 116

def pid
  digital_object.pid
end

#profile(opts = {}) ⇒ Hash

Retrieve the datastream profile as a hash (and cache it)

Parameters:

  • opts (Hash) (defaults to: {})

    :validateChecksum if you want fedora to validate the checksum

Returns:

  • (Hash)

    see Fedora #getDatastream documentation for keys



261
262
263
264
265
266
267
268
269
270
271
272
273
274
# File 'lib/rubydora/datastream.rb', line 261

def profile opts= {}
  if @profile && !(opts[:validateChecksum] && !@profile.has_key?('dsChecksumValid'))
    ## Force a recheck of the profile if they've passed :validateChecksum and we don't have dsChecksumValid
    return @profile
  end
  
  return @profile = {} unless digital_object.respond_to? :repository
  
  @profile = begin
    xml = profile_xml(opts)

    (self.profile_xml_to_hash(xml) unless xml.blank?) || {}
  end
end

#profile=(profile_xml) ⇒ Object



294
295
296
# File 'lib/rubydora/datastream.rb', line 294

def profile= profile_xml
  @profile = self.profile_xml_to_hash(profile_xml)
end

#profile_xml(opts = {}) ⇒ Object



276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# File 'lib/rubydora/datastream.rb', line 276

def profile_xml opts = {}
  @profile_xml = nil unless opts.empty?
  
  @profile_xml ||= begin

    options = { :pid => pid, :dsid => dsid }
    options.merge!(opts)
    options[:asOfDateTime] = asOfDateTime if asOfDateTime
    options[:validateChecksum] = true if repository.config[:validateChecksum]
    repository.datastream(options)
  rescue RestClient::Unauthorized => e
    raise e
  rescue RestClient::ResourceNotFound
    # the datastream is new
    ''
  end
end

#profile_xml_to_hash(profile_xml) ⇒ Object



298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'lib/rubydora/datastream.rb', line 298

def profile_xml_to_hash profile_xml
  profile_xml.gsub! '<datastreamProfile', '<datastreamProfile xmlns="http://www.fedora.info/definitions/1/0/management/"' unless profile_xml =~ /xmlns=/
  doc = Nokogiri::XML(profile_xml)
  h = doc.xpath('/management:datastreamProfile/*', {'management' => "http://www.fedora.info/definitions/1/0/management/"} ).inject({}) do |sum, node|
               sum[node.name] ||= []
               sum[node.name] << node.text
               sum
             end.reject { |key, values| values.empty? }
  h.select { |key, values| values.length == 1 }.each do |key, values|
    h[key] = values.reject { |x| x.empty? }.first 
  end

  h['dsSize'] &&= h['dsSize'].to_i rescue h['dsSize']
  h['dsCreateDate'] &&= Time.parse(h['dsCreateDate']) rescue h['dsCreateDate']
  h['dsChecksumValid'] &&= h['dsChecksumValid'] == 'true' 
  h['dsVersionable'] &&= h['dsVersionable'] == 'true' 
  h
end

#saveRubydora::Datastream

Modify or save the datastream



340
341
342
343
344
345
346
347
348
349
# File 'lib/rubydora/datastream.rb', line 340

def save
  check_if_read_only
  run_callbacks :save do
    raise RubydoraError.new("Unable to save #{self.inspect} without content") unless has_content?
    return create if new?
    repository.modify_datastream to_api_params.merge({ :pid => pid, :dsid => dsid })
    reset_profile_attributes
    self.class.new(digital_object, dsid, @options)
  end
end

#stream(from = 0, length = nil) ⇒ Object

Returns a streaming response of the datastream. This is ideal for large datasteams because it doesn’t require holding the entire content in memory. If you specify the from and length parameters it simulates a range request. Unfortunatly Fedora 3 doesn’t have range requests, so this method needs to download the whole thing and just seek to the part you care about.

Parameters:

  • from (Integer) (defaults to: 0)

    (bytes) the starting point you want to return.



225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
# File 'lib/rubydora/datastream.rb', line 225

def stream (from = 0, length = nil)
  raise "Can't determine dsSize" unless dsSize
  length = dsSize - from unless length
  counter = 0
  Enumerator.new do |blk|
    repository.datastream_dissemination(:pid => pid, :dsid => dsid) do |response|
      response.read_body do |chunk|
        last_counter = counter
        counter += chunk.size
        if (counter > from) # greater than the range minimum
          if counter > from + length
            # At the end of what we need. Write the beginning of what was read.
            offset = (length + from) - counter
            blk << chunk[0..offset]
            break
          elsif from >= last_counter
            # At the end of what we beginning of what we need. Write the end of what was read.
            offset = from - last_counter
            blk << chunk[offset..-1]
          else 
            # In the middle. We need all of this
            blk << chunk
          end
          if (counter == from + length)
            # Iteration was exactly the right length, no more reads needed.
            break
          end
        end
      end
    end
  end
end

#urlString

Get the URL for the datastream content

Returns:

  • (String)


166
167
168
169
170
# File 'lib/rubydora/datastream.rb', line 166

def url
  options = { }
  options[:asOfDateTime] = asOfDateTime if asOfDateTime
  repository.datastream_url(pid, dsid, options) + "/content"
end

#versionsObject



317
318
319
320
321
322
323
324
325
# File 'lib/rubydora/datastream.rb', line 317

def versions
  versions_xml = repository.datastream_versions(:pid => pid, :dsid => dsid)
  return [] if versions_xml.nil?
  versions_xml.gsub! '<datastreamProfile', '<datastreamProfile xmlns="http://www.fedora.info/definitions/1/0/management/"' unless versions_xml =~ /xmlns=/
  doc = Nokogiri::XML(versions_xml)
  doc.xpath('//management:datastreamProfile', {'management' => "http://www.fedora.info/definitions/1/0/management/"} ).map do |ds|
    self.class.new @digital_object, @dsid, :profile => ds.to_s, :asOfDateTime => ds.xpath('management:dsCreateDate', 'management' => "http://www.fedora.info/definitions/1/0/management/").text
  end
end