Class: MMS2R::Media

Inherits:
Object show all
Defined in:
lib/mms2r.rb,
lib/mms2r/media.rb,
lib/mms2r/media/sprint.rb

Defined Under Namespace

Modules: Sprint

Constant Summary collapse

USER_AGENT =

Spoof User-Agent, primarily for the Sprint CDN

"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(mail, opts = {}) ⇒ Media

Initialize a new MMS2R::Media comprised of a mail.

Specify options to initialize with: :logger => some_logger for logging :process => :lazy, for non-greedy processing upon initialization

#process will have to be called explicitly if the lazy process option is chosen.



186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# File 'lib/mms2r/media.rb', line 186

def initialize(mail, opts={})

  @mail = mail
  @logger = opts[:logger]
  log("#{self.class} created", :info)
  @carrier = self.class.domain(mail)
  @dir_count = 0
  sha = Digest::SHA1.hexdigest("#{@carrier}-#{Time.now.to_i}-#{rand}")
  @media_dir = File.expand_path(
                 File.join(self.tmp_dir(),
                 "#{self.safe_message_id(@mail.message_id)}_#{sha}"))
  @media = {}
  @was_processed = false
  @number = nil
  @subject = nil
  @body = nil
  @exif = nil
  @default_media = nil
  @default_text = nil
  @default_html = nil

  f = File.expand_path(File.join(self.conf_dir(), "aliases.yml"))
  @aliases = YAML::load_file(f)

  conf = "#{@aliases[@carrier] || @carrier}.yml"
  f = File.expand_path(File.join(self.conf_dir(), conf))
  c = File.exist?(f) ? YAML::load_file(f) : {}
  @config = self.class.initialize_config(c)

  processor_module = MMS2R::CARRIERS[@carrier]
  extend processor_module if processor_module

  lazy = (opts[:process] == :lazy) rescue false
  self.process() unless lazy
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(method, *args, &block) ⇒ Object

Pass off everything we don’t do to the Mail object TODO: refactor to explicit addition a la blog.jayfields.com/2008/02/ruby-replace-methodmissing-with-dynamic.html



112
113
114
# File 'lib/mms2r/media.rb', line 112

def method_missing method, *args, &block
  mail.send method, *args, &block
end

Instance Attribute Details

#carrierObject (readonly)

Carrier is the domain name of the carrier. If the carrier is not known the carrier will be set to ‘mms2r.media’



132
133
134
# File 'lib/mms2r/media.rb', line 132

def carrier
  @carrier
end

#mailObject (readonly)

Mail object that the media files were derived from.



119
120
121
# File 'lib/mms2r/media.rb', line 119

def mail
  @mail
end

#mediaObject (readonly)

media returns the hash of media. The media hash is keyed by mime-type such as ‘text/plain’ and the value mapped to the key is an array of media that are of that type.



126
127
128
# File 'lib/mms2r/media.rb', line 126

def media
  @media
end

#media_dirObject (readonly)

Base working dir where media for a unique mms message are dropped



137
138
139
# File 'lib/mms2r/media.rb', line 137

def media_dir
  @media_dir
end

Class Method Details

.conf_dirObject

Get the directory where conf files are stored.



671
672
673
# File 'lib/mms2r/media.rb', line 671

def self.conf_dir
  @@conf_dir ||= File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'conf'))
end

.conf_dir=(d) ⇒ Object

Set the directory where conf files are stored.



678
679
680
# File 'lib/mms2r/media.rb', line 678

def self.conf_dir=(d)
  @@conf_dir=d
end

.default_ext(content_type) ⇒ Object

Returns a default file extension based on a content type



693
694
695
696
697
698
699
# File 'lib/mms2r/media.rb', line 693

def self.default_ext(content_type)
  if MMS2R::EXT[content_type]
    MMS2R::EXT[content_type]
  elsif content_type
    content_type.split('/').last
  end
end

.domain(mail) ⇒ Object

Determine if return-path or from is going to be used to desiginate the origin carrier. If the domain in the From header is listed in conf/from.yaml then that is the carrier domain. Else if there is a Return-Path header its address’s domain is the carrier doamin, else use From header’s address domain.



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/mms2r/media.rb', line 146

def self.domain(mail)
  return_path = case
    when mail.return_path
      mail.return_path ? mail.return_path.split('@').last : ''
    else
      ''
    end

  from_domain = case
    when mail.from && mail.from.first
      mail.from.first.split('@').last
    else
      ''
    end

  f = File.expand_path(File.join(self.conf_dir(), "from.yml"))
  from = YAML::load_file(f)

  ret = case
    when from.include?(from_domain)
      from_domain
    when return_path.present?
      return_path
    else
      from_domain
    end
  ret
end

.initialize_config(c) ⇒ Object

Joins the generic mms2r configuration with the carrier specific configuration.



705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
# File 'lib/mms2r/media.rb', line 705

def self.initialize_config(c)
  f = File.expand_path(File.join(self.conf_dir(), "mms2r_media.yml"))
  conf = YAML::load_file(f)
  conf['ignore'] ||= {} unless conf['ignore']
  conf['transform'] = {} unless conf['transform']
  conf['number'] = [] unless conf['number']
  return conf unless c

  kinds = ['ignore', 'transform']

  kinds.each do |kind|
    if c[kind]
      c[kind].each do |type,array|
        conf[kind][type] = [] unless conf[kind][type]
        conf[kind][type] += array
      end
    end
  end
  conf['number'] = c['number'] if c['number']

  conf
end

.safe_message_id(mid) ⇒ Object

Helper to create a safe directory path element based on the mail message id.



686
687
688
# File 'lib/mms2r/media.rb', line 686

def self.safe_message_id(mid)
  mid.nil? ? "#{Time.now.to_i}" : mid.gsub(/\$|<|>|@|\./, "")
end

.tmp_dirObject

Get the temporary directory where media files are written to.



658
659
660
# File 'lib/mms2r/media.rb', line 658

def self.tmp_dir
  @@tmp_dir ||= File.expand_path(File.join(Dir.tmpdir, (ENV['USER'].nil? ? '':ENV['USER']), 'mms2r'))
end

.tmp_dir=(d) ⇒ Object

Set the temporary directory where media files are written to.



664
665
666
# File 'lib/mms2r/media.rb', line 664

def self.tmp_dir=(d)
  @@tmp_dir=d
end

Instance Method Details

#add_file(type, file) ⇒ Object

Helper to add a file to the media hash.



503
504
505
506
# File 'lib/mms2r/media.rb', line 503

def add_file(type, file)
  media[type] = [] unless media[type]
  media[type] << file
end

#aliasesObject



544
545
546
# File 'lib/mms2r/media.rb', line 544

def aliases
  @aliases
end

#bodyObject

Convenience method that returns a string including all the text of the default text/plain file found. If the plain text is blank then it returns stripped down version of the title and body of default text/html. Returns empty string if no body text is found.



266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
# File 'lib/mms2r/media.rb', line 266

def body
  text_file = default_text

  if RUBY_VERSION < "1.9"
    @body = text_file ? IO.read(text_file.path).strip : ""
    require 'iconv'
    ic = Iconv.new('UTF-8', 'ISO-8859-1')
    @body = ic.iconv(@body)
    @body << ic.iconv(nil)
    ic.close
  else
    @body = text_file ? IO.read(text_file.path, :mode => "rb").strip : ""
    @body = @body.chars.select{|i| i.valid_encoding?}.join
  end

  if @body.blank? &&
     html_file = default_html
    html = Nokogiri::HTML(IO.read(html_file.path))
    @body = (html.xpath("//head/title").map(&:text) + html.xpath("//body/*").map(&:text)).join(" ")
  end
  @body
end

#conf_dirObject

convenience accessor for self.class.conf_dir



735
736
737
# File 'lib/mms2r/media.rb', line 735

def conf_dir
  self.class.conf_dir
end

#default_ext(type) ⇒ Object

convenience accessor for self.class.default_ext



749
750
751
# File 'lib/mms2r/media.rb', line 749

def default_ext(type)
  self.class.default_ext(type)
end

#default_htmlObject

Returns a File with the most likely candidate that is html, or nil otherwise. It also adds singleton methods to the File object so it can be used in place of a CGI upload (local_path, original_filename, size, and content_type) such as in conjunction with AttachmentFu. The largest file found in terms of bytes is returned.

Returns nil if there are not any text Files found



322
323
324
# File 'lib/mms2r/media.rb', line 322

def default_html
  @default_html ||= attachment(['text/html'])
end

#default_mediaObject

Returns a File with the most likely candidate for the user-submitted media. Given that most MMS messages only have one file attached, this method will try to return that file. Singleton methods are added to the File object so it can be used in place of a CGI upload (local_path, original_filename, size, and content_type) such as in conjunction with AttachementFu. The largest file found in terms of bytes is returned.

Returns nil if there are not any video or image Files found.



298
299
300
# File 'lib/mms2r/media.rb', line 298

def default_media
  @default_media ||= attachment(['video', 'image', 'application', 'text'])
end

#default_textObject

Returns a File with the most likely candidate that is text, or nil otherwise. It also adds singleton methods to the File object so it can be used in place of a CGI upload (local_path, original_filename, size, and content_type) such as in conjunction with AttachmentFu. The largest file found in terms of bytes is returned.

Returns nil if there are not any text Files found



310
311
312
# File 'lib/mms2r/media.rb', line 310

def default_text
  @default_text ||= attachment(['text/plain'])
end

#device_type?Boolean

Best guess of the mobile device type. Simple heuristics thus far by inspecting mail headers and jpeg/tiff exif metadata, and file name. Known smart phone types thus far are

  • :blackberry

  • :dash

  • :droid

  • :htc

  • :iphone

  • :lge

  • :motorola

  • :nokia

  • :palm

  • :pantech

  • :samsung

If the message is from a carrier known to MMS2R, and not a smart phone its type is returned as :handset Otherwise device type is :unknown



569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
# File 'lib/mms2r/media.rb', line 569

def device_type?

  file = attachment(['image'])
  if file
    original = file.original_filename
    @exif = case original
            when /\.je?pg$/i
              EXIFR::JPEG.new(file)
            when /\.tiff?$/i
              EXIFR::TIFF.new(file)
            end
    if @exif
      models = config['device_types']['models'] rescue {}
      models.each do |type, regex|
        return type if @exif.model =~ regex
      end
      makes = config['device_types']['makes'] rescue {}
      makes.each do |type, regex|
        return type if @exif.make =~ regex
      end
      software = config['device_types']['software'] rescue {}
      software.each do |type, regex|
        return type if @exif.software =~ regex
      end
    end
  end

  headers = config['device_types']['headers'] rescue {}
  headers.keys.each do |header|
    if mail.header[header]
      # headers[header] refers to a hash of smart phone types with regex values
      # that if they match, the header signals the type should be returned
      headers[header].each do |type, regex|
        return type if mail.header[header].decoded =~ regex
        field = mail.header.fields.detect { |field| field.name == header }
        return type if field && field.to_s =~ regex
      end
    end
  end

  file = attachment(['image'])
  if file
    original_filename = file.original_filename
      filenames = config['device_types']['filenames'] rescue {}
      filenames.each do |type, regex|
        return type if original_filename =~ regex
      end
  end

  file = attachment(['video'])
  if file
    original_filename = file.original_filename
      filenames = config['device_types']['filenames'] rescue {}
      filenames.each do |type, regex|
        return type if original_filename =~ regex
      end
  end

  boundary = mail.boundary
  boundaries = config['device_types']['boundary'] rescue {}
  boundaries.each do |type, regex|
    return type if boundary =~ regex
  end

  return :handset if File.exist?( File.expand_path(
                         File.join(self.conf_dir, "#{self.aliases[self.carrier] || self.carrier}.yml")
                     ) )

  :unknown
end

#exifObject

exif object on default image from exifr gem



643
644
645
646
# File 'lib/mms2r/media.rb', line 643

def exif
  device_type? unless @exif
  @exif
end

#filename?(part) ⇒ Boolean

returns a filename declared for a part, or a default if its not defined



522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
# File 'lib/mms2r/media.rb', line 522

def filename?(part)
  name = part.filename
  if (name.nil? || name.empty?)
    if part.content_id && (matched = /^<(.+)>$/.match(part.content_id))
      name = matched[1]
    else
      name = "#{Time.now.to_f}.#{self.default_ext(part.part_type?)}"
    end
  end
  # FIXME FWIW, janky look for dot extension 1 to 4 chars long
  name = (name =~ /\..{1,4}$/ ? name : "#{name}.#{self.default_ext(part.part_type?)}").strip

  # handle excessively large filenames
  if name.size > 255
    ext = File.extname(name)
    base = File.basename(name, ext)
    name = "#{base[0, 255 - ext.size]}#{ext}"
  end

  name
end

#ignore_media?(type, part) ⇒ Boolean

Helper for process template method to determine if media contained in a part should be ignored. Producers should override this method to return true for media such as images that are advertising, carrier logos, etc. See the ignore section in the discussion of the built-in configuration.



380
381
382
383
384
385
386
387
# File 'lib/mms2r/media.rb', line 380

def ignore_media?(type, part)
  ignores = config['ignore'][type] || []
  ignore   = ignores.detect{ |test| filename?(part) == test}
  ignore ||= ignores.detect{ |test| filename?(part) =~ test if test.is_a?(Regexp) }
  ignore ||= ignores.detect{ |test| part.body.decoded.strip =~ test if test.is_a?(Regexp) }
  ignore ||= (part.body.decoded.strip.size == 0 ? true : nil)
  ignore.nil? ? false : true
end

#initialize_config(config) ⇒ Object

convenience accessor for self.class.initialize_confg



763
764
765
# File 'lib/mms2r/media.rb', line 763

def initialize_config(config)
  self.class.initialize_config(config)
end

#is_mobile?Boolean

The source of the MMS was some sort of mobile or smart phone



651
652
653
# File 'lib/mms2r/media.rb', line 651

def is_mobile?
  self.device_type? != :unknown
end

#log(message, level = :info) ⇒ Object



728
729
730
# File 'lib/mms2r/media.rb', line 728

def log(message, level = :info)
  @logger.send(level, message) unless @logger.nil?
end

#msg_tmp_dirObject

Helper to temp_file to create a unique temporary directory that is a child of tmp_dir This version is based on the message_id of the mail.



512
513
514
515
516
517
# File 'lib/mms2r/media.rb', line 512

def msg_tmp_dir
  @dir_count += 1
  dir = File.expand_path(File.join(@media_dir, "#{@dir_count}"))
  FileUtils.mkdir_p(dir)
  dir
end

#numberObject

Get the phone number associated with this MMS if it exists. The value returned is simplistic, it is just the user name of the from address before the @ symbol. Validation of the number is left to you. Most carriers are using the real phone number as the username.



228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/mms2r/media.rb', line 228

def number
  unless @number
    params = config['number']
    if params && params.any? && (header = mail.header[params[0]])
      @number = header.to_s.gsub(params[1], params[2])
    end
    if @number.nil? || @number.blank?
      @number = mail.from.first.split(/@|\//).first rescue ""
    end
  end

  @number
end

#processObject

process is a template method and collects all the media in a MMS. Override helper methods to this template to clean out advertising and/or ignore media that are advertising. This method should not be overridden unless there is an extreme special case in processing the media of a MMS (like Sprint)

Helper methods for the process template:

  • ignore_media? – true if the media contained in a part should be ignored.

  • process_media – retrieves media to temporary file, returns path to file.

  • transform_text – called by process_media, strips out advertising.

  • temp_file – creates a temporary filepath based on information from the part.

Block support: Call process() with a block to automatically iterate through media. For example, to process and receive only media of video type:

mms.process do |media_type, file|
  results << file if media_type =~ /video/
end

note: purge must be explicitly called to remove the media files

mms2r extracts from an mms message.


349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
# File 'lib/mms2r/media.rb', line 349

def process # :yields: media_type, file
  unless @was_processed
    log("#{self.class} processing", :info)

    parts = self.folded_parts(mail)
    parts.each do |part|
      if part.part_type? == 'text/html'
        process_html_part(part)
      else
        process_part(part)
      end
    end

    @was_processed = true
  end

  # when process acts upon a block
  if block_given?
    media.each do |k, v|
      yield(k, v)
    end
  end

end

#process_html_part(part) ⇒ Object

Helper to decide if a html part should be kept or ignored. We are defining it here primarily for the benefit so that Sprint can override a special case for processing.



438
439
440
# File 'lib/mms2r/media.rb', line 438

def process_html_part(part)
  process_part(part)
end

#process_media(part) ⇒ Object

Helper for process template method to decode the part based on its type and write its content to a temporary file. Returns path to temporary file that holds the content. Parts with a main type of text will have their contents transformed with a call to transform_text

Producers should only override this method if the parts of the MMS need special treatment besides what is expected for a normal mime part (like Sprint).

Returns a tuple of content type, file path



401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
# File 'lib/mms2r/media.rb', line 401

def process_media(part)
  # Mail body auto-magically decodes quoted
  # printable for text/html type.
  file = temp_file(part)
  if part.part_type? =~ /^text\// ||
     part.part_type? == 'application/smil'
    type, content = transform_text_part(part)
  else
    if part.part_type? == 'application/octet-stream'
      type = type_from_filename(filename?(part))
    else
      type = part.part_type?
    end
    content = part.body.decoded
  end
  return type, nil if content.nil? || content.empty?

  log("#{self.class} writing file #{file}", :info)
  File.open(file, 'wb'){ |f| f.write(content) }
  return type, file
end

#process_part(part) ⇒ Object

Helper to decide if a part should be kept or ignored



426
427
428
429
430
431
# File 'lib/mms2r/media.rb', line 426

def process_part(part)
  return if ignore_media?(part.part_type?, part)

  type, file = process_media(part)
  add_file(type, file) unless type.nil? || file.nil?
end

#purgeObject

Purges the unique MMS2R::Media.media_dir directory created for this producer and all of the media that it contains.



495
496
497
498
# File 'lib/mms2r/media.rb', line 495

def purge
  log("#{self.class} purging #{@media_dir} and all its contents", :info)
  FileUtils.rm_rf(@media_dir)
end

#safe_message_id(message_id) ⇒ Object

convenience accessor for self.class.safe_message_id



756
757
758
# File 'lib/mms2r/media.rb', line 756

def safe_message_id(message_id)
  self.class.safe_message_id(message_id)
end

#subjectObject

Return the Subject for this message, returns “” for default carrier subject such as ‘Multimedia message’ for ATT&T carrier.



246
247
248
249
250
251
252
253
254
255
256
257
258
259
# File 'lib/mms2r/media.rb', line 246

def subject

  unless @subject
    subject = mail.subject.strip rescue ""
    ignores = config['ignore']['text/plain']
    if ignores && ignores.detect{|s| s == subject}
      @subject = ""
    else
      @subject = transform_text('text/plain', subject).last
    end
  end

  @subject
end

#temp_file(part) ⇒ Object

Helper for process template method to name a temporary filepath based on information in the part. This version attempts to honor the name of the media as labeled in the part header and creates a unique temporary directory for writing the file so filename collision does not occur. Consumers of this method expect the directory structure to the file exists, if the method is overridden it is mandatory that this behavior is retained.



486
487
488
489
# File 'lib/mms2r/media.rb', line 486

def temp_file(part)
  file_name = filename?(part)
  File.expand_path(File.join(msg_tmp_dir(),File.basename(file_name)))
end

#tmp_dirObject

convenience accessor for self.class.conf_dir



742
743
744
# File 'lib/mms2r/media.rb', line 742

def tmp_dir
  self.class.tmp_dir
end

#transform_text(type, text) ⇒ Object

Helper for process_media template method to transform text. See the transform section in the discussion of the built-in configuration.



447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
# File 'lib/mms2r/media.rb', line 447

def transform_text(type, text)
  return type, text if !config['transform'] || !(transforms = config['transform'][type])

  if RUBY_VERSION < "1.9"
    require 'iconv'
    ic = Iconv.new('UTF-8', 'ISO-8859-1')
    text = ic.iconv(text)
    text << ic.iconv(nil)
    ic.close
  end

  transforms.each do |transform|
    next unless transform.size == 2
    p = transform.first
    r = transform.last
    text = text.gsub(p, r) rescue text
  end

  return type, text
end

#transform_text_part(part) ⇒ Object

Helper for process_media template method to transform text.



471
472
473
474
475
# File 'lib/mms2r/media.rb', line 471

def transform_text_part(part)
  type = part.part_type?
  text = part.body.decoded.strip
  transform_text(type, text)
end