Class: Stevedore::StevedoreEmail

Inherits:
StevedoreBlob show all
Defined in:
lib/parsers/stevedore_email.rb

Instance Attribute Summary collapse

Attributes inherited from StevedoreBlob

#download_url, #extra, #text, #title

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from StevedoreBlob

#analyze!, #clean_text, #initialize

Constructor Details

This class inherits a constructor from Stevedore::StevedoreBlob

Instance Attribute Details

#attachmentsObject

TODO write wrt other fields. where do those go???



13
14
15
# File 'lib/parsers/stevedore_email.rb', line 13

def attachments
  @attachments
end

#content_typeObject

TODO write wrt other fields. where do those go???



13
14
15
# File 'lib/parsers/stevedore_email.rb', line 13

def content_type
  @content_type
end

#creation_dateObject

TODO write wrt other fields. where do those go???



13
14
15
# File 'lib/parsers/stevedore_email.rb', line 13

def creation_date
  @creation_date
end

#dkim_verifiedObject

TODO write wrt other fields. where do those go???



13
14
15
# File 'lib/parsers/stevedore_email.rb', line 13

def dkim_verified
  @dkim_verified
end

#message_ccObject

TODO write wrt other fields. where do those go???



13
14
15
# File 'lib/parsers/stevedore_email.rb', line 13

def message_cc
  @message_cc
end

#message_fromObject

TODO write wrt other fields. where do those go???



13
14
15
# File 'lib/parsers/stevedore_email.rb', line 13

def message_from
  @message_from
end

#message_toObject

TODO write wrt other fields. where do those go???



13
14
15
# File 'lib/parsers/stevedore_email.rb', line 13

def message_to
  @message_to
end

#subjectObject

TODO write wrt other fields. where do those go???



13
14
15
# File 'lib/parsers/stevedore_email.rb', line 13

def subject
  @subject
end

Class Method Details

.new_from_tika(content, metadata, download_url, filepath) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/parsers/stevedore_email.rb', line 15

def self.new_from_tika(content, , download_url, filepath)
  t = super
  t.creation_date = ["Creation-Date"]
  t.message_to = ["Message-To"]
  t.message_from = ["Message-From"]
  t.message_cc = ["Message-Cc"]
  t.title = t.subject = ["subject"]
  t.dkim_verified = begin 
                      Dkim::Verifier.new(filepath).verify!
                    rescue Dkim::DkimError
                      false
                    end
  t.attachments = ["X-Attachments"].to_s.split("|").map do |raw_attachment_filename| 
    attachment_filename = CGI::unescape(raw_attachment_filename)
    possible_filename = File.join(File.dirname(filepath), attachment_filename)
    eml_filename = File.join(File.dirname(filepath), File.basename(filepath, '.eml') + '-' + attachment_filename)
    possible_s3_url = S3_BASEPATH + '/' + CGI::escape(File.basename(possible_filename))
    possible_eml_s3_url = S3_BASEPATH + '/' + CGI::escape(File.basename(eml_filename))

    # we might be uploading from the disk in which case we see if we can find an attachment on disk with the name from X-Attachments
    # or we might be uploading via S3, in which case we see if an object exists, accessible on S3, with the path from X-Attachments
    # TODO: support private S3 buckets
    s3_url = if File.exists? possible_filename
                possible_s3_url
             elsif File.exists? eml_filename
                possible_eml_s3_url
             else
                nil
             end
    s3_url = begin
              if Manticore::Client.new.head(possible_s3_url).code == 200
                puts "found attachment: #{possible_s3_url}"
                possible_s3_url
              elsif Manticore::Client.new.head(possible_eml_s3_url).code == 200
                puts "found attachment: #{possible_eml_s3_url}"
                possible_eml_s3_url
              end
            rescue
              nil
            end if s3_url.nil?
    if s3_url.nil?
      STDERR.puts "Tika X-Attachments: " + ["X-Attachments"].to_s.inspect
      STDERR.puts "Couldn't find attachment '#{possible_s3_url}' aka '#{possible_eml_s3_url}' from '#{raw_attachment_filename}' from #{download_url}"
    end
    s3_url
  end.compact
  t
end

Instance Method Details

#to_hashObject



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/parsers/stevedore_email.rb', line 65

def to_hash
  {
    "sha1" => Digest::SHA1.hexdigest(download_url),
    "title" => title.to_s,
    "source_url" => download_url.to_s,
    "file" => {
      "title" => title.to_s,
      "file" => text.to_s
    },
    "analyzed" => {
      "body" => text.to_s,
      "metadata" => {
        "Content-Type" => content_type || "message/rfc822",
        "Creation-Date" => creation_date,
        "Message-To" => message_from.is_a?(Enumerable) ? message_from : [ message_from ],
        "Message-From" => message_to.is_a?(Enumerable) ? message_to : [ message_to ],
        "Message-Cc" => message_cc.is_a?(Enumerable) ? message_cc : [ message_cc ],
        "subject" => subject,
        "attachments" => attachments,
        "dkim_verified" => dkim_verified
      }
    },
    "_updatedAt" => Time.now
  }
end