Class: MboxHeaderScraper::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/mbox_header_scraper/scraper.rb

Overview

scrape mbox file

Class Method Summary collapse

Class Method Details

.check_in_file(filename) ⇒ Object

rubocop:enable Metrics/AbcSize



36
37
38
39
# File 'lib/mbox_header_scraper/scraper.rb', line 36

def self.check_in_file(filename)
  return 'file does not exists.' unless File.exist?(filename)
  nil
end

.check_out_file(filename) ⇒ Object



41
42
43
44
# File 'lib/mbox_header_scraper/scraper.rb', line 41

def self.check_out_file(filename)
  return 'file already exists.' if File.exist?(filename)
  nil
end

.insert_header(headers) ⇒ Object



51
52
53
54
55
56
57
58
59
# File 'lib/mbox_header_scraper/scraper.rb', line 51

def self.insert_header(headers)
  line = []

  headers.select { |v| headers[v] == true }.keys.each do |v|
    line << v
  end

  (line.join("\t") + "\n")
end

.process(in_file, out_file, options = { Subject: true, Date: true, From: true, To: true, CC: true }) ⇒ Object

rubocop:disable Metrics/AbcSize



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/mbox_header_scraper/scraper.rb', line 4

def self.process(in_file, out_file, options = { Subject: true, Date: true, From: true, To: true, CC: true })
  Tempfile.open('mbox_header_scraper_result') do |result_file|
    result_file.write(insert_header(options))

    tmp = nil

    # to prevent nil error on first line
    def tmp.closed?
      true
    end

    IO.foreach(in_file) do |line|
      if /^From / =~ line && !tmp.closed?
        tmp.close(false)
        result_file.write(single_mail_to_tsv(tmp, options))
        tmp.delete
      end

      tmp = Tempfile.open('mbox_header_scraper_tmp') if tmp.closed?
      tmp.write(line)
    end

    tmp.close(false)
    result_file.write(single_mail_to_tsv(tmp, options))
    tmp.delete

    result_file.close(false)
    FileUtils.copy_file(result_file.path, out_file)
  end
end

.single_mail_to_tsv(mail_file, options) ⇒ Object



46
47
48
49
# File 'lib/mbox_header_scraper/scraper.rb', line 46

def self.single_mail_to_tsv(mail_file, options)
  mail = MboxHeaderScraper::Mail.new(mail_file)
  mail.header_to_tsv(options)
end