Class: Embulk::Input::DruginfoInterViewFormInputPlugin

Inherits:
InputPlugin
  • Object
show all
Defined in:
lib/embulk/input/druginfo_interview_form.rb

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.resume(task, columns, count, &control) ⇒ Object



42
43
44
45
46
47
# File 'lib/embulk/input/druginfo_interview_form.rb', line 42

def self.resume(task, columns, count, &control)
  task_reports = yield(task, columns, count)

  next_config_diff = {}
  return next_config_diff
end

.transaction(config, &control) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/embulk/input/druginfo_interview_form.rb', line 9

def self.transaction(config, &control)
  # config.yml で設定した値の読み込み
  # document_type で場合分けできる
  task = {
    "root_dir" => config.param("root_dir", :string),
    "document_type" => config.param("document_type", :string)
  }

  # type: boolean, long, double, string, timestamp
  columns = [
    Column.new(0, "molecular_formula", :string),
    Column.new(1, "molecular_weight", :string),
    Column.new(2, "description", :string),
    Column.new(3, "solubility", :string),
    Column.new(4, "acidity_constant", :string),
    Column.new(5, "melting_point", :string),
    Column.new(6, "hygroscopic", :string),
    Column.new(7, "partition_coefficient", :string),
    Column.new(8, "impurities", :string),
    Column.new(9, "development_area", :string),
    Column.new(10, "expiration_date", :string),
    Column.new(11, "principal_agent_amount", :string),
    Column.new(12, "diluent", :string),
    Column.new(13, "dosage_form", :string),
    Column.new(14, "weight", :string),
    Column.new(15, "diameter", :string),
    Column.new(16, "thickness", :string),
    Column.new(17, "dissolution", :string)
  ]

  resume(task, columns, 1, &control)
end

Instance Method Details

#initObject

TODO def self.guess(config)

sample_records = [
  {"example"=>"a", "column"=>1, "value"=>0.1},
  {"example"=>"a", "column"=>2, "value"=>0.2},
]
columns = Guess::SchemaGuess.from_hash_records(sample_records)
return {"columns" => columns}

end



59
60
61
62
63
# File 'lib/embulk/input/druginfo_interview_form.rb', line 59

def init
  # initialization code:
  @document_type = task["document_type"]
  @root_dir = task["root_dir"]
end

#runObject



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/embulk/input/druginfo_interview_form.rb', line 65

def run
  Dir.glob(@root_dir + '/*.pdf') {|f|
    document = PDF::Reader.new(f)
    title = document.pages.first.text[0]
    text = ""
    is_main = false
    document.pages.each do |page|
      if is_main == false and page.text.start_with?("Ⅰ.概要に関する項目")
        is_main = true
      elsif is_main == true
        text += page.text
      end
    end

    molecular_formula = text.match(/4.分子式及び分子量(.+?)5./m)[1]
    molecular_weight = molecular_formula
    description = text.match(/(1)外観・性状(.+?)(2)/m)[1]
    solubility = text.match(/(2)溶解性(.+?)(3)/m)[1]
    acidity_constant = text.match(/(5)酸塩基解離定数(.+?)(6)/m)[1]
    melting_point = text.match(/(4)融点.+?\n(.+?)(5)/m)[1]
    hygroscopic = text.match(/(3)吸湿性(.+?)(4)/m)[1]
    partition_coefficient = text.match(/(6)分配係数(.+?)(7)/m)[1]
    # 遺伝毒性
    development_area = text.match(/9.国際誕生年月日.+((.+?)).*10./m)[1] # 括弧の種類
    expiration_date = text.match(/2.有効期間又は使用期限(.+?)3./m)[1]
    principal_agent_amount = text.match(/2.製剤の組成\n+(1)有効成分(活性成分)の含量(.+?)(2)/m)[1]
    diluent = text.match(/2.製剤の組成.+(2)添加物(.+?)(3)/m)[1]
    dosage_form = text.match(/1.剤形(.+?)2./m)[1]
    weight = dosage_form.match(/(1)剤形の区別、外観及び性状(.+?)(2)/m)[1]
    diameter = dosage_form.match(/(1)剤形の区別、外観及び性状(.+?)(2)/m)[1]
    thickness = dosage_form.match(/(1)剤形の区別、外観及び性状(.+?)(2)/m)[1]
    dissolution = text.match(/7.溶出性(.+?)8./m)[1]


    page_builder.add([molecular_formula, molecular_weight, description,
                      solubility, acidity_constant, melting_point,
                      hygroscopic, partition_coefficient, "impurities",
                      development_area, expiration_date, principal_agent_amount,
                      diluent, dosage_form, weight,
                      diameter, thickness, dissolution])
  }

  # page_builder.add(["example-value", 1, 0.1])
  # page_builder.add(["example-value", 2, 0.2])
  page_builder.finish

  task_report = {}
  return task_report
end