Class: Embulk::Input::DruginfoInterViewFormInputPlugin
- Inherits:
-
InputPlugin
- Object
- InputPlugin
- Embulk::Input::DruginfoInterViewFormInputPlugin
- Defined in:
- lib/embulk/input/druginfo_interview_form.rb
Class Method Summary collapse
Instance Method Summary collapse
-
#init ⇒ Object
TODO def self.guess(config) sample_records = [ “column”=>1, “value”=>0.1, “column”=>2, “value”=>0.2, ] columns = Guess::SchemaGuess.from_hash_records(sample_records) return => columns end.
- #run ⇒ Object
Class Method Details
.resume(task, columns, count, &control) ⇒ Object
42 43 44 45 46 47 |
# File 'lib/embulk/input/druginfo_interview_form.rb', line 42 def self.resume(task, columns, count, &control) task_reports = yield(task, columns, count) next_config_diff = {} return next_config_diff end |
.transaction(config, &control) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/embulk/input/druginfo_interview_form.rb', line 9 def self.transaction(config, &control) # config.yml で設定した値の読み込み # document_type で場合分けできる task = { "root_dir" => config.param("root_dir", :string), "document_type" => config.param("document_type", :string) } # type: boolean, long, double, string, timestamp columns = [ Column.new(0, "molecular_formula", :string), Column.new(1, "molecular_weight", :string), Column.new(2, "description", :string), Column.new(3, "solubility", :string), Column.new(4, "acidity_constant", :string), Column.new(5, "melting_point", :string), Column.new(6, "hygroscopic", :string), Column.new(7, "partition_coefficient", :string), Column.new(8, "impurities", :string), Column.new(9, "development_area", :string), Column.new(10, "expiration_date", :string), Column.new(11, "principal_agent_amount", :string), Column.new(12, "diluent", :string), Column.new(13, "dosage_form", :string), Column.new(14, "weight", :string), Column.new(15, "diameter", :string), Column.new(16, "thickness", :string), Column.new(17, "dissolution", :string) ] resume(task, columns, 1, &control) end |
Instance Method Details
#init ⇒ Object
TODO def self.guess(config)
sample_records = [
{"example"=>"a", "column"=>1, "value"=>0.1},
{"example"=>"a", "column"=>2, "value"=>0.2},
]
columns = Guess::SchemaGuess.from_hash_records(sample_records)
return {"columns" => columns}
end
59 60 61 62 63 |
# File 'lib/embulk/input/druginfo_interview_form.rb', line 59 def init # initialization code: @document_type = task["document_type"] @root_dir = task["root_dir"] end |
#run ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/embulk/input/druginfo_interview_form.rb', line 65 def run Dir.glob(@root_dir + '/*.pdf') {|f| document = PDF::Reader.new(f) title = document.pages.first.text[0] text = "" is_main = false document.pages.each do |page| if is_main == false and page.text.start_with?("Ⅰ.概要に関する項目") is_main = true elsif is_main == true text += page.text end end molecular_formula = text.match(/4.分子式及び分子量(.+?)5./m)[1] molecular_weight = molecular_formula description = text.match(/(1)外観・性状(.+?)(2)/m)[1] solubility = text.match(/(2)溶解性(.+?)(3)/m)[1] acidity_constant = text.match(/(5)酸塩基解離定数(.+?)(6)/m)[1] melting_point = text.match(/(4)融点.+?\n(.+?)(5)/m)[1] hygroscopic = text.match(/(3)吸湿性(.+?)(4)/m)[1] partition_coefficient = text.match(/(6)分配係数(.+?)(7)/m)[1] # 遺伝毒性 development_area = text.match(/9.国際誕生年月日.+((.+?)).*10./m)[1] # 括弧の種類 expiration_date = text.match(/2.有効期間又は使用期限(.+?)3./m)[1] principal_agent_amount = text.match(/2.製剤の組成\n+(1)有効成分(活性成分)の含量(.+?)(2)/m)[1] diluent = text.match(/2.製剤の組成.+(2)添加物(.+?)(3)/m)[1] dosage_form = text.match(/1.剤形(.+?)2./m)[1] weight = dosage_form.match(/(1)剤形の区別、外観及び性状(.+?)(2)/m)[1] diameter = dosage_form.match(/(1)剤形の区別、外観及び性状(.+?)(2)/m)[1] thickness = dosage_form.match(/(1)剤形の区別、外観及び性状(.+?)(2)/m)[1] dissolution = text.match(/7.溶出性(.+?)8./m)[1] page_builder.add([molecular_formula, molecular_weight, description, solubility, acidity_constant, melting_point, hygroscopic, partition_coefficient, "impurities", development_area, expiration_date, principal_agent_amount, diluent, dosage_form, weight, diameter, thickness, dissolution]) } # page_builder.add(["example-value", 1, 0.1]) # page_builder.add(["example-value", 2, 0.2]) page_builder.finish task_report = {} return task_report end |