Module: ChemScanner::Interpreter::ReactionDetection
- Includes:
- SchemeBase
- Included in:
- Scheme
- Defined in:
- lib/chem_scanner/interpreter/post_process/reaction_step.rb,
lib/chem_scanner/interpreter/reaction_detection/molecule_group.rb,
lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb,
lib/chem_scanner/interpreter/reaction_detection/assign_to_reaction.rb,
lib/chem_scanner/interpreter/reaction_detection/duplicate_reagents.rb,
lib/chem_scanner/interpreter/reaction_detection/remove_separated_mol.rb,
lib/chem_scanner/interpreter/reaction_detection/multi_line_chain_reaction.rb
Instance Method Summary collapse
- #assign_molecule_group ⇒ Object
-
#assign_text ⇒ Object
Attach/bind text to molecule or arrow.
- #assign_to_reaction ⇒ Object
-
#check_position(mol_poly, arrow, prod_side = true) ⇒ Object
Check if molecule belong to reaction.
- #check_reaction_orderring ⇒ Object
- #detect_position(arrow, mol_poly) ⇒ Object
- #detect_reaction_step(reaction) ⇒ Object
- #distance_molecule_group(rgroup, arrow, group) ⇒ Object
- #molecules_intersects_with_segment(segment) ⇒ Object
- #multi_line_chain_reaction ⇒ Object
- #nearest_arrow(text) ⇒ Object
- #nearest_molecule(point) ⇒ Object
- #process_reactions_step ⇒ Object
- #refine_duplicate_reagents ⇒ Object
-
#remove_separated_mol ⇒ Object
(1): A —> C.
- #sort_arrow_map ⇒ Object
- #text_around_arrow?(arrow, text, dist) ⇒ Boolean
- #try_detect_label_position(text) ⇒ Object
Methods included from SchemeBase
#add_molecule_substitution_info, #add_reaction_substitution_info, #assemble_molecule_text, #auto_fit_arrow_polygons
Instance Method Details
#assign_molecule_group ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/chem_scanner/interpreter/reaction_detection/molecule_group.rb', line 10 def assign_molecule_group all_reagent_ids = @reactions.reduce([]) do |acc, r| acc.concat(@arrow_map[r.arrow_id].text_arr) end auto_fit_arrow_polygons @mol_group_map.select do |tid, mgroup| ( !all_reagent_ids.include?(tid) && mgroup.molecules.count == 1 && !mgroup.molecules.first.boxed ) end.each do |mkey, mgroup| mol = mgroup.molecules.first mmid = mol.fragment.id mgroup_pos = {} @reactions.each do |reaction| rid = reaction.arrow_id arrow = @arrow_map[rid] group = detect_position(arrow, mgroup.title.polygon) next if group.nil? mgroup_pos[rid] = group end pos = mgroup_pos.detect { |_, p| p == "reagents" } next unless pos.nil? pos = mgroup_pos.detect { |_, p| %w[reactants products].include?(p) } next if pos.nil? # Don't need to keep it text_map anymore mol.text = @text_map.delete(mkey).value unless mgroup_pos.empty? mol.text_ids.delete(mkey) @mol_map.each_value { |m| m.text_ids.delete(mkey) } reaction = @reactions.detect { |r| r.arrow_id == pos[0] } group_ids = reaction.send("#{pos[1][0...-1]}_ids") group_ids.push(mmid) end end |
#assign_text ⇒ Object
Attach/bind text to molecule or arrow
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb', line 9 def assign_text tgroup_ids = @mol_group_map.keys text_as_mol_ids = [] @text_map.each do |k, text| group = try_detect_label_position(text) center = text.polygon.center min_mol = nearest_molecule(center) min_arrow = nearest_arrow(text) arrow = @arrow_map[min_arrow.key] if arrow.nil? mol_key = min_mol.key if group.nil? @mol_map[mol_key].text_ids.push(k) else text_as_mol_ids.push(id: k, mol: mol_key, group: group) end next end if min_mol.key.zero? arrow.text_arr.push(min_arrow.key) next end to_arrow = ( min_arrow.value < min_mol.value * 2.5 && text_around_arrow?(arrow, text, min_arrow.value) ) if to_arrow arrow.text_arr.push(k) next end # Do not add a molecule-group text to molecule as description @mol_map[min_mol.key].text_ids.push(k) unless tgroup_ids.include?(k) end text_as_mol_ids.each do |tinfo| tid = tinfo[:id] text = @text_map[tid] mid = tinfo[:mol] mol = @mol_map.values.detect { |m| m.label == text.bold_text } if mol.nil? @mol_map[mid].text_ids.push(tid) else rid = tinfo[:group].keys.first group = tinfo[:group][rid] reaction = @reactions.detect { |r| r.arrow_id == rid } rgroup = reaction.send("#{group[0..-2]}_ids") rgroup.push(mol.id).uniq! end end @mol_map.each_value { |mol| assemble_molecule_text(mol) } end |
#assign_to_reaction ⇒ Object
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/chem_scanner/interpreter/reaction_detection/assign_to_reaction.rb', line 12 def assign_to_reaction undetected_molecules = {} @arrow_map.each do |key, arrow| reaction = Reaction.new reaction.arrow_id = key undetected = [] @mol_map.reject { |_, mol| mol.boxed }.each do |kmol, mol| mpoly = mol.polygon @arrow_map.each_value do |a| dist = a.min_distance_to_polygon(mpoly) a.build_polygons(mpoly.height + dist) end group = detect_position(arrow, mpoly) case group when "reagents" then reaction.reagent_ids.push(kmol) when "reactants" then reaction.reactant_ids.push(kmol) when "products" then reaction.product_ids.push(kmol) else undetected.push(kmol) end end @reactions.push(reaction) undetected_molecules[key] = undetected unless undetected.empty? end # Molecules which are both reagents and reactants/products # If reagent -> arrow distance in range, then consider as reagent # Otherwise, consider as reactant/product @reactions.each do |r| reagent_ids = r.reagent_ids arrow = @arrow_map[r.arrow_id] others = @reactions.reject { |oreact| oreact.arrow_id == r.arrow_id } others.each do |o| common = reagent_ids & o.reactant_ids common += reagent_ids & o.product_ids common.each do |cid| mol = @mol_map[cid] dist = arrow.min_distance_to_polygon(mol.polygon) target = dist > 2 ? r : o target.delete_id(cid) end end end auto_fit_arrow_polygons undetected_molecules.each do |rkey, ids| reaction = @reactions.detect { |r| r.arrow_id == rkey } arrow = @arrow_map[rkey] ids.each do |id| mol = @mol_map[id] mpoly = mol.polygon group = detect_position(arrow, mpoly) case group when "reagents" then reaction.reagent_ids.push(id) when "reactants" then reaction.reactant_ids.push(id) when "products" then reaction.product_ids.push(id) end end end end |
#check_position(mol_poly, arrow, prod_side = true) ⇒ Object
Check if molecule belong to reaction
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/chem_scanner/interpreter/reaction_detection/assign_to_reaction.rb', line 97 def check_position(mol_poly, arrow, prod_side = true) arrow_segment = ->(larrow) do prod_side ? larrow.head_segment : larrow.tail_segment end segment = arrow_segment.call(arrow) sline = segment.to_line inter = sline.intersects_with_polygon?(mol_poly) return false unless inter inter_point = sline.intersection_points_with_polygon(mol_poly).first inter_seg = Geometry::Segment.new(segment.point2, inter_point) @arrow_map.except(arrow.id).each_value do |oarrow| other_hseg = oarrow.head_segment check_contains = ( other_hseg.contains_segment?(segment) || segment.contains_segment?(other_hseg) ) next if check_contains osegment = arrow_segment.call(oarrow) check = osegment.to_line.intersects_with_polygon?(mol_poly) && \ oarrow.all_intersects_with_segment?(inter_seg) return false if check end true end |
#check_reaction_orderring ⇒ Object
48 49 50 51 52 53 54 55 56 57 |
# File 'lib/chem_scanner/interpreter/reaction_detection/multi_line_chain_reaction.rb', line 48 def check_reaction_orderring return true if @arrow_map.count < 2 @arrow_map.each_value do |arrow| return true if arrow.middle_points.count > 0 return true unless arrow.head_segment.to_line.horizontal? end false end |
#detect_position(arrow, mol_poly) ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/chem_scanner/interpreter/reaction_detection/assign_to_reaction.rb', line 82 def detect_position(arrow, mol_poly) mcenter = mol_poly.center check_pos = check_position(mol_poly, arrow) return "products" if check_pos && arrow.product_side?(mcenter) check_pos = check_position(mol_poly, arrow, false) return "reactants" if check_pos && arrow.reactant_side?(mcenter) return "reagents" if arrow.polygon_around?(mol_poly) nil end |
#detect_reaction_step(reaction) ⇒ Object
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/chem_scanner/interpreter/post_process/reaction_step.rb', line 12 def detect_reaction_step(reaction) number_ref = [ ["1", "2", "3", "4", "5", "6", "7", "8", "9"], ["I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX"], ["i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix"], ["A", "B", "C", "D", "E", "F", "G", "H", "J"], ] regex_list = [ /(^|\A)(([1-9a-z]{0,3}) *[)\.] *(.*))($|\z)/i, /(^|\A)\((([1-9a-z]{0,3}) *\) *(.*))($|\z)/i, ] check = false list_matched = [] list_numbered = [] regex_list.each do |regex| next if check list_matched = reaction.description.enum_for(:scan, regex).map { Regexp.last_match } list_numbered = list_matched.map { |x| x[3] } next if list_numbered.empty? number_ref.each do |ref| check = true if ref & list_numbered == list_numbered end end return unless check && list_numbered.count >= 2 flatten_ref = number_ref.flatten check_temperature = false check_time = false list_position = list_matched.map { |x| x.begin(0) } list_matched.each_with_index.map do |matched, idx| next_pos = list_position[idx + 1] || -1 next_pos = next_pos.negative? ? next_pos : (next_pos - 1) description = reaction.description[list_position[idx]..next_pos] text_start_pos = if matched[4].empty? m2 = matched[2] description.index(m2) + m2.size else description.index(matched[4]) || 0 end description = description[text_start_pos..-1] temperature, _, time = extract_reaction_info([description]) step = ReactionStep.new step.temperature = temperature step.time = time step.description = description step.number = (flatten_ref.index(matched[3]) % 9) + 1 check_time = !time.empty? check_temperature = !temperature.empty? reaction.reagent_abbs.each do |abb| next unless description.include?(abb) step.reagents.push(ChemScanner.get_abbreviation(abb)) end reaction.steps.push(step) end reaction.time = "" if check_time reaction.temperature = "" if check_temperature # NOTE: tempo tricky assign reagents to empty step return if reaction.reagents.count != 1 empty_steps = reaction.steps.select do |s| s.description.empty? || s.description == "\n" end return if empty_steps.count != 1 empty_steps.first.reagents.push(reaction.reagents.first.cano_smiles) end |
#distance_molecule_group(rgroup, arrow, group) ⇒ Object
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/chem_scanner/interpreter/reaction_detection/remove_separated_mol.rb', line 69 def distance_molecule_group(rgroup, arrow, group) dist_map = {} intersect_points_with_line = ->(id, line) do @mol_map[id].polygon.intersection_points_with_line(line) end if group == "reactant_ids" apoint = arrow.tail aline = arrow.tail_segment.to_line else apoint = arrow.head aline = arrow.head_segment.to_line end rgroup.each do |id| next unless @mol_map.key?(id) # Distance to arrow inter_points = intersect_points_with_line.call(id, aline) da = 9_999_999 inter_points.each do |point| length = Geometry.distance(apoint, point) da = length if length < da end # Distance to other molecule within group dmols = 9_999_999 (rgroup - [id]).each do |mid| other = @mol_map[mid] next if other.nil? intersect_points_with_line.call(mid, aline).each do |op| inter_points.each do |p| length = Geometry.distance(p, op) dmols = length if length < dmols end end end dist_map[id] = [da, dmols].min end dist_map end |
#molecules_intersects_with_segment(segment) ⇒ Object
156 157 158 159 160 161 162 163 |
# File 'lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb', line 156 def molecules_intersects_with_segment(segment) ids = [] @mol_map.each do |key, mol| ids.push(key) if segment.intersects_with_polygon?(mol.polygon) end ids end |
#multi_line_chain_reaction ⇒ Object
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/chem_scanner/interpreter/reaction_detection/multi_line_chain_reaction.rb', line 8 def multi_line_chain_reaction return if check_reaction_orderring rarray = @reactions.select do |r| r.reactant_ids.count.zero? || r.product_ids.count.zero? end rcount = rarray.count return if rcount.zero? auto_fit_arrow_polygons sorted_akey = sort_arrow_map get_reaction = ->(id) { @reactions.detect { |r| r.arrow_id == id } } rarray.each do |reaction| rkey = sorted_akey.find_index do |key_arr| key_arr.include?(reaction.arrow_id) end next if rkey.nil? if reaction.reactant_ids.count.zero? other_ids = sorted_akey[rkey - 1] next if other_ids.nil? other_id = other_ids.last other = get_reaction.call(other_id) reaction.reactant_ids.concat(other.product_ids) else other_ids = sorted_akey[rkey + 1] next if other_ids.nil? other_id = other_ids.first other = get_reaction.call(other_id) reaction.product_ids.concat(other.reactant_ids) end end end |
#nearest_arrow(text) ⇒ Object
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# File 'lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb', line 108 def nearest_arrow(text) min_arrow = OpenStruct.new(key: 0, value: 9_999_999) tpoly = text.polygon @arrow_map.each do |okey, arrow| arrow.segments.each do |segment| ppoint = segment.to_line.point_projection(tpoly.center) seg_contains = segment.contains_point?(ppoint) next unless seg_contains dist = segment.distance_to_boundingbox(tpoly) if dist < min_arrow.value min_arrow.key = okey min_arrow.value = dist end end end min_arrow end |
#nearest_molecule(point) ⇒ Object
93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb', line 93 def nearest_molecule(point) min_mol = OpenStruct.new(key: 0, value: 9_999_999) @mol_map.each do |okey, mol| dist = mol.min_distance_to_point(point) if dist < min_mol.value min_mol.key = okey min_mol.value = dist end end min_mol end |
#process_reactions_step ⇒ Object
8 9 10 |
# File 'lib/chem_scanner/interpreter/post_process/reaction_step.rb', line 8 def process_reactions_step @reactions.each { |r| detect_reaction_step(r) } end |
#refine_duplicate_reagents ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/chem_scanner/interpreter/reaction_detection/duplicate_reagents.rb', line 9 def refine_duplicate_reagents delete_info = [] @reactions.each do |r| arrow = @arrow_map[r.arrow_id] rremain = @reactions.reject { |other| other.arrow_id == r.arrow_id } rremain.each do |other| other_rps = other.reactant_ids + other.product_ids r.reagent_ids -= other_rps dup_ids = r.reagent_ids & other.reagent_ids next if dup_ids.empty? dup_ids.each do |id| obj = @mol_map.key?(id) ? @mol_map[id] : @text_map[id] polygon = obj.polygon pcenter = polygon.center apoint = arrow.contains_point?(pcenter) opoint = @arrow_map[other.arrow_id].contains_point?(pcenter) next if apoint.nil? || opoint.nil? rdist = pcenter.distance_to(apoint) odist = pcenter.distance_to(opoint) if rdist > odist info = OpenStruct.new(rid: r.arrow_id, id: id) delete_info.push(info) end end end end delete_info.each do |info| reaction = @reactions.detect { |r| r.arrow_id == info.rid } reaction.delete_id(info.id) end end |
#remove_separated_mol ⇒ Object
(1): A —> C
(2): B —> D
|
|
V
E
Remove C from (2)
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/chem_scanner/interpreter/reaction_detection/remove_separated_mol.rb', line 16 def remove_separated_mol dist_gap = 2.0 @reactions.each do |r| arrow = @arrow_map[r.arrow_id] %w[reactant_ids product_ids].each do |group| rgroup = r.send(group) next if rgroup.count < 2 # Distance map of 1 molecule to arrow # and other molecules within group dist_map = distance_molecule_group(rgroup, arrow, group) min_dist = dist_map.min_by { |_, value| value }.last remove_map = dist_map.select do |k, v| dist_check = v > (dist_gap * min_dist) next unless dist_check in_other = @reactions.select do |other| check = ( other.arrow_id != r.arrow_id && other.molecule_ids.include?(k) ) next unless check oarrow = @arrow_map[other.arrow_id] !arrow.parallel_to?(oarrow) end in_other.count > 0 end remove_keys = remove_map.keys remove_map.each_key do |k| mol = @mol_map[k] next if mol.nil? (rgroup - [k]).each do |id| om = @mol_map[id] next if om.nil? d = Geometry.distance(mol.polygon.center, om.polygon.center) remove_keys.push(id) if d < (dist_gap * min_dist) end end rgroup.delete_if { |x| remove_keys.include?(x) } end end end |
#sort_arrow_map ⇒ Object
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/chem_scanner/interpreter/reaction_detection/multi_line_chain_reaction.rb', line 59 def sort_arrow_map sorted_arr = [] arrow_keys = @arrow_map.keys while !arrow_keys.empty? arrow = @arrow_map[arrow_keys.first] aheight = arrow.height min_height = arrow.head.y - aheight max_height = arrow.head.y + aheight akeys = arrow_keys.select do |ak| y_head = @arrow_map[ak].head.y y_head >= min_height && y_head <= max_height end sorted_arr.push(akeys) arrow_keys = arrow_keys - akeys end sorted_arr.map! { |arr| arr.sort_by! { |id| @arrow_map[id].head.x } } sorted_arr.sort_by! { |arr| - @arrow_map[arr.first].head.y } sorted_arr end |
#text_around_arrow?(arrow, text, dist) ⇒ Boolean
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
# File 'lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb', line 130 def text_around_arrow?(arrow, text, dist) tpoly = text.polygon is_middle = arrow.poly_in_middle?(text.polygon) return false unless is_middle pheight = [tpoly.width, tpoly.height].max arrow.build_polygons(pheight + dist) cur_height = arrow.height arrow.build_polygons(cur_height) tcenter = tpoly.center reaction = @reactions.detect { |r| r.arrow_id == arrow.id } arrow.segments.each do |aseg| pseg = aseg.perpen_segment_via_point(tcenter) check_contains = ( aseg.contains_point?(pseg.point1) || aseg.contains_point?(pseg.point2) ) mol_ids = molecules_intersects_with_segment(pseg) mol_ids = mol_ids - reaction.reagent_ids return true if mol_ids.empty? && check_contains end false end |
#try_detect_label_position(text) ⇒ Object
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb', line 72 def try_detect_label_position(text) return nil if text.value != text.bold_text group_pos = {} @reactions.each do |reaction| rid = reaction.arrow_id arrow = @arrow_map[rid] group = detect_position(arrow, text.polygon) next if group.nil? group_pos[rid] = group end return nil unless group_pos.size == 1 pos = group_pos.values.first return nil unless %w[reactants products].include?(pos) group_pos end |