Module: ChemScanner::Interpreter::PreProcess

Included in:
Scheme
Defined in:
lib/chem_scanner/interpreter/pre_process/arrow.rb,
lib/chem_scanner/interpreter/pre_process/graphic.rb,
lib/chem_scanner/interpreter/pre_process/molecule.rb

Instance Method Summary collapse

Instance Method Details

#assemble_ionic_moleculeObject



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/chem_scanner/interpreter/pre_process/molecule.rb', line 68

def assemble_ionic_molecule
  charged_mol = @mol_map.each_with_object([]) do |(k, mol), arr|
    charged_ids = mol.charged_atom_ids
    next arr unless charged_ids.size == 1

    aid = charged_ids.first
    charge = mol.atom_map[aid].charge
    arr.push(mol: mol, aid: aid, charge: charge, mid: k)
  end

  charged_group = @mol_group_map.each_with_object([]) do |(k, group), arr|
    next arr unless group.molecules.count == 1

    mol = group.molecules.first
    charged_ids = mol.charged_atom_ids
    next arr unless charged_ids.count == 1

    aid = charged_ids.first
    charge = mol.atom_map[aid].charge
    arr.push(mol: mol, aid: aid, charge: charge, mid: k)
  end

  list_mol = charged_mol.concat(charged_group)
  grouped = {}
  list_mol.each do |charged_info|
    mol = charged_info[:mol]
    charge = charged_info[:charge]
    center = mol.polygon.bounding_box.center

    others = list_mol.select { |ocharged| ocharged[:charge] == -charge }
    opposite_mol = others.each_with_object(dist: 99999) do |minfo, obj|
      ocenter = minfo[:mol].polygon.bounding_box.center
      dist = Geometry.distance(center, ocenter)

      if dist < obj[:dist]
        obj[:dist] = dist
        obj.merge!(mol: minfo[:mol], mid: minfo[:mid])
      end
    end
    # Estimated value, could change later
    next if opposite_mol[:mol].nil? || opposite_mol[:dist] > 4

    mid = charged_info[:mid]
    next if grouped.key?(mid) || grouped.value?(mid)

    grouped[mid] = opposite_mol[:mid]
  end

  # { a1 => b, a2 => b, a3 => c } then remove both a1 and a2
  values = []
  dup_hash = {}
  grouped.each do |key, okey|
    values.push(okey) unless values.include?(okey)
    dup_hash[okey] = (dup_hash[okey] || []).push(key)
  end
  dup_keys = dup_hash.values.select { |x| x.size > 1 }.flatten
  grouped.delete_if { |k, _| dup_keys.include?(k) }

  grouped.each do |key, okey|
    get_mol = lambda do |id|
      if @mol_map.key?(id)
        @mol_map[id]
      else
        @mol_group_map[id].molecules.first
      end
    end

    mol = get_mol.call(key)
    omol = get_mol.call(okey)

    mol.add(omol)
    mol.update_output_formats
    @mol_map.delete(okey)
    mgid = @mol_group_map.delete(okey)
    next if mgid.nil?

    tid = mgid.title.id
    @text_map.delete(tid)
  end
end

#detect_line_fragmentObject

  • Check text within mol

  • Detect if there are any “arrow” molecule, ( straight C bonds: —– ) which people drawing to be viewed as an arrow



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/chem_scanner/interpreter/pre_process/arrow.rb', line 90

def detect_line_fragment
  remove_keys = []

  @fragment_map.each do |key, fragment|
    # Check if user draw a molecule as an "extended" arrow
    next unless fragment.line?

    remove_keys.push(key)
    @fragment_as_line += 1

    nodes = fragment.node_map.values
    is_vertical = nodes.map(&:y).uniq.count == 1
    sorted_atoms = nodes.sort_by { |atom| is_vertical ? atom.y : atom.x }
    segment = Geometry::Segment.new(sorted_atoms.first, sorted_atoms.last)

    @segment_map[key] = segment
  end

  remove_keys.each { |k| @fragment_map.delete(k) }
end

#extract_fragment_graphicObject



32
33
34
35
36
37
38
# File 'lib/chem_scanner/interpreter/pre_process/graphic.rb', line 32

def extract_fragment_graphic
  @fragment_map.each_value do |fragment|
    next if fragment.graphic_map.empty?

    @graphic_map.merge!(fragment.graphic_map)
  end
end

#find_fragment_inside_rectangleObject



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/chem_scanner/interpreter/pre_process/graphic.rb', line 8

def find_fragment_inside_rectangle
  # 3 = Rectangle
  @graphic_map.select do |_, v|
    v.type == 3 && v.bounding_box.area < 100
  end.each do |_, graphic|
    @fragment_map.each_value do |fragment|
      next unless graphic.polygon.contains_polygon?(fragment.polygon)

      fragment.boxed = true
    end

    @fragment_group_map.each do |_, fgroup|
      fmap = fgroup[:fragment_map]
      next unless fmap.values.count == 1

      text = fgroup[:title]
      next unless graphic.polygon.contains_polygon?(text.polygon)

      fragment = fmap.values.first
      fragment.boxed = true
    end
  end
end

#fragment_to_moleculesObject



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/chem_scanner/interpreter/pre_process/molecule.rb', line 34

def fragment_to_molecules
  @fragment_map.each do |k, fragment|
    next if fragment.node_map.count.zero?

    mol = Molecule.new(fragment)
    mol.process
    @mol_map[k] = mol
  end

  @fragment_group_map.each do |k, fgroup|
    mgroup = MoleculeGroup.new
    mgroup.title = fgroup[:title]

    fgroup[:fragment_map].each do |_, fragment|
      # NOTE: nested fragment should not contain any special type.
      # For instance, there are some cases that
      # DMF is implicitly converted to C-C-C with nickname D-M-F
      node_type = fragment.node_map.detect { |_, n| n.type.positive? }
      mgroup.add_fragment(fragment) if node_type.nil?
    end

    @mol_group_map[k] = mgroup
  end
end

#populate_molecule_infoObject



59
60
61
62
63
64
65
66
# File 'lib/chem_scanner/interpreter/pre_process/molecule.rb', line 59

def populate_molecule_info
  list_mol = @mol_map.values
  list_mol_group = @mol_group_map.values.reduce([]) do |acc, mgroup|
    acc.concat(mgroup.molecules)
  end

  (list_mol + list_mol_group).each(&:update_output_formats)
end

#process_orbital_as_polymerObject



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/chem_scanner/interpreter/pre_process/molecule.rb', line 16

def process_orbital_as_polymer
  @graphic_map.each_value do |graphic|
    next unless graphic.orbital_type == 256 && graphic.oval_type == 3

    gpoly = graphic.polygon
    next if gpoly.nil?

    @fragment_map.each_value do |fragment|
      fragment.node_map.each_value do |node|
        next unless gpoly.contains?(node.point)

        node.set_is_polymer
        fragment.polygon = fragment.polygon.merge_polygon(gpoly)
      end
    end
  end
end

#refine_arrowObject

  • Detect cross arrow from line map

  • Attach “extend” line to arrow



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/chem_scanner/interpreter/pre_process/arrow.rb', line 12

def refine_arrow
  detect_line_fragment

  # Headless arrow ~ line, part of the real arrow
  segment_keys = @geometry_map.select { |_, g| g.headless }.keys
  segment_keys.each do |k|
    segment = @geometry_map.delete(k)
    tail = Geometry::Point.new(segment.tail[:x], segment.tail[:y])
    head = Geometry::Point.new(segment.head[:x], segment.head[:y])

    @segment_map[k] = Geometry::Segment.new(tail, head)
  end

  segment_keys = @graphic_map.select { |_, g| g.line? }.keys
  segment_keys.each do |k|
    segment = @graphic_map.delete(k)
    tail = Geometry::Point.new(segment.tail[:x], segment.tail[:y])
    head = Geometry::Point.new(segment.head[:x], segment.head[:y])

    @segment_map[k] = Geometry::Segment.new(tail, head)
  end

  #      |
  # ---->|
  #      |
  #      V
  arrow_graphic = @graphic_map.reject { |_, g| g.head.nil? || g.tail.nil? }
  all_arrow = @geometry_map.merge(arrow_graphic)
  all_arrow.each do |key, geometry|
    arrow = Arrow.new(geometry)
    @arrow_map[key] = arrow
    aseg = geometry.segment
    line = aseg.to_line

    all_arrow.except(key).each do |_, other|
      oseg = other.segment
      next unless line.intersects_with_segment?(oseg)

      point = line.intersection_points_with(oseg.to_line)
      next unless oseg.contains_point?(point)

      #     |
      #     |
      # ----|->
      #     |
      #     |
      #     v
      # NOTE: due to manually drawing,
      # the intersection point may not exactly the head of the arrow
      next if Geometry.distance(arrow.head, point) > ESTIMATED_DIST

      # If it intersect with any other geometry
      arrow.change_head(other.head)
    end
  end

  #     # --\-->
  #       # Same effect as "nogo" attritbue
  try_check_cross

  # -----|
  #      |
  #      V
  try_extend_tail

  #      |------>
  #      |
  # -----|
  #      |
  #      |------>
  try_extend_split
end

#refine_moleculesObject



8
9
10
11
12
13
14
# File 'lib/chem_scanner/interpreter/pre_process/molecule.rb', line 8

def refine_molecules
  process_orbital_as_polymer
  fragment_to_molecules
  populate_molecule_info

  assemble_ionic_molecule
end

#try_check_crossObject



172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# File 'lib/chem_scanner/interpreter/pre_process/arrow.rb', line 172

def try_check_cross
  @arrow_map.each_value do |arrow|
    next if arrow.cross

    keys = []
    @segment_map.each do |key, seg|
      arrow.segments.each do |aseg|
        next unless seg.intersects_with?(aseg)

        pintersect = seg.intersection_point_with(aseg)
        check = aseg.contains_point?(pintersect) \
                && seg.point_in_range(pintersect, 3.0 / 5.0)
        next unless check

        # Add to the "polyline" of arrow
        arrow.add_cross_segment(seg)
        keys.push(key)
      end
    end

    keys.each { |key| @segment_map.delete(key) }
  end
end

#try_extend_splitObject



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/chem_scanner/interpreter/pre_process/arrow.rb', line 139

def try_extend_split
  arrow_new_split = {}

  @segment_map.each do |key, segment|
    line = segment.to_line

    @arrow_map.each_value do |arrow|
      asegment = arrow.tail_segment
      next unless line.intersects_with_segment?(asegment)

      point = line.intersection_points_with(asegment.to_line)
      dist1 = Geometry.distance(segment.point1, point)
      dist2 = Geometry.distance(segment.point2, point)
      next if [dist1, dist2].min > ESTIMATED_DIST

      tail_point = dist1 < dist2 ? segment.point2 : segment.point1
      arrow_new_split[arrow.id] = {
        key: key,
        point: point,
        tpoint: tail_point,
      }
    end
  end

  arrow_new_split.each do |aid, split_info|
    arrow = @arrow_map[aid]
    arrow.update_tail(split_info[:point])
    arrow.change_tail(split_info[:tpoint])

    @segment_map.delete(split_info[:skey])
  end
end

#try_extend_tailObject

Try to extend base arrow if possible



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/chem_scanner/interpreter/pre_process/arrow.rb', line 112

def try_extend_tail
  arrow_new_tail = {}
  @segment_map.each do |key, seg|
    @arrow_map.each_value do |arrow|
      dist1 = Geometry.distance(seg.point1, arrow.tail)
      dist2 = Geometry.distance(seg.point2, arrow.tail)
      if dist1 <= dist2
        dist = dist1
        point = seg.point2
      else
        dist = dist2
        point = seg.point1
      end

      next if dist > ESTIMATED_DIST

      arrow_new_tail[arrow.id] = { skey: key, point: point }
    end
  end

  arrow_new_tail.each do |aid, tail_info|
    @segment_map.delete(tail_info[:skey])
    arrow = @arrow_map[aid]
    arrow.change_tail(tail_info[:point])
  end
end