Class: ChemScanner::Interpreter::Molecule

Inherits:
Object
  • Object
show all
Defined in:
lib/chem_scanner/interpreter/element/molecule.rb

Overview

Molecule class

Constant Summary collapse

RGB_RED =
"FF0000"
CHEMDRAW_RDKIT_BTYPE_MAP =
{
  0 => 0,
  1 => 1,
  2 => 2,
  3 => 3,
  4 => 4,
  5 => 5,
  6 => 6,
  1.5 => 7,
  2.5 => 8,
  3.5 => 9,
  4.5 => 10,
  5.5 => 11,
  "ionic" => 13,
  "hydrogen" => 14,
  "dative" => 17,
}.freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(fragment = nil) ⇒ Molecule

Returns a new instance of Molecule.



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 36

def initialize(fragment = nil)
  @polygon = fragment.polygon unless fragment.nil?
  @text = ""
  @label = ""
  @mdl = ""
  @cano_smiles = ""
  @abbreviation = ""
  @text_ids = []
  @boxed = fragment.boxed unless fragment.nil?
  @details = OpenStruct.new

  @fragment = fragment

  @atom_bookmark_map = {}
  @atom_map = {}
  @rw_mol = RDKitChem::RWMol.new
  @conf = RDKitChem::Conformer.new
  @rw_mol.add_conf(@conf)

  @bond_map = {}

  @dash_bonds = []
  @dative_bonds = []
end

Instance Attribute Details

#abbreviationObject

Returns the value of attribute abbreviation.



11
12
13
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 11

def abbreviation
  @abbreviation
end

#atom_mapObject (readonly)

Returns the value of attribute atom_map.



14
15
16
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 14

def atom_map
  @atom_map
end

#boxedObject

Returns the value of attribute boxed.



11
12
13
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 11

def boxed
  @boxed
end

#cano_smilesObject (readonly)

Returns the value of attribute cano_smiles.



14
15
16
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 14

def cano_smiles
  @cano_smiles
end

#clone_fromObject

Returns the value of attribute clone_from.



11
12
13
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 11

def clone_from
  @clone_from
end

#dash_bondsObject (readonly)

Returns the value of attribute dash_bonds.



14
15
16
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 14

def dash_bonds
  @dash_bonds
end

#dative_bondsObject (readonly)

Returns the value of attribute dative_bonds.



14
15
16
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 14

def dative_bonds
  @dative_bonds
end

#detailsObject

Returns the value of attribute details.



11
12
13
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 11

def details
  @details
end

#fragmentObject (readonly)

Returns the value of attribute fragment.



14
15
16
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 14

def fragment
  @fragment
end

#is_redObject (readonly)

Returns the value of attribute is_red.



14
15
16
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 14

def is_red
  @is_red
end

#labelObject

Returns the value of attribute label.



11
12
13
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 11

def label
  @label
end

#mdlObject (readonly)

Returns the value of attribute mdl.



14
15
16
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 14

def mdl
  @mdl
end

#polygonObject (readonly)

Returns the value of attribute polygon.



14
15
16
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 14

def polygon
  @polygon
end

#rw_molObject (readonly)

Returns the value of attribute rw_mol.



14
15
16
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 14

def rw_mol
  @rw_mol
end

#textObject

Returns the value of attribute text.



11
12
13
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 11

def text
  @text
end

#text_idsObject

Returns the value of attribute text_ids.



11
12
13
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 11

def text_ids
  @text_ids
end

Class Method Details

.new_from_smiles(id, smiles) ⇒ Object



458
459
460
461
462
463
464
465
466
467
468
469
470
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 458

def self.new_from_smiles(id, smiles)
  mol = new(nil)
  rw_mol = RDKitChem::RWMol.mol_from_smiles(smiles)
  mol.set_rw_mol(rw_mol)

  fragment = OpenStruct.new(id: id)
  mol.set_fragment(fragment)

  mdl = rw_mol.mol_to_mol_block(true, -1, false)
  mol.set_output_formats(smiles, mdl)

  mol
end

Instance Method Details

#add(other) ⇒ Object



348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 348

def add(other)
  @fragment.add(other.fragment)
  @polygon = fragment.polygon

  @text += " #{other.text}"
  @label = ""
  @label += " #{other.label}" unless other.label.empty?
  @text_ids.concat(other.text_ids)
  @boxed |= other.boxed
  odetails = other.details.marshal_dump
  @details = OpenStruct.new(@details.marshal_dump.merge(odetails))

  @atom_map.merge!(other.atom_map)

  combined = RDKitChem.combine_mols(@rw_mol, other.rw_mol)
  @rw_mol = RDKitChem::RWMol.new(combined)

  @dash_bonds.concat(other.dash_bonds)
  @dative_bonds.concat(other.dative_bonds)
end

#add_bond(bond) ⇒ Object



102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 102

def add_bond(bond)
  order = bond.order
  return -1 unless CHEMDRAW_RDKIT_BTYPE_MAP.key?(order)

  begin_id = bond.begin_id
  end_id = bond.end_id
  stereo = bond.stereo

  inverse_direction = [4, 7, 10, 12].include?(stereo)
  begin_id, end_id = end_id, begin_id if inverse_direction

  batom = @atom_map[begin_id]
  eatom = @atom_map[end_id]
  bidx = batom.get_idx
  eidx = eatom.get_idx

  bonds = get_atom_bonds(begin_id) + get_atom_bonds(end_id)
  duplicate = bonds.detect do |b|
    b.has_endpoint?(begin_id) && b.has_endpoint?(end_id)
  end
  return -1 unless duplicate.nil?

  if order == "dative"
    bond.order = 1
    order = 1

    if batom.charge.zero? && eatom.charge.zero?
      batom.set_formal_charge(-1)
      eatom.set_formal_charge(1)
    end

    @dative_bonds.push(bond.id)
  end

  if stereo == 1
    @dash_bonds.push(bond.id)
    return -1
  end

  begin
    rd_bond = RDKitChem::Bond.new(order)
    rd_bond.set_begin_atom_idx(bidx)
    rd_bond.set_end_atom_idx(eidx)
    # bid = @rw_mol.add_bond(bidx, eidx, order)

    # Stereo handling
    sdir = { 3 => 2, 4 => 2, 6 => 1, 7 => 1 }
    if sdir.key?(stereo)
      rd_bond.set_bond_dir(sdir[stereo])
      @chiral_possible = true
    end

    rd_bond.set_owning_mol(@rw_mol)
    bid = @rw_mol.add_bond(rd_bond)

    return bid
  rescue RuntimeError
    return -1
  end
end

#charged_atom_idsObject



291
292
293
294
295
296
297
298
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 291

def charged_atom_ids
  @atom_map.each_with_object([]) do |(key, atom), ids|
    charge = atom.charge
    next ids if charge.zero?

    ids.push(key)
  end
end

#check_redObject



262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 262

def check_red
  ncolors = @fragment.node_map.values.map(&:color).uniq
  bcolors = @fragment.bond_map.values.map(&:color).uniq

  if ncolors.count != 1 || bcolors.count != 1
    @is_red = false
    return
  end

  ncolor = ncolors.first
  bcolor = bcolors.first

  if ncolor != bcolor
    @is_red = false
    return
  end

  color = @fragment.parser.color_table[ncolor].upcase
  color == RGB_RED
end

#cloneObject



300
301
302
303
304
305
306
307
308
309
310
311
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 300

def clone
  cloned = self.class.new(@fragment.clone)
  cloned.process
  cloned.update_output_formats

  cloned.clone_from = @clone_from.nil? ? id : @clone_from
  cloned.label = @label
  cloned.text_ids = @text_ids
  cloned.text = @text

  cloned
end

#get_atom(id) ⇒ Object



283
284
285
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 283

def get_atom(id)
  @atom_map[id]
end

#get_atom_bonds(atom_id) ⇒ Object



98
99
100
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 98

def get_atom_bonds(atom_id)
  @bond_map.values.select { |b| b.has_endpoint?(atom_id) }
end

#get_cano_smilesObject



313
314
315
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 313

def get_cano_smiles
  @rw_mol.mol_to_smiles(true)
end

#get_mdlObject



317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 317

def get_mdl
  @dash_bonds.each do |bid|
    bond = @fragment.bond_map[bid]
    bid = bond.begin_id
    eid = bond.end_id
    bidx = @atom_map[bid].get_idx
    eidx = @atom_map[eid].get_idx

    @rw_mol.add_bond(bidx, eidx, 17)
  end

  mdl = @rw_mol.mol_to_mol_block(true, -1, false)

  @dash_bonds.each do |bid|
    bond = @fragment.bond_map[bid]
    bid = bond.begin_id
    eid = bond.end_id
    bidx = @atom_map[bid].get_idx
    eidx = @atom_map[eid].get_idx

    @rw_mol.remove_bond(bidx, eidx)
  end

  mdl.force_encoding(Encoding::UTF_8)
end

#group_transform(aid, group, value) ⇒ Object



418
419
420
421
422
423
424
425
426
427
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 418

def group_transform(aid, group, value)
  text = @atom_map[aid].alias_text.dup
  return unless text.include?(group)

  text.sub!(group, value)
  info = OpenStruct.new(text: text, id: aid)

  try_expand_atoms([info])
  update_output_formats
end

#idObject



61
62
63
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 61

def id
  @fragment.id
end

#inspectObject



429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 429

def inspect
  (
    "#<Molecule: id=#{fragment.id}, " +
      "polygon: #{polygon}," +
      "text: #{text}, " +
      "label: #{label}, " +
      "mdl: #{mdl}, " +
      "cano_smiles: #{cano_smiles}, " +
      "text_ids: #{text_ids}, " +
      "boxed: #{boxed}, " +
      "details: #{details}, " +
      "dash_bonds: #{dash_bonds}, " +
      "dative_bonds: #{dative_bonds} >"
  )
end

#kekulizeObject



163
164
165
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 163

def kekulize
  RDKitChem.kekulize(@rw_mol)
end

#min_distance_to_point(point) ⇒ Object



248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 248

def min_distance_to_point(point)
  min = 9_999_999

  @fragment.node_map.values.reject(&:expanded).each do |node|
    next if node.x.nil? || node.y.nil?

    npoint = Geometry::Point.new(node.x, node.y)
    dist = npoint.distance_to(point)
    min = dist if dist < min
  end

  min
end

#n_atom_transform(aid, num) ⇒ Object



369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 369

def n_atom_transform(aid, num)
  return false if num == 1

  bonds = @fragment.bond_map.values.select do |b|
    [b.begin_id, b.end_id].include?(aid)
  end
  return false unless bonds.count == 2

  others = bonds.reduce([]) do |arr, bond|
    arr.concat([bond.begin_id, bond.end_id] - [aid])
  end
  return false unless others.count == 2

  ref_mol = RDKitChem::RWMol.new(@rw_mol)

  target_atom = @atom_map[aid]
  target_idx = target_atom.get_idx

  others.each do |other|
    oidx = @atom_map[other].get_idx
    @rw_mol.remove_bond(target_idx, oidx)
  end

  @rw_mol.remove_atom(target_idx)
  @atom_map.delete(aid)

  added_id = []
  (1..num).each do
    catom = target_atom.clone
    @atom_map[catom.id] = catom
    added_id.push(catom.id)
  end

  others.insert(1, *added_id)
  # for n atoms, need n+1 bonds
  (1..num + 1).each do |i|
    begin_idx = @atom_map[others[i - 1]].get_idx
    end_idx = @atom_map[others[i]].get_idx
    @rw_mol.add_bond(begin_idx, end_idx, 1)
  end

  begin
    @rw_mol.compute_2dcoords(ref_mol)
  rescue RuntimeError
  end

  update_output_formats
end

#processObject



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 65

def process
  @fragment.node_map.each do |nid, node|
    atom = Atom.new(node, @rw_mol)
    atom.process

    @atom_map[nid] = atom
  end

  @chiral_possible = false
  @fragment.bond_map.each do |k, bond|
    bid = add_bond(bond)
    next if bid.negative?

    @bond_map[k] = bond
  end

  @rw_mol.detect_atom_stereo_chemistry(@conf) if @chiral_possible

  @rw_mol.clear_single_bond_dir_flags
  @rw_mol.detect_bond_stereo_chemistry(@conf)

  @rw_mol.remove_hs(false, false, false)

  begin
    kekulize
    RDKitChem.sanitize_mol(@rw_mol)
  rescue RuntimeError
  end

  try_expand
  check_red
end

#set_fragment(fragment) ⇒ Object



449
450
451
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 449

def set_fragment(fragment)
  @fragment = fragment
end

#set_output_formats(smiles, mdl) ⇒ Object



453
454
455
456
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 453

def set_output_formats(smiles, mdl)
  @cano_smiles = smiles
  @mdl = mdl
end

#set_rw_mol(rw_mol) ⇒ Object



445
446
447
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 445

def set_rw_mol(rw_mol)
  @rw_mol = rw_mol
end

#to_hashObject



287
288
289
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 287

def to_hash
  { id: @fragment.id, smiles: @cano_smiles, label: @label, text: @text }
end

#try_expandObject



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 167

def try_expand
  list_ids_with_text = []

  @fragment.node_map.each do |nid, node|
    # Node_Type = 8: AnonymousAlternativeGroup
    next if !node.is_alias || node.type == 8 || node.alias_text.empty? \
            || node.nested_fragment.count.positive? || !node.warning

    atext = node.alias_text
    list_ids_with_text.push(OpenStruct.new(text: atext, id: nid))
  end

  return if list_ids_with_text.empty?

  try_expand_atoms(list_ids_with_text)
  update_output_formats
end

#try_expand_atoms(list_expand) ⇒ Object



185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 185

def try_expand_atoms(list_expand)
  ref = RDKitChem::RWMol.new(@rw_mol)

  delete_ids = []
  list_expand.each do |info|
    next try_expand_hydrogen(info.id) if info.text == "H"

    smiles = ChemScanner.get_superatom(info.text)
    next if smiles.empty?

    expand_mol = RDKitChem::RWMol.mol_from_smiles(smiles)

    atom = @atom_map[info.id]
    delete_ids.push(info.id)
    idx = atom.get_idx

    target_bonds = get_atom_bonds(info.id)
    first_expand_idx = @rw_mol.get_num_atoms

    @rw_mol.insert_mol(expand_mol)

    target_bonds.each do |bond|
      other_id = bond.other_endpoint(info.id)
      other_idx = @atom_map[other_id].get_idx

      @rw_mol.remove_bond(other_idx, idx)
      # after combined, first atom should be the target to link with
      @rw_mol.add_bond(other_idx, first_expand_idx, bond.order)
    end

    target_bonds.each { |b| @bond_map.delete(b.id) }
  end

  delete_ids.each do |aid|
    atom = @rw_mol.get_atom_with_bookmark(aid)
    @rw_mol.remove_atom(atom)
    @atom_map.delete(aid)
  end

  # Generate added atom coords
  begin
    @rw_mol.compute_2dcoords(ref)
  rescue RuntimeError
    return
  end
end

#try_expand_hydrogen(atom_id) ⇒ Object



232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 232

def try_expand_hydrogen(atom_id)
  target_bonds = get_atom_bonds(atom_id)
  return if target_bonds.count == 2

  target_bond = target_bonds.first
  other = target_bond.other_endpoint(atom_id)
  other_atom = @atom_map[other]
  atom = @atom_map[atom_id]

  @rw_mol.remove_bond(other_atom.get_idx, atom.get_idx)
  @rw_mol.remove_atom(atom.get_idx)

  @atom_map.delete(atom_id)
  @bond_map.delete(target_bond.id)
end

#update_output_formatsObject



343
344
345
346
# File 'lib/chem_scanner/interpreter/element/molecule.rb', line 343

def update_output_formats
  @cano_smiles = get_cano_smiles
  @mdl = get_mdl
end