Skip to content

Commit 61ffa80

Browse files
committed
add L to fragment with fewest Ls and R to fragment to fewest Rs where possible, try to cut fragments in near equal size if possible, else default to size_threshold=5
1 parent 6a5ebf4 commit 61ffa80

File tree

2 files changed

+87
-52
lines changed

2 files changed

+87
-52
lines changed

rmgpy/molecule/fragment.py

Lines changed: 77 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from rmgpy.molecule.molecule import Atom, Bond, Molecule
4040
from rmgpy.molecule.atomtype import get_atomtype, AtomTypeError, ATOMTYPES, AtomType
4141
from rdkit import Chem
42-
42+
from numpy.random import randint
4343
# this variable is used to name atom IDs so that there are as few conflicts by
4444
# using the entire space of integer objects
4545
ATOM_ID_COUNTER = -(2**15)
@@ -888,15 +888,10 @@ def cut_molecule(self, output_smiles=False, cut_through=True, size_threshold=Non
888888
frag_list.append(res_frag)
889889
return frag_list
890890

891-
def sliceitup_arom(self, molecule, size_threshold=None):
891+
def sliceitup_arom(self, molecule, size_threshold=5):
892892
"""
893893
Several specified aromatic patterns
894894
"""
895-
# set min size for each aliphatic fragment size
896-
if size_threshold:
897-
size_threshold = size_threshold
898-
else:
899-
size_threshold = 5
900895
# if input is smiles string, output smiles
901896
if isinstance(molecule, str):
902897
molecule_smiles = molecule
@@ -950,27 +945,47 @@ def sliceitup_arom(self, molecule, size_threshold=None):
950945
# mol_set contains new set of fragments
951946
mol_set = Chem.GetMolFrags(new_mol, asMols=True)
952947
# check all fragments' size
953-
if all(
954-
sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6)
955-
>= size_threshold
956-
for mol in mol_set
957-
):
958-
# replace * at cutting position with cutting label
959-
for ind, rdmol in enumerate(mol_set):
960-
frag = Chem.MolToSmiles(rdmol)
961-
if len(mol_set) > 2: # means it cut into 3 fragments
948+
if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set):
949+
if len(mol_set) == 2:
950+
frag1 = Chem.MolToSmiles(mol_set[0])
951+
frag2 = Chem.MolToSmiles(mol_set[1])
952+
953+
frag1_R = frag1.count("Na")
954+
frag1_L = frag1.count("K")
955+
frag2_R = frag2.count("Na")
956+
frag2_L = frag2.count("K")
957+
958+
if frag1_R > frag2_R and frag1_L <= frag2_L:
959+
frag1_smi = frag1.replace("*", "L")
960+
frag2_smi = frag2.replace("*", "R")
961+
elif frag1_L > frag2_L and frag1_R <= frag2_R:
962+
frag1_smi = frag1.replace("*", "R")
963+
frag2_smi = frag2.replace("*", "L")
964+
elif frag2_L > frag1_L and frag2_R <= frag1_R:
965+
frag1_smi = frag1.replace("*", "R")
966+
frag2_smi = frag2.replace("*", "L")
967+
elif frag2_R > frag1_R and frag2_L <= frag1_L:
968+
frag1_smi = frag1.replace("*", "R")
969+
frag2_smi = frag2.replace("*", "L")
970+
elif randint(0,1)==1:
971+
frag1_smi = frag1.replace("*", "L")
972+
frag2_smi = frag2.replace("*", "R")
973+
else:
974+
frag1_smi = frag1.replace("*", "R")
975+
frag2_smi = frag2.replace("*", "L")
976+
977+
frag_list = [frag1_smi, frag2_smi]
978+
979+
elif len(mol_set) > 2: # means it cut into 3 fragments
980+
frag_list = []
981+
for ind, rdmol in enumerate(mol_set):
982+
frag = Chem.MolToSmiles(rdmol)
962983
if frag.count("*") > 1:
963-
# replace both with R
964984
frag_smi = frag.replace("*", "R")
965985
else:
966986
frag_smi = frag.replace("*", "L")
967-
else: # means it only cut once, generate 2 fragments
968-
if ind == 0:
969-
frag_smi = frag.replace("*", "R")
970-
else:
971-
frag_smi = frag.replace("*", "L")
972-
frag_list.append(frag_smi)
973-
break
987+
frag_list.append(frag_smi)
988+
break
974989
else:
975990
# turn to next matched_atom_map
976991
continue
@@ -1014,15 +1029,10 @@ def sliceitup_arom(self, molecule, size_threshold=None):
10141029
frag_list_new.append(res_frag)
10151030
return frag_list_new
10161031

1017-
def sliceitup_aliph(self, molecule, size_threshold=None):
1032+
def sliceitup_aliph(self, molecule, size_threshold=5):
10181033
"""
10191034
Several specified aliphatic patterns
10201035
"""
1021-
# set min size for each aliphatic fragment size
1022-
if size_threshold:
1023-
size_threshold = size_threshold
1024-
else:
1025-
size_threshold = 5
10261036
# if input is smiles string, output smiles
10271037
if isinstance(molecule, str):
10281038
molecule_smiles = molecule
@@ -1079,27 +1089,47 @@ def sliceitup_aliph(self, molecule, size_threshold=None):
10791089
# mol_set contains new set of fragments
10801090
mol_set = Chem.GetMolFrags(new_mol, asMols=True)
10811091
# check all fragments' size
1082-
if all(
1083-
sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6)
1084-
>= size_threshold
1085-
for mol in mol_set
1086-
):
1087-
# replace * at cutting position with cutting label
1088-
for ind, rdmol in enumerate(mol_set):
1089-
frag = Chem.MolToSmiles(rdmol)
1090-
if len(mol_set) > 2: # means it cut into 3 fragments
1092+
if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set):
1093+
if len(mol_set) == 2:
1094+
frag1 = Chem.MolToSmiles(mol_set[0])
1095+
frag2 = Chem.MolToSmiles(mol_set[1])
1096+
1097+
frag1_R = frag1.count("Na")
1098+
frag1_L = frag1.count("K")
1099+
frag2_R = frag2.count("Na")
1100+
frag2_L = frag2.count("K")
1101+
1102+
if frag1_R > frag2_R and frag1_L <= frag2_L:
1103+
frag1_smi = frag1.replace("*", "L")
1104+
frag2_smi = frag2.replace("*", "R")
1105+
elif frag1_L > frag2_L and frag1_R <= frag2_R:
1106+
frag1_smi = frag1.replace("*", "R")
1107+
frag2_smi = frag2.replace("*", "L")
1108+
elif frag2_L > frag1_L and frag2_R <= frag1_R:
1109+
frag1_smi = frag1.replace("*", "R")
1110+
frag2_smi = frag2.replace("*", "L")
1111+
elif frag2_R > frag1_R and frag2_L <= frag1_L:
1112+
frag1_smi = frag1.replace("*", "R")
1113+
frag2_smi = frag2.replace("*", "L")
1114+
elif randint(0,1)==1:
1115+
frag1_smi = frag1.replace("*", "L")
1116+
frag2_smi = frag2.replace("*", "R")
1117+
else:
1118+
frag1_smi = frag1.replace("*", "R")
1119+
frag2_smi = frag2.replace("*", "L")
1120+
1121+
frag_list = [frag1_smi, frag2_smi]
1122+
1123+
elif len(mol_set) > 2: # means it cut into 3 fragments
1124+
frag_list = []
1125+
for ind, rdmol in enumerate(mol_set):
1126+
frag = Chem.MolToSmiles(rdmol)
10911127
if frag.count("*") > 1:
1092-
# replace both with R
1093-
frag_smi = frag.replace("*", "R")
1094-
else:
1095-
frag_smi = frag.replace("*", "L")
1096-
else: # means it only cut once, generate 2 fragments
1097-
if ind == 0:
10981128
frag_smi = frag.replace("*", "R")
10991129
else:
11001130
frag_smi = frag.replace("*", "L")
1101-
frag_list.append(frag_smi)
1102-
break
1131+
frag_list.append(frag_smi)
1132+
break
11031133
else:
11041134
# turn to next matched_atom_map
11051135
continue

rmgpy/rmg/model.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
from rmgpy.rmg.decay import decay_species
6060
from rmgpy.rmg.reactors import PhaseSystem, Phase, Interface, Reactor
6161
from rmgpy.molecule.fragment import Fragment
62-
62+
from rmgpy.molecule.molecule import Molecule
6363
################################################################################
6464

6565

@@ -303,12 +303,17 @@ def make_new_species(self, object, label="", reactive=True, check_existing=True,
303303

304304
# If we're here then we're ready to make the new species
305305
if check_cut:
306-
try:
307-
mols = molecule.cut_molecule(cut_through=False)
308-
except AttributeError:
309-
# it's Molecule object, change it to Fragment and then cut
306+
# set size_threshold to about half the molecule size
307+
size_threshold = int((molecule.smiles.count("C")+molecule.smiles.count("c"))/2)
308+
# if the molecule type is Molecule, change type to Fragment before cutting
309+
if type(molecule) == Molecule:
310310
molecule = Fragment().from_adjacency_list(molecule.to_adjacency_list())
311+
# try to cut_molecule with size threshold set above
312+
mols = molecule.cut_molecule(cut_through=False,size_threshold=size_threshold)
313+
# if cut above can't be made try with default size_threshold = 5
314+
if len(mols) == 1:
311315
mols = molecule.cut_molecule(cut_through=False)
316+
# if cut above can't be made, don't cut
312317
if len(mols) == 1:
313318
molecule = mols[0]
314319
else:

0 commit comments

Comments
 (0)