|
39 | 39 | from rmgpy.molecule.molecule import Atom, Bond, Molecule |
40 | 40 | from rmgpy.molecule.atomtype import get_atomtype, AtomTypeError, ATOMTYPES, AtomType |
41 | 41 | from rdkit import Chem |
42 | | - |
| 42 | +from numpy.random import randint |
43 | 43 | # this variable is used to name atom IDs so that there are as few conflicts by |
44 | 44 | # using the entire space of integer objects |
45 | 45 | ATOM_ID_COUNTER = -(2**15) |
@@ -888,15 +888,10 @@ def cut_molecule(self, output_smiles=False, cut_through=True, size_threshold=Non |
888 | 888 | frag_list.append(res_frag) |
889 | 889 | return frag_list |
890 | 890 |
|
891 | | - def sliceitup_arom(self, molecule, size_threshold=None): |
| 891 | + def sliceitup_arom(self, molecule, size_threshold=5): |
892 | 892 | """ |
893 | 893 | Several specified aromatic patterns |
894 | 894 | """ |
895 | | - # set min size for each aliphatic fragment size |
896 | | - if size_threshold: |
897 | | - size_threshold = size_threshold |
898 | | - else: |
899 | | - size_threshold = 5 |
900 | 895 | # if input is smiles string, output smiles |
901 | 896 | if isinstance(molecule, str): |
902 | 897 | molecule_smiles = molecule |
@@ -950,27 +945,47 @@ def sliceitup_arom(self, molecule, size_threshold=None): |
950 | 945 | # mol_set contains new set of fragments |
951 | 946 | mol_set = Chem.GetMolFrags(new_mol, asMols=True) |
952 | 947 | # check all fragments' size |
953 | | - if all( |
954 | | - sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) |
955 | | - >= size_threshold |
956 | | - for mol in mol_set |
957 | | - ): |
958 | | - # replace * at cutting position with cutting label |
959 | | - for ind, rdmol in enumerate(mol_set): |
960 | | - frag = Chem.MolToSmiles(rdmol) |
961 | | - if len(mol_set) > 2: # means it cut into 3 fragments |
| 948 | + if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set): |
| 949 | + if len(mol_set) == 2: |
| 950 | + frag1 = Chem.MolToSmiles(mol_set[0]) |
| 951 | + frag2 = Chem.MolToSmiles(mol_set[1]) |
| 952 | + |
| 953 | + frag1_R = frag1.count("Na") |
| 954 | + frag1_L = frag1.count("K") |
| 955 | + frag2_R = frag2.count("Na") |
| 956 | + frag2_L = frag2.count("K") |
| 957 | + |
| 958 | + if frag1_R > frag2_R and frag1_L <= frag2_L: |
| 959 | + frag1_smi = frag1.replace("*", "L") |
| 960 | + frag2_smi = frag2.replace("*", "R") |
| 961 | + elif frag1_L > frag2_L and frag1_R <= frag2_R: |
| 962 | + frag1_smi = frag1.replace("*", "R") |
| 963 | + frag2_smi = frag2.replace("*", "L") |
| 964 | + elif frag2_L > frag1_L and frag2_R <= frag1_R: |
| 965 | + frag1_smi = frag1.replace("*", "R") |
| 966 | + frag2_smi = frag2.replace("*", "L") |
| 967 | + elif frag2_R > frag1_R and frag2_L <= frag1_L: |
| 968 | + frag1_smi = frag1.replace("*", "R") |
| 969 | + frag2_smi = frag2.replace("*", "L") |
| 970 | + elif randint(0,1)==1: |
| 971 | + frag1_smi = frag1.replace("*", "L") |
| 972 | + frag2_smi = frag2.replace("*", "R") |
| 973 | + else: |
| 974 | + frag1_smi = frag1.replace("*", "R") |
| 975 | + frag2_smi = frag2.replace("*", "L") |
| 976 | + |
| 977 | + frag_list = [frag1_smi, frag2_smi] |
| 978 | + |
| 979 | + elif len(mol_set) > 2: # means it cut into 3 fragments |
| 980 | + frag_list = [] |
| 981 | + for ind, rdmol in enumerate(mol_set): |
| 982 | + frag = Chem.MolToSmiles(rdmol) |
962 | 983 | if frag.count("*") > 1: |
963 | | - # replace both with R |
964 | 984 | frag_smi = frag.replace("*", "R") |
965 | 985 | else: |
966 | 986 | frag_smi = frag.replace("*", "L") |
967 | | - else: # means it only cut once, generate 2 fragments |
968 | | - if ind == 0: |
969 | | - frag_smi = frag.replace("*", "R") |
970 | | - else: |
971 | | - frag_smi = frag.replace("*", "L") |
972 | | - frag_list.append(frag_smi) |
973 | | - break |
| 987 | + frag_list.append(frag_smi) |
| 988 | + break |
974 | 989 | else: |
975 | 990 | # turn to next matched_atom_map |
976 | 991 | continue |
@@ -1014,15 +1029,10 @@ def sliceitup_arom(self, molecule, size_threshold=None): |
1014 | 1029 | frag_list_new.append(res_frag) |
1015 | 1030 | return frag_list_new |
1016 | 1031 |
|
1017 | | - def sliceitup_aliph(self, molecule, size_threshold=None): |
| 1032 | + def sliceitup_aliph(self, molecule, size_threshold=5): |
1018 | 1033 | """ |
1019 | 1034 | Several specified aliphatic patterns |
1020 | 1035 | """ |
1021 | | - # set min size for each aliphatic fragment size |
1022 | | - if size_threshold: |
1023 | | - size_threshold = size_threshold |
1024 | | - else: |
1025 | | - size_threshold = 5 |
1026 | 1036 | # if input is smiles string, output smiles |
1027 | 1037 | if isinstance(molecule, str): |
1028 | 1038 | molecule_smiles = molecule |
@@ -1079,27 +1089,47 @@ def sliceitup_aliph(self, molecule, size_threshold=None): |
1079 | 1089 | # mol_set contains new set of fragments |
1080 | 1090 | mol_set = Chem.GetMolFrags(new_mol, asMols=True) |
1081 | 1091 | # check all fragments' size |
1082 | | - if all( |
1083 | | - sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) |
1084 | | - >= size_threshold |
1085 | | - for mol in mol_set |
1086 | | - ): |
1087 | | - # replace * at cutting position with cutting label |
1088 | | - for ind, rdmol in enumerate(mol_set): |
1089 | | - frag = Chem.MolToSmiles(rdmol) |
1090 | | - if len(mol_set) > 2: # means it cut into 3 fragments |
| 1092 | + if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set): |
| 1093 | + if len(mol_set) == 2: |
| 1094 | + frag1 = Chem.MolToSmiles(mol_set[0]) |
| 1095 | + frag2 = Chem.MolToSmiles(mol_set[1]) |
| 1096 | + |
| 1097 | + frag1_R = frag1.count("Na") |
| 1098 | + frag1_L = frag1.count("K") |
| 1099 | + frag2_R = frag2.count("Na") |
| 1100 | + frag2_L = frag2.count("K") |
| 1101 | + |
| 1102 | + if frag1_R > frag2_R and frag1_L <= frag2_L: |
| 1103 | + frag1_smi = frag1.replace("*", "L") |
| 1104 | + frag2_smi = frag2.replace("*", "R") |
| 1105 | + elif frag1_L > frag2_L and frag1_R <= frag2_R: |
| 1106 | + frag1_smi = frag1.replace("*", "R") |
| 1107 | + frag2_smi = frag2.replace("*", "L") |
| 1108 | + elif frag2_L > frag1_L and frag2_R <= frag1_R: |
| 1109 | + frag1_smi = frag1.replace("*", "R") |
| 1110 | + frag2_smi = frag2.replace("*", "L") |
| 1111 | + elif frag2_R > frag1_R and frag2_L <= frag1_L: |
| 1112 | + frag1_smi = frag1.replace("*", "R") |
| 1113 | + frag2_smi = frag2.replace("*", "L") |
| 1114 | + elif randint(0,1)==1: |
| 1115 | + frag1_smi = frag1.replace("*", "L") |
| 1116 | + frag2_smi = frag2.replace("*", "R") |
| 1117 | + else: |
| 1118 | + frag1_smi = frag1.replace("*", "R") |
| 1119 | + frag2_smi = frag2.replace("*", "L") |
| 1120 | + |
| 1121 | + frag_list = [frag1_smi, frag2_smi] |
| 1122 | + |
| 1123 | + elif len(mol_set) > 2: # means it cut into 3 fragments |
| 1124 | + frag_list = [] |
| 1125 | + for ind, rdmol in enumerate(mol_set): |
| 1126 | + frag = Chem.MolToSmiles(rdmol) |
1091 | 1127 | if frag.count("*") > 1: |
1092 | | - # replace both with R |
1093 | | - frag_smi = frag.replace("*", "R") |
1094 | | - else: |
1095 | | - frag_smi = frag.replace("*", "L") |
1096 | | - else: # means it only cut once, generate 2 fragments |
1097 | | - if ind == 0: |
1098 | 1128 | frag_smi = frag.replace("*", "R") |
1099 | 1129 | else: |
1100 | 1130 | frag_smi = frag.replace("*", "L") |
1101 | | - frag_list.append(frag_smi) |
1102 | | - break |
| 1131 | + frag_list.append(frag_smi) |
| 1132 | + break |
1103 | 1133 | else: |
1104 | 1134 | # turn to next matched_atom_map |
1105 | 1135 | continue |
|
0 commit comments