Skip to content

Commit 716caa2

Browse files
committed
Adding two scripts from ccdc confidential for conformer analysis. Original scripts located here: https://github.com/ccdc-confidential/cpp-apps-main/tree/main/wrapping/contributed
The scripts have been heavily edited for usability, clarity and maintainability, but the science is the same.
1 parent 9dfc79d commit 716caa2

File tree

4 files changed

+388
-0
lines changed

4 files changed

+388
-0
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Conformer Filter Density
2+
3+
Filter conformers using a variety of metrics. See arguments in script for details.
4+
5+
CCDC Python API Licence required, minimum version: 3.0.15
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
#!/usr/bin/env python3
2+
#
3+
# This script can be used for any purpose without limitation subject to the
4+
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
5+
#
6+
# This permission notice and the following statement of attribution must be
7+
# included in all copies or substantial portions of this script.
8+
#
9+
# 2026-02-03: created by the Cambridge Crystallographic Data Centre
10+
11+
import argparse
12+
import csv
13+
14+
from ccdc import conformer, io
15+
16+
17+
def parse_args():
18+
"""Parse command line arguments."""
19+
parser = argparse.ArgumentParser(description=__doc__)
20+
21+
parser.add_argument('inmolfn',
22+
metavar='<input molecule file>',
23+
help='Input file (single- or multi-molecule file)')
24+
25+
parser.add_argument('-m', '--mode',
26+
choices=['absolute', 'relative'],
27+
default='absolute',
28+
help='Limit mode: absolute (fixed threshold) or relative '
29+
'(threshold based on molecule with fewest unusual torsions). '
30+
'WARNING: Relative mode may behave unexpectedly with conformers '
31+
'from multiple input molecules (default: %(default)s)')
32+
33+
parser.add_argument('-l', '--limit',
34+
dest='torsion_limit',
35+
type=int,
36+
default=0,
37+
metavar='<limit>',
38+
help='Maximum number of unusual torsions for a passing molecule '
39+
'(default: %(default)s)')
40+
41+
parser.add_argument('-d', '--local-density',
42+
dest='local_density_threshold',
43+
type=float,
44+
default=10.0,
45+
metavar='<threshold>',
46+
help='Local density threshold for classifying a torsion as unusual '
47+
'(default: %(default)s)')
48+
49+
parser.add_argument('--incl-organometallics',
50+
dest='incl_organometallics',
51+
action='store_true',
52+
help='Include organometallic compounds in the search '
53+
'(default: organic compounds only)')
54+
55+
parser.add_argument('--generalisation',
56+
action='store_true',
57+
help='Turn on generalisation for searches')
58+
59+
parser.add_argument('--successfn',
60+
default='successes.mol',
61+
metavar='<file>',
62+
help='Output file for molecules that pass the filter '
63+
'(default: %(default)s)')
64+
65+
parser.add_argument('--failurefn',
66+
default='failures.mol',
67+
metavar='<file>',
68+
help='Output file for molecules that fail the filter '
69+
'(default: %(default)s)')
70+
71+
parser.add_argument('-u', '--unusual-torsions',
72+
dest='unusualtorsionfn',
73+
default='unusual_torsions.csv',
74+
metavar='<file>',
75+
help='Output CSV file for unusual torsion details '
76+
'(default: %(default)s)')
77+
78+
return parser.parse_args()
79+
80+
81+
def create_mogul_engine(local_density_threshold, incl_organometallics, generalisation):
82+
"""Create and configure a geometry analyser engine.
83+
84+
Args:
85+
local_density_threshold: Threshold for classifying torsions as unusual
86+
incl_organometallics: Whether to include organometallic compounds
87+
generalisation: Whether to enable generalisation for searches
88+
89+
Returns:
90+
Configured ccdc.conformer.GeometryAnalyser instance
91+
"""
92+
engine = conformer.GeometryAnalyser()
93+
94+
engine.settings.bond.analyse = False
95+
engine.settings.angle.analyse = False
96+
engine.settings.ring.analyse = False
97+
98+
engine.settings.torsion.local_density_threshold = local_density_threshold
99+
engine.settings.generalisation = generalisation
100+
engine.settings.organometallic_filter = 'all' if incl_organometallics else 'organics_only'
101+
102+
return engine
103+
104+
105+
def analysis(torsion_limit, input_filename, mode, engine, success_file, failure_file, unusual_torsion_file):
106+
"""Analyze molecules for unusual torsions and filter based on criteria.
107+
108+
Args:
109+
torsion_limit: Maximum number of unusual torsions allowed
110+
input_filename: Path to input molecule file
111+
mode: 'absolute' or 'relative' filtering mode
112+
engine: Configured GeometryAnalyser instance
113+
success_file: Path to output file for passing molecules
114+
failure_file: Path to output file for failing molecules
115+
unusual_torsion_file: Path to CSV file for unusual torsion details
116+
"""
117+
# Analyze all molecules and collect unusual torsion data
118+
molecules = []
119+
min_unusual_torsions = float('inf')
120+
121+
with io.MoleculeReader(input_filename) as mol_reader:
122+
for molecule in mol_reader:
123+
molecule.standardise_aromatic_bonds()
124+
molecule.standardise_delocalised_bonds()
125+
126+
geometry_analysed_molecule = engine.analyse_molecule(molecule)
127+
128+
molecule.unusual_torsions = [
129+
t for t in geometry_analysed_molecule.analysed_torsions
130+
if t.unusual and t.enough_hits
131+
]
132+
molecule.num_unusual_torsions = len(molecule.unusual_torsions)
133+
molecules.append(molecule)
134+
135+
min_unusual_torsions = min(min_unusual_torsions, molecule.num_unusual_torsions)
136+
137+
# Write results
138+
with io.MoleculeWriter(success_file) as passed_writer, \
139+
io.MoleculeWriter(failure_file) as failed_writer, \
140+
open(unusual_torsion_file, 'w', newline='') as csv_file:
141+
142+
csv_writer = csv.writer(csv_file)
143+
csv_writer.writerow(['MoleculeIndex', 'Value', 'Zscore', 'LocalDensity', 'NumHits', 'Atoms'])
144+
145+
for idx, molecule in enumerate(molecules):
146+
threshold = torsion_limit if mode == 'absolute' else min_unusual_torsions + torsion_limit
147+
failed = molecule.num_unusual_torsions > threshold
148+
149+
if failed:
150+
failed_writer.write(molecule)
151+
for torsion in molecule.unusual_torsions:
152+
csv_writer.writerow([
153+
idx,
154+
torsion.value,
155+
torsion.z_score,
156+
torsion.local_density,
157+
torsion.nhits,
158+
' '.join(torsion.atom_labels)
159+
])
160+
else:
161+
passed_writer.write(molecule)
162+
163+
164+
def run():
165+
"""Main entry point for the script."""
166+
args = parse_args()
167+
168+
if args.torsion_limit < 0:
169+
raise ValueError('Torsion limit must be >= 0')
170+
171+
engine = create_mogul_engine(
172+
args.local_density_threshold,
173+
args.incl_organometallics,
174+
args.generalisation
175+
)
176+
177+
analysis(
178+
args.torsion_limit,
179+
args.inmolfn,
180+
args.mode,
181+
engine,
182+
args.successfn,
183+
args.failurefn,
184+
args.unusualtorsionsfn,
185+
)
186+
187+
188+
if __name__ == '__main__':
189+
run()

scripts/filter_poses/ReadMe.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Filter Poses
2+
3+
This is a short script to filter molecular poses in a multi-molecule file based on the torsion probabilities.
4+
5+
CCDC Python API Licence required, minimum version: 3.0.15
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
#!/usr/bin/env python3
2+
#
3+
# This script can be used for any purpose without limitation subject to the
4+
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
5+
#
6+
# This permission notice and the following statement of attribution must be
7+
# included in all copies or substantial portions of this script.
8+
#
9+
# 2025-02-03: created by the Cambridge Crystallographic Data Centre
10+
11+
import argparse
12+
import copy
13+
import csv
14+
import math
15+
16+
from ccdc import conformer, io
17+
18+
19+
def parse_args():
20+
"""Parse command line arguments."""
21+
parser = argparse.ArgumentParser(description=__doc__)
22+
23+
parser.add_argument('conformer_file',
24+
metavar='<input file>',
25+
help='Input file (multi-molecule file)')
26+
27+
parser.add_argument('-csv', '--write-csv',
28+
dest='write_csv',
29+
action='store_true',
30+
help='Write a csv file for all the analysed conformers.')
31+
32+
return parser.parse_args()
33+
34+
35+
class ProbabilityScorer:
36+
"""
37+
Use the ConformerGenerator and GeometryAnalyser to score conformers based on their conformational
38+
probabilities and unusual torsions.
39+
"""
40+
def __init__(self, user_conformer_generator_settings=None, skip_minimisation=True,
41+
user_mogul_analysis_settings=None):
42+
43+
self._generator = self._create_conformer_generator(user_conformer_generator_settings, skip_minimisation)
44+
self._mogul_analysis_engine = self._create_analyser(user_mogul_analysis_settings)
45+
46+
def _create_analyser(self, user_mogul_analyser_settings):
47+
"""Create a GeometryAnalyser engine to analyse the conformers."""
48+
engine = conformer.GeometryAnalyser()
49+
50+
# Tweak the 'default defaults'
51+
# By default, in this use case, we do not want to use generalisation.
52+
engine.settings.generalisation = False
53+
# By default, only the organic subset, i.e. exclude organometallic
54+
engine.settings.organometallic_filter = 'Organic'
55+
56+
# Ensure user settings are kept:
57+
if user_mogul_analyser_settings is not None:
58+
engine.settings = copy.copy(user_mogul_analyser_settings)
59+
60+
# Only analyse torsions:
61+
engine.settings.bond.analyse = False
62+
engine.settings.angle.analyse = False
63+
engine.settings.ring.analyse = False
64+
65+
return engine
66+
67+
def _create_conformer_generator(self, user_generator_settings, skip_minimisation=True):
68+
"""Create a ConformerGenerator engine to generate conformers for the molecules."""
69+
settings = conformer.ConformerSettings()
70+
if user_generator_settings is not None:
71+
settings = copy.copy(user_generator_settings)
72+
73+
# Mandatory setting here: this must work in use_input_torsion_distributions mode.
74+
settings.use_input_torsion_distributions = True
75+
76+
engine = conformer.ConformerGenerator(settings, skip_minimisation)
77+
return engine
78+
79+
@staticmethod
80+
def _combined_local_density(all_local_densities):
81+
if all_local_densities:
82+
return sum(all_local_densities) / len(all_local_densities)
83+
return 100.0
84+
85+
def probability_analysis(self, molecule, bad_normalised_score=1.0, bad_probability_score=0.0,
86+
bad_rmsd_score=9999.9):
87+
88+
# Approximation excluding rings:
89+
is_rigid = sum(bond.is_rotatable for bond in molecule.bonds) == 0
90+
91+
conformers = self._generator.generate(molecule)
92+
93+
normalised_score = None
94+
rmsd = None
95+
ln_probability = None
96+
97+
if conformers:
98+
# First conformer is the most likely, so return its score.
99+
normalised_score = conformers[0].normalised_score
100+
rmsd = conformers[0].rmsd()
101+
ln_probability = conformers[0].probability
102+
103+
if is_rigid:
104+
if ln_probability is None:
105+
ln_probability = 0.0
106+
if normalised_score is None:
107+
normalised_score = 0.0
108+
109+
if normalised_score is None:
110+
normalised_score = bad_normalised_score
111+
112+
if rmsd is None:
113+
rmsd = bad_rmsd_score
114+
115+
probability = bad_probability_score
116+
if ln_probability is not None:
117+
probability = math.exp(ln_probability)
118+
119+
return normalised_score, probability, rmsd
120+
121+
def unusual_torsions_analysis(self, molecule, max_hist_size=15):
122+
checked_mol = self._mogul_analysis_engine.analyse_molecule(molecule)
123+
unusual_count = 0
124+
local_densities = []
125+
unusual_local_densities = []
126+
no_data_torsions = 0
127+
for tor in checked_mol.analysed_torsions:
128+
hist_size = sum(tor.histogram())
129+
if hist_size > max_hist_size:
130+
local_densities.append(tor.local_density)
131+
132+
if tor.unusual:
133+
unusual_local_densities.append(tor.local_density)
134+
unusual_count += 1
135+
else:
136+
no_data_torsions += 1
137+
138+
# Expected number of torsions within 10 degrees assuming even distribution of observations.
139+
# Number of Mogul bins would be strictly better than assuming 18.
140+
uniform_torsion_dist = 100 / 18
141+
local_densities.append(uniform_torsion_dist)
142+
return unusual_count, self._combined_local_density(local_densities), self._combined_local_density(
143+
unusual_local_densities), no_data_torsions
144+
145+
def process_molecule(self, molecule):
146+
molecule.remove_unknown_atoms()
147+
molecule.assign_bond_types()
148+
149+
unusual_count, avg_local_density, avg_unusual_local_density, no_data_torsions = self.unusual_torsions_analysis(
150+
molecule)
151+
normalised_score, probability, rmsd = self.probability_analysis(molecule)
152+
153+
return {
154+
"identifier": molecule.identifier,
155+
"number of unusual torsions": unusual_count,
156+
"average local density": avg_local_density,
157+
"average unusual local density": avg_unusual_local_density,
158+
"number of torsions with no data": no_data_torsions,
159+
"normalised probability score": normalised_score,
160+
"probability score": probability,
161+
"RMSD to input conformation": rmsd,
162+
}
163+
164+
165+
def run():
166+
args = parse_args()
167+
168+
p = ProbabilityScorer()
169+
170+
all_data = []
171+
172+
with io.MoleculeReader(args.conformer_file) as reader:
173+
for i, molecule in enumerate(reader):
174+
data = p.process_molecule(molecule)
175+
if i == 0:
176+
print(", ".join(data.keys()))
177+
print(", ".join(str(value) for value in data.values()))
178+
all_data.append(data)
179+
180+
if args.write_csv:
181+
keys = all_data[0].keys()
182+
with open('filtered_poses_analysis.csv', 'w', newline='') as output_file:
183+
dict_writer = csv.DictWriter(output_file, keys)
184+
dict_writer.writeheader()
185+
dict_writer.writerows(all_data)
186+
187+
188+
if __name__ == "__main__":
189+
run()

0 commit comments

Comments
 (0)