|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# |
| 3 | +# This script can be used for any purpose without limitation subject to the |
| 4 | +# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx |
| 5 | +# |
| 6 | +# This permission notice and the following statement of attribution must be |
| 7 | +# included in all copies or substantial portions of this script. |
| 8 | +# |
| 9 | +# 2025-02-03: created by the Cambridge Crystallographic Data Centre |
| 10 | + |
| 11 | +import argparse |
| 12 | +import copy |
| 13 | +import csv |
| 14 | +import math |
| 15 | + |
| 16 | +from ccdc import conformer, io |
| 17 | + |
| 18 | + |
| 19 | +def parse_args(): |
| 20 | + """Parse command line arguments.""" |
| 21 | + parser = argparse.ArgumentParser(description=__doc__) |
| 22 | + |
| 23 | + parser.add_argument('conformer_file', |
| 24 | + metavar='<input file>', |
| 25 | + help='Input file (multi-molecule file)') |
| 26 | + |
| 27 | + parser.add_argument('-csv', '--write-csv', |
| 28 | + dest='write_csv', |
| 29 | + action='store_true', |
| 30 | + help='Write a csv file for all the analysed conformers.') |
| 31 | + |
| 32 | + return parser.parse_args() |
| 33 | + |
| 34 | + |
| 35 | +class ProbabilityScorer: |
| 36 | + """ |
| 37 | + Use the ConformerGenerator and GeometryAnalyser to score conformers based on their conformational |
| 38 | + probabilities and unusual torsions. |
| 39 | + """ |
| 40 | + def __init__(self, user_conformer_generator_settings=None, skip_minimisation=True, |
| 41 | + user_mogul_analysis_settings=None): |
| 42 | + |
| 43 | + self._generator = self._create_conformer_generator(user_conformer_generator_settings, skip_minimisation) |
| 44 | + self._mogul_analysis_engine = self._create_analyser(user_mogul_analysis_settings) |
| 45 | + |
| 46 | + def _create_analyser(self, user_mogul_analyser_settings): |
| 47 | + """Create a GeometryAnalyser engine to analyse the conformers.""" |
| 48 | + engine = conformer.GeometryAnalyser() |
| 49 | + |
| 50 | + # Tweak the 'default defaults' |
| 51 | + # By default, in this use case, we do not want to use generalisation. |
| 52 | + engine.settings.generalisation = False |
| 53 | + # By default, only the organic subset, i.e. exclude organometallic |
| 54 | + engine.settings.organometallic_filter = 'Organic' |
| 55 | + |
| 56 | + # Ensure user settings are kept: |
| 57 | + if user_mogul_analyser_settings is not None: |
| 58 | + engine.settings = copy.copy(user_mogul_analyser_settings) |
| 59 | + |
| 60 | + # Only analyse torsions: |
| 61 | + engine.settings.bond.analyse = False |
| 62 | + engine.settings.angle.analyse = False |
| 63 | + engine.settings.ring.analyse = False |
| 64 | + |
| 65 | + return engine |
| 66 | + |
| 67 | + def _create_conformer_generator(self, user_generator_settings, skip_minimisation=True): |
| 68 | + """Create a ConformerGenerator engine to generate conformers for the molecules.""" |
| 69 | + settings = conformer.ConformerSettings() |
| 70 | + if user_generator_settings is not None: |
| 71 | + settings = copy.copy(user_generator_settings) |
| 72 | + |
| 73 | + # Mandatory setting here: this must work in use_input_torsion_distributions mode. |
| 74 | + settings.use_input_torsion_distributions = True |
| 75 | + |
| 76 | + engine = conformer.ConformerGenerator(settings, skip_minimisation) |
| 77 | + return engine |
| 78 | + |
| 79 | + @staticmethod |
| 80 | + def _combined_local_density(all_local_densities): |
| 81 | + if all_local_densities: |
| 82 | + return sum(all_local_densities) / len(all_local_densities) |
| 83 | + return 100.0 |
| 84 | + |
| 85 | + def probability_analysis(self, molecule, bad_normalised_score=1.0, bad_probability_score=0.0, |
| 86 | + bad_rmsd_score=9999.9): |
| 87 | + |
| 88 | + # Approximation excluding rings: |
| 89 | + is_rigid = sum(bond.is_rotatable for bond in molecule.bonds) == 0 |
| 90 | + |
| 91 | + conformers = self._generator.generate(molecule) |
| 92 | + |
| 93 | + normalised_score = None |
| 94 | + rmsd = None |
| 95 | + ln_probability = None |
| 96 | + |
| 97 | + if conformers: |
| 98 | + # First conformer is the most likely, so return its score. |
| 99 | + normalised_score = conformers[0].normalised_score |
| 100 | + rmsd = conformers[0].rmsd() |
| 101 | + ln_probability = conformers[0].probability |
| 102 | + |
| 103 | + if is_rigid: |
| 104 | + if ln_probability is None: |
| 105 | + ln_probability = 0.0 |
| 106 | + if normalised_score is None: |
| 107 | + normalised_score = 0.0 |
| 108 | + |
| 109 | + if normalised_score is None: |
| 110 | + normalised_score = bad_normalised_score |
| 111 | + |
| 112 | + if rmsd is None: |
| 113 | + rmsd = bad_rmsd_score |
| 114 | + |
| 115 | + probability = bad_probability_score |
| 116 | + if ln_probability is not None: |
| 117 | + probability = math.exp(ln_probability) |
| 118 | + |
| 119 | + return normalised_score, probability, rmsd |
| 120 | + |
| 121 | + def unusual_torsions_analysis(self, molecule, max_hist_size=15): |
| 122 | + checked_mol = self._mogul_analysis_engine.analyse_molecule(molecule) |
| 123 | + unusual_count = 0 |
| 124 | + local_densities = [] |
| 125 | + unusual_local_densities = [] |
| 126 | + no_data_torsions = 0 |
| 127 | + for tor in checked_mol.analysed_torsions: |
| 128 | + hist_size = sum(tor.histogram()) |
| 129 | + if hist_size > max_hist_size: |
| 130 | + local_densities.append(tor.local_density) |
| 131 | + |
| 132 | + if tor.unusual: |
| 133 | + unusual_local_densities.append(tor.local_density) |
| 134 | + unusual_count += 1 |
| 135 | + else: |
| 136 | + no_data_torsions += 1 |
| 137 | + |
| 138 | + # Expected number of torsions within 10 degrees assuming even distribution of observations. |
| 139 | + # Number of Mogul bins would be strictly better than assuming 18. |
| 140 | + uniform_torsion_dist = 100 / 18 |
| 141 | + local_densities.append(uniform_torsion_dist) |
| 142 | + return unusual_count, self._combined_local_density(local_densities), self._combined_local_density( |
| 143 | + unusual_local_densities), no_data_torsions |
| 144 | + |
| 145 | + def process_molecule(self, molecule): |
| 146 | + molecule.remove_unknown_atoms() |
| 147 | + molecule.assign_bond_types() |
| 148 | + |
| 149 | + unusual_count, avg_local_density, avg_unusual_local_density, no_data_torsions = self.unusual_torsions_analysis( |
| 150 | + molecule) |
| 151 | + normalised_score, probability, rmsd = self.probability_analysis(molecule) |
| 152 | + |
| 153 | + return { |
| 154 | + "identifier": molecule.identifier, |
| 155 | + "number of unusual torsions": unusual_count, |
| 156 | + "average local density": avg_local_density, |
| 157 | + "average unusual local density": avg_unusual_local_density, |
| 158 | + "number of torsions with no data": no_data_torsions, |
| 159 | + "normalised probability score": normalised_score, |
| 160 | + "probability score": probability, |
| 161 | + "RMSD to input conformation": rmsd, |
| 162 | + } |
| 163 | + |
| 164 | + |
| 165 | +def run(): |
| 166 | + args = parse_args() |
| 167 | + |
| 168 | + p = ProbabilityScorer() |
| 169 | + |
| 170 | + all_data = [] |
| 171 | + |
| 172 | + with io.MoleculeReader(args.conformer_file) as reader: |
| 173 | + for i, molecule in enumerate(reader): |
| 174 | + data = p.process_molecule(molecule) |
| 175 | + if i == 0: |
| 176 | + print(", ".join(data.keys())) |
| 177 | + print(", ".join(str(value) for value in data.values())) |
| 178 | + all_data.append(data) |
| 179 | + |
| 180 | + if args.write_csv: |
| 181 | + keys = all_data[0].keys() |
| 182 | + with open('filtered_poses_analysis.csv', 'w', newline='') as output_file: |
| 183 | + dict_writer = csv.DictWriter(output_file, keys) |
| 184 | + dict_writer.writeheader() |
| 185 | + dict_writer.writerows(all_data) |
| 186 | + |
| 187 | + |
| 188 | +if __name__ == "__main__": |
| 189 | + run() |
0 commit comments