# ChimeraX script to report ipTM, average pLDDT and number of residue contacts # with PAE < 5 for drugss bound to a protein predicted by AlphaFold 3. # The .cif prediction files are specified on the command line. def drug_iptm_scores(cif_paths): iptm_scores = [] for cif_path in cif_paths: sconf_path = cif_path.replace('_model.cif', '_summary_confidences.json') with open(sconf_path, 'r') as f: import json sconf = json.load(f) cp_iptm = sconf['chain_pair_iptm'] d_iptm = cp_iptm[-1][:-1] # ipTM to each protein chain from os.path import basename dname = basename(cif_path).replace('_model.cif', '') iptm_scores.append((dname, d_iptm)) return iptm_scores def drug_plddt_and_pae_scores(session, cif_paths): scores = [] from chimerax.core.commands import run run(session, 'close') for cif_path in cif_paths: from os.path import basename cif_name = basename(cif_path) drug_name = cif_name.replace('_model.cif', '') session.logger.status(f'Opening {drug_name}') model = run(session, f'open {cif_path}')[0] drug_atoms = model.atoms[model.atoms.chain_ids == 'D'] ave_plddt = drug_atoms.bfactors.mean() pae_file = cif_path.replace('_model.cif', '_confidences.json') run(session, f'alphafold pae #1 file {pae_file} plot false') pbonds = run(session, 'alphafold contacts ~/D to /D distance 4 maxPae 3') protein_atoms, lig_atoms = pbonds.atoms nligatoms = len(lig_atoms.unique()) nres = len(protein_atoms.residues.unique()) npair = len(pbonds) run(session, 'close') scores.append((drug_name, (ave_plddt, nligatoms, nres, npair))) return scores def report_scores(cif_paths, sort_by_iptm = True, average_iptm = True): iptm_scores = dict(drug_iptm_scores(cif_paths)) plddt_and_pae_scores = dict(drug_plddt_and_pae_scores(session, cif_paths)) if average_iptm: from numpy import mean ave_iptm = {dname: mean(iptm) for dname, iptm in iptm_scores.items()} if sort_by_iptm: from numpy import mean order = [(mean(iptm), drug_name) for drug_name, iptm in iptm_scores.items()] order.sort(reverse = True) drug_names = [drug_name for iptm, drug_name in order] else: drug_names = list(iptm_scores.keys()) print('Name ipTM pLDDT PAE<3A/dist<4A/natom,nres,npair') for dname in drug_names: d_iptm = iptm_scores[dname] if average_iptm: iptm = '%.2f' % ave_iptm[dname] else: iptm = '/'.join('%.2f' % iptm for iptm in d_iptm) ave_plddt, nligatoms, nres, nlowpae = plddt_and_pae_scores[dname] plddt = '%.0f' % ave_plddt print(f'{dname} {iptm} {plddt} {nligatoms} {nres} {nlowpae}') import sys if len(sys.argv) == 1: from os import listdir cif_paths = [path for path in listdir() if path.endswith('_model.cif')] else: cif_paths = [] import glob for path in sys.argv[1:]: cif_paths.extend(glob.glob(path)) if len(cif_paths) == 0: from chimerax.core.errors import UserError raise UserError('Must specify *_model.cif file paths') cif_paths.sort() report_scores(cif_paths)