# Rewrite an AlphaFold 3 input json file putting the unpairedMSA, pairedMSA and mmcif attributes in separate files # and using the unpairedMSAPath, pairedMSAPath, mmcifPath attributes to refer to those separate files. def af3_extract_msa(input_json_path, new_json_path): import json with open(input_json_path, 'r') as f: i = json.load(f) from os.path import basename, dirname, join input_file_name = basename(input_json_path) output_dir = dirname(new_json_path) i['version'] = 2 for seq in i['sequences']: if 'protein' in seq: protein = seq['protein'] sid = protein['id'] if isinstance(sid, list): sid = ''.join(sid) for msa_type in ['unpairedMsa', 'pairedMsa']: if msa_type in protein: msa_path = join(output_dir, input_file_name.replace('.json', f'_{msa_type[:-3]}_msa_{sid}.a3m')) with open(msa_path, 'w') as f: f.write(protein[msa_type]) del protein[msa_type] protein[msa_type + 'Path'] = msa_path if 'templates' in protein: for tnum, t in enumerate(protein['templates']): mmcif_path = join(output_dir, input_file_name.replace('.json', f'_template_{sid}_{tnum+1}.cif')) with open(mmcif_path, 'w') as f: f.write(t['mmcif']) del t['mmcif'] t['mmcifPath'] = mmcif_path with open(new_json_path, 'w') as f: json.dump(i, f) import sys if len(sys.argv) != 3: print('Syntax: python3 af3_extract_msa.py ') else: af3_extract_msa(sys.argv[1], sys.argv[2])