#!/usr/bin/python3 # #$ -S /usr/bin/python3 #$ -q gpu.q #$ -N alphafold #$ -cwd ###$ -l h_rt=24:00:00 #$ -l h_rt=8:00:00 #$ -l mem_free=60G #$ -l scratch=50G #$ -l compute_cap=80,gpu_mem=40G ###$ -pe smp 2 # # Compute cap for A100 GPU is 8.0 (40 or 80 GB), for A40 GPU is 8.6 (48 GB). # # The "-pe smp 4" means allocate 4 slots. On a CPU queue that means 4 cores, # but on the gpu.q in means 4 GPUs. # # mem_free is per-slot. # # Adapted from alphafold/docker/run_alphafold.py script. # Original version runs AlphaFold using a docker image. # This adapted version uses a singularity image with defaults # set for the UCSF Wynton cluster. # """Singularity launch script for Alphafold.""" def parse_args(): import argparse parser = argparse.ArgumentParser(description='Run AlphaFold structure prediction using singularity image.') # Must specify either json_path or input_dir parser.add_argument( '--json_path', help='Paths to the input JSON file') parser.add_argument( '--input_dir', help='Paths to the directory containing input JSON files') parser.add_argument( '--output_dir', default = '.', help='Paths to a directory where the results will be saved') from os.path import expanduser, isdir weights_dir = expanduser('~/af3_weights') if not isdir(weights_dir): weights_dir = None parser.add_argument( '--model_dir', default = weights_dir, help='Path to the model to use for inference.') parser.add_argument( '--flash_attention_implementation', default='triton', choices=['triton', 'cudnn', 'xla'], help=( "Flash attention implementation to use. 'triton' and 'cudnn' uses a" ' Triton and cuDNN flash attention implementation, respectively. The' ' Triton kernel is fastest and has been tested more thoroughly. The' " Triton and cuDNN kernels require Ampere GPUs or later. 'xla' uses an" ' XLA attention implementation (no flash attention) and is portable' ' across GPU devices.' ), ) # Control which stages to run. parser.add_argument( '--run_data_pipeline', default=True, type=str_to_bool, help='Whether to run the data pipeline on the fold inputs.', ) parser.add_argument( '--run_inference', default=True, type=str_to_bool, help='Whether to run inference on the fold inputs.', ) parser.add_argument( '--db_dir', default = '/wynton/group/ferrin/nobackup/goddard/af3_databases', # default = '/wynton/group/databases/alphafold3', help = 'Path to the directory containing the databases.', ) # Number of CPUs to use for MSA tools. import multiprocessing parser.add_argument( '--jackhmmer_n_cpu', default = min(multiprocessing.cpu_count(), 8), help = 'Number of CPUs to use for Jackhmmer. Default to min(cpu_count, 8). Going' ' beyond 8 CPUs provides very little additional speedup.', ) parser.add_argument( '--nhmmer_n_cpu', default = min(multiprocessing.cpu_count(), 8), help = 'Number of CPUs to use for Nhmmer. Default to min(cpu_count, 8). Going' ' beyond 8 CPUs provides very little additional speedup.', ) # Compilation cache parser.add_argument( '--jax_compilation_cache_dir', default = None, help ='Path to a directory for the JAX compilation cache.', ) import os parser.add_argument( '--gpu_devices', default=os.environ.get('SGE_GPU', '0'), help='Comma separated list GPU identifiers to set environment variable CUDA_VISIBLE_DEVICES.') parser.add_argument( '--singularity_image_path', help='Path to the AlphaFold singularity image.') parser.add_argument( '--use_a100_80gb_settings', type=str_to_bool, help='Use AlphaFold 3 settings for A100 80 GB graphics. If not set use A100 40 GB settings.') args = parser.parse_args() return args def str_to_bool(v): if isinstance(v, bool): return v if v.lower() in ('yes', 'true', 't', 'y', '1'): return True elif v.lower() in ('no', 'false', 'f', 'n', '0'): return False else: import argparse raise argparse.ArgumentTypeError('Boolean value expected.') def main(): args = parse_args() if args.model_dir is None: raise RuntimeError("If you do not have the AlphaFold 3 neural net weights (also called model parameters) in directory ~/af3_weights then you must provide the --model_dir option to specify the location of the weights. To obtain the weights you must submit a request to Google https://forms.gle/svvpY4u2jsHEwWYS6 as described on the AlphaFold 3 Github page https://github.com/google-deepmind/alphafold3.") mounts = [] command_args = [] run_args = ['json_path', 'input_dir', 'db_dir', 'output_dir', 'model_dir', 'flash_attention_implementation', 'run_data_pipeline', 'run_inference', 'jackhmmer_n_cpu', 'nhmmer_n_cpu', 'jax_compilation_cache_dir'] for arg_name in run_args: if getattr(args, arg_name) is not None: command_args.append(f'--{arg_name}={getattr(args,arg_name)}') if args.json_path is None and args.input_dir is None: raise ValueError('Exactly one of --json_path or --input_dir must be specified.') env_vars = { 'CUDA_VISIBLE_DEVICES': args.gpu_devices, 'NVIDIA_VISIBLE_DEVICES': args.gpu_devices, } env_vals = ','.join('%s=%s' % (key,value) for key,value in env_vars.items()) # AlphaFold uses Python tempfile which uses TMPDIR env variable # which is /scratch/job-id-string on wynton. Otherwise Python will use /tmp # which is only 4-8 GB on wynton and will cause write errors on large sequences. import os tempdir = os.environ.get('TMPDIR', '/scratch') # Mount AlphaFold databases, models directory, current directory, scratch directory bind_directories = [args.db_dir, args.model_dir, os.getcwd(), tempdir] # Bind parent directories of fasta file locations, and output directory.. from os.path import isabs, dirname if args.json_path and isabs(args.json_path) and dirname(args.json_path): bind_directories.append(dirname(args.json_path)) if args.input_dir and isabs(args.input_dir): bind_directories.append(args.input_dir) if args.output_dir and isabs(args.output_dir): bind_directories.append(dirname(args.output_dir)) if args.jax_compilation_cache_dir and isabs(args.jax_compilation_cache_dir): bind_directories.append(args.jax_compilation_cache_dir) if args.singularity_image_path: singularity_image_path = args.singularity_image_path elif args.use_a100_80gb_settings: singularity_image_path = '/wynton/home/ferrin/goddard/alphafold_singularity/alphafold3_80gb_dec_4_2024.sif' else: singularity_image_path = '/wynton/home/ferrin/goddard/alphafold_singularity/alphafold3_40gb_dec_4_2024.sif' args = ['singularity', 'exec', '--nv', # Use Nvidia container library to use CUDA '-B "%s"' % ','.join(bind_directories), '--env %s' % env_vals, singularity_image_path, 'python', '/app/alphafold/run_alphafold.py', ] + command_args cmd = ' '.join(args) print (cmd) from subprocess import run import sys run('module load cuda/12.2 ; %s' % cmd, stdout = sys.stdout, stderr = sys.stderr, shell = True, # module command is a csh alias on Wynton executable = '/bin/csh', check = True) if __name__ == '__main__': main()