#!/usr/bin/python3
#
#$ -S /usr/bin/python3
#$ -q gpu.q
#$ -N alphafold 
#$ -cwd
###$ -l h_rt=24:00:00
#$ -l h_rt=8:00:00
#$ -l mem_free=60G
#$ -l scratch=50G
#$ -l compute_cap=80,gpu_mem=40G
###$ -pe smp 2
#
# Compute cap for A100 GPU is 8.0 (40 or 80 GB), for A40 GPU is 8.6 (48 GB).
#
# The "-pe smp 4" means allocate 4 slots.  On a CPU queue that means 4 cores,
# but on the gpu.q in means 4 GPUs.
#
# mem_free is per-slot.
#
# Adapted from alphafold/docker/run_alphafold.py script.
# Original version runs AlphaFold using a docker image.
# This adapted version uses a singularity image with defaults
# set for the UCSF Wynton cluster.
#

"""Run non-containerized Alphafold 3."""

def parse_args():
  import argparse

  parser = argparse.ArgumentParser(description='Run AlphaFold structure prediction using singularity image.')

  # Must specify either json_path or input_dir
  parser.add_argument(
    '--json_path',
    help='Paths to the input JSON file')

  parser.add_argument(
    '--input_dir',
    help='Paths to the directory containing input JSON files')

  parser.add_argument(
    '--output_dir',
    default = '.',
    help='Paths to a directory where the results will be saved')

  from os.path import expanduser, isdir
  weights_dir = expanduser('~/af3_weights')
  if not isdir(weights_dir):
    weights_dir = None

  parser.add_argument(
    '--model_dir',
    default = weights_dir,
    help='Path to the model to use for inference.')

  parser.add_argument(
    '--flash_attention_implementation',
    default='triton',
    choices=['triton', 'cudnn', 'xla'],
    help=(
        "Flash attention implementation to use. 'triton' and 'cudnn' uses a"
        ' Triton and cuDNN flash attention implementation, respectively. The'
        ' Triton kernel is fastest and has been tested more thoroughly. The'
        " Triton and cuDNN kernels require Ampere GPUs or later. 'xla' uses an"
        ' XLA attention implementation (no flash attention) and is portable'
        ' across GPU devices.'
    ),
  )

  # Control which stages to run.
  parser.add_argument(
    '--run_data_pipeline',
    default=True, type=str_to_bool, 
    help='Whether to run the data pipeline on the fold inputs.',
  )

  parser.add_argument(
    '--run_inference',
    default=True, type=str_to_bool, 
    help='Whether to run inference on the fold inputs.',
  )

  parser.add_argument(
    '--db_dir',
    default = '/home/goddard/af3_databases',
    help = 'Path to the directory containing the databases.',
  )

  parser.add_argument(
    '--num_diffusion_samples',
    default = 5,
    help = 'Number of diffusion samples to generate.',
  )

  # Number of CPUs to use for MSA tools.
  import multiprocessing
  parser.add_argument(
    '--jackhmmer_n_cpu',
    default = min(multiprocessing.cpu_count(), 8),
    help = 'Number of CPUs to use for Jackhmmer. Default to min(cpu_count, 8). Going'
    ' beyond 8 CPUs provides very little additional speedup.',
  )

  parser.add_argument(
    '--nhmmer_n_cpu',
    default = min(multiprocessing.cpu_count(), 8),
    help = 'Number of CPUs to use for Nhmmer. Default to min(cpu_count, 8). Going'
           ' beyond 8 CPUs provides very little additional speedup.',
  )

  # Compilation cache
  parser.add_argument(
    '--jax_compilation_cache_dir',
    default = None,
    help ='Path to a directory for the JAX compilation cache.',
  )
  
  args = parser.parse_args()
  return args

def str_to_bool(v):
    if isinstance(v, bool):
        return v
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        import argparse
        raise argparse.ArgumentTypeError('Boolean value expected.')

def main():

  args = parse_args()

  if args.model_dir is None:
    raise RuntimeError("If you do not have the AlphaFold 3 neural net weights (also called model parameters) in directory ~/af3_weights then you must provide the --model_dir option to specify the location of the weights.  To obtain the weights you must submit a request to Google https://forms.gle/svvpY4u2jsHEwWYS6 as described on the AlphaFold 3 Github page https://github.com/google-deepmind/alphafold3.")

  mounts = []
  command_args = []

  run_args = ['json_path', 'input_dir', 'db_dir', 'output_dir', 'model_dir', 'flash_attention_implementation',
              'run_data_pipeline', 'run_inference', 'num_diffusion_samples', 'jackhmmer_n_cpu', 'nhmmer_n_cpu', 'jax_compilation_cache_dir']
  for arg_name in run_args:
      if getattr(args, arg_name) is not None:
          command_args.append(f'--{arg_name}={getattr(args,arg_name)}')

  if args.json_path is None and args.input_dir is None:
     raise ValueError('Exactly one of --json_path or --input_dir must be specified.')

  args = ['python3', '/home/goddard/alphafold3/run_alphafold.py'] + command_args
  cmd = ' '.join(args)

  print (cmd)

  from subprocess import run
  import sys
  run(cmd,
      stdout = sys.stdout, stderr = sys.stderr,
      shell = True,  # module command is a csh alias on Wynton
      check = True)

if __name__ == '__main__':
  main()