#!/bin/sh # # Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. # # See file LICENSE for terms. # # # Convenience script to run MPI applications with UCX # # Usage: run_mpi.sh # verbose() { [ $VERBOSE -ne 0 ] && echo "$@" } check_slurm_env() { if [ -z "$SLURM_JOBID" ] then # Search for jobs of the current user SLURM_JOBID=$(squeue -h -u $USER -o "%i"|head -1) fi if [ -z "$SLURM_JOBID" ] then # Skip slurm return fi # Nodes to run on export HOSTS=$(hostlist -e $(squeue -j ${SLURM_JOBID} -h -o "%N")) SLURM_NNODES=$(squeue -j ${SLURM_JOBID} -h -o "%D") NNODES=$SLURM_NNODES if [ -n "$SLURM_JOB_CPUS_PER_NODE" ] then export PPN=$(echo $SLURM_JOB_CPUS_PER_NODE|cut -d'(' -f1) else TOTAL_CPUS=$(squeue -j ${SLURM_JOBID} -h -o "%C") export PPN=$((${TOTAL_CPUS} / ${SLURM_NNODES})) fi } usage() { echo "Usage: run_mpi.sh -- " echo echo " -h|--help Show this help message" echo " -v|--verbose Turn on verbosity" echo " -c|--config = Set UCX configuration" echo " -N|--nnodes Number of nodes to run on ($NNODES)" echo " --ppn Number of processes per node ($PPN)" echo " --mpi-log-level Log level for MPI UCX component ($MPI_LOG_LEVEL)" echo " --valgrind Run with valgrind" echo " --valgrind-args \"\" Extra arguments to valgrind" echo } initialize() { export MPIRUN=@MPIRUN@ export LIBUCS=@abs_top_builddir@/src/ucs/.libs/libucs.so export LIBUCT=@abs_top_builddir@/src/uct/.libs/libuct.so export LIBUCP=@abs_top_builddir@/src/ucp/.libs/libucp.so export VERBOSE=0 export EXE="" export EXE_ARGS="" export EXTRA_MPI_ARGS="" export NNODES=1 export PPN=1 export CONFIG="" export MPI_LOG_LEVEL=0 export VALGRIND=0 export VALGRIND_ARGS="" } parse_args() { while [[ $# -gt 0 ]] do key="$1" case $key in -h|--help) usage exit 0 ;; -v|--verbose) export VERBOSE=1 ;; -c|--config) export CONFIG="$CONFIG $2" shift ;; -N|--nnodes) export NNODES=$2 shift ;; --ppn) export PPN=$2 shift ;; --mpi-log-level) export MPI_LOG_LEVEL=$2 shift ;; --valgrind) export VALGRIND=1 ;; --valgrind-args) export VALGRIND_ARGS="$2" shift ;; [^-]*) export EXE=$key shift break ;; *) usage exit -2 ;; esac shift done while [[ $# -gt 0 ]] do key="$1" case $key in --) shift export EXTRA_MPI_ARGS="$@" break ;; *) EXE_ARGS+=("$key") ;; esac shift done } adjust_run_params() { export NP=$((${NNODES} * ${PPN})) export HOSTLIST=$(echo $HOSTS|cut -d' ' -f 1-$NNODES|tr ' ' ',') } run_open_mpi() { OMPI_ARGS="" OMPI_ARGS="$OMPI_ARGS -mca pml ucx" OMPI_ARGS="$OMPI_ARGS -mca pml_ucx_verbose $MPI_LOG_LEVEL" OMPI_ARGS="$OMPI_ARGS -mca spml ucx" OMPI_ARGS="$OMPI_ARGS -mca spml_ucx_verbose $MPI_LOG_LEVEL" OMPI_ARGS="$OMPI_ARGS -H $HOSTLIST" OMPI_ARGS="$OMPI_ARGS -n $NP" OMPI_ARGS="$OMPI_ARGS --map-by node" OMPI_ARGS="$OMPI_ARGS -mca ess_base_stream_buffering 0" OMPI_ARGS="$OMPI_ARGS -mca mpi_abort_delay -1" OMPI_ARGS="$OMPI_ARGS -x LD_PRELOAD=$LD_PRELOAD:$LIBUCP" if [ $VALGRIND -ne 0 ] then # Preload valgrind-enabled libraries for lib in /usr/lib64/mlnx_ofed/valgrind/*.so do [ -f $lib ] && OMPI_ARGS="$OMPI_ARGS:$lib" done fi OMPI_ARGS="$OMPI_ARGS -x UCX_HANDLE_ERRORS=freeze" for c in $CONFIG do OMPI_ARGS="$OMPI_ARGS -x $c" done if [ $VALGRIND -ne 0 ] then MPI_HOME=$(cd $(dirname ${MPIRUN})/.. && pwd) EXE="valgrind \ --fair-sched=try \ --track-origins=yes \ --leak-check=yes \ --suppressions=${MPI_HOME}/share/openmpi/openmpi-valgrind.supp \ --suppressions=@abs_srcdir@/ompi.supp \ $VALGRIND_ARGS \ $EXE" LD_LIBRARY_PATH="$LD_LIBRARY_PATH:@VALGRIND_LIBPATH@" fi OMPI_ARGS="$OMPI_ARGS -x LD_LIBRARY_PATH" export LD_LIBRARY_PATH verbose $MPIRUN $OMPI_ARGS $EXTRA_MPI_ARGS $EXE "${EXE_ARGS[@]}" $MPIRUN $OMPI_ARGS $EXTRA_MPI_ARGS $EXE "${EXE_ARGS[@]}" } main() { EXE_ARGS=() initialize check_slurm_env parse_args "$@" adjust_run_params if (strings $MPIRUN|grep -qi orte) && ($MPIRUN -h|grep -q "Open MPI") then run_open_mpi else echo "Unrecognized MPI flavor ($MPIRUN)" exit -3 fi } main "$@"