parameters: - name: name # defaults for any parameters that aren't specified default: 'test' - name: iodemo_args default: '' - name: iodemo_tls default: 'rc_x' - name: duration default: 60 - name: roce_iface default: $(roce_iface) - name: initial_delay default: 20 steps: - bash: | set -eEx $(workspace)/buildlib/az-network-corrupter.sh reset=yes interface=${{ parameters.roce_iface }} displayName: Restore port state condition: always() timeoutInMinutes: 2 - bash: | set -eEx source $(workspace)/buildlib/az-helpers.sh $(workspace)/buildlib/az-network-corrupter.sh \ initial_delay=${{ parameters.initial_delay }} \ cycles=$(cycles) \ downtime=$(downtime) \ uptime=$(uptime) \ interface=${{ parameters.roce_iface }} \ |& add_timestamp &>corrupter.log & while ! pgrep -u "$USER" -f 'network-corrupter' do sleep 1 done pgrep -u "$USER" -f 'network-corrupter' corrupter_pid=$(pgrep -u "$USER" -f 'network-corrupter') echo "corrupter_pid=$corrupter_pid" azure_set_variable "corrupter_pid" "$corrupter_pid" displayName: Start network corrupter timeoutInMinutes: 2 - bash: | set -eEx sudo /hpc/local/bin/lshca mkdir -p $(workspace)/${{ parameters.name }} # set UCX environment variables export UCX_NET_DEVICES=$(ibdev2netdev | sed -ne 's/\(\w*\) port \([0-9]\) ==> '${{parameters.roce_iface}}' .*/\1:\2/p') export UCX_TLS=${{ parameters.iodemo_tls }} export UCX_RNDV_THRESH=4k export LD_LIBRARY_PATH=$(workspace)/install/lib:$LD_LIBRARY_PATH $(workspace)/test/apps/iodemo/run_io_demo.sh \ -H $(agent_hosts) \ --tasks-per-node 1 \ --duration ${{ parameters.duration }} \ -v \ --num-clients 1 \ --num-servers 1 \ --map-by slot \ --log-dir $(workspace)/${{ parameters.name }} \ -i ${{ parameters.roce_iface }} \ $(io_demo_exe) \ -d 512:524288 \ -P 2 \ -o read,write \ -i 0 \ -w 16 \ -t 60 \ ${{ parameters.iodemo_args }} displayName: Launch with run_io_demo.sh ( ${{ parameters.name }} ) timeoutInMinutes: 15 - bash: | python /hpc/noarch/git_projects/hpc-mtt-conf/scripts/iodemo_analyzer.py --allow_list $(System.PullRequest.TargetBranch) -d $(workspace)/${{ parameters.name }} --duration ${{ parameters.duration }} -t 3 displayName: Analyze for ${{ parameters.name }} timeoutInMinutes: 1 - bash: | set -eEx pid=$(corrupter_pid) echo "Stopping corrupter, PID=${pid}" if ! kill ${pid}; then echo "##vso[task.logissue type=warning]Can't stop corrupter: process doesn't exist" echo "##vso[task.complete result=Failed;]" else echo "Corrupter stopped successfully" fi cat corrupter.log displayName: Kill corrupter condition: always() timeoutInMinutes: 2 - bash: | set -eEx $(workspace)/buildlib/az-network-corrupter.sh reset=yes interface=${{ parameters.roce_iface }} displayName: Restore port state condition: always() timeoutInMinutes: 2