#! /usr/bin/env bash # Copyright (c) 2018-2022 NVIDIA Corporation # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # Released under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. ################################################################################ # Thrust and CUB build script for gpuCI ################################################################################ set -e # Stop on errors. # append variable value # Appends ${value} to ${variable}, adding a space before ${value} if # ${variable} is not empty. function append { tmp="${!1:+${!1} }${2}" eval "${1}=\${tmp}" } # log args... # Prints out ${args[*]} with a gpuCI log prefix and a newline before and after. function log() { printf "\n>>>> %s\n\n" "${*}" } # print_with_trailing_blank_line args... # Prints ${args[*]} with one blank line following, preserving newlines within # ${args[*]} but stripping any preceding ${args[*]}. function print_with_trailing_blank_line { printf "%s\n\n" "${*}" } # echo_and_run name args... # Echo ${args[@]}, then execute ${args[@]} function echo_and_run { echo "${1}: ${@:2}" ${@:2} } # echo_and_run_timed name args... # Echo ${args[@]}, then execute ${args[@]} and report how long it took, # including ${name} in the output of the time. function echo_and_run_timed { echo "${@:2}" TIMEFORMAT=$'\n'"${1} Time: %lR" time ${@:2} } # join_delimit [value [value [...]]] # Combine all values into a single string, separating each by a single character # delimiter. Eg: # foo=(bar baz kramble) # joined_foo=$(join_delimit "|" "${foo[@]}") # echo joined_foo # "bar|baz|kramble" function join_delimit { local IFS="${1}" shift echo "${*}" } ################################################################################ # VARIABLES - Set up bash and environmental variables. ################################################################################ # Get the variables the Docker container set up for us: ${CXX}, ${CUDACXX}, etc. set +e # Don't stop on errors from /etc/cccl.bashrc. source /etc/cccl.bashrc set -e # Stop on errors. # Set path. export PATH=/usr/local/cuda/bin:${PATH} # Set home to the job's workspace. export HOME=${WORKSPACE} # Per-process memory util logs: MEMMON_LOG=${WORKSPACE}/build/memmon_log # Switch to the build directory. cd ${WORKSPACE} mkdir -p build cd build # Remove any old .ninja_log file so the PrintNinjaBuildTimes step is accurate: rm -f .ninja_log if [[ -z "${CMAKE_BUILD_TYPE}" ]]; then CMAKE_BUILD_TYPE="Release" fi CMAKE_BUILD_FLAGS="--" # The Docker image sets up `${CXX}` and `${CUDACXX}`. append CMAKE_FLAGS "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}" append CMAKE_FLAGS "-DCMAKE_CUDA_COMPILER='${CUDACXX}'" if [[ "${CXX_TYPE}" == "nvcxx" ]]; then # NVC++ isn't properly detected by CMake, so we have to tell CMake to ignore # detection and explicit provide the compiler ID. Ninja currently isn't # supported, so we just use makefiles. append CMAKE_FLAGS "-DCMAKE_CUDA_COMPILER_FORCED=ON" append CMAKE_FLAGS "-DCMAKE_CUDA_COMPILER_ID=NVCXX" # We use NVC++ "slim" image which only contain a single CUDA toolkit version. # When using NVC++ in an environment without GPUs (like our CPU-only # builders) it unfortunately defaults to the oldest CUDA toolkit version it # supports, even if that version is not in the image. So, we have to # explicitly tell NVC++ it which CUDA toolkit version to use. CUDA_VER=$(echo ${SDK_VER} | sed 's/.*\(cuda[0-9]\+\.[0-9]\+\)/\1/') append CMAKE_FLAGS "-DCMAKE_CUDA_FLAGS=-gpu=${CUDA_VER}" # Don't stop on build failures. append CMAKE_BUILD_FLAGS "-k" else if [[ "${CXX_TYPE}" == "icc" ]]; then # Only the latest version of the Intel C++ compiler, which NVCC doesn't # officially support yet, is freely available. append CMAKE_FLAGS "-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler" fi # We're using NVCC so we need to set the host compiler. append CMAKE_FLAGS "-DCMAKE_CXX_COMPILER='${CXX}'" append CMAKE_FLAGS "-G Ninja" # Don't stop on build failures. append CMAKE_BUILD_FLAGS "-k0" fi DETERMINE_PARALLELISM_FLAGS="" # Used to limit the number of default build threads. Any build/link # steps that exceed this limit will cause this script to report a # failure. Tune this using the memmon logs printed after each run. # # Build steps that take more memory than this limit should # be split into multiple steps/translation units. Any temporary # increases to this threshold should be reverted ASAP. The goal # to do decrease this as much as possible and not increase it. if [[ -z "${MIN_MEMORY_PER_THREAD}" ]]; then if [[ "${CXX_TYPE}" == "nvcxx" ]]; then MIN_MEMORY_PER_THREAD=3.0 # GiB elif [[ "${CXX_TYPE}" == "icc" ]]; then MIN_MEMORY_PER_THREAD=2.5 # GiB else MIN_MEMORY_PER_THREAD=2.0 # GiB fi fi append DETERMINE_PARALLELISM_FLAGS "--min-memory-per-thread ${MIN_MEMORY_PER_THREAD}" if [[ -n "${PARALLEL_LEVEL}" ]]; then append DETERMINE_PARALLELISM_FLAGS "-j ${PARALLEL_LEVEL}" fi # COVERAGE_PLAN options: # * Exhaustive # * Thorough # * Minimal if [[ -z "${COVERAGE_PLAN}" ]]; then # `ci/local/build.bash` always sets a coverage plan, so we can assume we're # in gpuCI if one was not set. if [[ "${CXX_TYPE}" == "nvcxx" ]]; then # Today, NVC++ builds take too long to do anything more than Minimal. COVERAGE_PLAN="Minimal" elif [[ "${BUILD_TYPE}" == "cpu" ]] && [[ "${BUILD_MODE}" == "branch" ]]; then # Post-commit CPU CI builds. COVERAGE_PLAN="Exhaustive" elif [[ "${BUILD_TYPE}" == "cpu" ]]; then # Pre-commit CPU CI builds. COVERAGE_PLAN="Thorough" elif [[ "${BUILD_TYPE}" == "gpu" ]]; then # Pre- and post-commit GPU CI builds. COVERAGE_PLAN="Minimal" fi fi case "${COVERAGE_PLAN}" in Exhaustive) append CMAKE_FLAGS "-DTHRUST_ENABLE_MULTICONFIG=ON" append CMAKE_FLAGS "-DTHRUST_IGNORE_DEPRECATED_CPP_11=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_DIALECT_ALL=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_SYSTEM_CPP=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_SYSTEM_TBB=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_SYSTEM_OMP=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_SYSTEM_CUDA=ON" append CMAKE_FLAGS "-DTHRUST_INCLUDE_CUB_CMAKE=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_WORKLOAD=LARGE" ;; Thorough) # Build the legacy bench.cu. We'll probably want to remove this when we # switch to the new, heavier thrust_benchmarks project. append CMAKE_FLAGS "-DTHRUST_ENABLE_BENCHMARKS=ON" append CMAKE_FLAGS "-DTHRUST_ENABLE_MULTICONFIG=ON" append CMAKE_FLAGS "-DTHRUST_IGNORE_DEPRECATED_CPP_11=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_DIALECT_ALL=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_SYSTEM_CPP=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_SYSTEM_TBB=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_SYSTEM_OMP=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_SYSTEM_CUDA=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_WORKLOAD=SMALL" append CMAKE_FLAGS "-DTHRUST_INCLUDE_CUB_CMAKE=ON" append CMAKE_FLAGS "-DTHRUST_AUTO_DETECT_COMPUTE_ARCHS=ON" if [[ "${CXX_TYPE}" != "nvcxx" ]]; then # NVC++ can currently only target one compute architecture at a time. append CMAKE_FLAGS "-DTHRUST_ENABLE_COMPUTE_50=ON" append CMAKE_FLAGS "-DTHRUST_ENABLE_COMPUTE_60=ON" append CMAKE_FLAGS "-DTHRUST_ENABLE_COMPUTE_70=ON" fi append CMAKE_FLAGS "-DTHRUST_ENABLE_COMPUTE_80=ON" ;; Minimal) append CMAKE_FLAGS "-DTHRUST_ENABLE_MULTICONFIG=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_DIALECT_LATEST=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_SYSTEM_CPP=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_SYSTEM_TBB=OFF" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_SYSTEM_OMP=OFF" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_ENABLE_SYSTEM_CUDA=ON" append CMAKE_FLAGS "-DTHRUST_MULTICONFIG_WORKLOAD=SMALL" append CMAKE_FLAGS "-DTHRUST_INCLUDE_CUB_CMAKE=ON" append CMAKE_FLAGS "-DTHRUST_AUTO_DETECT_COMPUTE_ARCHS=ON" if [[ "${BUILD_TYPE}" == "cpu" ]] && [[ "${CXX_TYPE}" == "nvcxx" ]]; then # If no GPU is automatically detected, NVC++ insists that you explicitly # provide an architecture. # TODO: This logic should really be moved into CMake, but it will be # tricky to do that until CMake officially supports NVC++. append CMAKE_FLAGS "-DTHRUST_ENABLE_COMPUTE_80=ON" fi ;; esac if [[ -n "${@}" ]]; then append CMAKE_BUILD_FLAGS "${@}" fi append CTEST_FLAGS "--output-on-failure" CTEST_EXCLUSION_REGEXES=() if [[ "${BUILD_TYPE}" == "cpu" ]]; then CTEST_EXCLUSION_REGEXES+=("^cub" "^thrust.*cuda") fi if [[ -n "${CTEST_EXCLUSION_REGEXES[@]}" ]]; then CTEST_EXCLUSION_REGEX=$(join_delimit "|" "${CTEST_EXCLUSION_REGEXES[@]}") append CTEST_FLAGS "-E ${CTEST_EXCLUSION_REGEX}" fi if [[ -n "${@}" ]]; then CTEST_INCLUSION_REGEX=$(join_delimit "|" "${@}") append CTEST_FLAGS "-R ^${CTEST_INCLUSION_REGEX[@]}$" fi # Export variables so they'll show up in the logs when we report the environment. export COVERAGE_PLAN export CMAKE_FLAGS export CMAKE_BUILD_FLAGS export CTEST_FLAGS ################################################################################ # ENVIRONMENT - Configure and print out information about the environment. ################################################################################ log "Determine system topology..." # Set `${PARALLEL_LEVEL}` if it is unset; otherwise, this just reports the # system topology. source ${WORKSPACE}/ci/common/determine_build_parallelism.bash ${DETERMINE_PARALLELISM_FLAGS} log "Get environment..." env | sort log "Check versions..." # We use sed and echo below to ensure there is always one and only trailing # line following the output from each tool. ${CXX} --version 2>&1 | sed -Ez '$ s/\n*$/\n/' echo ${CUDACXX} --version 2>&1 | sed -Ez '$ s/\n*$/\n/' echo cmake --version 2>&1 | sed -Ez '$ s/\n*$/\n/' echo if [[ "${BUILD_TYPE}" == "gpu" ]]; then nvidia-smi 2>&1 | sed -Ez '$ s/\n*$/\n/' fi ################################################################################ # BUILD - Build Thrust and CUB examples and tests. ################################################################################ log "Configure Thrust and CUB..." echo_and_run_timed "Configure" cmake .. --log-level=VERBOSE ${CMAKE_FLAGS} configure_status=$? log "Build Thrust and CUB..." # ${PARALLEL_LEVEL} needs to be passed after we run # determine_build_parallelism.bash, so it can't be part of ${CMAKE_BUILD_FLAGS}. set +e # Don't stop on build failures. # Monitor memory usage. Thresholds in GiB: python3 ${WORKSPACE}/ci/common/memmon.py \ --log-threshold 0.0 \ --fail-threshold ${MIN_MEMORY_PER_THREAD} \ --log-file ${MEMMON_LOG} \ & memmon_pid=$! echo_and_run_timed "Build" cmake --build . ${CMAKE_BUILD_FLAGS} -j ${PARALLEL_LEVEL} build_status=$? # Stop memmon: kill -s SIGINT ${memmon_pid} # Re-enable exit on failure: set -e ################################################################################ # TEST - Run Thrust and CUB examples and tests. ################################################################################ log "Test Thrust and CUB..." ( # Make sure test_status captures ctest, not tee: # https://stackoverflow.com/a/999259/11130318 set -o pipefail echo_and_run_timed "Test" ctest ${CTEST_FLAGS} | tee ctest_log ) test_status=$? ################################################################################ # COMPILE TIME INFO: Print the 20 longest running build steps (ninja only) ################################################################################ if [[ -f ".ninja_log" ]]; then log "Checking slowest build steps:" echo_and_run "CompileTimeInfo" cmake -P ../cmake/PrintNinjaBuildTimes.cmake | head -n 23 fi ################################################################################ # RUNTIME INFO: Print the 20 longest running test steps ################################################################################ if [[ -f "ctest_log" ]]; then log "Checking slowest test steps:" echo_and_run "TestTimeInfo" cmake -DLOGFILE=ctest_log -P ../cmake/PrintCTestRunTimes.cmake | head -n 20 fi ################################################################################ # MEMORY_USAGE ################################################################################ memmon_status=0 if [[ -f "${MEMMON_LOG}" ]]; then log "Checking memmon logfile: ${MEMMON_LOG}" if [[ -n "$(grep -E "^FAIL" ${MEMMON_LOG})" ]]; then log "error: Some build steps exceeded MIN_MEMORY_PER_THREAD (${MIN_MEMORY_PER_THREAD} GiB):" grep -E "^FAIL" ${MEMMON_LOG} memmon_status=1 else log "Top memory usage per build step (all less than limit of ${MIN_MEMORY_PER_THREAD} GiB):" if [[ -s ${MEMMON_LOG} ]]; then # Not empty: head -n5 ${MEMMON_LOG} else echo "None detected above logging threshold." fi fi fi ################################################################################ # SUMMARY - Print status of each step and exit with failure if needed. ################################################################################ log "Summary:" echo "- Configure Error Code: ${configure_status}" echo "- Build Error Code: ${build_status}" echo "- Build Memory Check: ${memmon_status}" echo "- Test Error Code: ${test_status}" if [[ "${configure_status}" != "0" ]] || \ [[ "${build_status}" != "0" ]] || \ [[ "${memmon_status}" != "0" ]] || \ [[ "${test_status}" != "0" ]]; then exit 1 fi