Commit b8724383 by Felix Abecassis

hooks: change the semantic of NVIDIA_VISIBLE_DEVICES=""

With LXC, you can override the value of an environment variable to null, but you can't unset an existing variable. The NVIDIA hook was previously activated when NVIDIA_VISIBLE_DEVICES was set to null. As a result, it was not possible to disable the hook by overriding the environment variable in the configuration. The hook can now be disabled by setting NVIDIA_VISIBLE_DEVICES to null or to the new special value "void". Signed-off-by: 's avatarFelix Abecassis <fabecassis@nvidia.com>
parent b046bbab
...@@ -4,11 +4,32 @@ ...@@ -4,11 +4,32 @@
set -eu set -eu
if [ -z "${CUDA_VERSION+x}" ] && [ -z "${NVIDIA_VISIBLE_DEVICES+x}" ]; then # NVIDIA_VISIBLE_DEVICES="" *or* NVIDIA_VISIBLE_DEVICES="void"
# Not a GPU container, nothing to do, exit early. # GPU support was explicitly disabled, exit early.
if [ -z "${NVIDIA_VISIBLE_DEVICES-x}" ] || [ "${NVIDIA_VISIBLE_DEVICES:-}" = "void" ]; then
exit 0 exit 0
fi fi
# https://github.com/nvidia/nvidia-container-runtime#cuda_version
if [ -n "${CUDA_VERSION:-}" ] && [ -z "${NVIDIA_REQUIRE_CUDA:-}" ]; then
# Legacy CUDA image: default to all devices and all driver capabilities.
if [ -z "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
NVIDIA_VISIBLE_DEVICES="all"
fi
if [ -z "${NVIDIA_DRIVER_CAPABILITIES:-}" ]; then
NVIDIA_DRIVER_CAPABILITIES="all"
fi
if [[ "${CUDA_VERSION}" =~ ^[0-9]+\.[0-9]+ ]]; then
NVIDIA_REQUIRE_CUDA="cuda>=${BASH_REMATCH[0]}"
fi
else
# NVIDIA_VISIBLE_DEVICES unset and it's not a legacy CUDA image.
# This is not a GPU image, exit early.
if [ -z "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
exit 0
fi
fi
export PATH=$PATH:/usr/sbin:/usr/bin:/sbin:/bin export PATH=$PATH:/usr/sbin:/usr/bin:/sbin:/bin
if ! which nvidia-container-cli >/dev/null; then if ! which nvidia-container-cli >/dev/null; then
echo "ERROR: Missing tool nvidia-container-cli, see https://github.com/NVIDIA/libnvidia-container" >&2 echo "ERROR: Missing tool nvidia-container-cli, see https://github.com/NVIDIA/libnvidia-container" >&2
...@@ -128,7 +149,7 @@ if [ "${USERNS}" = "yes" ]; then ...@@ -128,7 +149,7 @@ if [ "${USERNS}" = "yes" ]; then
fi fi
# https://github.com/nvidia/nvidia-container-runtime#nvidia_disable_require # https://github.com/nvidia/nvidia-container-runtime#nvidia_disable_require
if [ -n "${NVIDIA_DISABLE_REQUIRE+x}" ]; then if [ -n "${NVIDIA_DISABLE_REQUIRE:-}" ]; then
if [ "$(parse_bool "${NVIDIA_DISABLE_REQUIRE}")" = "true" ]; then if [ "$(parse_bool "${NVIDIA_DISABLE_REQUIRE}")" = "true" ]; then
CLI_DISABLE_REQUIRE="true" CLI_DISABLE_REQUIRE="true"
fi fi
...@@ -152,15 +173,12 @@ if [ -z "${CLI_LDCONFIG}" ]; then ...@@ -152,15 +173,12 @@ if [ -z "${CLI_LDCONFIG}" ]; then
fi fi
# https://github.com/nvidia/nvidia-container-runtime#nvidia_visible_devices # https://github.com/nvidia/nvidia-container-runtime#nvidia_visible_devices
CLI_DEVICES= CLI_DEVICES="${NVIDIA_VISIBLE_DEVICES}"
if [ -n "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
CLI_DEVICES="${NVIDIA_VISIBLE_DEVICES}"
fi
# https://github.com/nvidia/nvidia-container-runtime#nvidia_driver_capabilities # https://github.com/nvidia/nvidia-container-runtime#nvidia_driver_capabilities
CLI_CAPABILITIES= CLI_CAPABILITIES=
if [ -n "${NVIDIA_DRIVER_CAPABILITIES+x}" ]; then if [ -n "${NVIDIA_DRIVER_CAPABILITIES:-}" ]; then
CLI_CAPABILITIES="${NVIDIA_DRIVER_CAPABILITIES//,/ }" CLI_CAPABILITIES="${NVIDIA_DRIVER_CAPABILITIES//,/ }"
fi fi
# https://github.com/nvidia/nvidia-container-runtime#nvidia_require_ # https://github.com/nvidia/nvidia-container-runtime#nvidia_require_
...@@ -169,23 +187,6 @@ for req in $(compgen -e "NVIDIA_REQUIRE_"); do ...@@ -169,23 +187,6 @@ for req in $(compgen -e "NVIDIA_REQUIRE_"); do
CLI_REQUIREMENTS="${CLI_REQUIREMENTS} ${!req}" CLI_REQUIREMENTS="${CLI_REQUIREMENTS} ${!req}"
done done
# https://github.com/nvidia/nvidia-container-runtime#cuda_version
if [ -n "${CUDA_VERSION+x}" ] && [ -z "${NVIDIA_REQUIRE_CUDA+x}" ]; then
# Legacy CUDA image detected, default to all devices and all driver capabilities.
if [ -z "${CLI_DEVICES}" ]; then
CLI_DEVICES="all"
fi
if [ -z "${CLI_CAPABILITIES}" ]; then
CLI_CAPABILITIES="all"
fi
# Transform CUDA_VERSION=X.Y to a "cuda>=X.Y" constraint for nvidia-container-cli.
if [[ "${CUDA_VERSION}" =~ ^[0-9]+\.[0-9]+ ]]; then
CLI_REQUIREMENTS="${CLI_REQUIREMENTS} cuda>=${BASH_REMATCH[0]}"
fi
fi
if [ "${CLI_CAPABILITIES}" = "all" ]; then if [ "${CLI_CAPABILITIES}" = "all" ]; then
CLI_CAPABILITIES="compute compat32 graphics utility video" CLI_CAPABILITIES="compute compat32 graphics utility video"
fi fi
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment