diff --git a/.ci_support/linux_64_blas_implgenericc_compiler_version12cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version12.yaml b/.ci_support/linux_64_blas_implgenericc_compiler_version12cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version12.yaml index 08644b2e..7f3b2658 100644 --- a/.ci_support/linux_64_blas_implgenericc_compiler_version12cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version12.yaml +++ b/.ci_support/linux_64_blas_implgenericc_compiler_version12cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version12.yaml @@ -64,6 +64,9 @@ pytorch: - '2.4' target_platform: - linux-64 +use_magma: +- 'false' +- 'true' zip_keys: - - c_compiler_version - cxx_compiler_version diff --git a/.ci_support/linux_64_blas_implgenericc_compiler_version13cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13.yaml b/.ci_support/linux_64_blas_implgenericc_compiler_version13cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13.yaml index 7ffe87a0..69fc01db 100644 --- a/.ci_support/linux_64_blas_implgenericc_compiler_version13cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13.yaml +++ b/.ci_support/linux_64_blas_implgenericc_compiler_version13cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13.yaml @@ -64,6 +64,9 @@ pytorch: - '2.4' target_platform: - linux-64 +use_magma: +- 'false' +- 'true' zip_keys: - - c_compiler_version - cxx_compiler_version diff --git a/.ci_support/linux_64_blas_implmklc_compiler_version12cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version12.yaml b/.ci_support/linux_64_blas_implmklc_compiler_version12cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version12.yaml index 83ae6560..e0e75cc7 100644 --- a/.ci_support/linux_64_blas_implmklc_compiler_version12cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version12.yaml +++ b/.ci_support/linux_64_blas_implmklc_compiler_version12cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version12.yaml @@ -64,6 +64,9 @@ pytorch: - '2.4' target_platform: - linux-64 +use_magma: +- 'false' +- 'true' zip_keys: - - c_compiler_version - cxx_compiler_version diff --git a/.ci_support/linux_64_blas_implmklc_compiler_version13cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13.yaml b/.ci_support/linux_64_blas_implmklc_compiler_version13cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13.yaml index facbc7ca..c9b53e55 100644 --- a/.ci_support/linux_64_blas_implmklc_compiler_version13cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13.yaml +++ b/.ci_support/linux_64_blas_implmklc_compiler_version13cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13.yaml @@ -64,6 +64,9 @@ pytorch: - '2.4' target_platform: - linux-64 +use_magma: +- 'false' +- 'true' zip_keys: - - c_compiler_version - cxx_compiler_version diff --git a/.ci_support/linux_aarch64_c_compiler_version12cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version12.yaml b/.ci_support/linux_aarch64_c_compiler_version12cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version12.yaml index 3910b6e5..f0c8756e 100644 --- a/.ci_support/linux_aarch64_c_compiler_version12cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version12.yaml +++ b/.ci_support/linux_aarch64_c_compiler_version12cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version12.yaml @@ -64,6 +64,9 @@ pytorch: - '2.4' target_platform: - linux-aarch64 +use_magma: +- 'false' +- 'true' zip_keys: - - c_compiler_version - cxx_compiler_version diff --git a/.ci_support/linux_aarch64_c_compiler_version13cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13.yaml b/.ci_support/linux_aarch64_c_compiler_version13cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13.yaml index 4f899c39..0337c1b4 100644 --- a/.ci_support/linux_aarch64_c_compiler_version13cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13.yaml +++ b/.ci_support/linux_aarch64_c_compiler_version13cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13.yaml @@ -64,6 +64,9 @@ pytorch: - '2.4' target_platform: - linux-aarch64 +use_magma: +- 'false' +- 'true' zip_keys: - - c_compiler_version - cxx_compiler_version diff --git a/README.md b/README.md index 10844c99..85644a73 100644 --- a/README.md +++ b/README.md @@ -189,6 +189,7 @@ Current release info | Name | Downloads | Version | Platforms | | --- | --- | --- | --- | | [![Conda Recipe](https://img.shields.io/badge/recipe-libtorch-green.svg)](https://anaconda.org/conda-forge/libtorch) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/libtorch.svg)](https://anaconda.org/conda-forge/libtorch) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/libtorch.svg)](https://anaconda.org/conda-forge/libtorch) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/libtorch.svg)](https://anaconda.org/conda-forge/libtorch) | +| [![Conda Recipe](https://img.shields.io/badge/recipe-libtorch--cuda--linalg-green.svg)](https://anaconda.org/conda-forge/libtorch-cuda-linalg) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/libtorch-cuda-linalg.svg)](https://anaconda.org/conda-forge/libtorch-cuda-linalg) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/libtorch-cuda-linalg.svg)](https://anaconda.org/conda-forge/libtorch-cuda-linalg) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/libtorch-cuda-linalg.svg)](https://anaconda.org/conda-forge/libtorch-cuda-linalg) | | [![Conda Recipe](https://img.shields.io/badge/recipe-pytorch-green.svg)](https://anaconda.org/conda-forge/pytorch) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/pytorch.svg)](https://anaconda.org/conda-forge/pytorch) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/pytorch.svg)](https://anaconda.org/conda-forge/pytorch) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/pytorch.svg)](https://anaconda.org/conda-forge/pytorch) | | [![Conda Recipe](https://img.shields.io/badge/recipe-pytorch--cpu-green.svg)](https://anaconda.org/conda-forge/pytorch-cpu) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/pytorch-cpu.svg)](https://anaconda.org/conda-forge/pytorch-cpu) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/pytorch-cpu.svg)](https://anaconda.org/conda-forge/pytorch-cpu) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/pytorch-cpu.svg)](https://anaconda.org/conda-forge/pytorch-cpu) | | [![Conda Recipe](https://img.shields.io/badge/recipe-pytorch--gpu-green.svg)](https://anaconda.org/conda-forge/pytorch-gpu) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/pytorch-gpu.svg)](https://anaconda.org/conda-forge/pytorch-gpu) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/pytorch-gpu.svg)](https://anaconda.org/conda-forge/pytorch-gpu) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/pytorch-gpu.svg)](https://anaconda.org/conda-forge/pytorch-gpu) | @@ -203,16 +204,16 @@ conda config --add channels conda-forge conda config --set channel_priority strict ``` -Once the `conda-forge` channel has been enabled, `libtorch, pytorch, pytorch-cpu, pytorch-gpu` can be installed with `conda`: +Once the `conda-forge` channel has been enabled, `libtorch, libtorch-cuda-linalg, pytorch, pytorch-cpu, pytorch-gpu` can be installed with `conda`: ``` -conda install libtorch pytorch pytorch-cpu pytorch-gpu +conda install libtorch libtorch-cuda-linalg pytorch pytorch-cpu pytorch-gpu ``` or with `mamba`: ``` -mamba install libtorch pytorch pytorch-cpu pytorch-gpu +mamba install libtorch libtorch-cuda-linalg pytorch pytorch-cpu pytorch-gpu ``` It is possible to list all of the versions of `libtorch` available on your platform with `conda`: diff --git a/recipe/README.md b/recipe/README.md index 1a953a8d..cae82de3 100644 --- a/recipe/README.md +++ b/recipe/README.md @@ -83,7 +83,7 @@ of 2024-11-28: | magma | 2.6.1 | | 2.8.0 | `.ci/docker/common/instal_magma.sh` | | libabseil | indirect? | | 20240722.0 | | | libuv | | | 1.49.2 | (not pinned) | -| mkl | 2024.2.0 | | 2024.2.2 | `.ci/docker/common/install_mkl.sh` | +| mkl | 2024.2.0 | <2024 | 2023.2.0 | `.ci/docker/common/install_mkl.sh` | | nccl | 2.21.5+ | | 2.23.4.1 | `third_party/nccl/nccl` | | protobuf | 3.7.0rc2+ | | 5.28.2 | `third_party/protobuf` | | sleef | 3.6+ | | 3.7 | `third_party/sleef` | @@ -98,3 +98,92 @@ of 2024-11-28: | sympy | ==1.13.1 | >=1.13.1, !=1.13.2 | 1.13.3 | (wheel metadata) | | typing-extensions | >=4.8.0 | | 4.12.2 | (wheel metadata) | | triton | 3.1.0 | none | 3.1.0 | (wheel metadata) | + + +Maintenance notes +================= + +Packages built by the recipe +---------------------------- +The recipe currently builds four packages: + +1. `libtorch` that installs the common libraries, executables and data files + that are independent of selected Python version and are therefore shared + by all Python versions. + +2. `libtorch-cuda-linalg` that provides the shared `libtorch_cuda_linalg.so` + library, in variant linked to `magma` or not, and is built only for + GPU-enabled variants. + +3. `pytorch` that installs the library and other files for a specific Python + version. + +4. `pytorch-cpu` or `pytorch-gpu` backwards compatibility metapackage. + +These packages can be built in the following variants: + +- `cpu` variant that does not use CUDA, or `cuda` variant built using + specific CUDA version (`libtorch-cuda-linalg` is built only in `cuda` + variants). + +- `mkl` variant that uses MKL to provide BLAS/LAPACK, as well as a set + of additional functions, and `generic` variant that can use any BLAS/LAPACK + provider (created by patching on OpenBLAS support upstream). + +Additionally, `libtorch-cuda-linalg` can be built in `magma` or `nomagma` +variant. The former links against libmagma, while the latter avoids this +significant dependency. Both versions support the built-in cuSOLVER backend, +and the `magma` version normally uses a heuristic to choose between them, +in order to achieve the best performance for a given operation. + +Some of the platforms support only a subset of these variants. + +The recipe supports a `megabuild` mode that is currently used for Linux +configurations. In this mode, PyTorch is built for all Python versions +in a single run. As a result, the shared bits (`libtorch*`) are only built once. + +As the `megabuild` mode imposes high disk space requirements on the CI builders, +it is not used on other platforms currently. For this reason, there are separate +configurations for every Python version there. + + +The build process +----------------- +The upstream build system consists of a heavily customize `setup.py` script, +based on the setuptools build system that performs some preparations related +to building C++ code and then calls into CMake to build it (i.e. it's not +suitable to use CMake directly). The build process can be customized using +environment variables, some of them processed directly by the setup script, +others converted into `-D` options for CMake. When looking for available +options, `setup.py` and `tools/setup_helpers/cmake.py` are the two primary +files to look at. + +Normally, the setup code only runs the `cmake` generate step if `CMakeCache.txt` +does not exist yet. Therefore, on subsequent calls environment variables do not +affect the CMake build. It is technically possible to force rerunning it via +appending `--cmake` option, but that usually causes the build system to consider +all targets out of date, and therefore rebuild everything from scratch. Instead, +we are editing `CMakeCache.txt` directly, therefore triggering the build step +to detect changes and regenerate. + +To facilitate split package builds, we perform the build in the following steps: + +1. For the top-level rule (`libtorch-split`), we perform the base environment + setup and run `setup.py build` to build the libraries and collect the data + files without actually installing them. Then we move the files we need + into temporary directories for repackaging. + + a. If `megabuild` is enabled, we build against a fixed Python version. + Otherwise, we build using the final Python version. + + b. If CUDA support is enabled, we build with `magma` disabled first. + Then we copy the resulting library, and rebuild with `magma` enabled. + This way, we obtain the two version of the library to repackage. + +2. For the `libtorch` and `libtorch-cuda-linalg` packages, we manually install + files that were prepared earlier. + +3. For the final `pytorch` package(s), we invoke `pip install` to build + and install the complete package. Importantly, this reuses previously built + targets, so only Python-related bits are rebuilt. In `megabuild` mode, + we patch `CMakeCache.txt` to set the correct Python version. diff --git a/recipe/bld.bat b/recipe/bld.bat index 30cc5d4f..a00ffaff 100644 --- a/recipe/bld.bat +++ b/recipe/bld.bat @@ -1,47 +1,3 @@ @echo On -set TH_BINARY_BUILD=1 -set PYTORCH_BUILD_VERSION=%PKG_VERSION% -set PYTORCH_BUILD_NUMBER=%PKG_BUILDNUM% - -if "%pytorch_variant%" == "gpu" ( - set build_with_cuda=1 - set desired_cuda=%CUDA_VERSION:~0,-1%.%CUDA_VERSION:~-1,1% -) else ( - set build_with_cuda= - set USE_CUDA=0 -) - -if "%build_with_cuda%" == "" goto cuda_flags_end - -set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda% -set CUDA_BIN_PATH=%CUDA_PATH%\bin -set TORCH_CUDA_ARCH_LIST=5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX -set TORCH_NVCC_FLAGS=-Xfatbin -compress-all - -:cuda_flags_end - -set DISTUTILS_USE_SDK=1 - -set CMAKE_INCLUDE_PATH=%LIBRARY_PREFIX%\include -set LIB=%LIBRARY_PREFIX%\lib;%LIB% - -IF "%build_with_cuda%" == "" goto cuda_end - -set MAGMA_HOME=%LIBRARY_PREFIX% - -set "PATH=%CUDA_BIN_PATH%;%PATH%" - -set CUDNN_INCLUDE_DIR=%LIBRARY_PREFIX%\include - -:cuda_end - -set CMAKE_GENERATOR=Ninja -set "CMAKE_GENERATOR_PLATFORM=" -set "CMAKE_PREFIX_PATH=%LIBRARY_PREFIX%" -set "libuv_ROOT=%LIBRARY_PREFIX%" -set "USE_SYSTEM_SLEEF=OFF" -set "BUILD_CUSTOM_PROTOBUF=OFF" - -%PYTHON% -m pip install . --no-deps -vv -if errorlevel 1 exit /b 1 +call %RECIPE_DIR%\build_common.bat diff --git a/recipe/build.sh b/recipe/build.sh index 9a12aeb1..8a9603bf 100644 --- a/recipe/build.sh +++ b/recipe/build.sh @@ -1,229 +1,3 @@ -#!/bin/bash - -set -ex - -# This is used to detect if it's in the process of building pytorch -export IN_PYTORCH_BUILD=1 - -# https://github.com/conda-forge/pytorch-cpu-feedstock/issues/243 -# https://github.com/pytorch/pytorch/blob/v2.3.1/setup.py#L341 -export PACKAGE_TYPE=conda - -# remove pyproject.toml to avoid installing deps from pip -rm -rf pyproject.toml - -# uncomment to debug cmake build -# export CMAKE_VERBOSE_MAKEFILE=1 - -export USE_CUFILE=0 -export USE_NUMA=0 -export USE_ITT=0 -export CFLAGS="$(echo $CFLAGS | sed 's/-fvisibility-inlines-hidden//g')" -export CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fvisibility-inlines-hidden//g')" -export LDFLAGS="$(echo $LDFLAGS | sed 's/-Wl,--as-needed//g')" -export LDFLAGS="$(echo $LDFLAGS | sed 's/-Wl,-dead_strip_dylibs//g')" -export LDFLAGS_LD="$(echo $LDFLAGS_LD | sed 's/-dead_strip_dylibs//g')" -if [[ "$c_compiler" == "clang" ]]; then - export CXXFLAGS="$CXXFLAGS -Wno-deprecated-declarations -Wno-unknown-warning-option -Wno-error=unused-command-line-argument -Wno-error=vla-cxx-extension" - export CFLAGS="$CFLAGS -Wno-deprecated-declarations -Wno-unknown-warning-option -Wno-error=unused-command-line-argument -Wno-error=vla-cxx-extension" -else - export CXXFLAGS="$CXXFLAGS -Wno-deprecated-declarations -Wno-error=maybe-uninitialized" - export CFLAGS="$CFLAGS -Wno-deprecated-declarations -Wno-error=maybe-uninitialized" -fi - -# This is not correctly found for linux-aarch64 since pytorch 2.0.0 for some reason -export _GLIBCXX_USE_CXX11_ABI=1 - -# KINETO seems to require CUPTI and will look quite hard for it. -# CUPTI seems to cause trouble when users install a version of -# cudatoolkit different than the one specified at compile time. -# https://github.com/conda-forge/pytorch-cpu-feedstock/issues/135 -export USE_KINETO=OFF - -if [[ "$target_platform" == "osx-64" ]]; then - export CXXFLAGS="$CXXFLAGS -DTARGET_OS_OSX=1" - export CFLAGS="$CFLAGS -DTARGET_OS_OSX=1" -fi - -# Dynamic libraries need to be lazily loaded so that torch -# can be imported on system without a GPU -LDFLAGS="${LDFLAGS//-Wl,-z,now/-Wl,-z,lazy}" - -export CMAKE_GENERATOR=Ninja -export CMAKE_LIBRARY_PATH=$PREFIX/lib:$PREFIX/include:$CMAKE_LIBRARY_PATH -export CMAKE_PREFIX_PATH=$PREFIX -export CMAKE_BUILD_TYPE=Release - -for ARG in $CMAKE_ARGS; do - if [[ "$ARG" == "-DCMAKE_"* ]]; then - cmake_arg=$(echo $ARG | cut -d= -f1) - cmake_arg=$(echo $cmake_arg| cut -dD -f2-) - cmake_val=$(echo $ARG | cut -d= -f2-) - printf -v $cmake_arg "$cmake_val" - export ${cmake_arg} - fi -done -CMAKE_FIND_ROOT_PATH+=";$SRC_DIR" -unset CMAKE_INSTALL_PREFIX -export TH_BINARY_BUILD=1 -export PYTORCH_BUILD_VERSION=$PKG_VERSION -export PYTORCH_BUILD_NUMBER=$PKG_BUILDNUM - -export INSTALL_TEST=0 -export BUILD_TEST=0 - -export USE_SYSTEM_SLEEF=1 -# use our protobuf -export BUILD_CUSTOM_PROTOBUF=OFF -rm -rf $PREFIX/bin/protoc - -# prevent six from being downloaded -> third_party/NNPACK/cmake/DownloadSix.cmake - -if [[ "${target_platform}" != "${build_platform}" ]]; then - # It helps cross compiled builds without emulation support to complete - # Use BUILD PREFIX protoc instead of the one that is from the host platform - sed -i.bak \ - "s,IMPORTED_LOCATION_RELEASE .*/bin/protoc,IMPORTED_LOCATION_RELEASE \"${BUILD_PREFIX}/bin/protoc," \ - ${PREFIX}/lib/cmake/protobuf/protobuf-targets-release.cmake -fi - -# I don't know where this folder comes from, but it's interfering with the build in osx-64 -rm -rf $PREFIX/git - -if [[ "$CONDA_BUILD_CROSS_COMPILATION" == 1 ]]; then - export COMPILER_WORKS_EXITCODE=0 - export COMPILER_WORKS_EXITCODE__TRYRUN_OUTPUT="" -fi - -if [[ "${CI}" == "github_actions" ]]; then - # h-vetinari/hmaarrfk -- May 2024 - # reduce parallelism to avoid getting OOM-killed on - # cirun-openstack-gpu-2xlarge, which has 32GB RAM, 8 CPUs - export MAX_JOBS=4 -else - export MAX_JOBS=${CPU_COUNT} -fi - -if [[ "$blas_impl" == "generic" ]]; then - # Fake openblas - export BLAS=OpenBLAS - sed -i.bak "s#FIND_LIBRARY.*#set(OpenBLAS_LIB ${PREFIX}/lib/liblapack${SHLIB_EXT} ${PREFIX}/lib/libcblas${SHLIB_EXT} ${PREFIX}/lib/libblas${SHLIB_EXT})#g" cmake/Modules/FindOpenBLAS.cmake -else - export BLAS=MKL -fi - -if [[ "$PKG_NAME" == "pytorch" ]]; then - PIP_ACTION=install - # Trick Cmake into thinking python hasn't changed - sed "s/3\.12/$PY_VER/g" build/CMakeCache.txt.orig > build/CMakeCache.txt - sed -i.bak "s/3;12/${PY_VER%.*};${PY_VER#*.}/g" build/CMakeCache.txt - sed -i.bak "s/cpython-312/cpython-${PY_VER%.*}${PY_VER#*.}/g" build/CMakeCache.txt -else - # For the main script we just build a wheel for so that the C++/CUDA - # parts are built. Then they are reused in each python version. - PIP_ACTION=wheel -fi - -# MacOS build is simple, and will not be for CUDA -if [[ "$OSTYPE" == "darwin"* ]]; then - # Produce macOS builds with torch.distributed support. - # This is enabled by default on Linux, but disabled by default on macOS, - # because it requires an non-bundled compile-time dependency (libuv - # through gloo). This dependency is made available through meta.yaml, so - # we can override the default and set USE_DISTRIBUTED=1. - export USE_DISTRIBUTED=1 - - if [[ "$target_platform" == "osx-arm64" ]]; then - # MKLDNN did not support on Apple M1 at the time support Apple M1 - # was added. Revisit later - export USE_MKLDNN=0 - fi -elif [[ ${cuda_compiler_version} != "None" ]]; then - if [[ "$target_platform" == "linux-aarch64" ]]; then - # https://github.com/pytorch/pytorch/pull/121975 - # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/264 - export USE_PRIORITIZED_TEXT_FOR_LD=1 - fi - # Even though cudnn is used for CUDA builds, it's good to enable - # for MKLDNN for CUDA builds when CUDA builds are used on a machine - # with no NVIDIA GPUs. - export USE_MKLDNN=1 - export USE_CUDA=1 - export USE_CUFILE=1 - # PyTorch has multiple different bits of logic finding CUDA, override - # all of them. - export CUDAToolkit_BIN_DIR=${BUILD_PREFIX}/bin - export CUDAToolkit_ROOT_DIR=${PREFIX} - if [[ "${target_platform}" != "${build_platform}" ]]; then - export CUDA_TOOLKIT_ROOT=${PREFIX} - fi - case ${target_platform} in - linux-64) - export CUDAToolkit_TARGET_DIR=${PREFIX}/targets/x86_64-linux - ;; - linux-aarch64) - export CUDAToolkit_TARGET_DIR=${PREFIX}/targets/sbsa-linux - ;; - *) - echo "unknown CUDA arch, edit build.sh" - exit 1 - esac - case ${cuda_compiler_version} in - 12.6) - export TORCH_CUDA_ARCH_LIST="5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX" - ;; - *) - echo "unsupported cuda version. edit build.sh" - exit 1 - esac - export TORCH_NVCC_FLAGS="-Xfatbin -compress-all" - export NCCL_ROOT_DIR=$PREFIX - export NCCL_INCLUDE_DIR=$PREFIX/include - export USE_SYSTEM_NCCL=1 - export USE_STATIC_NCCL=0 - export USE_STATIC_CUDNN=0 - export MAGMA_HOME="${PREFIX}" -else - if [[ "$target_platform" != *-64 ]]; then - # Breakpad seems to not work on aarch64 or ppc64le - # https://github.com/pytorch/pytorch/issues/67083 - export USE_BREAKPAD=0 - fi - # MKLDNN is an Apache-2.0 licensed library for DNNs and is used - # for CPU builds. Not to be confused with MKL. - export USE_MKLDNN=1 - export USE_CUDA=0 -fi - -echo '${CXX}'=${CXX} -echo '${PREFIX}'=${PREFIX} -$PREFIX/bin/python -m pip $PIP_ACTION . --no-deps -vvv --no-clean \ - | sed "s,${CXX},\$\{CXX\},g" \ - | sed "s,${PREFIX},\$\{PREFIX\},g" - -if [[ "$PKG_NAME" == "libtorch" ]]; then - mkdir -p $SRC_DIR/dist - pushd $SRC_DIR/dist - wheel unpack ../torch-*.whl - pushd torch-* - mv torch/bin/* ${PREFIX}/bin - mv torch/lib/* ${PREFIX}/lib - mv torch/share/* ${PREFIX}/share - for f in ATen caffe2 tensorpipe torch c10; do - mv torch/include/$f ${PREFIX}/include/$f - done - rm ${PREFIX}/lib/libtorch_python.* - popd - popd - - # Keep the original backed up to sed later - cp build/CMakeCache.txt build/CMakeCache.txt.orig -else - # Keep this in ${PREFIX}/lib so that the library can be found by - # TorchConfig.cmake. - # With upstream non-split build, `libtorch_python.so` - # and TorchConfig.cmake are both in ${SP_DIR}/torch/lib and therefore - # this is not needed. - mv ${SP_DIR}/torch/lib/libtorch_python${SHLIB_EXT} ${PREFIX}/lib -fi +# we are using a separate file here to avoid conda-build thinking that +# magma is used in top level package build +source $RECIPE_DIR/build_common.sh diff --git a/recipe/build_common.bat b/recipe/build_common.bat new file mode 100644 index 00000000..30cc5d4f --- /dev/null +++ b/recipe/build_common.bat @@ -0,0 +1,47 @@ +@echo On + +set TH_BINARY_BUILD=1 +set PYTORCH_BUILD_VERSION=%PKG_VERSION% +set PYTORCH_BUILD_NUMBER=%PKG_BUILDNUM% + +if "%pytorch_variant%" == "gpu" ( + set build_with_cuda=1 + set desired_cuda=%CUDA_VERSION:~0,-1%.%CUDA_VERSION:~-1,1% +) else ( + set build_with_cuda= + set USE_CUDA=0 +) + +if "%build_with_cuda%" == "" goto cuda_flags_end + +set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda% +set CUDA_BIN_PATH=%CUDA_PATH%\bin +set TORCH_CUDA_ARCH_LIST=5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX +set TORCH_NVCC_FLAGS=-Xfatbin -compress-all + +:cuda_flags_end + +set DISTUTILS_USE_SDK=1 + +set CMAKE_INCLUDE_PATH=%LIBRARY_PREFIX%\include +set LIB=%LIBRARY_PREFIX%\lib;%LIB% + +IF "%build_with_cuda%" == "" goto cuda_end + +set MAGMA_HOME=%LIBRARY_PREFIX% + +set "PATH=%CUDA_BIN_PATH%;%PATH%" + +set CUDNN_INCLUDE_DIR=%LIBRARY_PREFIX%\include + +:cuda_end + +set CMAKE_GENERATOR=Ninja +set "CMAKE_GENERATOR_PLATFORM=" +set "CMAKE_PREFIX_PATH=%LIBRARY_PREFIX%" +set "libuv_ROOT=%LIBRARY_PREFIX%" +set "USE_SYSTEM_SLEEF=OFF" +set "BUILD_CUSTOM_PROTOBUF=OFF" + +%PYTHON% -m pip install . --no-deps -vv +if errorlevel 1 exit /b 1 diff --git a/recipe/build_common.sh b/recipe/build_common.sh new file mode 100644 index 00000000..ef72c019 --- /dev/null +++ b/recipe/build_common.sh @@ -0,0 +1,268 @@ +#!/bin/bash + +echo "=== Building ${PKG_NAME} (magma: ${use_magma}; py: ${PY_VER}) ===" + +set -ex + +# This is used to detect if it's in the process of building pytorch +export IN_PYTORCH_BUILD=1 + +# https://github.com/conda-forge/pytorch-cpu-feedstock/issues/243 +# https://github.com/pytorch/pytorch/blob/v2.3.1/setup.py#L341 +export PACKAGE_TYPE=conda + +# remove pyproject.toml to avoid installing deps from pip +rm -rf pyproject.toml + +# uncomment to debug cmake build +# export CMAKE_VERBOSE_MAKEFILE=1 + +export USE_CUFILE=0 +export USE_NUMA=0 +export USE_ITT=0 +export CFLAGS="$(echo $CFLAGS | sed 's/-fvisibility-inlines-hidden//g')" +export CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fvisibility-inlines-hidden//g')" +export LDFLAGS="$(echo $LDFLAGS | sed 's/-Wl,--as-needed//g')" +export LDFLAGS="$(echo $LDFLAGS | sed 's/-Wl,-dead_strip_dylibs//g')" +export LDFLAGS_LD="$(echo $LDFLAGS_LD | sed 's/-dead_strip_dylibs//g')" +if [[ "$c_compiler" == "clang" ]]; then + export CXXFLAGS="$CXXFLAGS -Wno-deprecated-declarations -Wno-unknown-warning-option -Wno-error=unused-command-line-argument -Wno-error=vla-cxx-extension" + export CFLAGS="$CFLAGS -Wno-deprecated-declarations -Wno-unknown-warning-option -Wno-error=unused-command-line-argument -Wno-error=vla-cxx-extension" +else + export CXXFLAGS="$CXXFLAGS -Wno-deprecated-declarations -Wno-error=maybe-uninitialized" + export CFLAGS="$CFLAGS -Wno-deprecated-declarations -Wno-error=maybe-uninitialized" +fi + +# This is not correctly found for linux-aarch64 since pytorch 2.0.0 for some reason +export _GLIBCXX_USE_CXX11_ABI=1 + +# KINETO seems to require CUPTI and will look quite hard for it. +# CUPTI seems to cause trouble when users install a version of +# cudatoolkit different than the one specified at compile time. +# https://github.com/conda-forge/pytorch-cpu-feedstock/issues/135 +export USE_KINETO=OFF + +if [[ "$target_platform" == "osx-64" ]]; then + export CXXFLAGS="$CXXFLAGS -DTARGET_OS_OSX=1" + export CFLAGS="$CFLAGS -DTARGET_OS_OSX=1" +fi + +# Dynamic libraries need to be lazily loaded so that torch +# can be imported on system without a GPU +LDFLAGS="${LDFLAGS//-Wl,-z,now/-Wl,-z,lazy}" + +export CMAKE_GENERATOR=Ninja +export CMAKE_LIBRARY_PATH=$PREFIX/lib:$PREFIX/include:$CMAKE_LIBRARY_PATH +export CMAKE_PREFIX_PATH=$PREFIX +export CMAKE_BUILD_TYPE=Release + +for ARG in $CMAKE_ARGS; do + if [[ "$ARG" == "-DCMAKE_"* ]]; then + cmake_arg=$(echo $ARG | cut -d= -f1) + cmake_arg=$(echo $cmake_arg| cut -dD -f2-) + cmake_val=$(echo $ARG | cut -d= -f2-) + printf -v $cmake_arg "$cmake_val" + export ${cmake_arg} + fi +done +CMAKE_FIND_ROOT_PATH+=";$SRC_DIR" +unset CMAKE_INSTALL_PREFIX +export TH_BINARY_BUILD=1 +export PYTORCH_BUILD_VERSION=$PKG_VERSION +export PYTORCH_BUILD_NUMBER=$PKG_BUILDNUM + +export INSTALL_TEST=0 +export BUILD_TEST=0 + +export USE_SYSTEM_SLEEF=1 +# use our protobuf +export BUILD_CUSTOM_PROTOBUF=OFF +rm -rf $PREFIX/bin/protoc + +# prevent six from being downloaded +> third_party/NNPACK/cmake/DownloadSix.cmake + +if [[ "${target_platform}" != "${build_platform}" ]]; then + # It helps cross compiled builds without emulation support to complete + # Use BUILD PREFIX protoc instead of the one that is from the host platform + sed -i.bak \ + "s,IMPORTED_LOCATION_RELEASE .*/bin/protoc,IMPORTED_LOCATION_RELEASE \"${BUILD_PREFIX}/bin/protoc," \ + ${PREFIX}/lib/cmake/protobuf/protobuf-targets-release.cmake +fi + +# I don't know where this folder comes from, but it's interfering with the build in osx-64 +rm -rf $PREFIX/git + +if [[ "$CONDA_BUILD_CROSS_COMPILATION" == 1 ]]; then + export COMPILER_WORKS_EXITCODE=0 + export COMPILER_WORKS_EXITCODE__TRYRUN_OUTPUT="" +fi + +if [[ "${CI}" == "github_actions" ]]; then + # h-vetinari/hmaarrfk -- May 2024 + # reduce parallelism to avoid getting OOM-killed on + # cirun-openstack-gpu-2xlarge, which has 32GB RAM, 8 CPUs + export MAX_JOBS=4 +else + export MAX_JOBS=${CPU_COUNT} +fi + +if [[ "$blas_impl" == "generic" ]]; then + # Fake openblas + export BLAS=OpenBLAS + sed -i.bak "s#FIND_LIBRARY.*#set(OpenBLAS_LIB ${PREFIX}/lib/liblapack${SHLIB_EXT} ${PREFIX}/lib/libcblas${SHLIB_EXT} ${PREFIX}/lib/libblas${SHLIB_EXT})#g" cmake/Modules/FindOpenBLAS.cmake +else + export BLAS=MKL +fi + +if [[ "$PKG_NAME" == "pytorch" ]]; then + # Trick Cmake into thinking python hasn't changed + sed "s/3\.12/$PY_VER/g" build/CMakeCache.txt.orig > build/CMakeCache.txt + sed -i.bak "s/3;12/${PY_VER%.*};${PY_VER#*.}/g" build/CMakeCache.txt + sed -i.bak "s/cpython-312/cpython-${PY_VER%.*}${PY_VER#*.}/g" build/CMakeCache.txt +fi + +# MacOS build is simple, and will not be for CUDA +if [[ "$OSTYPE" == "darwin"* ]]; then + # Produce macOS builds with torch.distributed support. + # This is enabled by default on Linux, but disabled by default on macOS, + # because it requires an non-bundled compile-time dependency (libuv + # through gloo). This dependency is made available through meta.yaml, so + # we can override the default and set USE_DISTRIBUTED=1. + export USE_DISTRIBUTED=1 + + if [[ "$target_platform" == "osx-arm64" ]]; then + # MKLDNN did not support on Apple M1 at the time support Apple M1 + # was added. Revisit later + export USE_MKLDNN=0 + fi +elif [[ ${cuda_compiler_version} != "None" ]]; then + if [[ "$target_platform" == "linux-aarch64" ]]; then + # https://github.com/pytorch/pytorch/pull/121975 + # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/264 + export USE_PRIORITIZED_TEXT_FOR_LD=1 + fi + # Even though cudnn is used for CUDA builds, it's good to enable + # for MKLDNN for CUDA builds when CUDA builds are used on a machine + # with no NVIDIA GPUs. + export USE_MKLDNN=1 + export USE_CUDA=1 + export USE_CUFILE=1 + # PyTorch has multiple different bits of logic finding CUDA, override + # all of them. + export CUDAToolkit_BIN_DIR=${BUILD_PREFIX}/bin + export CUDAToolkit_ROOT_DIR=${PREFIX} + if [[ "${target_platform}" != "${build_platform}" ]]; then + export CUDA_TOOLKIT_ROOT=${PREFIX} + fi + case ${target_platform} in + linux-64) + export CUDAToolkit_TARGET_DIR=${PREFIX}/targets/x86_64-linux + ;; + linux-aarch64) + export CUDAToolkit_TARGET_DIR=${PREFIX}/targets/sbsa-linux + ;; + *) + echo "unknown CUDA arch, edit build.sh" + exit 1 + esac + case ${cuda_compiler_version} in + 12.6) + export TORCH_CUDA_ARCH_LIST="5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX" + ;; + *) + echo "unsupported cuda version. edit build.sh" + exit 1 + esac + export TORCH_NVCC_FLAGS="-Xfatbin -compress-all" + export NCCL_ROOT_DIR=$PREFIX + export NCCL_INCLUDE_DIR=$PREFIX/include + export USE_SYSTEM_NCCL=1 + export USE_STATIC_NCCL=0 + export USE_STATIC_CUDNN=0 + export MAGMA_HOME="${PREFIX}" + # Perform the initial build without magma enabled, we'll enable + # it for the remaining builds (particularly, to have it enabled + # for pytorch). + export USE_MAGMA=0 +else + if [[ "$target_platform" != *-64 ]]; then + # Breakpad seems to not work on aarch64 or ppc64le + # https://github.com/pytorch/pytorch/issues/67083 + export USE_BREAKPAD=0 + fi + # MKLDNN is an Apache-2.0 licensed library for DNNs and is used + # for CPU builds. Not to be confused with MKL. + export USE_MKLDNN=1 + export USE_CUDA=0 +fi + +echo '${CXX}'=${CXX} +echo '${PREFIX}'=${PREFIX} + +case ${PKG_NAME} in + libtorch-split) + # Call setup.py directly to avoid spending time on unnecessarily + # packing and unpacking the wheel. + $PREFIX/bin/python setup.py build + + mkdir -p dist-libtorch/include dist-libtorch-cuda-linalg-{magma,nomagma}/lib + mv build/lib.*/torch/{bin,lib,share} dist-libtorch/ + mv build/lib.*/torch/include/{ATen,caffe2,tensorpipe,torch,c10} dist-libtorch/include/ + rm dist-libtorch/lib/libtorch_python.* + if [[ ${cuda_compiler_version} != "None" ]]; then + mv dist-libtorch/lib/libtorch_cuda_linalg.* dist-libtorch-cuda-linalg-nomagma/lib/ + + # Now rebuild with magma enabled. + sed -i -e "/USE_MAGMA/s:=.*:=1:" build/CMakeCache.txt + $PREFIX/bin/python setup.py build + mv build/lib.*/torch/lib/libtorch_cuda_linalg.* dist-libtorch-cuda-linalg-magma/lib/ + fi + + # Keep the original backed up to sed later + cp build/CMakeCache.txt build/CMakeCache.txt.orig + ;; + libtorch) + mv dist-libtorch/bin/* ${PREFIX}/bin/ + mv dist-libtorch/lib/* ${PREFIX}/lib/ + mv dist-libtorch/share/* ${PREFIX}/share/ + mv dist-libtorch/include/* ${PREFIX}/include/ + ;; + libtorch-cuda-linalg) + if [[ ${use_magma} == true ]]; then + mv dist-libtorch-cuda-linalg-magma/lib/* ${PREFIX}/lib/ + else + mv dist-libtorch-cuda-linalg-nomagma/lib/* ${PREFIX}/lib/ + fi + ;; + pytorch) + $PREFIX/bin/python -m pip install . --no-deps -vvv --no-clean \ + | sed "s,${CXX},\$\{CXX\},g" \ + | sed "s,${PREFIX},\$\{PREFIX\},g" + # Keep this in ${PREFIX}/lib so that the library can be found by + # TorchConfig.cmake. + # With upstream non-split build, `libtorch_python.so` + # and TorchConfig.cmake are both in ${SP_DIR}/torch/lib and therefore + # this is not needed. + # + # NB: we are using cp rather than mv, so that the loop below symlinks it + # back. + cp ${SP_DIR}/torch/lib/libtorch_python${SHLIB_EXT} ${PREFIX}/lib + + pushd $SP_DIR/torch + # Make symlinks for libraries and headers from libtorch into $SP_DIR/torch + # Also remove the vendorered libraries they seem to include + # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/243 + # https://github.com/pytorch/pytorch/blob/v2.3.1/setup.py#L341 + for f in bin/* lib/* share/* include/*; do + if [[ -e "$PREFIX/$f" ]]; then + rm -rf $f + ln -sf $PREFIX/$f $PWD/$f + fi + done + popd + ;; + *) + echo "Unknown package name, edit build.sh" + exit 1 +esac diff --git a/recipe/build_pytorch.sh b/recipe/build_pytorch.sh index 9f2eab82..6e5fa09b 100644 --- a/recipe/build_pytorch.sh +++ b/recipe/build_pytorch.sh @@ -1,17 +1 @@ -set -x source $RECIPE_DIR/build.sh - -# if $SP_DIR/torch doesn't exist here, the installation -# of pytorch (see build_libtorch.sh call above) failed -pushd $SP_DIR/torch - -# Make symlinks for libraries and headers from libtorch into $SP_DIR/torch -# Also remove the vendorered libraries they seem to include -# https://github.com/conda-forge/pytorch-cpu-feedstock/issues/243 -# https://github.com/pytorch/pytorch/blob/v2.3.1/setup.py#L341 -for f in bin/* lib/* share/* include/*; do - if [[ -e "$PREFIX/$f" ]]; then - rm -rf $f - ln -sf $PREFIX/$f $PWD/$f - fi -done diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml index 86840e97..89891379 100644 --- a/recipe/conda_build_config.yaml +++ b/recipe/conda_build_config.yaml @@ -22,3 +22,7 @@ github_actions_labels: # [linux] megabuild: - true # [linux] - false # [osx] + +use_magma: +- true +- false diff --git a/recipe/meta.yaml b/recipe/meta.yaml index bea64c3a..c48cb00d 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -9,8 +9,15 @@ {% set build = build + 100 %} {% endif %} +{% set mkl = "<2024" %} + +# a reasonably safe subset of tests that should run under 15 minutes +# disable hypothesis because it randomly yields health check errors +{% set tests = "test/test_autograd.py test/test_autograd_fallback.py test/test_custom_ops.py test/test_linalg.py test/test_mkldnn.py test/test_modules.py test/test_nn.py test/test_torch.py test/test_xnnpack_integration.py --deselect test/test_torch.py::TestTorch::test_print -m 'not hypothesis'" %} +{% set tests_3_13_deselects = "--deselect test/test_custom_ops.py::TestCustomOp::test_data_dependent_compile --deselect test/test_custom_ops.py::TestCustomOp::test_functionalize_error --deselect test/test_custom_ops.py::TestCustomOpAPI::test_compile --deselect test/test_custom_ops.py::TestCustomOpAPI::test_fake -k 'not test_compile_int4_mm and not test_compile_int8_mm'" %} + package: - name: libtorch + name: libtorch-split version: {{ version }} source: @@ -35,21 +42,14 @@ source: - patches/0009-Allow-libcufile-for-conda-builds.patch # conda-specific patch, lets us override CUDA paths - patches/0010-Allow-overriding-CUDA-related-paths.patch + # NumPy 2 fixes: + # https://github.com/pytorch/pytorch/pull/136800 + - patches/0011-Fix-test-test_linalg.py-for-NumPy-2-136800.patch + # https://github.com/pytorch/pytorch/pull/137740 + - patches/0012-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch build: number: {{ build }} - string: cuda{{ cuda_compiler_version | replace('.', '') }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [cuda_compiler_version != "None"] - string: cpu_{{ blas_impl }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [cuda_compiler_version == "None"] - detect_binary_files_with_prefix: false - run_exports: - - {{ pin_subpackage('libtorch', max_pin='x.x') }} - ignore_run_exports_from: - - python * # [megabuild] - - numpy * # [megabuild] - - cross-python_{{ target_platform }} # [megabuild and build_platform != target_platform] - ignore_run_exports: - - python * # [megabuild] - - numpy * # [megabuild] skip: true # [win] # cuda 11.8 was dropped due to maintenance effort, see discussion in #177 skip: true # [cuda_compiler_version == "11.8"] @@ -125,54 +125,221 @@ requirements: - libuv - pkg-config # [unix] - typing_extensions - run: - # GPU requirements without run_exports - - {{ pin_compatible('cudnn') }} # [cuda_compiler_version != "None"] - run_constrained: - # These constraints ensure conflict between pytorch and - # pytorch-cpu 1.1 which we built before conda-forge had GPU infrastructure - # built into place. - # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/65 - - pytorch-cpu =={{ version }} # [cuda_compiler_version == "None"] - - pytorch-gpu ==99999999 # [cuda_compiler_version == "None"] - - pytorch-gpu =={{ version }} # [cuda_compiler_version != "None"] - - pytorch-cpu ==99999999 # [cuda_compiler_version != "None"] - - pytorch {{ version }} cuda{{ cuda_compiler_version | replace('.', '') }}_*_{{ PKG_BUILDNUM }} # [cuda_compiler_version != "None"] - - pytorch {{ version }} cpu_{{ blas_impl }}_*_{{ PKG_BUILDNUM }} # [cuda_compiler_version == "None"] - # See following link for sysroot consraint addition - # https://github.com/conda-forge/pytorch-cpu-feedstock/pull/293#issuecomment-2503611320 - # 2024/12 hmaarrfk's summary: - # The medium term solution is to add such a constraint to libcufile - # The long term solution is to add such a constraint to all packages - # that depend on a specific sysroot at building. - - sysroot_{{ target_platform }} >={{ c_stdlib_version }} - -# these tests are for the libtorch output below, but due to -# a particularity of conda-build, that output is defined in -# the global build stage, including tests -test: - commands: - # libraries - {% for each_lib in [ 'libc10', 'libshm', 'libtorch', 'libtorch_cpu', 'libtorch_global_deps'] %} - - test -f $PREFIX/lib/{{ each_lib }}.so # [linux] - - test -f $PREFIX/lib/{{ each_lib }}.dylib # [osx] - {% endfor %} - {% for each_lib in ['libc10_cuda', 'libcaffe2_nvrtc', 'libtorch_cuda', 'libtorch_cuda_linalg'] %} - - test -f $PREFIX/lib/{{ each_lib }}.so # [linux and cuda_compiler_version != "None"] - {% endfor %} outputs: + - name: libtorch-cuda-linalg + build: + string: cuda{{ cuda_compiler_version | replace('.', '') }}_magma_{{ blas_impl }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [use_magma] + string: cuda{{ cuda_compiler_version | replace('.', '') }}_nomagma_{{ blas_impl }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [not use_magma] + skip: true # [win or cuda_compiler_version == "None"] + ignore_run_exports_from: + - python * # [megabuild] + - numpy * # [megabuild] + - cross-python_{{ target_platform }} # [megabuild and build_platform != target_platform] + ignore_run_exports: + - python * # [megabuild] + - numpy * # [megabuild] + track_features: + - nomagma # [not use_magma] + script: build_common.sh # [unix] + script: build_common.bat # [win] + requirements: + build: + # When you change 3.12 here, change it in build.sh as well + - python 3.12 # [megabuild and build_platform != target_platform] + - python # [not megabuild and build_platform != target_platform] + - cross-python_{{ target_platform }} # [build_platform != target_platform] + - numpy * # [megabuild and build_platform != target_platform] + - numpy # [not megabuild and build_platform != target_platform] + - {{ stdlib('c') }} + - {{ compiler('c') }} + - {{ compiler('cxx') }} + - {{ compiler('cuda') }} + - patch + - git + - libgomp + - cmake + - ninja + # Keep libprotobuf here so that a compatibile version + # of protobuf is installed between build and host + - libprotobuf + - protobuf + - make + host: + # GPU requirements + - cudnn + - nccl + - cuda-version {{ cuda_compiler_version }} + - nvtx-c + - cuda-driver-dev + - cuda-cudart-dev + - cuda-nvrtc-dev + - cuda-nvtx-dev + - cuda-nvml-dev + - cuda-profiler-api + - libcublas-dev + - libcufile-dev + - libcufft-dev + - libcurand-dev + - libcusolver-dev + - libcusparse-dev + - magma # [use_magma] + # other requirements + - python 3.12 # [megabuild] + - python # [not megabuild] + - numpy * # [megabuild] + - numpy # [not megabuild] + - pip + - setuptools + - pyyaml + - requests + - six + - mkl-devel {{ mkl }} # [blas_impl == "mkl"] + - libcblas * *_mkl # [blas_impl == "mkl"] + - libcblas # [blas_impl != "mkl"] + - liblapack # [blas_impl != "mkl"] + - libgomp + - libabseil + - libprotobuf + - sleef + - libuv + - pkg-config + - typing_extensions + run: + # GPU requirements without run_exports + - {{ pin_compatible('cudnn') }} + run_constrained: + - magma <0.0a0 # [not use_magma] + - name: libtorch + build: + string: cuda{{ cuda_compiler_version | replace('.', '') }}_{{ blas_impl }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [cuda_compiler_version != "None"] + string: cpu_{{ blas_impl }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [cuda_compiler_version == "None"] + detect_binary_files_with_prefix: false + run_exports: + - {{ pin_subpackage('libtorch', max_pin='x.x') }} + ignore_run_exports_from: + - magma * + - python * # [megabuild] + - numpy * # [megabuild] + - cross-python_{{ target_platform }} # [megabuild and build_platform != target_platform] + ignore_run_exports: + - python * # [megabuild] + - numpy * # [megabuild] + script: build.sh # [unix] + script: bld.bat # [win] + requirements: + build: + # When you change 3.12 here, change it in build.sh as well + - python 3.12 # [megabuild and build_platform != target_platform] + - python # [not megabuild and build_platform != target_platform] + - cross-python_{{ target_platform }} # [build_platform != target_platform] + - numpy * # [megabuild and build_platform != target_platform] + - numpy # [not megabuild and build_platform != target_platform] + - {{ stdlib('c') }} + - {{ compiler('c') }} + - {{ compiler('cxx') }} + - {{ compiler('cuda') }} # [cuda_compiler_version != "None"] + # Dec 2020: it seems that git is broken on windows, so we use m2-git + - m2-patch # [win] + - m2-git # [win] + - patch # [not win] + - git # [not win] + - libgomp # [linux] + - llvm-openmp # [osx] + - cmake + - ninja + # Keep libprotobuf here so that a compatibile version + # of protobuf is installed between build and host + - libprotobuf + - protobuf + - make # [linux] + host: + # GPU requirements + - cudnn # [cuda_compiler_version != "None"] + - nccl # [cuda_compiler_version != "None"] + - cuda-version {{ cuda_compiler_version }} # [cuda_compiler_version != "None"] + - nvtx-c # [cuda_compiler_version != "None"] + {% if cuda_compiler_version != "None" %} + - cuda-driver-dev + - cuda-cudart-dev + - cuda-nvrtc-dev + - cuda-nvtx-dev + - cuda-nvml-dev + - cuda-profiler-api + - libcublas-dev + - libcufile-dev + - libcufft-dev + - libcurand-dev + - libcusolver-dev + - libcusparse-dev + {% endif %} + # other requirements + - python 3.12 # [megabuild] + - python # [not megabuild] + - numpy * # [megabuild] + - numpy # [not megabuild] + - pip + - setuptools + - pyyaml + - requests + - six + - mkl-devel {{ mkl }} # [blas_impl == "mkl"] + - libcblas * *_mkl # [blas_impl == "mkl"] + - libcblas # [blas_impl != "mkl"] + - liblapack # [blas_impl != "mkl"] + - libgomp # [linux] + - llvm-openmp # [osx] + - libabseil + - libprotobuf + - sleef + - libuv + - pkg-config # [unix] + - typing_extensions + - libtorch-cuda-linalg {{ version }} # [cuda_compiler_version != "None"] + run: + - libtorch-cuda-linalg {{ version }} # [cuda_compiler_version != "None"] + # GPU requirements without run_exports + - {{ pin_compatible('cudnn') }} # [cuda_compiler_version != "None"] + run_constrained: + # These constraints ensure conflict between pytorch and + # pytorch-cpu 1.1 which we built before conda-forge had GPU infrastructure + # built into place. + # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/65 + - pytorch-cpu =={{ version }} # [cuda_compiler_version == "None"] + - pytorch-gpu ==99999999 # [cuda_compiler_version == "None"] + - pytorch-gpu =={{ version }} # [cuda_compiler_version != "None"] + - pytorch-cpu ==99999999 # [cuda_compiler_version != "None"] + - pytorch {{ version }} cuda{{ cuda_compiler_version | replace('.', '') }}_{{ blas_impl }}_*_{{ PKG_BUILDNUM }} # [cuda_compiler_version != "None"] + - pytorch {{ version }} cpu_{{ blas_impl }}_*_{{ PKG_BUILDNUM }} # [cuda_compiler_version == "None"] + # See following link for sysroot consraint addition + # https://github.com/conda-forge/pytorch-cpu-feedstock/pull/293#issuecomment-2503611320 + # 2024/12 hmaarrfk's summary: + # The medium term solution is to add such a constraint to libcufile + # The long term solution is to add such a constraint to all packages + # that depend on a specific sysroot at building. + - sysroot_{{ target_platform }} >={{ c_stdlib_version|default(0) }} + test: + commands: + # libraries + {% for each_lib in [ 'libc10', 'libshm', 'libtorch', 'libtorch_cpu', 'libtorch_global_deps'] %} + - test -f $PREFIX/lib/{{ each_lib }}.so # [linux] + - test -f $PREFIX/lib/{{ each_lib }}.dylib # [osx] + {% endfor %} + {% for each_lib in ['libc10_cuda', 'libcaffe2_nvrtc', 'libtorch_cuda'] %} + - test -f $PREFIX/lib/{{ each_lib }}.so # [linux and cuda_compiler_version != "None"] + {% endfor %} + - name: pytorch build: - string: cuda{{ cuda_compiler_version | replace('.', '') }}_py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [cuda_compiler_version != "None"] - string: cpu_{{ blas_impl }}_py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [cuda_compiler_version == "None"] + string: cuda{{ cuda_compiler_version | replace('.', '') }}_{{ blas_impl }}_py{{ CONDA_PY }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [cuda_compiler_version != "None"] + string: cpu_{{ blas_impl }}_py{{ CONDA_PY }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [cuda_compiler_version == "None"] detect_binary_files_with_prefix: false + ignore_run_exports_from: + - magma run_exports: - {{ pin_subpackage('pytorch', max_pin='x.x') }} - {{ pin_subpackage('libtorch', max_pin='x.x') }} - skip: true # [win] - skip: true # [cuda_compiler_version != "None" and linux64 and blas_impl != "mkl"] script: build_pytorch.sh # [unix] script: build_pytorch.bat # [win] @@ -203,9 +370,9 @@ outputs: # GPU requirements - cudnn # [cuda_compiler_version != "None"] - nccl # [cuda_compiler_version != "None"] - - magma # [cuda_compiler_version != "None"] - cuda-version {{ cuda_compiler_version }} # [cuda_compiler_version != "None"] - nvtx-c # [cuda_compiler_version != "None"] + - magma # [cuda_compiler_version != "None"] {% if cuda_compiler_version != "None" %} - cuda-driver-dev - cuda-cudart-dev @@ -274,15 +441,18 @@ outputs: requires: - {{ compiler('c') }} - {{ compiler('cxx') }} + - ninja - boto3 - hypothesis - pytest - tabulate - pydot - - mock # [linux] - pip - expecttest - xmlrunner + - pytest-flakefinder + - pytest-rerunfailures + - pytest-xdist imports: - torch # [not (aarch64 and cuda_compiler_version != "None")] source_files: @@ -291,8 +461,13 @@ outputs: # as of pytorch=2.0.0, there is a bug when trying to run tests without the tools - tools commands: - - OMP_NUM_THREADS=4 python ./test/run_test.py || true # [not win and not (aarch64 and cuda_compiler_version != "None")] - - python ./test/run_test.py # [win] + # the whole test suite takes forever, but we should get a good enough coverage + # for potential packaging problems by running a fixed subset + - OMP_NUM_THREADS=4 python -m pytest -n auto {{ tests }} # [not win and not (aarch64 and cuda_compiler_version != "None") and py != 313] + - python -m pytest -n auto {{ tests }} # [win and py != 313] + # dynamo does not support python 3.13 + - OMP_NUM_THREADS=4 python -m pytest -n auto {{ tests_3_13_deselects }} {{ tests }} # [not win and not (aarch64 and cuda_compiler_version != "None") and py == 313] + - python -m pytest -n auto {{ tests_3_13_deselects }} {{ tests }} # [win and py == 313] # Run pip check so as to ensure that all pytorch packages are installed # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/24 - pip check @@ -314,18 +489,17 @@ outputs: {% set pytorch_cpu_gpu = "pytorch-gpu" %} # [cuda_compiler_version != "None"] - name: {{ pytorch_cpu_gpu }} build: - string: cuda{{ cuda_compiler_version | replace('.', '') }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [megabuild and cuda_compiler_version != "None"] - string: cpu_{{ blas_impl }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [megabuild and cuda_compiler_version == "None"] - string: cuda{{ cuda_compiler_version | replace('.', '') }}py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [not megabuild and cuda_compiler_version != "None"] - string: cpu_{{ blas_impl }}_py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [not megabuild and cuda_compiler_version == "None"] + string: cuda{{ cuda_compiler_version | replace('.', '') }}_{{ blas_impl }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [megabuild and cuda_compiler_version != "None"] + string: cpu_{{ blas_impl }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [megabuild and cuda_compiler_version == "None"] + string: cuda{{ cuda_compiler_version | replace('.', '') }}_{{ blas_impl }}py{{ CONDA_PY }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [not megabuild and cuda_compiler_version != "None"] + string: cpu_{{ blas_impl }}_py{{ CONDA_PY }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [not megabuild and cuda_compiler_version == "None"] detect_binary_files_with_prefix: false - skip: true # [cuda_compiler_version != "None" and linux64 and blas_impl != "mkl"] # weigh down cpu implementation and give cuda preference track_features: - pytorch-cpu # [cuda_compiler_version == "None"] requirements: run: - - pytorch {{ version }}=cuda*{{ PKG_BUILDNUM }} # [megabuild and cuda_compiler_version != "None"] + - pytorch {{ version }}=cuda_{{ blas_impl }}*{{ PKG_BUILDNUM }} # [megabuild and cuda_compiler_version != "None"] - pytorch {{ version }}=cpu_{{ blas_impl }}*{{ PKG_BUILDNUM }} # [megabuild and cuda_compiler_version == "None"] - {{ pin_subpackage("pytorch", exact=True) }} # [not megabuild] test: diff --git a/recipe/patches/0011-Fix-test-test_linalg.py-for-NumPy-2-136800.patch b/recipe/patches/0011-Fix-test-test_linalg.py-for-NumPy-2-136800.patch new file mode 100644 index 00000000..1247841b --- /dev/null +++ b/recipe/patches/0011-Fix-test-test_linalg.py-for-NumPy-2-136800.patch @@ -0,0 +1,80 @@ +From f5c485918fac838d989c5aa1d4dcc6651273eacd Mon Sep 17 00:00:00 2001 +From: Haifeng Jin +Date: Tue, 1 Oct 2024 07:53:24 +0000 +Subject: [PATCH 4/5] Fix test/test_linalg.py for NumPy 2 (#136800) + +Related to #107302. + +When built and tested with NumPy 2 the following unit tests failed. + +``` +=========================================================== short test summary info ============================================================ +FAILED [0.0026s] test/test_linalg.py::TestLinalgCPU::test_householder_product_cpu_complex128 - TypeError: expected np.ndarray (got Tensor) +FAILED [0.0024s] test/test_linalg.py::TestLinalgCPU::test_householder_product_cpu_complex64 - TypeError: expected np.ndarray (got Tensor) +FAILED [0.0025s] test/test_linalg.py::TestLinalgCPU::test_householder_product_cpu_float32 - TypeError: expected np.ndarray (got Tensor) +FAILED [0.0024s] test/test_linalg.py::TestLinalgCPU::test_householder_product_cpu_float64 - TypeError: expected np.ndarray (got Tensor) +FAILED [0.0016s] test/test_linalg.py::TestLinalgCPU::test_nuclear_norm_axes_small_brute_force_old_cpu - ValueError: Unable to avoid copy while creating an array as requested. +FAILED [0.0054s] test/test_linalg.py::TestLinalgCPU::test_solve_cpu_complex128 - AssertionError: The values for attribute 'shape' do not match: torch.Size([0, 0]) != torch.Size([0, 0, 0]). +FAILED [0.0055s] test/test_linalg.py::TestLinalgCPU::test_solve_cpu_complex64 - AssertionError: The values for attribute 'shape' do not match: torch.Size([0, 0]) != torch.Size([0, 0, 0]). +FAILED [0.0048s] test/test_linalg.py::TestLinalgCPU::test_solve_cpu_float32 - AssertionError: The values for attribute 'shape' do not match: torch.Size([0, 0]) != torch.Size([0, 0, 0]). +FAILED [0.0054s] test/test_linalg.py::TestLinalgCPU::test_solve_cpu_float64 - AssertionError: The values for attribute 'shape' do not match: torch.Size([0, 0]) != torch.Size([0, 0, 0]). +=========================================== 9 failed, 1051 passed, 118 skipped in 152.51s (0:02:32) ============================================ +``` + +This PR fixes them. The test is now compatible with both NumPy 1 & 2. + +Some more details: + +1. The `np.linalg.solve` has changed its behavior. So I added an adapt function in the unit test to keep its behavior the same no matter it is NumPy 1 or Numpy 2. +2. The cause of the failure is when passing a `torch.Tensor` to `np.linalg.qr`, the return type in NumPy 1 is `(np.ndarray, np.ndarray)`, while it is `(torch.Tensor, torch.Tensor)` in NumPy 2. +3. NumPy 2 does not allow `np.array(obj, copy=False)`, but recommended to use `np.asarray(obj)` instead. + +Pull Request resolved: https://github.com/pytorch/pytorch/pull/136800 +Approved by: https://github.com/lezcano +--- + test/test_linalg.py | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/test/test_linalg.py b/test/test_linalg.py +index e9ec874d6..060bccef2 100644 +--- a/test/test_linalg.py ++++ b/test/test_linalg.py +@@ -2351,7 +2351,7 @@ class TestLinalg(TestCase): + if self.device_type != 'cpu' and randrange(100) < 95: + return # too many cpu <==> device copies + +- a = np.array(x.cpu(), copy=False) ++ a = np.asarray(x.cpu()) + expected = np.linalg.norm(a, "nuc", axis=axes) + + ans = torch.norm(x, "nuc", dim=axes) +@@ -3082,7 +3082,14 @@ class TestLinalg(TestCase): + self.assertEqual(b.expand_as(Ax), Ax) + + # Check against NumPy +- expected = np.linalg.solve(A.cpu().numpy(), b.expand_as(x).cpu().numpy()) ++ if rhs == (): ++ # In NumPy 2, "b" can no longer be a vector (i.e. rhs == ()) if has batch dimensions. ++ # So, reshape it to a matrix and back. Related documentation: ++ # https://numpy.org/doc/1.26/reference/generated/numpy.linalg.solve.html ++ # https://numpy.org/doc/2.0/reference/generated/numpy.linalg.solve.html ++ expected = np.linalg.solve(A.cpu().numpy(), b.cpu().numpy().reshape(*b.shape, 1)).reshape(b.shape) ++ else: ++ expected = np.linalg.solve(A.cpu().numpy(), b.cpu().numpy()) + self.assertEqual(x, expected) + + batches = [(), (0, ), (3, ), (2, 3)] +@@ -5234,7 +5241,9 @@ class TestLinalg(TestCase): + tau_shape = [*A_cpu.shape[:-2], A_cpu.shape[-1]] + tau = torch.empty(tau_shape, dtype=dtype).view(-1, A_cpu.shape[-1]) + for A_i, reflectors_i, tau_i in zip(A_cpu.contiguous().view(*flattened_batch_shape), reflectors, tau): +- reflectors_tmp, tau_i[:] = map(torch.from_numpy, np.linalg.qr(A_i, mode='raw')) ++ reflectors_tmp, tau_i[:] = ( ++ torch.from_numpy(x) if isinstance(x, np.ndarray) else x for x in np.linalg.qr(A_i, mode='raw') ++ ) + reflectors_i[:] = reflectors_tmp.T + reflectors = reflectors.view(*A_cpu.shape) + tau = tau.view(tau_shape) +-- +2.47.1 + diff --git a/recipe/patches/0012-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch b/recipe/patches/0012-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch new file mode 100644 index 00000000..3ecdfec0 --- /dev/null +++ b/recipe/patches/0012-Fixes-NumPy-2-test-failures-in-test_torch.py-137740.patch @@ -0,0 +1,63 @@ +From e9d0fadc19ca7677a4598f8fbbf03721667a64bf Mon Sep 17 00:00:00 2001 +From: Haifeng Jin +Date: Sat, 12 Oct 2024 02:40:17 +0000 +Subject: [PATCH 5/5] Fixes NumPy 2 test failures in test_torch.py (#137740) + +Related to #107302 + +The breakages are caused by backward incompatibility between NumPy 1 and NumPy 2. +This PR fixes all the corresponding test failures in `test_torch.py`. + +1. The dtype of the return value `np.percentile` when passed a `torch.float32` tensor. +NumPy 1: Return value of `np.float64`. +NumPy 2: Return value of `np.float32`. +Solution: Enforce it with `.astype(np.float64)`. + +2. The type of `np.gradient()` when returning multiple arrays. +NumPy1: A list of arrays. +NumPy2: A tuple of arrays. +Solution: Cast the tuple to a list. +Pull Request resolved: https://github.com/pytorch/pytorch/pull/137740 +Approved by: https://github.com/ezyang +--- + test/test_torch.py | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/test/test_torch.py b/test/test_torch.py +index be4d61808..c6fd6ac9f 100644 +--- a/test/test_torch.py ++++ b/test/test_torch.py +@@ -2891,7 +2891,7 @@ else: + + # if the given input arg is not a list, it returns a list of single element: [arg] + def _wrap_to_list(self, input_array): +- return input_array if isinstance(input_array, list) else [input_array] ++ return list(input_array) if isinstance(input_array, (list, tuple)) else [input_array] + + # To ensure inf, -inf, and nan values do not cause divergence between Numpy and PyTorch. + # There are two types of possible divergence: +@@ -3029,7 +3029,7 @@ else: + # Result is given just as real number and all the imaginary parts to be equal to zero. + self.assertEqual(expected[i].imag, torch.zeros(actual[i].shape), exact_dtype=False) + else: +- actual, expected = self._inf_nan_preprocess(list(actual), expected) ++ actual, expected = self._inf_nan_preprocess(list(actual), list(expected)) + self.assertEqual(actual, expected, equal_nan=True, exact_dtype=False) + + @onlyNativeDeviceTypes +@@ -7549,10 +7549,10 @@ class TestTorch(TestCase): + torch.mean(sample, dim=0), torch.full((d,), 0.5), atol=2, rtol=2 + ) + torch.testing.assert_close( +- np.percentile(sample, 25, axis=0), np.repeat(0.25, d), atol=2, rtol=2 ++ np.percentile(sample, 25, axis=0).astype(np.float64), np.repeat(0.25, d), atol=2, rtol=2 + ) + torch.testing.assert_close( +- np.percentile(sample, 75, axis=0), np.repeat(0.75, d), atol=2, rtol=2 ++ np.percentile(sample, 75, axis=0).astype(np.float64), np.repeat(0.75, d), atol=2, rtol=2 + ) + + @skipIfTorchDynamo("np.float64 restored as float32 after graph break.") +-- +2.47.1 +