Enable Clang compilation in OSS for fbgemm_gpu (CPU) (#2330)

Summary: Pull Request resolved: #2330 Reviewed By: spcyppt Differential Revision: D53777574 Pulled By: q10 fbshipit-source-id: f6228cd941c5ece168540abc0699883c0c97570f
pytorch · Feb 15, 2024 · d00b5d5 · d00b5d5
1 parent 5ca556a
commit d00b5d5
Show file tree

Hide file tree

Showing 11 changed files with 185 additions and 156 deletions.
diff --git a/.github/scripts/fbgemm_build.bash b/.github/scripts/fbgemm_build.bash
@@ -13,51 +13,34 @@
 # FBGEMM Build Auxiliary Functions
 ################################################################################
 
-__configure_fbgemm_build_gcc () {
-  # shellcheck disable=SC2155
-  local env_prefix=$(env_name_or_prefix "${env_name}")
-
-  # shellcheck disable=SC2155,SC2086
-  local python_path=$(conda run ${env_prefix} which python)
-
-  # shellcheck disable=SC2206
-  build_args=(
-    -DUSE_SANITIZER=address
-    -DFBGEMM_LIBRARY_TYPE=${fbgemm_library_type}
-    -DPYTHON_EXECUTABLE=${python_path}
-  )
-}
-
-__configure_fbgemm_build_clang () {
+__configure_fbgemm_build () {
   # shellcheck disable=SC2155
   local env_prefix=$(env_name_or_prefix "${env_name}")
 
   # shellcheck disable=SC2155,SC2086
   local python_path=$(conda run ${env_prefix} which python)
-  # shellcheck disable=SC2155,SC2086
-  local conda_prefix=$(conda run ${env_prefix} printenv CONDA_PREFIX)
 
   # shellcheck disable=SC2206
   build_args=(
     -DUSE_SANITIZER=address
     -DFBGEMM_LIBRARY_TYPE=${fbgemm_library_type}
     -DPYTHON_EXECUTABLE=${python_path}
-    -DOpenMP_C_LIB_NAMES=libomp
-    -DOpenMP_C_FLAGS=\"-fopenmp=libomp -I ${conda_prefix}/include\"
-    -DOpenMP_CXX_LIB_NAMES=libomp
-    -DOpenMP_CXX_FLAGS=\"-fopenmp=libomp -I ${conda_prefix}/include\"
-    -DOpenMP_libomp_LIBRARY=${conda_prefix}/lib/libomp.so
   )
-}
 
-__configure_fbgemm_build () {
   if [ "$fbgemm_compiler" == "clang" ]; then
-    echo "[BUILD] Configuring for building using Clang ..."
-    __configure_fbgemm_build_clang
-
-  else
-    echo "[BUILD] Configuring for building using GCC ..."
-    __configure_fbgemm_build_gcc
+    echo "[BUILD] Host compiler is Clang; adding extra compiler flags ..."
+
+    # shellcheck disable=SC2155,SC2086
+    local conda_prefix=$(conda run ${env_prefix} printenv CONDA_PREFIX)
+
+    # shellcheck disable=SC2206
+    build_args+=(
+      -DOpenMP_C_LIB_NAMES=libomp
+      -DOpenMP_C_FLAGS=\"-fopenmp=libomp -I ${conda_prefix}/include\"
+      -DOpenMP_CXX_LIB_NAMES=libomp
+      -DOpenMP_CXX_FLAGS=\"-fopenmp=libomp -I ${conda_prefix}/include\"
+      -DOpenMP_libomp_LIBRARY=${conda_prefix}/lib/libomp.so
+    )
   fi
 
   # shellcheck disable=SC2145

diff --git a/.github/scripts/fbgemm_gpu_build.bash b/.github/scripts/fbgemm_gpu_build.bash
@@ -55,6 +55,32 @@ prepare_fbgemm_gpu_build () {
   echo "[BUILD] Successfully ran git submodules update"
 }
 
+__configure_compiler_flags () {
+  # shellcheck disable=SC2155
+  local env_prefix=$(env_name_or_prefix "${env_name}")
+
+  if print_exec "conda run ${env_prefix} c++ --version | grep -i clang"; then
+    echo "[BUILD] Host compiler is Clang; adding extra compiler flags ..."
+
+    # shellcheck disable=SC2155,SC2086
+    local conda_prefix=$(conda run ${env_prefix} printenv CONDA_PREFIX)
+    # shellcheck disable=SC2155,SC2086
+    local cpp_path=$(conda run ${env_prefix} which c++)
+
+    echo "[BUILD] Setting Clang (should already be symlinked as c++) as the host compiler: ${cpp_path}"
+    # NOTE: There appears to be no ROCm equivalent for NVCC_PREPEND_FLAGS:
+    #   https://github.com/ROCm/HIP/issues/931
+    # shellcheck disable=SC2086
+    print_exec conda env config vars set ${env_prefix} NVCC_PREPEND_FLAGS="-ccbin ${cpp_path}"
+
+    echo "[BUILD] Appending OpenMP flags for Clang ..."
+    # shellcheck disable=SC2206
+    build_args+=(
+      --openmp libomp ${conda_prefix}
+    )
+  fi
+}
+
 __configure_fbgemm_gpu_build_cpu () {
   # Update the package name and build args depending on if CUDA is specified
   echo "[BUILD] Setting CPU-only build args ..."
@@ -162,7 +188,7 @@ __configure_fbgemm_gpu_build () {
   if [ "$fbgemm_variant" == "" ]; then
     echo "Usage: ${FUNCNAME[0]} FBGEMM_VARIANT"
     echo "Example(s):"
-    echo "    ${FUNCNAME[0]} cpu                          # CPU-only variant"
+    echo "    ${FUNCNAME[0]} cpu                          # CPU-only variant using Clang"
     echo "    ${FUNCNAME[0]} cuda                         # CUDA variant for default target(s)"
     echo "    ${FUNCNAME[0]} cuda '7.0;8.0'               # CUDA variant for custom target(s)"
     echo "    ${FUNCNAME[0]} rocm                         # ROCm variant for default target(s)"
@@ -190,6 +216,9 @@ __configure_fbgemm_gpu_build () {
     __configure_fbgemm_gpu_build_cuda "${fbgemm_variant_targets}"
   fi
 
+  # Set other compiler flags as needed
+  __configure_compiler_flags
+
   # shellcheck disable=SC2145
   echo "[BUILD] FBGEMM_GPU build arguments have been set:  ${build_args[@]}"
 }
@@ -375,11 +404,11 @@ build_fbgemm_gpu_package () {
   if [ "$fbgemm_variant" == "" ]; then
     echo "Usage: ${FUNCNAME[0]} ENV_NAME RELEASE_TYPE VARIANT [VARIANT_TARGETS]"
     echo "Example(s):"
-    echo "    ${FUNCNAME[0]} build_env nightly cpu                           # Nightly CPU-only variant"
-    echo "    ${FUNCNAME[0]} build_env nightly cuda                          # Nightly CUDA variant for default target(s)"
-    echo "    ${FUNCNAME[0]} build_env nightly cuda '7.0;8.0'                # Nightly CUDA variant for custom target(s)"
-    echo "    ${FUNCNAME[0]} build_env release rocm                          # Release ROCm variant for default target(s)"
-    echo "    ${FUNCNAME[0]} build_env release rocm 'gfx906;gfx908;gfx90a'   # Release ROCm variant for custom target(s)"
+    echo "    ${FUNCNAME[0]} build_env cpu                          # CPU-only variant"
+    echo "    ${FUNCNAME[0]} build_env cuda                         # CUDA variant for default target(s)"
+    echo "    ${FUNCNAME[0]} build_env cuda '7.0;8.0'               # CUDA variant for custom target(s)"
+    echo "    ${FUNCNAME[0]} build_env rocm                         # ROCm variant for default target(s)"
+    echo "    ${FUNCNAME[0]} build_env rocm 'gfx906;gfx908;gfx90a'  # ROCm variant for custom target(s)"
     return 1
   fi
 

diff --git a/.github/scripts/fbgemm_gpu_test.bash b/.github/scripts/fbgemm_gpu_test.bash
@@ -32,7 +32,7 @@ run_python_test () {
   local env_prefix=$(env_name_or_prefix "${env_name}")
 
   # shellcheck disable=SC2086
-  if print_exec conda run --no-capture-output ${env_prefix} python -m pytest -v -rsx -s -W ignore::pytest.PytestCollectionWarning --cache-clear "${python_test_file}"; then
+  if print_exec conda run --no-capture-output ${env_prefix} python -m pytest "${pytest_args[@]}" --cache-clear  "${python_test_file}"; then
     echo "[TEST] Python test suite PASSED: ${python_test_file}"
     echo ""
     echo ""
@@ -50,13 +50,13 @@ run_python_test () {
   # enabled by using the pytest cache and the --lf flag.
 
   # shellcheck disable=SC2086
-  if exec_with_retries 2 conda run --no-capture-output ${env_prefix} python -m pytest -v -rsx -s -W ignore::pytest.PytestCollectionWarning --lf --last-failed-no-failures none "${python_test_file}"; then
+  if exec_with_retries 2 conda run --no-capture-output ${env_prefix} python -m pytest "${pytest_args[@]}" --lf --last-failed-no-failures none "${python_test_file}"; then
     echo "[TEST] Python test suite PASSED with retries: ${python_test_file}"
     echo ""
     echo ""
     echo ""
   else
-    echo "[TEST] Python test suite FAILED for some or all tests despite retries: ${python_test_file}"
+    echo "[TEST] Python test suite FAILED for some or all tests despite multiple retries: ${python_test_file}"
     echo ""
     echo ""
     echo ""
@@ -104,13 +104,63 @@ __configure_fbgemm_gpu_test_rocm () {
   )
 }
 
+__setup_fbgemm_gpu_test () {
+  # shellcheck disable=SC2155
+  local env_prefix=$(env_name_or_prefix "${env_name}")
+
+  # Configure the environment for ignored test suites for each FBGEMM_GPU
+  # variant
+  if [ "$fbgemm_variant" == "cpu" ]; then
+    echo "[TEST] Configuring for CPU-based testing ..."
+    __configure_fbgemm_gpu_test_cpu
+
+  elif [ "$fbgemm_variant" == "rocm" ]; then
+    echo "[TEST] Configuring for ROCm-based testing ..."
+    __configure_fbgemm_gpu_test_rocm
+
+  else
+    echo "[TEST] Configuring for CUDA-based testing ..."
+    __configure_fbgemm_gpu_test_cuda
+  fi
+
+  if [[ $MACHINE_NAME == 'aarch64' ]]; then
+    # NOTE: Setting KMP_DUPLICATE_LIB_OK silences the error about multiple
+    # OpenMP being linked when FBGEMM_GPU is compiled under Clang on aarch64
+    # machines:
+    #   https://stackoverflow.com/questions/53014306/error-15-initializing-libiomp5-dylib-but-found-libiomp5-dylib-already-initial
+    echo "[TEST] Platform is aarch64; will set KMP_DUPLICATE_LIB_OK ..."
+    # shellcheck disable=SC2086
+    print_exec conda env config vars set ${env_prefix} KMP_DUPLICATE_LIB_OK=1
+  fi
+
+  echo "[TEST] Installing PyTest ..."
+  # shellcheck disable=SC2086
+  (exec_with_retries 3 conda install ${env_prefix} -y pytest expecttest) || return 1
+
+  echo "[TEST] Checking imports ..."
+  (test_python_import_package "${env_name}" fbgemm_gpu) || return 1
+  (test_python_import_package "${env_name}" fbgemm_gpu.split_embedding_codegen_lookup_invokers) || return 1
+
+  # Configure the PyTest args
+  pytest_args=(
+    -v
+    -rsx
+    -s
+    -W ignore::pytest.PytestCollectionWarning
+  )
+
+  # shellcheck disable=SC2145
+  echo "[TEST] PyTest args:  ${pytest_args[@]}"
+}
+
+
 ################################################################################
 # FBGEMM_GPU Test Functions
 ################################################################################
 
 run_fbgemm_gpu_tests () {
-  local env_name="$1"
-  local fbgemm_variant="$2"
+  env_name="$1"
+  fbgemm_variant="$2"
   if [ "$fbgemm_variant" == "" ]; then
     echo "Usage: ${FUNCNAME[0]} ENV_NAME [FBGEMM_VARIANT]"
     echo "Example(s):"
@@ -129,27 +179,7 @@ run_fbgemm_gpu_tests () {
 
   # shellcheck disable=SC2155
   local env_prefix=$(env_name_or_prefix "${env_name}")
-
-  if [ "$fbgemm_variant" == "cpu" ]; then
-    echo "Configuring for CPU-based testing ..."
-    __configure_fbgemm_gpu_test_cpu
-
-  elif [ "$fbgemm_variant" == "rocm" ]; then
-    echo "Configuring for ROCm-based testing ..."
-    __configure_fbgemm_gpu_test_rocm
-
-  else
-    echo "Configuring for CUDA-based testing ..."
-    __configure_fbgemm_gpu_test_cuda
-  fi
-
-  echo "[TEST] Installing pytest ..."
-  # shellcheck disable=SC2086
-  (exec_with_retries 3 conda install ${env_prefix} -y pytest expecttest) || return 1
-
-  echo "[TEST] Checking imports ..."
-  (test_python_import_package "${env_name}" fbgemm_gpu) || return 1
-  (test_python_import_package "${env_name}" fbgemm_gpu.split_embedding_codegen_lookup_invokers) || return 1
+  __setup_fbgemm_gpu_test
 
   echo "[TEST] Enumerating ALL test files ..."
   # shellcheck disable=SC2155

diff --git a/.github/workflows/fbgemm_gpu_ci_cpu.yml b/.github/workflows/fbgemm_gpu_ci_cpu.yml
@@ -67,6 +67,7 @@ jobs:
           { arch: arm, instance: "linux.arm64.2xlarge" },
         ]
         python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
+        compiler: [ "gcc", "clang" ]
 
     steps:
     - name: Setup Build Container
@@ -90,7 +91,7 @@ jobs:
       run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
 
     - name: Install C/C++ Compilers
-      run: . $PRELUDE; install_cxx_compiler $BUILD_ENV
+      run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }}
 
     - name: Install Build Tools
       run: . $PRELUDE; install_build_tools $BUILD_ENV
@@ -111,7 +112,7 @@ jobs:
     - name: Upload Built Wheel as GHA Artifact
       uses: actions/upload-artifact@v3
       with:
-        name: fbgemm_gpu_nightly_cpu_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}.whl
+        name: fbgemm_gpu_nightly_cpu_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_${{ matrix.python-version }}.whl
         path: fbgemm_gpu/dist/fbgemm_gpu_nightly_cpu-*.whl
 
 
@@ -135,6 +136,7 @@ jobs:
           { arch: arm, instance: "linux.arm64.2xlarge" },
         ]
         python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
+        compiler: [ "gcc", "clang" ]
     needs: build_artifact
 
     steps:
@@ -149,7 +151,7 @@ jobs:
     - name: Download Wheel Artifact from GHA
       uses: actions/download-artifact@v3
       with:
-        name: fbgemm_gpu_nightly_cpu_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}.whl
+        name: fbgemm_gpu_nightly_cpu_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_${{ matrix.python-version }}.whl
 
     - name: Display System Info
       run: . $PRELUDE; print_system_info; print_ec2_info
@@ -165,7 +167,7 @@ jobs:
 
     - name: Install C/C++ Compilers
       # CXX compiler is needed for inductor used by torchrec.
-      run: . $PRELUDE; install_cxx_compiler $BUILD_ENV
+      run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }}
 
     - name: Install PyTorch-CPU Nightly
       run: . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cpu
@@ -188,73 +190,7 @@ jobs:
       run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV cpu
 
     - name: Push Wheel to PyPI
-      if: ${{ github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true') }}
+      if: ${{ (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true')) && matrix.compiler == 'gcc' }}
       env:
         PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
       run: . $PRELUDE; publish_to_pypi $BUILD_ENV fbgemm_gpu_nightly_cpu-*.whl "$PYPI_TOKEN"
-
-
-  build_and_test_ubuntu:
-    runs-on: ${{ matrix.host-machine.instance }}
-    container:
-      image: ${{ matrix.container-image }}
-      options: --user root
-    defaults:
-      run:
-        shell: bash
-    env:
-      PRELUDE: .github/scripts/setup_env.bash
-      BUILD_ENV: build_binary
-    strategy:
-      fail-fast: false
-      matrix:
-        host-machine: [
-          { arch: x86, instance: "linux.4xlarge" },
-          { arch: arm, instance: "linux.arm64.2xlarge" },
-        ]
-        container-image: [ "ubuntu:20.04", "ubuntu:22.04" ]
-        python-version: [ "3.11" ]
-
-    steps:
-    - name: Setup Build Container
-      run: |
-        apt update -y
-        apt install -y binutils build-essential git pciutils sudo wget
-        git config --global --add safe.directory '*'
-
-    - name: Checkout the Repository
-      uses: actions/checkout@v4
-      with:
-        submodules: true
-
-    - name: Display System Info
-      run: . $PRELUDE; print_system_info
-
-    - name: Display GPU Info
-      run: . $PRELUDE; print_gpu_info
-
-    - name: Setup Miniconda
-      run: . $PRELUDE; setup_miniconda $HOME/miniconda
-
-    - name: Create Conda Environment
-      run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
-
-    - name: Install Build Tools
-      run: . $PRELUDE; install_build_tools $BUILD_ENV
-
-    - name: Install PyTorch
-      run:  . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cpu
-
-    - name: Collect PyTorch Environment Info
-      if: ${{ success() || failure() }}
-      run:  . $PRELUDE; collect_pytorch_env_info $BUILD_ENV
-
-    - name: Prepare FBGEMM_GPU Build
-      run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
-
-    - name: Build + Install FBGEMM_GPU (CPU version)
-      run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_install $BUILD_ENV cpu
-
-    - name: Test FBGEMM_GPU-CPU Nightly Installation
-      timeout-minutes: 15
-      run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV cpu