huggingface · mfuntowicz · Jan 22, 2025 · Jan 21, 2025 · Jan 21, 2025 · Jan 21, 2025
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -1,51 +1,79 @@
 name: Release
 
 on:
+  workflow_dispatch:
   push:
     tags:
       - "v*"
 
 jobs:
-  pypi:
-    name: Publish release artifact on PyPi repository
+#  pypi:
+#    name: Publish release artifact on PyPi repository
+#    runs-on: ubuntu-latest
+#    steps:
+#      - name: Set up Python
+#        uses: actions/setup-python@v4
+#        with:
+#          python-version: 3.10
+#
+#      - name: Check out the repo
+#        uses: actions/checkout@v4
+#
+#      - name: Install dependencies
+#        run: |
+#          pip install --upgrade pip
+#          pip install setuptools wheel
+#
+#      - run: |
+#          python setup.py sdist bdist_wheel
+#      - run: |
+#          pip install twine
+#
+#      - name: Upload to PyPi
+#        env:
+#          OPTIMUM_NVIDIA_PYPI_TOKEN: ${{ secrets.OPTIMUM_NVIDIA_PYPI_TOKEN }}
+#        run: |
+#          twine upload dist/* -u __token__ -p "$OPTIMUM_NVIDIA_PYPI_TOKEN"
+
+  docker:
+    name: Build and push containers to registries
     runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      attestations: write
+      id-token: write
     strategy:
       matrix:
         python-version: ['3.10', '3.12']
 
     steps:
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
+      - name: Extract inlined Python version
+        run: echo "INLINE_PYTHON_VERSION=$(echo ${{ matrix.python-version }} | sed 's/\.//')" >> $GITHUB_ENV
 
       - name: Check out the repo
         uses: actions/checkout@v4
 
-      - name: Install dependencies
-        run: |
-          pip install --upgrade pip
-          pip install setuptools wheel
+      - name: Retrieve TensorRT-LLM dependency
+        run:  echo "TENSORRT_LLM_VERSION=$(grep -i 'tensorrt-llm ==' setup.py | cut -c22- | rev | cut -c3- | rev)" >> $GITHUB_ENV
 
-      - run: |
-          python setup.py sdist bdist_wheel
-      - run: |
-          pip install twine
+      - name: Print out targeted TensorRT-LLM version
+        run: echo "Building against TensorRT-LLM ${{ env.TENSORRT_LLM_VERSION }}"
 
-      - name: Upload to PyPi
+      - name: Check out TensorRT-LLM repo
+        uses: actions/checkout@v4
+        with:
+          path: tensorrt-llm
+          repository: nvidia/tensorrt-llm
+          submodules: true
+          fetch-depth: 1
+          ref: v${{ env.TENSORRT_LLM_VERSION }}
+
+      - name: Build TensorRT-LLM base image
+        id: build-tensorrt-llm-base
         env:
-          OPTIMUM_NVIDIA_PYPI_TOKEN: ${{ secrets.OPTIMUM_NVIDIA_PYPI_TOKEN }}
-        run: |
-          twine upload dist/* -u __token__ -p "$OPTIMUM_NVIDIA_PYPI_TOKEN"
+          TARGET_CUDA_ARCHS: "75-real;80-real;86-real;89-real;90-real"
+        run: make -C docker release_build PYTHON_VERSION=${{ matrix.python-version }} CUDA_ARCHS=${{ env.TARGET_CUDA_ARCHS }}
 
-  docker:
-    name: Push Docker container to Docker Hub and Github Registry
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      attestations: write
-      id-token: write
-    steps:
       - name: Check out the repo
         uses: actions/checkout@v4
 
@@ -61,9 +89,8 @@ jobs:
         with:
           images: huggingface/optimum-nvidia
           tags: |
-            type=sha
-            type=pep440,pattern={{raw}},suffix=-py310
-            type=semver,pattern={{raw}},suffix=-py310
+            type=pep440,pattern={{raw}},suffix=-py${{ env.INLINE_PYTHON_VERSION }}
+            type=semver,pattern={{raw}},suffix=-py${{ env.INLINE_PYTHON_VERSION }}
 
       - name: Build and push Docker image
         id: push

diff --git a/setup.py b/setup.py
@@ -86,6 +86,7 @@
         "Intended Audience :: Science/Research",
         "Operating System :: OS Independent",
         "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.12",
         "Topic :: Scientific/Engineering :: Artificial Intelligence",
     ],
     keywords="transformers, neural-network, fine-tuning, inference, nvidia, tensorrt, ampere, hopper",