conda-forge · h-vetinari · Nov 26, 2024 · Nov 26, 2024 · Nov 26, 2024 · Nov 26, 2024
@@ -31,9 +31,12 @@ source:
     # https://github.com/triton-lang/triton/commit/757b6a61e7df814ba806f498f8bb3160f84b120c
     - patches/0007-Update-config.enableRegionSimplification-for-LLVM-19.patch
     - patches/0008-Do-not-link-directly-to-LLVM-static-libraries.patch
+    # https://github.com/triton-lang/triton/commit/f48dbc1b106c93144c198fbf3c4f30b2aab9d242
+    - patches/0009-CODEGEN-Support-CUDA-12.6-4588.patch
+    - patches/0010-Use-system-PATH-to-find-tools-in-CONDA_PREFIX.patch
 
 build:
-  number: 1
+  number: 2
   # TODO: windows support should be available from next version;
   #       CPU-only support still under development
   skip: true  # [win or cuda_compiler_version == "None"]
@@ -82,7 +85,9 @@ test:
     - pip check
     # test suite essentially depends on availability of a physical GPU,
     # see https://github.com/openai/triton/issues/466;
-    # - pytest -v python/test
+    # run a test that does not require a GPU but checks
+    # if triton.compile() works
+    - pytest -v python/test/unit/tools/test_aot.py::test_ttgir_to_ptx
 
 about:
   home: https://github.com/openai/triton

@@ -0,0 +1,37 @@
+From 2b7c9075640d7909d8e07c0e8fd1cef867df1af5 Mon Sep 17 00:00:00 2001
+From: Keren Zhou <[email protected]>
+Date: Tue, 27 Aug 2024 11:36:38 -0400
+Subject: [PATCH 9/9] [CODEGEN] Support CUDA 12.6 (#4588)
+
+According to the
+[table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history),
+both CUDA 12.5 and 12.6 use PTX ISA 8.5
+---
+ third_party/nvidia/backend/compiler.py | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/third_party/nvidia/backend/compiler.py b/third_party/nvidia/backend/compiler.py
+index 6d7994923..807775f93 100644
+--- a/third_party/nvidia/backend/compiler.py
++++ b/third_party/nvidia/backend/compiler.py
+@@ -44,12 +44,15 @@ def ptx_get_version(cuda_version) -> int:
+     assert isinstance(cuda_version, str)
+     major, minor = map(int, cuda_version.split('.'))
+     if major == 12:
+-        return 80 + minor
++        if minor < 6:
++            return 80 + minor
++        elif minor == 6:
++            return 85
+     if major == 11:
+         return 70 + minor
+     if major == 10:
+         return 63 + minor
+-    raise RuntimeError("Triton only support CUDA 10.0 or higher")
++    raise RuntimeError("Triton only support CUDA 10.0 or higher, but got CUDA version: " + cuda_version)
+
+
+ @functools.lru_cache(None)
+-- 
+2.47.1
+
@@ -0,0 +1,52 @@
+From e7f10ca2067ed0c960d03e95f0a149d2f5d8dc00 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <[email protected]>
+Date: Tue, 26 Nov 2024 11:58:06 +0100
+Subject: [PATCH 10/10] Use system PATH to find tools (in CONDA_PREFIX)
+
+---
+ python/triton/backends/compiler.py     | 2 ++
+ third_party/nvidia/backend/compiler.py | 2 ++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/python/triton/backends/compiler.py b/python/triton/backends/compiler.py
+index 990690045..08d7750ba 100644
+--- a/python/triton/backends/compiler.py
++++ b/python/triton/backends/compiler.py
+@@ -1,5 +1,6 @@
+ import os
+ import re
++import shutil
+ import subprocess
+
+ from abc import ABCMeta, abstractmethod, abstractclassmethod
+@@ -28,6 +29,7 @@ class BaseBackend(metaclass=ABCMeta):
+         paths = [
+             os.environ.get(f"TRITON_{binary.upper()}_PATH", ""),
+             os.path.join(base_dir, "third_party", "cuda", "bin", binary),
++            shutil.which(binary),
+         ]
+         for p in paths:
+             bin = p.split(" ")[0]
+diff --git a/third_party/nvidia/backend/compiler.py b/third_party/nvidia/backend/compiler.py
+index 807775f93..cd8b6adae 100644
+--- a/third_party/nvidia/backend/compiler.py
++++ b/third_party/nvidia/backend/compiler.py
+@@ -11,6 +11,7 @@ import signal
+ import os
+ import subprocess
+ from pathlib import Path
++import shutil
+
+
+ @functools.lru_cache()
+@@ -18,6 +19,7 @@ def _path_to_binary(binary: str):
+     paths = [
+         os.environ.get(f"TRITON_{binary.upper()}_PATH", ""),
+         os.path.join(os.path.dirname(__file__), "bin", binary),
++        shutil.which(binary),
+     ]
+
+     for bin in paths:
+-- 
+2.47.1
+