From 5c20f99a07012dfa55bfc8df992e2298a8852f05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Tue, 26 Nov 2024 08:50:51 +0100
Subject: [PATCH 1/3] Backport upstream commit for CUDA 12.6 support

---
 recipe/meta.yaml                              |  4 +-
 .../0009-CODEGEN-Support-CUDA-12.6-4588.patch | 37 +++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 recipe/patches/0009-CODEGEN-Support-CUDA-12.6-4588.patch

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index eaf44f5..8f4b76d 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -31,9 +31,11 @@ source:
     # https://github.com/triton-lang/triton/commit/757b6a61e7df814ba806f498f8bb3160f84b120c
     - patches/0007-Update-config.enableRegionSimplification-for-LLVM-19.patch
     - patches/0008-Do-not-link-directly-to-LLVM-static-libraries.patch
+    # https://github.com/triton-lang/triton/commit/f48dbc1b106c93144c198fbf3c4f30b2aab9d242
+    - patches/0009-CODEGEN-Support-CUDA-12.6-4588.patch
 
 build:
-  number: 1
+  number: 2
   # TODO: windows support should be available from next version;
   #       CPU-only support still under development
   skip: true  # [win or cuda_compiler_version == "None"]
diff --git a/recipe/patches/0009-CODEGEN-Support-CUDA-12.6-4588.patch b/recipe/patches/0009-CODEGEN-Support-CUDA-12.6-4588.patch
new file mode 100644
index 0000000..ef8e258
--- /dev/null
+++ b/recipe/patches/0009-CODEGEN-Support-CUDA-12.6-4588.patch
@@ -0,0 +1,37 @@
+From 2b7c9075640d7909d8e07c0e8fd1cef867df1af5 Mon Sep 17 00:00:00 2001
+From: Keren Zhou <kerenzhou@openai.com>
+Date: Tue, 27 Aug 2024 11:36:38 -0400
+Subject: [PATCH 9/9] [CODEGEN] Support CUDA 12.6 (#4588)
+
+According to the
+[table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history),
+both CUDA 12.5 and 12.6 use PTX ISA 8.5
+---
+ third_party/nvidia/backend/compiler.py | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/third_party/nvidia/backend/compiler.py b/third_party/nvidia/backend/compiler.py
+index 6d7994923..807775f93 100644
+--- a/third_party/nvidia/backend/compiler.py
++++ b/third_party/nvidia/backend/compiler.py
+@@ -44,12 +44,15 @@ def ptx_get_version(cuda_version) -> int:
+     assert isinstance(cuda_version, str)
+     major, minor = map(int, cuda_version.split('.'))
+     if major == 12:
+-        return 80 + minor
++        if minor < 6:
++            return 80 + minor
++        elif minor == 6:
++            return 85
+     if major == 11:
+         return 70 + minor
+     if major == 10:
+         return 63 + minor
+-    raise RuntimeError("Triton only support CUDA 10.0 or higher")
++    raise RuntimeError("Triton only support CUDA 10.0 or higher, but got CUDA version: " + cuda_version)
+ 
+ 
+ @functools.lru_cache(None)
+-- 
+2.47.1
+

From 3d723b185aa2c2dff4bfaec4898d2516fc80fe5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Tue, 26 Nov 2024 12:00:47 +0100
Subject: [PATCH 2/3] Fix finding tool paths in CONDA_PREFIX

---
 recipe/meta.yaml                              |  1 +
 ...m-PATH-to-find-tools-in-CONDA_PREFIX.patch | 52 +++++++++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 recipe/patches/0010-Use-system-PATH-to-find-tools-in-CONDA_PREFIX.patch

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 8f4b76d..4b0406f 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -33,6 +33,7 @@ source:
     - patches/0008-Do-not-link-directly-to-LLVM-static-libraries.patch
     # https://github.com/triton-lang/triton/commit/f48dbc1b106c93144c198fbf3c4f30b2aab9d242
     - patches/0009-CODEGEN-Support-CUDA-12.6-4588.patch
+    - patches/0010-Use-system-PATH-to-find-tools-in-CONDA_PREFIX.patch
 
 build:
   number: 2
diff --git a/recipe/patches/0010-Use-system-PATH-to-find-tools-in-CONDA_PREFIX.patch b/recipe/patches/0010-Use-system-PATH-to-find-tools-in-CONDA_PREFIX.patch
new file mode 100644
index 0000000..51211ae
--- /dev/null
+++ b/recipe/patches/0010-Use-system-PATH-to-find-tools-in-CONDA_PREFIX.patch
@@ -0,0 +1,52 @@
+From e7f10ca2067ed0c960d03e95f0a149d2f5d8dc00 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
+Date: Tue, 26 Nov 2024 11:58:06 +0100
+Subject: [PATCH 10/10] Use system PATH to find tools (in CONDA_PREFIX)
+
+---
+ python/triton/backends/compiler.py     | 2 ++
+ third_party/nvidia/backend/compiler.py | 2 ++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/python/triton/backends/compiler.py b/python/triton/backends/compiler.py
+index 990690045..08d7750ba 100644
+--- a/python/triton/backends/compiler.py
++++ b/python/triton/backends/compiler.py
+@@ -1,5 +1,6 @@
+ import os
+ import re
++import shutil
+ import subprocess
+ 
+ from abc import ABCMeta, abstractmethod, abstractclassmethod
+@@ -28,6 +29,7 @@ class BaseBackend(metaclass=ABCMeta):
+         paths = [
+             os.environ.get(f"TRITON_{binary.upper()}_PATH", ""),
+             os.path.join(base_dir, "third_party", "cuda", "bin", binary),
++            shutil.which(binary),
+         ]
+         for p in paths:
+             bin = p.split(" ")[0]
+diff --git a/third_party/nvidia/backend/compiler.py b/third_party/nvidia/backend/compiler.py
+index 807775f93..cd8b6adae 100644
+--- a/third_party/nvidia/backend/compiler.py
++++ b/third_party/nvidia/backend/compiler.py
+@@ -11,6 +11,7 @@ import signal
+ import os
+ import subprocess
+ from pathlib import Path
++import shutil
+ 
+ 
+ @functools.lru_cache()
+@@ -18,6 +19,7 @@ def _path_to_binary(binary: str):
+     paths = [
+         os.environ.get(f"TRITON_{binary.upper()}_PATH", ""),
+         os.path.join(os.path.dirname(__file__), "bin", binary),
++        shutil.which(binary),
+     ]
+ 
+     for bin in paths:
+-- 
+2.47.1
+

From c88955691d0d86b01f5037982902012f62a7cee9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Tue, 26 Nov 2024 12:46:31 +0100
Subject: [PATCH 3/3] Run a test for ptxas that does not require GPU

---
 recipe/meta.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 4b0406f..8e7d03a 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -85,7 +85,9 @@ test:
     - pip check
     # test suite essentially depends on availability of a physical GPU,
     # see https://github.com/openai/triton/issues/466;
-    # - pytest -v python/test
+    # run a test that does not require a GPU but checks
+    # if triton.compile() works
+    - pytest -v python/test/unit/tools/test_aot.py::test_ttgir_to_ptx
 
 about:
   home: https://github.com/openai/triton