-
Notifications
You must be signed in to change notification settings - Fork 227
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
12 additions
and
309 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,313 +1,16 @@ | ||
import os | ||
import re | ||
import sys | ||
import argparse | ||
import torch | ||
import json | ||
import importlib | ||
import subprocess | ||
import torch._dynamo | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
from pathlib import Path | ||
|
||
gpu_arch_ver = os.getenv("MATRIX_GPU_ARCH_VERSION") | ||
gpu_arch_type = os.getenv("MATRIX_GPU_ARCH_TYPE") | ||
channel = os.getenv("MATRIX_CHANNEL") | ||
package_type = os.getenv("MATRIX_PACKAGE_TYPE") | ||
target_os = os.getenv("TARGET_OS") | ||
BASE_DIR = Path(__file__).parent.parent.parent | ||
|
||
is_cuda_system = gpu_arch_type == "cuda" | ||
NIGHTLY_ALLOWED_DELTA = 3 | ||
|
||
MODULES = [ | ||
{ | ||
"name": "torchvision", | ||
"repo": "https://github.com/pytorch/vision.git", | ||
"smoke_test": "./vision/test/smoke_test.py", | ||
"extension": "extension", | ||
"repo_name": "vision", | ||
}, | ||
{ | ||
"name": "torchaudio", | ||
"repo": "https://github.com/pytorch/audio.git", | ||
"smoke_test": "./audio/test/smoke_test/smoke_test.py --no-ffmpeg", | ||
"extension": "_extension", | ||
"repo_name": "audio", | ||
}, | ||
] | ||
|
||
|
||
class Net(nn.Module): | ||
def __init__(self): | ||
super().__init__() | ||
self.conv1 = nn.Conv2d(1, 32, 3, 1) | ||
self.conv2 = nn.Conv2d(32, 64, 3, 1) | ||
self.fc1 = nn.Linear(9216, 1) | ||
|
||
def forward(self, x): | ||
x = self.conv1(x) | ||
x = self.conv2(x) | ||
x = F.max_pool2d(x, 2) | ||
x = torch.flatten(x, 1) | ||
output = self.fc1(x) | ||
return output | ||
|
||
def load_json_from_basedir(filename: str): | ||
try: | ||
with open(BASE_DIR / filename) as fptr: | ||
return json.load(fptr) | ||
except FileNotFoundError as exc: | ||
raise ImportError(f"File {filename} not found error: {exc.strerror}") from exc | ||
except json.JSONDecodeError as exc: | ||
raise ImportError(f"Invalid JSON {filename}") from exc | ||
|
||
def read_release_matrix(): | ||
return load_json_from_basedir("release_matrix.json") | ||
|
||
def check_version(package: str) -> None: | ||
release_version = os.getenv("RELEASE_VERSION") | ||
# if release_version is specified, use it to validate the packages | ||
if(release_version): | ||
release_matrix = read_release_matrix() | ||
stable_version = release_matrix["torch"] | ||
else: | ||
stable_version = os.getenv("MATRIX_STABLE_VERSION") | ||
|
||
# only makes sense to check nightly package where dates are known | ||
if channel == "nightly": | ||
check_nightly_binaries_date(package) | ||
elif stable_version is not None: | ||
if not torch.__version__.startswith(stable_version): | ||
raise RuntimeError( | ||
f"Torch version mismatch, expected {stable_version} for channel {channel}. But its {torch.__version__}" | ||
) | ||
|
||
if release_version and package == "all": | ||
for module in MODULES: | ||
imported_module = importlib.import_module(module["name"]) | ||
module_version = imported_module.__version__ | ||
if not module_version.startswith(release_matrix[module["name"]]): | ||
raise RuntimeError( | ||
f"{module['name']} version mismatch, expected: \ | ||
{release_matrix[module['name']]} for channel {channel}. But its {module_version}" | ||
) | ||
else: | ||
print(f"{module['name']} version actual: {module_version} expected: \ | ||
{release_matrix[module['name']]} for channel {channel}.") | ||
import re | ||
|
||
cuda_exception_missed = True | ||
try: | ||
print("Testing test_cuda_runtime_errors_captured") | ||
torch._assert_async(torch.tensor(0, device="cuda")) | ||
torch._assert_async(torch.tensor(0 + 0j, device="cuda")) | ||
except RuntimeError as e: | ||
if re.search("CUDA", f"{e}"): | ||
print(f"Caught CUDA exception with success: {e}") | ||
cuda_exception_missed = False | ||
else: | ||
print(f"Skip version check for channel {channel} as stable version is None") | ||
|
||
|
||
def check_nightly_binaries_date(package: str) -> None: | ||
from datetime import datetime | ||
format_dt = '%Y%m%d' | ||
|
||
date_t_str = re.findall("dev\\d+", torch.__version__) | ||
date_t_delta = datetime.now() - datetime.strptime(date_t_str[0][3:], format_dt) | ||
if date_t_delta.days >= NIGHTLY_ALLOWED_DELTA: | ||
raise RuntimeError( | ||
f"the binaries are from {date_t_str} and are more than {NIGHTLY_ALLOWED_DELTA} days old!" | ||
) | ||
|
||
if package == "all": | ||
for module in MODULES: | ||
imported_module = importlib.import_module(module["name"]) | ||
module_version = imported_module.__version__ | ||
date_m_str = re.findall("dev\\d+", module_version) | ||
date_m_delta = datetime.now() - datetime.strptime(date_m_str[0][3:], format_dt) | ||
print(f"Nightly date check for {module['name']} version {module_version}") | ||
if date_m_delta.days > NIGHTLY_ALLOWED_DELTA: | ||
raise RuntimeError( | ||
f"Expected {module['name']} to be less then {NIGHTLY_ALLOWED_DELTA} days. But its {date_m_delta}" | ||
) | ||
|
||
|
||
def test_cuda_runtime_errors_captured() -> None: | ||
cuda_exception_missed = True | ||
try: | ||
print("Testing test_cuda_runtime_errors_captured") | ||
torch._assert_async(torch.tensor(0, device="cuda")) | ||
except RuntimeError as e: | ||
if re.search("CUDA", f"{e}"): | ||
print(f"Caught CUDA exception with success: {e}") | ||
else: | ||
raise e | ||
|
||
raise e | ||
if cuda_exception_missed: | ||
raise RuntimeError("Expected CUDA RuntimeError but have not received!") | ||
|
||
|
||
def smoke_test_cuda(package: str, runtime_error_check: str) -> None: | ||
if not torch.cuda.is_available() and is_cuda_system: | ||
raise RuntimeError(f"Expected CUDA {gpu_arch_ver}. However CUDA is not loaded.") | ||
|
||
if package == 'all' and is_cuda_system: | ||
for module in MODULES: | ||
imported_module = importlib.import_module(module["name"]) | ||
# TBD for vision move extension module to private so it will | ||
# be _extention. | ||
version = "N/A" | ||
if module["extension"] == "extension": | ||
version = imported_module.extension._check_cuda_version() | ||
else: | ||
version = imported_module._extension._check_cuda_version() | ||
print(f"{module['name']} CUDA: {version}") | ||
|
||
# torch.compile is available on macos-arm64 and Linux for python 3.8-3.11 | ||
if sys.version_info < (3, 12, 0) and ( | ||
(target_os == "linux" and torch.cuda.is_available()) or | ||
target_os == "macos-arm64"): | ||
smoke_test_compile() | ||
|
||
if torch.cuda.is_available(): | ||
if torch.version.cuda != gpu_arch_ver: | ||
raise RuntimeError( | ||
f"Wrong CUDA version. Loaded: {torch.version.cuda} Expected: {gpu_arch_ver}" | ||
) | ||
print(f"torch cuda: {torch.version.cuda}") | ||
# todo add cudnn version validation | ||
print(f"torch cudnn: {torch.backends.cudnn.version()}") | ||
print(f"cuDNN enabled? {torch.backends.cudnn.enabled}") | ||
|
||
# nccl is availbale only on Linux | ||
if (sys.platform in ["linux", "linux2"]): | ||
print(f"torch nccl version: {torch.cuda.nccl.version()}") | ||
|
||
|
||
if runtime_error_check == "enabled": | ||
test_cuda_runtime_errors_captured() | ||
|
||
|
||
def smoke_test_conv2d() -> None: | ||
import torch.nn as nn | ||
|
||
print("Testing smoke_test_conv2d") | ||
# With square kernels and equal stride | ||
m = nn.Conv2d(16, 33, 3, stride=2) | ||
# non-square kernels and unequal stride and with padding | ||
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2)) | ||
assert m is not None | ||
# non-square kernels and unequal stride and with padding and dilation | ||
basic_conv = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1)) | ||
input = torch.randn(20, 16, 50, 100) | ||
output = basic_conv(input) | ||
|
||
if is_cuda_system: | ||
print("Testing smoke_test_conv2d with cuda") | ||
conv = nn.Conv2d(3, 3, 3).cuda() | ||
x = torch.randn(1, 3, 24, 24, device="cuda") | ||
with torch.cuda.amp.autocast(): | ||
out = conv(x) | ||
assert out is not None | ||
|
||
supported_dtypes = [torch.float16, torch.float32, torch.float64] | ||
for dtype in supported_dtypes: | ||
print(f"Testing smoke_test_conv2d with cuda for {dtype}") | ||
conv = basic_conv.to(dtype).cuda() | ||
input = torch.randn(20, 16, 50, 100, device="cuda").type(dtype) | ||
output = conv(input) | ||
assert output is not None | ||
|
||
|
||
def test_linalg(device="cpu") -> None: | ||
print(f"Testing smoke_test_linalg on {device}") | ||
A = torch.randn(5, 3, device=device) | ||
U, S, Vh = torch.linalg.svd(A, full_matrices=False) | ||
assert U.shape == A.shape and S.shape == torch.Size([3]) and Vh.shape == torch.Size([3, 3]) | ||
torch.dist(A, U @ torch.diag(S) @ Vh) | ||
|
||
U, S, Vh = torch.linalg.svd(A) | ||
assert U.shape == torch.Size([5, 5]) and S.shape == torch.Size([3]) and Vh.shape == torch.Size([3, 3]) | ||
torch.dist(A, U[:, :3] @ torch.diag(S) @ Vh) | ||
|
||
A = torch.randn(7, 5, 3, device=device) | ||
U, S, Vh = torch.linalg.svd(A, full_matrices=False) | ||
torch.dist(A, U @ torch.diag_embed(S) @ Vh) | ||
|
||
if device == "cuda": | ||
supported_dtypes = [torch.float32, torch.float64] | ||
for dtype in supported_dtypes: | ||
print(f"Testing smoke_test_linalg with cuda for {dtype}") | ||
A = torch.randn(20, 16, 50, 100, device=device, dtype=dtype) | ||
torch.linalg.svd(A) | ||
|
||
|
||
def smoke_test_compile() -> None: | ||
supported_dtypes = [torch.float16, torch.float32, torch.float64] | ||
dv = "cuda" if target_os == "linux" else "cpu" | ||
|
||
def foo(x: torch.Tensor) -> torch.Tensor: | ||
return torch.sin(x) + torch.cos(x) | ||
|
||
for dtype in supported_dtypes: | ||
print(f"Testing smoke_test_compile for {dtype}") | ||
x = torch.rand(3, 3, device=dv).type(dtype) | ||
x_eager = foo(x) | ||
x_pt2 = torch.compile(foo)(x) | ||
print(torch.allclose(x_eager, x_pt2)) | ||
|
||
# Reset torch dynamo since we are changing mode | ||
torch._dynamo.reset() | ||
dtype = torch.float32 | ||
torch.set_float32_matmul_precision('high') | ||
print(f"Testing smoke_test_compile with mode 'max-autotune' for {dtype}") | ||
x = torch.rand(64, 1, 28, 28, device=dv).type(torch.float32) | ||
model = Net().to(device=dv) | ||
x_pt2 = torch.compile(model, mode="max-autotune")(x) | ||
|
||
|
||
def smoke_test_modules(): | ||
cwd = os.getcwd() | ||
for module in MODULES: | ||
if module["repo"]: | ||
if not os.path.exists(f"{cwd}/{module['repo_name']}"): | ||
print(f"Path does not exist: {cwd}/{module['repo_name']}") | ||
try: | ||
subprocess.check_output( | ||
f"git clone --depth 1 {module['repo']}", | ||
stderr=subprocess.STDOUT, | ||
shell=True, | ||
) | ||
except subprocess.CalledProcessError as exc: | ||
raise RuntimeError( | ||
f"Cloning {module['repo']} FAIL: {exc.returncode} Output: {exc.output}" | ||
) from exc | ||
try: | ||
smoke_test_command = f"python3 {module['smoke_test']}" | ||
if target_os == 'windows': | ||
smoke_test_command = f"python {module['smoke_test']}" | ||
output = subprocess.check_output( | ||
smoke_test_command, stderr=subprocess.STDOUT, shell=True, | ||
universal_newlines=True) | ||
except subprocess.CalledProcessError as exc: | ||
raise RuntimeError(f"Module {module['name']} FAIL: {exc.returncode} Output: {exc.output}") from exc | ||
else: | ||
print(f"Output: \n{output}\n") | ||
|
||
|
||
def main() -> None: | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--package", | ||
help="Package to include in smoke testing", | ||
type=str, | ||
choices=["all", "torchonly"], | ||
default="all", | ||
) | ||
parser.add_argument( | ||
"--runtime-error-check", | ||
help="No Runtime Error check", | ||
type=str, | ||
choices=["enabled", "disabled"], | ||
default="enabled", | ||
) | ||
options = parser.parse_args() | ||
print(f"torch: {torch.__version__}") | ||
|
||
smoke_test_cuda(options.package, options.runtime_error_check) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |