Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch CI to pytorch 1.13 #521

Merged
merged 19 commits into from
Nov 15, 2022
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 28 additions & 17 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ setup_conda: &setup_conda
install_dep: &install_dep
- run:
name: Install Dependencies with torch nightly
no_output_timeout: 30m
command: |
source $BASH_ENV

Expand All @@ -94,7 +95,7 @@ install_dep: &install_dep
conda install ninja
echo "Ninja version $(ninja --version)"

conda install pytorch=1.12.1 "torchvision>=0.13" torchaudio cudatoolkit=11.3 -c pytorch -q
conda install pytorch=1.13 torchvision torchaudio pytorch-cuda=11.6 -c pytorch -c nvidia -q
$CONDA_PYTHON -m pip install -r requirements-benchmark.txt --progress-bar off

# Mark install as complete
Expand All @@ -109,7 +110,7 @@ install_dep_exp: &install_dep_exp
if [ -f /home/circleci/venv/check_version.py ]; then $CONDA_PYTHON /home/circleci/venv/check_version.py torch gt 1.11 && exit 0; fi
# start installing
source activate /home/circleci/venv
conda install pytorch=1.12.1 "torchvision>=0.13" torchaudio cudatoolkit=11.3 -c pytorch -q
conda install pytorch=1.13 torchvision torchaudio pytorch-cuda=11.6 -c pytorch -c nvidia -q
$CONDA_PYTHON -m pip install -r experimental/requirements.txt --progress-bar off

install_repo: &install_repo
Expand Down Expand Up @@ -158,6 +159,7 @@ run_mypy: &run_mypy
when: always
command: |
source $BASH_ENV
$CONDA_PYTHON -m mypy --version
$CONDA_PYTHON -m mypy --ignore-missing-imports --scripts-are-modules --pretty --exclude "(build|stubs|third_party|docs|setup.py)" .

run_flake8: &run_flake8
Expand Down Expand Up @@ -375,6 +377,8 @@ jobs:
parameters:
dockerimage:
type: string
pytorch_version:
type: string
cu_version:
type: string

Expand All @@ -390,14 +394,17 @@ jobs:

steps:
- checkout

- run: git submodule update --init --recursive
- run:
name: conda build
name: conda build for py3_9
no_output_timeout: 20m
command: |
git submodule update --init --recursive
python packaging/conda/build_conda.py --cuda << parameters.cu_version >> --python 3.9 --pytorch 1.12.1 --upload-dev
python packaging/conda/build_conda.py --cuda << parameters.cu_version >> --python 3.10 --pytorch 1.12.1 --upload-dev
python packaging/conda/build_conda.py --cuda << parameters.cu_version >> --python 3.9 --pytorch << parameters.pytorch_version >>
- run:
name: conda build for py3_10
no_output_timeout: 20m
command: |
python packaging/conda/build_conda.py --cuda << parameters.cu_version >> --python 3.10 --pytorch << parameters.pytorch_version >>

gpu_tests_cu114_sm75:
<<: *gpu_cu114
Expand Down Expand Up @@ -542,21 +549,25 @@ workflows:
- gh-pages

- build_conda:
name: conda_build_cu113
name: conda_build_cu113_1.12.1
dockerimage: pytorch/conda-builder:cuda113
cu_version: "11.3"
filters:
branches:
only:
- main
pytorch_version: "1.12.1"
- build_conda:
name: conda_build_cu116
name: conda_build_cu116_1.12.1
dockerimage: pytorch/conda-builder:cuda116
cu_version: "11.6"
filters:
branches:
only:
- main
danthe3rd marked this conversation as resolved.
Show resolved Hide resolved
pytorch_version: "1.12.1"
- build_conda:
name: conda_build_cu116_1.13
dockerimage: pytorch/conda-builder:cuda116
cu_version: "11.6"
pytorch_version: "1.13"
- build_conda:
name: conda_build_cu117_1.13
dockerimage: pytorch/conda-builder:cuda117
cu_version: "11.7"
pytorch_version: "1.13"
- binary_linux_wheel:
python_version: "3.7"
name: binary_linux_wheel_py37_cu102
Expand Down
16 changes: 12 additions & 4 deletions packaging/conda/build_conda.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"1.11.0": ["10.2", "11.1", "11.3", "11.5"],
"1.12.0": ["10.2", "11.3", "11.6"],
"1.12.1": ["10.2", "11.3", "11.6"],
"1.13": ["11.6", "11.7"],
}


Expand All @@ -33,6 +34,8 @@ def conda_docker_image_for_cuda(cuda_version):
return "pytorch/conda-builder:cuda115"
if cuda_version == "11.6":
return "pytorch/conda-builder:cuda116"
if cuda_version == "11.7":
return "pytorch/conda-builder:cuda117"
raise ValueError(f"Unknown cuda version {cuda_version}")


Expand Down Expand Up @@ -94,9 +97,12 @@ def _set_env_for_build(self):
os.environ["PYTORCH_VERSION"] = self.pytorch_version
os.environ["CU_VERSION"] = self.cuda_version
os.environ["SOURCE_ROOT_DIR"] = str(SOURCE_ROOT_DIR)
os.environ["CONDA_CUDATOOLKIT_CONSTRAINT"] = version_constraint(
self.cuda_version
)
cuda_constraint = version_constraint(self.cuda_version)
pytorch_version_tuple = tuple(int(v) for v in self.pytorch_version.split("."))
if pytorch_version_tuple < (1, 13):
os.environ["CONDA_CUDA_CONSTRAINT"] = f"cudatoolkit{cuda_constraint}"
else:
os.environ["CONDA_CUDA_CONSTRAINT"] = f"pytorch-cuda{cuda_constraint}"
os.environ["FORCE_CUDA"] = "1"

if self.conda_always_copy:
Expand All @@ -107,7 +113,9 @@ def _get_build_args(self):
"conda",
"build",
"-c",
"fastchan", # which can avoid needing pytorch and conda-forge
"pytorch",
"-c",
"nvidia",
"--no-anaconda-upload",
"--python",
self.python_version,
Expand Down
7 changes: 4 additions & 3 deletions packaging/conda/xformers/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,22 @@ source:
requirements:
build:
- ninja
- pytorch=={{ environ.get('PYTORCH_VERSION') }}
host:
# - numpy >=1.11
- python
- pytorch=={{ environ.get('PYTORCH_VERSION') }}
- cudatoolkit{{ environ['CONDA_CUDATOOLKIT_CONSTRAINT'] }}
- {{environ['CONDA_CUDA_CONSTRAINT']}}

run:
# - numpy >=1.11
- python
- pytorch=={{ environ.get('PYTORCH_VERSION') }}
- cudatoolkit{{ environ['CONDA_CUDATOOLKIT_CONSTRAINT'] }}
- {{environ['CONDA_CUDA_CONSTRAINT']}}

build:
string: py{{py}}_cu{{ environ['CU_VERSION'] }}_pyt{{ environ['PYTORCH_VERSION']}}
script: python setup.py install --single-version-externally-managed --record=record.txt
script: {{environ['PYTHON']}} setup.py install --single-version-externally-managed --record=record.txt
script_env:
- BUILD_VERSION
- CUDA_HOME
Expand Down
5 changes: 3 additions & 2 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ click == 8.0.4
protobuf==3.20.1

# Tools for unit tests & coverage.
pytest == 5.4.1
pytest == 7.2.0
pytest-cov == 2.10.0
pytest-mpi == 0.4
pytest-timeout == 1.4.2
Expand All @@ -27,4 +27,5 @@ hydra-core >= 1.1
fairscale >= 0.4.5

# Dependency for fused layers, optional
triton == 2.0.0.dev20220701
triton == 2.0.0.dev20221105
danthe3rd marked this conversation as resolved.
Show resolved Hide resolved
networkx
danthe3rd marked this conversation as resolved.
Show resolved Hide resolved
3 changes: 2 additions & 1 deletion tests/test_unbind.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ def test_unbind(dim: int, contiguous: bool):
g = torch.randn_like(loss1)
loss1.backward(g)
loss2.backward(g)
# type: ignore
assert x.grad is not None
assert x2.grad is not None
assert torch.allclose(x.grad, x2.grad)


Expand Down
7 changes: 4 additions & 3 deletions xformers/benchmarks/LRA/run_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import os
from enum import Enum
from pathlib import Path
from typing import Dict, Tuple
from typing import Dict, Tuple, cast

import pytorch_lightning as pl
import torch
Expand Down Expand Up @@ -51,10 +51,11 @@ def build_model(args: argparse.Namespace, config: Dict) -> nn.Module:
task = args.task
attention_name = args.attention

model: pl.LightningModule = (
model = cast(
pl.LightningModule,
ModelForSCDual(config[f"{task}"], attention_name)
if task == Task.Retrieval
else ModelForSC(config[f"{task}"], attention_name)
else ModelForSC(config[f"{task}"], attention_name),
)

logging.info(model)
Expand Down
19 changes: 15 additions & 4 deletions xformers/components/nvfuser/bias_act_dropout.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# LICENSE file in the root directory of this source tree.


import functools
from typing import Optional

import torch
Expand Down Expand Up @@ -47,6 +48,16 @@ def __init__(
self.bias = (
nn.Parameter(torch.zeros(bias_shape)) if bias_shape is not None else None
)
self._fn_train = functools.partial(
_fn,
activation=self.pytorch_activation,
prob=self.p,
)
self._fn_eval = functools.partial(
_fn,
activation=self.pytorch_activation,
prob=0.0,
)

assert (
self.p < 1.0
Expand All @@ -59,12 +70,12 @@ def init_weights(self, *args, **kwargs):

def forward(self, x: torch.Tensor) -> torch.Tensor:
# Train/inference
p = self.p if self.training else 0.0
fn = self._fn_train if self.training else self._fn_eval

# Catch a non-cuda setup, fallback to pytorch
if not x.is_cuda:
return _fn(x, self.bias, self.pytorch_activation, p)
return fn(x, self.bias)

# AOTAutograd, NVFuser backed path
aot_fn = memory_efficient_fusion(_fn, static_argnums=(2, 3))
return aot_fn(x, self.bias, self.pytorch_activation, p)
aot_fn = memory_efficient_fusion(fn)
return aot_fn(x, self.bias)
13 changes: 8 additions & 5 deletions xformers/components/nvfuser/bias_dropout_res.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# LICENSE file in the root directory of this source tree.


import functools
from typing import Optional

import torch
Expand All @@ -14,8 +15,8 @@
def _fn(
x: torch.Tensor,
bias: Optional[torch.nn.parameter.Parameter],
prob: float,
residual: torch.Tensor,
prob: float,
) -> torch.Tensor:
a = torch.add(x, bias) if bias is not None else x
b = torch.nn.functional.dropout(a, prob) if prob > 0.0 else a
Expand All @@ -41,6 +42,8 @@ def __init__(
self.bias = (
nn.Parameter(torch.zeros(bias_shape)) if bias_shape is not None else None
)
self._fn_train = functools.partial(_fn, prob=self.p)
self._fn_eval = functools.partial(_fn, prob=0.0)

assert (
self.p < 1.0
Expand All @@ -53,12 +56,12 @@ def init_weights(self, *args, **kwargs):

def forward(self, x: torch.Tensor, residual: torch.Tensor) -> torch.Tensor:
# Train/inference
p = self.p if self.training else 0.0
fn = self._fn_train if self.training else self._fn_eval

# Catch a non-cuda setup, fallback to pytorch
if not x.is_cuda:
return _fn(x, self.bias, p, residual)
return fn(x, self.bias, residual)

# AOTAutograd, NVFuser backed path
aot_fn = memory_efficient_fusion(fn=_fn, static_argnums=(2))
return aot_fn(x, self.bias, p, residual)
aot_fn = memory_efficient_fusion(fn)
return aot_fn(x, self.bias, residual)
23 changes: 18 additions & 5 deletions xformers/components/nvfuser/bias_dropout_res_layernorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# LICENSE file in the root directory of this source tree.


import functools
from typing import Optional

import torch
Expand All @@ -16,10 +17,10 @@
def _fn(
x: torch.Tensor,
bias: Optional[torch.nn.parameter.Parameter],
residual: torch.Tensor,
prob: float,
layer_norm_style: Optional[ResidualNormStyle],
norm: nn.Module,
residual: torch.Tensor,
) -> torch.Tensor:
a = torch.add(x, bias) if bias is not None else x
b = torch.nn.functional.dropout(a, prob) if prob > 0.0 else a
Expand Down Expand Up @@ -57,6 +58,18 @@ def __init__(
nn.Parameter(torch.zeros(bias_shape)) if bias_shape is not None else None
)
self.norm = nn.LayerNorm(d_model)
self._fn_train = functools.partial(
_fn,
prob=p,
layer_norm_style=self.layer_norm_style,
norm=self.norm,
)
self._fn_eval = functools.partial(
_fn,
prob=0.0,
layer_norm_style=self.layer_norm_style,
norm=self.norm,
)

assert (
self.p < 1.0
Expand All @@ -69,12 +82,12 @@ def init_weights(self, *args, **kwargs):

def forward(self, x: torch.Tensor, residual: torch.Tensor) -> torch.Tensor:
# Train/inference
p = self.p if self.training else 0.0
fn = self._fn_train if self.training else self._fn_eval

# Catch a non-cuda setup, fallback to pytorch
if not x.is_cuda:
return _fn(x, self.bias, p, self.layer_norm_style, self.norm, residual)
return fn(x, self.bias, residual)

# AOTAutograd, NVFuser backed path
aot_fn = memory_efficient_fusion(fn=_fn, static_argnums=(2, 3, 4))
return aot_fn(x, self.bias, p, self.layer_norm_style, self.norm, residual)
aot_fn = memory_efficient_fusion(fn=fn)
return aot_fn(x, self.bias, residual)