Skip to content

Commit

Permalink
Switch CI to pytorch 1.13
Browse files Browse the repository at this point in the history
ghstack-source-id: 473802a854a8c50f96d4e5a1aae86e3771f1e788
Pull Request resolved: #521
  • Loading branch information
danthe3rd committed Nov 15, 2022
1 parent cb79827 commit 72b4b63
Show file tree
Hide file tree
Showing 9 changed files with 103 additions and 37 deletions.
47 changes: 37 additions & 10 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ setup_conda: &setup_conda
install_dep: &install_dep
- run:
name: Install Dependencies with torch nightly
no_output_timeout: 30m
command: |
source $BASH_ENV
Expand All @@ -94,7 +95,7 @@ install_dep: &install_dep
conda install ninja
echo "Ninja version $(ninja --version)"
conda install pytorch=1.12.1 "torchvision>=0.13" torchaudio cudatoolkit=11.3 -c pytorch -q
conda install pytorch=1.13 torchvision torchaudio pytorch-cuda=11.6 -c pytorch -c nvidia -q
$CONDA_PYTHON -m pip install -r requirements-benchmark.txt --progress-bar off
# Mark install as complete
Expand All @@ -103,13 +104,14 @@ install_dep: &install_dep
install_dep_exp: &install_dep_exp
- run:
name: Install Dependencies for experimental tests
no_output_timeout: 30m
command: |
source $BASH_ENV
# check if we have restored venv cache (/home/circleci/venv) correctly, if so, just skip
if [ -f /home/circleci/venv/check_version.py ]; then $CONDA_PYTHON /home/circleci/venv/check_version.py torch gt 1.11 && exit 0; fi
# start installing
source activate /home/circleci/venv
conda install pytorch=1.12.1 "torchvision>=0.13" torchaudio cudatoolkit=11.3 -c pytorch -q
conda install pytorch=1.13 torchvision torchaudio pytorch-cuda=11.6 -c pytorch -c nvidia -q
$CONDA_PYTHON -m pip install -r experimental/requirements.txt --progress-bar off
install_repo: &install_repo
Expand Down Expand Up @@ -158,6 +160,7 @@ run_mypy: &run_mypy
when: always
command: |
source $BASH_ENV
$CONDA_PYTHON -m mypy --version
$CONDA_PYTHON -m mypy --ignore-missing-imports --scripts-are-modules --pretty --exclude "(build|stubs|third_party|docs|setup.py)" .
run_flake8: &run_flake8
Expand Down Expand Up @@ -375,6 +378,8 @@ jobs:
parameters:
dockerimage:
type: string
pytorch_version:
type: string
cu_version:
type: string

Expand All @@ -390,14 +395,17 @@ jobs:

steps:
- checkout

- run: git submodule update --init --recursive
- run:
name: conda build for py3_9
no_output_timeout: 20m
command: |
python packaging/conda/build_conda.py --cuda << parameters.cu_version >> --python 3.9 --pytorch << parameters.pytorch_version >> --upload-dev
- run:
name: conda build
name: conda build for py3_10
no_output_timeout: 20m
command: |
git submodule update --init --recursive
python packaging/conda/build_conda.py --cuda << parameters.cu_version >> --python 3.9 --pytorch 1.12.1 --upload-dev
python packaging/conda/build_conda.py --cuda << parameters.cu_version >> --python 3.10 --pytorch 1.12.1 --upload-dev
python packaging/conda/build_conda.py --cuda << parameters.cu_version >> --python 3.10 --pytorch << parameters.pytorch_version >> --upload-dev
gpu_tests_cu114_sm75:
<<: *gpu_cu114
Expand All @@ -408,7 +416,6 @@ jobs:
steps:
- run_gpu_ci:
arch: "7.5"
- <<: *run_coverage

gpu_tests_cu114_sm70:
<<: *gpu_cu114
Expand Down Expand Up @@ -542,17 +549,37 @@ workflows:
- gh-pages

- build_conda:
name: conda_build_cu113
name: conda_build_cu113_1.12.1
dockerimage: pytorch/conda-builder:cuda113
cu_version: "11.3"
pytorch_version: "1.12.1"
filters:
branches:
only:
- main
- build_conda:
name: conda_build_cu116
name: conda_build_cu116_1.12.1
dockerimage: pytorch/conda-builder:cuda116
cu_version: "11.6"
pytorch_version: "1.12.1"
filters:
branches:
only:
- main
- build_conda:
name: conda_build_cu116_1.13
dockerimage: pytorch/conda-builder:cuda116
cu_version: "11.6"
pytorch_version: "1.13"
filters:
branches:
only:
- main
- build_conda:
name: conda_build_cu117_1.13
dockerimage: pytorch/conda-builder:cuda117
cu_version: "11.7"
pytorch_version: "1.13"
filters:
branches:
only:
Expand Down
16 changes: 12 additions & 4 deletions packaging/conda/build_conda.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"1.11.0": ["10.2", "11.1", "11.3", "11.5"],
"1.12.0": ["10.2", "11.3", "11.6"],
"1.12.1": ["10.2", "11.3", "11.6"],
"1.13": ["11.6", "11.7"],
}


Expand All @@ -33,6 +34,8 @@ def conda_docker_image_for_cuda(cuda_version):
return "pytorch/conda-builder:cuda115"
if cuda_version == "11.6":
return "pytorch/conda-builder:cuda116"
if cuda_version == "11.7":
return "pytorch/conda-builder:cuda117"
raise ValueError(f"Unknown cuda version {cuda_version}")


Expand Down Expand Up @@ -94,9 +97,12 @@ def _set_env_for_build(self):
os.environ["PYTORCH_VERSION"] = self.pytorch_version
os.environ["CU_VERSION"] = self.cuda_version
os.environ["SOURCE_ROOT_DIR"] = str(SOURCE_ROOT_DIR)
os.environ["CONDA_CUDATOOLKIT_CONSTRAINT"] = version_constraint(
self.cuda_version
)
cuda_constraint = version_constraint(self.cuda_version)
pytorch_version_tuple = tuple(int(v) for v in self.pytorch_version.split("."))
if pytorch_version_tuple < (1, 13):
os.environ["CONDA_CUDA_CONSTRAINT"] = f"cudatoolkit{cuda_constraint}"
else:
os.environ["CONDA_CUDA_CONSTRAINT"] = f"pytorch-cuda{cuda_constraint}"
os.environ["FORCE_CUDA"] = "1"

if self.conda_always_copy:
Expand All @@ -107,7 +113,9 @@ def _get_build_args(self):
"conda",
"build",
"-c",
"fastchan", # which can avoid needing pytorch and conda-forge
"pytorch",
"-c",
"nvidia",
"--no-anaconda-upload",
"--python",
self.python_version,
Expand Down
7 changes: 4 additions & 3 deletions packaging/conda/xformers/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,22 @@ source:
requirements:
build:
- ninja
- pytorch=={{ environ.get('PYTORCH_VERSION') }}
host:
# - numpy >=1.11
- python
- pytorch=={{ environ.get('PYTORCH_VERSION') }}
- cudatoolkit{{ environ['CONDA_CUDATOOLKIT_CONSTRAINT'] }}
- {{environ['CONDA_CUDA_CONSTRAINT']}}

run:
# - numpy >=1.11
- python
- pytorch=={{ environ.get('PYTORCH_VERSION') }}
- cudatoolkit{{ environ['CONDA_CUDATOOLKIT_CONSTRAINT'] }}
- {{environ['CONDA_CUDA_CONSTRAINT']}}

build:
string: py{{py}}_cu{{ environ['CU_VERSION'] }}_pyt{{ environ['PYTORCH_VERSION']}}
script: python setup.py install --single-version-externally-managed --record=record.txt
script: {{environ['PYTHON']}} setup.py install --single-version-externally-managed --record=record.txt
script_env:
- BUILD_VERSION
- CUDA_HOME
Expand Down
3 changes: 2 additions & 1 deletion requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ click == 8.0.4
protobuf==3.20.1

# Tools for unit tests & coverage.
pytest == 5.4.1
pytest == 7.2.0
pytest-cov == 2.10.0
pytest-mpi == 0.4
pytest-timeout == 1.4.2
Expand All @@ -28,3 +28,4 @@ fairscale >= 0.4.5

# Dependency for fused layers, optional
triton==2.0.0.dev20221105
networkx
3 changes: 2 additions & 1 deletion tests/test_unbind.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ def test_unbind(dim: int, contiguous: bool):
g = torch.randn_like(loss1)
loss1.backward(g)
loss2.backward(g)
# type: ignore
assert x.grad is not None
assert x2.grad is not None
assert torch.allclose(x.grad, x2.grad)


Expand Down
9 changes: 5 additions & 4 deletions xformers/benchmarks/LRA/run_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import os
from enum import Enum
from pathlib import Path
from typing import Dict, Tuple
from typing import Dict, Tuple, cast

import pytorch_lightning as pl
import torch
Expand Down Expand Up @@ -51,10 +51,11 @@ def build_model(args: argparse.Namespace, config: Dict) -> nn.Module:
task = args.task
attention_name = args.attention

model: pl.LightningModule = (
ModelForSCDual(config[f"{task}"], attention_name) # type: ignore
model = cast(
pl.LightningModule,
ModelForSCDual(config[f"{task}"], attention_name)
if task == Task.Retrieval
else ModelForSC(config[f"{task}"], attention_name) # type: ignore
else ModelForSC(config[f"{task}"], attention_name),
)

logging.info(model)
Expand Down
19 changes: 15 additions & 4 deletions xformers/components/nvfuser/bias_act_dropout.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# LICENSE file in the root directory of this source tree.


import functools
from typing import Optional

import torch
Expand Down Expand Up @@ -47,6 +48,16 @@ def __init__(
self.bias = (
nn.Parameter(torch.zeros(bias_shape)) if bias_shape is not None else None
)
self._fn_train = functools.partial(
_fn,
activation=self.pytorch_activation,
prob=self.p,
)
self._fn_eval = functools.partial(
_fn,
activation=self.pytorch_activation,
prob=0.0,
)

assert (
self.p < 1.0
Expand All @@ -59,12 +70,12 @@ def init_weights(self, *args, **kwargs):

def forward(self, x: torch.Tensor) -> torch.Tensor:
# Train/inference
p = self.p if self.training else 0.0
fn = self._fn_train if self.training else self._fn_eval

# Catch a non-cuda setup, fallback to pytorch
if not x.is_cuda:
return _fn(x, self.bias, self.pytorch_activation, p)
return fn(x, self.bias)

# AOTAutograd, NVFuser backed path
aot_fn = memory_efficient_fusion(_fn, static_argnums=(2, 3))
return aot_fn(x, self.bias, self.pytorch_activation, p)
aot_fn = memory_efficient_fusion(fn)
return aot_fn(x, self.bias)
13 changes: 8 additions & 5 deletions xformers/components/nvfuser/bias_dropout_res.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# LICENSE file in the root directory of this source tree.


import functools
from typing import Optional

import torch
Expand All @@ -14,8 +15,8 @@
def _fn(
x: torch.Tensor,
bias: Optional[torch.nn.parameter.Parameter],
prob: float,
residual: torch.Tensor,
prob: float,
) -> torch.Tensor:
a = torch.add(x, bias) if bias is not None else x
b = torch.nn.functional.dropout(a, prob) if prob > 0.0 else a
Expand All @@ -41,6 +42,8 @@ def __init__(
self.bias = (
nn.Parameter(torch.zeros(bias_shape)) if bias_shape is not None else None
)
self._fn_train = functools.partial(_fn, prob=self.p)
self._fn_eval = functools.partial(_fn, prob=0.0)

assert (
self.p < 1.0
Expand All @@ -53,12 +56,12 @@ def init_weights(self, *args, **kwargs):

def forward(self, x: torch.Tensor, residual: torch.Tensor) -> torch.Tensor:
# Train/inference
p = self.p if self.training else 0.0
fn = self._fn_train if self.training else self._fn_eval

# Catch a non-cuda setup, fallback to pytorch
if not x.is_cuda:
return _fn(x, self.bias, p, residual)
return fn(x, self.bias, residual)

# AOTAutograd, NVFuser backed path
aot_fn = memory_efficient_fusion(fn=_fn, static_argnums=(2))
return aot_fn(x, self.bias, p, residual)
aot_fn = memory_efficient_fusion(fn)
return aot_fn(x, self.bias, residual)
23 changes: 18 additions & 5 deletions xformers/components/nvfuser/bias_dropout_res_layernorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# LICENSE file in the root directory of this source tree.


import functools
from typing import Optional

import torch
Expand All @@ -16,10 +17,10 @@
def _fn(
x: torch.Tensor,
bias: Optional[torch.nn.parameter.Parameter],
residual: torch.Tensor,
prob: float,
layer_norm_style: Optional[ResidualNormStyle],
norm: nn.Module,
residual: torch.Tensor,
) -> torch.Tensor:
a = torch.add(x, bias) if bias is not None else x
b = torch.nn.functional.dropout(a, prob) if prob > 0.0 else a
Expand Down Expand Up @@ -57,6 +58,18 @@ def __init__(
nn.Parameter(torch.zeros(bias_shape)) if bias_shape is not None else None
)
self.norm = nn.LayerNorm(d_model)
self._fn_train = functools.partial(
_fn,
prob=p,
layer_norm_style=self.layer_norm_style,
norm=self.norm,
)
self._fn_eval = functools.partial(
_fn,
prob=0.0,
layer_norm_style=self.layer_norm_style,
norm=self.norm,
)

assert (
self.p < 1.0
Expand All @@ -69,12 +82,12 @@ def init_weights(self, *args, **kwargs):

def forward(self, x: torch.Tensor, residual: torch.Tensor) -> torch.Tensor:
# Train/inference
p = self.p if self.training else 0.0
fn = self._fn_train if self.training else self._fn_eval

# Catch a non-cuda setup, fallback to pytorch
if not x.is_cuda:
return _fn(x, self.bias, p, self.layer_norm_style, self.norm, residual)
return fn(x, self.bias, residual)

# AOTAutograd, NVFuser backed path
aot_fn = memory_efficient_fusion(fn=_fn, static_argnums=(2, 3, 4))
return aot_fn(x, self.bias, p, self.layer_norm_style, self.norm, residual)
aot_fn = memory_efficient_fusion(fn=fn)
return aot_fn(x, self.bias, residual)

0 comments on commit 72b4b63

Please sign in to comment.