Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates for PyTorch Lightning 2.0 release #266

Merged
merged 8 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
python-version: [3.7, 3.8]
python-version: [3.8, 3.9]

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Visit https://zamba.drivendata.org/docs/ for full documentation and tutorials.

First, make sure you have the prerequisites installed:

* Python 3.7 or 3.8
* Python 3.8 or 3.9
* FFmpeg > 4.3

Then run:
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ We encourage people to share their custom models trained with Zamba. If you trai

First, make sure you have the prerequisites installed:

* Python 3.7 or 3.8
* Python 3.8 or 3.9
* FFmpeg > 4.3

Then run:
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ GPU configurations.

Prerequisites:

- Python 3.7 or 3.8
- Python 3.8 or 3.9
- FFmpeg

#### [Python](https://www.python.org/) 3.7 or 3.8
#### [Python](https://www.python.org/) 3.8 or 3.9

We recommend [Python installation using Anaconda](https://www.anaconda.com/download/) for all platforms. For more information about how to install Anaconda, here are some useful YouTube videos of installation:

Expand Down
2 changes: 1 addition & 1 deletion docs/docs/models/depth.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ The output of the depth estimation model is a csv with the following columns:
- `time`: seconds from the start of the video
- `distance`: distance between detected animal and the camera

There will be multiple rows per timestamp if there are multiple animals detected in the frame. If there is no animal in the frame, the distance will be null.
There will be multiple rows per timestamp if there are multiple animals detected in the frame. Due to current limitations of the algorithm, the distance for all animals in the frame will be the same. If there is no animal in the frame, the distance will be null.

For example, the first few rows of the `depth_predictions.csv` might look like this:

Expand Down
5 changes: 2 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ classifiers = [
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
requires-python = ">=3.7"
requires-python = ">=3.8"
dependencies = [
"appdirs",
"av",
Expand All @@ -25,7 +25,6 @@ dependencies = [
"future",
"fvcore",
"gitpython",
"importlib_metadata ; python_version < '3.8'",
"loguru",
"numpy",
"opencv-python-headless",
Expand All @@ -35,7 +34,7 @@ dependencies = [
"pqdm",
"pydantic",
"python-dotenv",
"pytorch-lightning>=1.6.0",
"pytorch-lightning>=2.0.0",
"pytorchvideo",
"scikit-learn",
"tensorboard",
Expand Down
27 changes: 20 additions & 7 deletions zamba/models/model_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.strategies import DDPStrategy
from pytorch_lightning.tuner import Tuner

from zamba.data.video import VideoLoaderConfig
from zamba.models.config import (
Expand All @@ -24,7 +25,11 @@
PredictConfig,
)
from zamba.models.registry import available_models
from zamba.models.utils import get_checkpoint_hparams, get_default_hparams
from zamba.models.utils import (
configure_accelerator_and_devices_from_gpus,
get_checkpoint_hparams,
get_default_hparams,
)
from zamba.pytorch.finetuning import BackboneFinetuning
from zamba.pytorch_lightning.utils import ZambaDataModule, ZambaVideoClassificationLightningModule

Expand Down Expand Up @@ -274,16 +279,18 @@ def train_model(
if train_config.backbone_finetune_config is not None:
callbacks.append(BackboneFinetuning(**train_config.backbone_finetune_config.dict()))

accelerator, devices = configure_accelerator_and_devices_from_gpus(train_config.gpus)

trainer = pl.Trainer(
gpus=train_config.gpus,
accelerator=accelerator,
devices=devices,
max_epochs=train_config.max_epochs,
auto_lr_find=train_config.auto_lr_find,
logger=tensorboard_logger,
callbacks=callbacks,
fast_dev_run=train_config.dry_run,
strategy=DDPStrategy(find_unused_parameters=False)
if data_module.multiprocessing_context is not None
else None,
if (data_module.multiprocessing_context is not None) and (train_config.gpus > 1)
else "auto",
)

if video_loader_config.cache_dir is None:
Expand All @@ -293,7 +300,8 @@ def train_model(

if train_config.auto_lr_find:
logger.info("Finding best learning rate.")
trainer.tune(model, data_module)
tuner = Tuner(trainer)
tuner.lr_find(model=model, datamodule=data_module)

try:
git_hash = git.Repo(search_parent_directories=True).head.object.hexsha
Expand Down Expand Up @@ -377,8 +385,13 @@ def predict_model(
else:
logger.info(f"Videos will be cached to {video_loader_config.cache_dir}.")

accelerator, devices = configure_accelerator_and_devices_from_gpus(predict_config.gpus)

trainer = pl.Trainer(
gpus=predict_config.gpus, logger=False, fast_dev_run=predict_config.dry_run
accelerator=accelerator,
devices=devices,
logger=False,
fast_dev_run=predict_config.dry_run,
)

configuration = {
Expand Down
11 changes: 11 additions & 0 deletions zamba/models/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,14 @@ def get_model_species(checkpoint, model_name):
else:
model_species = get_default_hparams(model_name)["species"]
return model_species


def configure_accelerator_and_devices_from_gpus(gpus):
"""Derive accelerator and number of devices for pl.Trainer from user-specified number of gpus."""
if gpus > 0:
accelerator = "gpu"
devices = gpus
else:
accelerator = "cpu"
devices = "auto"
return accelerator, devices
30 changes: 22 additions & 8 deletions zamba/pytorch_lightning/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ def __init__(
self.save_hyperparameters("lr", "scheduler", "scheduler_params", "species")
self.hparams["model_class"] = self.model_class

self.training_step_outputs = []
self.validation_step_outputs = []
self.test_step_outputs = []

def forward(self, x):
return self.model(x)

Expand All @@ -173,9 +177,10 @@ def training_step(self, batch, batch_idx):
y_hat = self(x)
loss = F.binary_cross_entropy_with_logits(y_hat, y)
self.log("train_loss", loss.detach())
self.training_step_outputs.append(loss)
return loss

def validation_step(self, batch, batch_idx):
def _val_step(self, batch, batch_idx):
x, y = batch
y_hat = self(x)
loss = F.binary_cross_entropy_with_logits(y_hat, y)
Expand All @@ -188,6 +193,16 @@ def validation_step(self, batch, batch_idx):
"y_proba": y_proba,
}

def validation_step(self, batch, batch_idx):
output = self._val_step(batch, batch_idx)
self.validation_step_outputs.append(output)
return output

def test_step(self, batch, batch_idx):
output = self._val_step(batch, batch_idx)
self.test_step_outputs.append(output)
return output

@staticmethod
def aggregate_step_outputs(
outputs: Dict[str, np.ndarray]
Expand Down Expand Up @@ -229,23 +244,22 @@ def compute_and_log_metrics(
):
self.log(f"species/{subset}_{metric_name}/{label}", metric)

def validation_epoch_end(self, outputs: List[Dict[str, np.ndarray]]):
def on_validation_epoch_end(self):
"""Aggregates validation_step outputs to compute and log the validation macro F1 and top K
metrics.

Args:
outputs (List[dict]): list of output dictionaries from each validation step
containing y_pred and y_true.
"""
y_true, y_pred, y_proba = self.aggregate_step_outputs(outputs)
y_true, y_pred, y_proba = self.aggregate_step_outputs(self.validation_step_outputs)
self.compute_and_log_metrics(y_true, y_pred, y_proba, subset="val")
self.validation_step_outputs.clear() # free memory

def test_step(self, batch, batch_idx):
return self.validation_step(batch, batch_idx)

def test_epoch_end(self, outputs: List[Dict[str, np.ndarray]]):
y_true, y_pred, y_proba = self.aggregate_step_outputs(outputs)
def on_test_epoch_end(self):
y_true, y_pred, y_proba = self.aggregate_step_outputs(self.test_step_outputs)
self.compute_and_log_metrics(y_true, y_pred, y_proba, subset="test")
self.test_step_outputs.clear() # free memory

def predict_step(self, batch, batch_idx, dataloader_idx: Optional[int] = None):
x, y = batch
Expand Down