Skip to content

Commit

Permalink
Make val split ratio configurable (#760)
Browse files Browse the repository at this point in the history
* make val split ratio configurable

* use DeprecationWarning, update config key
  • Loading branch information
djdameln authored Dec 5, 2022
1 parent ab6cb57 commit cb06714
Show file tree
Hide file tree
Showing 19 changed files with 118 additions and 42 deletions.
91 changes: 59 additions & 32 deletions anomalib/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,64 @@ def update_multi_gpu_training_config(config: Union[DictConfig, ListConfig]) -> U
return config


def update_datasets_config(config: Union[DictConfig, ListConfig]) -> Union[DictConfig, ListConfig]:
"""Updates the dataset section of the config.
Args:
config (Union[DictConfig, ListConfig]): Configurable parameters for the current run.
Returns:
Union[DictConfig, ListConfig]: Updated config
"""
if "format" not in config.dataset.keys():
config.dataset.format = "mvtec"

if "create_validation_set" in config.dataset.keys():
warn(
DeprecationWarning(
"The 'create_validation_set' parameter is deprecated and will be removed in v0.4.0. Please use "
"'validation_split_mode' instead."
)
)
config.dataset.val_split_mode = "from_test" if config.dataset.create_validation_set else "same_as_test"

if "test_batch_size" in config.dataset.keys():
warn(
DeprecationWarning(
"The 'test_batch_size' parameter is deprecated and will be removed in v0.4.0. Please use "
"'eval_batch_size' instead."
)
)
config.dataset.eval_batch_size = config.dataset.test_batch_size

if "transform_config" in config.dataset.keys() and "val" in config.dataset.transform_config.keys():
warn(
DeprecationWarning(
"The 'transform_config.val' parameter is deprecated and will be removed in v0.4.0. Please use "
"'transform_config.eval' instead."
)
)
config.dataset.transform_config.eval = config.dataset.transform_config.val

config = update_input_size_config(config)

if "clip_length_in_frames" in config.dataset.keys() and config.dataset.clip_length_in_frames > 1:
warn(
"Anomalib's models and visualizer are currently not compatible with video datasets with a clip length > 1.\
Custom changes to these modules will be needed to prevent errors and/or unpredictable behaviour."
)

if config.dataset.format == "folder" and "split_ratio" in config.dataset.keys():
warn(
DeprecationWarning(
"The 'split_ratio' parameter is deprecated and will be removed in v0.4.0. Please use "
"'normal_split_ratio' instead."
)
)
config.dataset.normal_split_ratio = config.dataset.split_ratio
return config


def get_configurable_parameters(
model_name: Optional[str] = None,
config_path: Optional[Union[Path, str]] = None,
Expand Down Expand Up @@ -142,38 +200,7 @@ def get_configurable_parameters(
# keep track of the original config file because it will be modified
config_original: DictConfig = config.copy()

# Dataset Configs
if "format" not in config.dataset.keys():
config.dataset.format = "mvtec"

if "create_validation_set" in config.dataset.keys():
warn(
"The 'create_validation_set' parameter is deprecated and will be removed in v0.4.0. Please use "
"'validation_split_mode' instead."
)
config.dataset.validation_split_mode = "from_test" if config.dataset.create_validation_set else "same_as_test"

if "test_batch_size" in config.dataset.keys():
warn(
"The 'test_batch_size' parameter is deprecated and will be removed in v0.4.0. Please use "
"'eval_batch_size' instead."
)
config.dataset.eval_batch_size = config.dataset.test_batch_size

if "transform_config" in config.dataset.keys() and "val" in config.dataset.transform_config.keys():
warn(
"The 'transform_config.val' parameter is deprecated and will be removed in v0.4.0. Please use "
"'transform_config.eval' instead."
)
config.dataset.transform_config.eval = config.dataset.transform_config.val

config = update_input_size_config(config)

if "clip_length_in_frames" in config.dataset.keys() and config.dataset.clip_length_in_frames > 1:
warn(
"Anomalib's models and visualizer are currently not compatible with video datasets with a clip length > 1.\
Custom changes to these modules will be needed to prevent errors and/or unpredictable behaviour."
)
config = update_datasets_config(config)

# Project Configs
project_path = Path(config.project.path) / config.model.name / config.dataset.name
Expand Down
7 changes: 6 additions & 1 deletion anomalib/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> AnomalibDataModule:
transform_config_train=config.dataset.transform_config.train,
transform_config_eval=config.dataset.transform_config.eval,
val_split_mode=config.dataset.val_split_mode,
val_split_ratio=config.dataset.val_split_ratio,
)
elif config.dataset.format.lower() == "btech":
datamodule = BTech(
Expand All @@ -57,6 +58,7 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> AnomalibDataModule:
transform_config_train=config.dataset.transform_config.train,
transform_config_eval=config.dataset.transform_config.eval,
val_split_mode=config.dataset.val_split_mode,
val_split_ratio=config.dataset.val_split_ratio,
)
elif config.dataset.format.lower() == "folder":
datamodule = Folder(
Expand All @@ -67,14 +69,15 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> AnomalibDataModule:
normal_test_dir=config.dataset.normal_test_dir,
mask_dir=config.dataset.mask,
extensions=config.dataset.extensions,
split_ratio=config.dataset.split_ratio,
normal_split_ratio=config.dataset.normal_split_ratio,
image_size=(config.dataset.image_size[0], config.dataset.image_size[1]),
train_batch_size=config.dataset.train_batch_size,
eval_batch_size=config.dataset.eval_batch_size,
num_workers=config.dataset.num_workers,
transform_config_train=config.dataset.transform_config.train,
transform_config_eval=config.dataset.transform_config.eval,
val_split_mode=config.dataset.val_split_mode,
val_split_ratio=config.dataset.val_split_ratio,
)
elif config.dataset.format.lower() == "ucsdped":
datamodule = UCSDped(
Expand All @@ -90,6 +93,7 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> AnomalibDataModule:
eval_batch_size=config.dataset.eval_batch_size,
num_workers=config.dataset.num_workers,
val_split_mode=config.dataset.val_split_mode,
val_split_ratio=config.dataset.val_split_ratio,
)
elif config.dataset.format.lower() == "avenue":
datamodule = Avenue(
Expand All @@ -105,6 +109,7 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> AnomalibDataModule:
eval_batch_size=config.dataset.eval_batch_size,
num_workers=config.dataset.num_workers,
val_split_mode=config.dataset.val_split_mode,
val_split_ratio=config.dataset.val_split_ratio,
)
else:
raise ValueError(
Expand Down
11 changes: 10 additions & 1 deletion anomalib/data/avenue.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,17 @@ def __init__(
transform_config_train: Optional[Union[str, A.Compose]] = None,
transform_config_eval: Optional[Union[str, A.Compose]] = None,
val_split_mode: ValSplitMode = ValSplitMode.FROM_TEST,
val_split_ratio: float = 0.5,
seed: Optional[int] = None,
):
super().__init__(train_batch_size, eval_batch_size, num_workers, val_split_mode)
super().__init__(
train_batch_size=train_batch_size,
eval_batch_size=eval_batch_size,
num_workers=num_workers,
val_split_mode=val_split_mode,
val_split_ratio=val_split_ratio,
seed=seed,
)

self.root = Path(root)
self.gt_dir = Path(gt_dir)
Expand Down
6 changes: 5 additions & 1 deletion anomalib/data/base/datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,15 @@ def __init__(
eval_batch_size: int,
num_workers: int,
val_split_mode: ValSplitMode,
val_split_ratio: float,
seed: Optional[int] = None,
):
super().__init__()
self.train_batch_size = train_batch_size
self.eval_batch_size = eval_batch_size
self.num_workers = num_workers
self.val_split_mode = val_split_mode
self.val_split_ratio = val_split_ratio
self.seed = seed

self.train_data: Optional[AnomalibDataset] = None
Expand Down Expand Up @@ -77,7 +79,9 @@ def _setup(self, _stage: Optional[str] = None) -> None:
self.train_data.setup()
self.test_data.setup()
if self.val_split_mode == ValSplitMode.FROM_TEST:
self.val_data, self.test_data = random_split(self.test_data, [0.5, 0.5], label_aware=True, seed=self.seed)
self.test_data, self.val_data = random_split(
self.test_data, self.val_split_ratio, label_aware=True, seed=self.seed
)
elif self.val_split_mode == ValSplitMode.SAME_AS_TEST:
self.val_data = self.test_data
elif self.val_split_mode != ValSplitMode.NONE:
Expand Down
10 changes: 9 additions & 1 deletion anomalib/data/btech.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ def __init__(
transform_config_train: Optional[Union[str, A.Compose]] = None,
transform_config_eval: Optional[Union[str, A.Compose]] = None,
val_split_mode: ValSplitMode = ValSplitMode.SAME_AS_TEST,
val_split_ratio: float = 0.5,
seed: Optional[int] = None,
) -> None:
"""Instantiate BTech Lightning Data Module.
Expand Down Expand Up @@ -224,7 +225,14 @@ def __init__(
>>> data["image"].shape, data["mask"].shape
(torch.Size([32, 3, 256, 256]), torch.Size([32, 256, 256]))
"""
super().__init__(train_batch_size, eval_batch_size, num_workers, val_split_mode, seed)
super().__init__(
train_batch_size=train_batch_size,
eval_batch_size=eval_batch_size,
num_workers=num_workers,
val_split_mode=val_split_mode,
val_split_ratio=val_split_ratio,
seed=seed,
)

self.root = Path(root)
self.category = Path(category)
Expand Down
10 changes: 6 additions & 4 deletions anomalib/data/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ class Folder(AnomalibDataModule):
normal images for the test dataset. Defaults to None.
mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing
the mask annotations. Defaults to None.
split_ratio (float, optional): Ratio to split normal training images and add to the
normal_split_ratio (float, optional): Ratio to split normal training images and add to the
test set in case test set doesn't contain any normal images.
Defaults to 0.2.
extensions (Optional[Tuple[str, ...]], optional): Type of the image extensions to read from the
Expand Down Expand Up @@ -247,7 +247,7 @@ def __init__(
abnormal_dir: Union[str, Path],
normal_test_dir: Optional[Union[str, Path]] = None,
mask_dir: Optional[Union[str, Path]] = None,
split_ratio: float = 0.2,
normal_split_ratio: float = 0.2,
extensions: Optional[Tuple[str]] = None,
#
image_size: Optional[Union[int, Tuple[int, int]]] = None,
Expand All @@ -258,17 +258,19 @@ def __init__(
transform_config_train: Optional[Union[str, A.Compose]] = None,
transform_config_eval: Optional[Union[str, A.Compose]] = None,
val_split_mode: ValSplitMode = ValSplitMode.FROM_TEST,
val_split_ratio: float = 0.5,
seed: Optional[int] = None,
):
super().__init__(
train_batch_size=train_batch_size,
eval_batch_size=eval_batch_size,
num_workers=num_workers,
val_split_mode=val_split_mode,
val_split_ratio=val_split_ratio,
seed=seed,
)

self.split_ratio = split_ratio
self.normal_split_ratio = normal_split_ratio

pre_process_train = PreProcessor(config=transform_config_train, image_size=image_size)
pre_process_eval = PreProcessor(config=transform_config_eval, image_size=image_size)
Expand Down Expand Up @@ -307,7 +309,7 @@ def _setup(self, _stage: Optional[str] = None):

# add some normal images to the test set
if not self.test_data.has_normal:
self.train_data, normal_test_data = random_split(self.train_data, self.split_ratio, seed=self.seed)
self.train_data, normal_test_data = random_split(self.train_data, self.normal_split_ratio, seed=self.seed)
self.test_data += normal_test_data

super()._setup()
2 changes: 2 additions & 0 deletions anomalib/data/mvtec.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,15 @@ def __init__(
transform_config_train: Optional[Union[str, A.Compose]] = None,
transform_config_eval: Optional[Union[str, A.Compose]] = None,
val_split_mode: ValSplitMode = ValSplitMode.SAME_AS_TEST,
val_split_ratio: float = 0.5,
seed: Optional[int] = None,
):
super().__init__(
train_batch_size=train_batch_size,
eval_batch_size=eval_batch_size,
num_workers=num_workers,
val_split_mode=val_split_mode,
val_split_ratio=val_split_ratio,
seed=seed,
)

Expand Down
11 changes: 10 additions & 1 deletion anomalib/data/ucsd_ped.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,17 @@ def __init__(
transform_config_train: Optional[Union[str, A.Compose]] = None,
transform_config_eval: Optional[Union[str, A.Compose]] = None,
val_split_mode: ValSplitMode = ValSplitMode.FROM_TEST,
val_split_ratio: float = 0.5,
seed: Optional[int] = None,
):
super().__init__(train_batch_size, eval_batch_size, num_workers, val_split_mode)
super().__init__(
train_batch_size=train_batch_size,
eval_batch_size=eval_batch_size,
num_workers=num_workers,
val_split_mode=val_split_mode,
val_split_ratio=val_split_ratio,
seed=seed,
)

self.root = Path(root)
self.category = category
Expand Down
1 change: 1 addition & 0 deletions anomalib/models/cflow/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dataset:
train: null
eval: null
val_split_mode: same_as_test # options: [same_as_test, from_test]
val_split_ratio: 0.5 # fraction of test images that will be used for validation (not used in 'same_as_test' mode)

model:
name: cflow
Expand Down
1 change: 1 addition & 0 deletions anomalib/models/dfkde/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dataset:
train: null
eval: null
val_split_mode: same_as_test # options: [same_as_test, from_test]
val_split_ratio: 0.5 # fraction of test images that will be used for validation (not used in 'same_as_test' mode)

model:
name: dfkde
Expand Down
1 change: 1 addition & 0 deletions anomalib/models/dfm/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dataset:
train: null
eval: null
val_split_mode: same_as_test # options: [same_as_test, from_test]
val_split_ratio: 0.5 # fraction of test images that will be used for validation (not used in 'same_as_test' mode)

model:
name: dfm
Expand Down
1 change: 1 addition & 0 deletions anomalib/models/draem/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dataset:
train: ./anomalib/models/draem/transform_config.yaml
eval: ./anomalib/models/draem/transform_config.yaml
val_split_mode: same_as_test # options: [same_as_test, from_test]
val_split_ratio: 0.5 # fraction of test images that will be used for validation (not used in 'same_as_test' mode)
tiling:
apply: false
tile_size: null
Expand Down
1 change: 1 addition & 0 deletions anomalib/models/fastflow/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dataset:
train: null
eval: null
val_split_mode: same_as_test # options: [same_as_test, from_test]
val_split_ratio: 0.5 # fraction of test images that will be used for validation (not used in 'same_as_test' mode)
tiling:
apply: false
tile_size: null
Expand Down
1 change: 1 addition & 0 deletions anomalib/models/ganomaly/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dataset:
train: null
eval: null
val_split_mode: same_as_test # options: [same_as_test, from_test]
val_split_ratio: 0.5 # fraction of test images that will be used for validation (not used in 'same_as_test' mode)
tiling:
apply: true
tile_size: 64
Expand Down
1 change: 1 addition & 0 deletions anomalib/models/padim/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dataset:
train: null
eval: null
val_split_mode: same_as_test # options: [same_as_test, from_test]
val_split_ratio: 0.5 # fraction of test images that will be used for validation (not used in 'same_as_test' mode)
tiling:
apply: false
tile_size: null
Expand Down
1 change: 1 addition & 0 deletions anomalib/models/patchcore/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dataset:
train: null
eval: null
val_split_mode: same_as_test # options: [same_as_test, from_test]
val_split_ratio: 0.5 # fraction of test images that will be used for validation (not used in 'same_as_test' mode)
tiling:
apply: false
tile_size: null
Expand Down
1 change: 1 addition & 0 deletions anomalib/models/reverse_distillation/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dataset:
train: null
eval: null
val_split_mode: same_as_test # options: [same_as_test, from_test]
val_split_ratio: 0.5 # fraction of test images that will be used for validation (not used in 'same_as_test' mode)
tiling:
apply: false
tile_size: 64
Expand Down
1 change: 1 addition & 0 deletions anomalib/models/stfpm/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dataset:
train: null
eval: null
val_split_mode: same_as_test # options: [same_as_test, from_test]
val_split_ratio: 0.5 # fraction of test images that will be used for validation (not used in 'same_as_test' mode)
tiling:
apply: false
tile_size: null
Expand Down
2 changes: 1 addition & 1 deletion tests/pre_merge/datasets/test_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def folder_data_module():
abnormal_dir="broken_large",
mask_dir=os.path.join(root, "ground_truth/broken_large"),
task="segmentation",
split_ratio=0.2,
normal_split_ratio=0.2,
image_size=(256, 256),
train_batch_size=1,
eval_batch_size=1,
Expand Down

0 comments on commit cb06714

Please sign in to comment.