diff --git a/datasets/modelnet40.py b/datasets/modelnet40.py new file mode 100644 index 0000000..23ebb95 --- /dev/null +++ b/datasets/modelnet40.py @@ -0,0 +1,161 @@ +import urllib +import shutil +from os import listdir, makedirs, remove +from os.path import exists, join +from zipfile import ZipFile + +import h5py +import numpy as np +import pandas as pd +from torch.utils.data import Dataset + +from utils.pcutil import rand_rotation_matrix + +all_classes = ['airplane', 'bathtub', 'bed', 'bench', 'bookshelf', 'bottle', + 'bowl', 'car', 'chair', 'cone', 'cup', 'curtain', 'desk', 'door', + 'dresser', 'flower_pot', 'glass_box', 'guitar', 'keyboard', + 'lamp', 'laptop', 'mantel', 'monitor', 'night_stand', 'person', + 'piano', 'plant', 'radio', 'range_hood', 'sink', 'sofa', + 'stairs', 'stool', 'table', 'tent', 'toilet', 'tv_stand', 'vase', + 'wardrobe', 'xbox'] + +number_to_category = {i: c for i, c in enumerate(all_classes)} +category_to_number = {c: i for i, c in enumerate(all_classes)} + + +class ModelNet40(Dataset): + def __init__(self, root_dir='/home/datasets/modelnet40', classes=[], + transform=[], split='train', valid_percent=10, percent_supervised=0.0): + """ + Args: + root_dir (string): Directory with all the point clouds. + transform (callable, optional): Optional transform to be applied + on a sample. + split (string): `train` or `test` + valid_percent (int): Percent of train (from the end) to use as valid set. + """ + self.root_dir = root_dir + self.transform = transform + self.split = split.lower() + self.valid_percent = valid_percent + self.percent_supervised = percent_supervised + + self._maybe_download_data() + + if self.split in ('train', 'valid'): + self.files_list = join(self.root_dir, 'train_files.txt') + elif self.split == 'test': + self.files_list = join(self.root_dir, 'test_files.txt') + else: + raise ValueError('Incorrect split') + + data, labels = self._load_files() + + if classes: + if classes[0] in all_classes: + classes = np.asarray([category_to_number[c] for c in classes]) + filter = [label in classes for label in labels] + data = data[filter] + labels = labels[filter] + else: + classes = np.arange(len(all_classes)) + + if self.split in ('train', 'valid'): + new_data, new_labels = [], [] + if self.percent_supervised > 0.0: + data_sup, labels_sub = [], [] + for c in classes: + pc_in_class = sum(labels.flatten() == c) + + if self.split == 'train': + portion = slice(0, int(pc_in_class * (1 - (self.valid_percent / 100)))) + else: + portion = slice(int(pc_in_class * (1 - (self.valid_percent / 100))), pc_in_class) + + new_data.append(data[labels.flatten() == c][portion]) + new_labels.append(labels[labels.flatten() == c][portion]) + + if self.percent_supervised > 0.0: + n_max = int(self.percent_supervised * (portion.stop - 1)) + data_sup.append(data[labels.flatten() == c][:n_max]) + labels_sub.append(labels[labels.flatten() == c][:n_max]) + data = np.vstack(new_data) + labels = np.vstack(new_labels) + if self.percent_supervised > 0.0: + self.data_sup = np.vstack(data_sup) + self.labels_sup = np.vstack(labels_sub) + self.data = data + self.labels = labels + + def _load_files(self) -> pd.DataFrame: + + with open(self.files_list) as f: + files = [join(self.root_dir, line.rstrip().rsplit('/', 1)[1]) for line in f] + + data, labels = [], [] + for file in files: + with h5py.File(file) as f: + data.extend(f['data'][:]) + labels.extend(f['label'][:]) + + return np.asarray(data), np.asarray(labels) + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + sample = self.data[idx] + sample /= 2 # Scale to [-0.5, 0.5] range + label = self.labels[idx] + + if 'rotate'.lower() in self.transform: + r_rotation = rand_rotation_matrix() + r_rotation[0, 2] = 0 + r_rotation[2, 0] = 0 + r_rotation[1, 2] = 0 + r_rotation[2, 1] = 0 + r_rotation[2, 2] = 1 + + sample = sample.dot(r_rotation).astype(np.float32) + if self.percent_supervised > 0.0: + id_sup = np.random.randint(self.data_sup.shape[0]) + sample_sup = self.data_sup[id_sup] + sample_sup /= 2 + label_sup = self.labels_sup[id_sup] + return sample, label, sample_sup, label_sup + else: + return sample, label + + def _maybe_download_data(self): + if exists(self.root_dir): + return + + print(f'ModelNet40 doesn\'t exist in root directory {self.root_dir}. ' + f'Downloading...') + makedirs(self.root_dir) + + url = 'https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip' + + data = urllib.request.urlopen(url) + filename = url.rpartition('/')[2][:-5] + file_path = join(self.root_dir, filename) + with open(file_path, mode='wb') as f: + d = data.read() + f.write(d) + + print('Extracting...') + with ZipFile(file_path, mode='r') as zip_f: + zip_f.extractall(self.root_dir) + + remove(file_path) + + extracted_dir = join(self.root_dir, 'modelnet40_ply_hdf5_2048') + for d in listdir(extracted_dir): + shutil.move(src=join(extracted_dir, d), + dst=self.root_dir) + + shutil.rmtree(extracted_dir) + + +if __name__ == '__main__': + ModelNet40() diff --git a/evaluation/find_best_epoch_on_validation.py b/evaluation/find_best_epoch_on_validation.py new file mode 100644 index 0000000..8717f88 --- /dev/null +++ b/evaluation/find_best_epoch_on_validation.py @@ -0,0 +1,168 @@ +import argparse +import json +import logging +import random +import re +from datetime import datetime +from importlib import import_module +from os import listdir +from os.path import join + +import numpy as np +import pandas as pd +import torch +from torch.distributions.beta import Beta +from torch.utils.data import DataLoader + +from datasets.shapenet import ShapeNetDataset +from metrics.jsd import jsd_between_point_cloud_sets +from utils.util import cuda_setup, setup_logging + + +def _get_epochs_by_regex(path, regex): + reg = re.compile(regex) + return {int(w[:5]) for w in listdir(path) if reg.match(w)} + + +def main(eval_config): + # Load hyperparameters as they were during training + train_results_path = join(eval_config['results_root'], eval_config['arch'], + eval_config['experiment_name']) + with open(join(train_results_path, 'config.json')) as f: + train_config = json.load(f) + + random.seed(train_config['seed']) + torch.manual_seed(train_config['seed']) + torch.cuda.manual_seed_all(train_config['seed']) + + setup_logging(join(train_results_path, 'results')) + log = logging.getLogger(__name__) + + log.debug('Evaluating JensenShannon divergences on validation set on all ' + 'saved epochs.') + + weights_path = join(train_results_path, 'weights') + + # Find all epochs that have saved model weights + e_epochs = _get_epochs_by_regex(weights_path, r'(?P\d{5})_E\.pth') + g_epochs = _get_epochs_by_regex(weights_path, r'(?P\d{5})_G\.pth') + epochs = sorted(e_epochs.intersection(g_epochs)) + log.debug(f'Testing epochs: {epochs}') + + device = cuda_setup(eval_config['cuda'], eval_config['gpu']) + log.debug(f'Device variable: {device}') + if device.type == 'cuda': + log.debug(f'Current CUDA device: {torch.cuda.current_device()}') + + # + # Dataset + # + dataset_name = train_config['dataset'].lower() + if dataset_name == 'shapenet': + dataset = ShapeNetDataset(root_dir=train_config['data_dir'], + classes=train_config['classes'], split='valid') + elif dataset_name == 'faust': + from datasets.dfaust import DFaustDataset + dataset = DFaustDataset(root_dir=train_config['data_dir'], + classes=train_config['classes'], split='valid') + elif dataset_name == 'mcgill': + from datasets.mcgill import McGillDataset + dataset = McGillDataset(root_dir=train_config['data_dir'], + classes=train_config['classes'], split='valid') + else: + raise ValueError(f'Invalid dataset name. Expected `shapenet` or ' + f'`faust`. Got: `{dataset_name}`') + classes_selected = ('all' if not train_config['classes'] + else ','.join(train_config['classes'])) + log.debug(f'Selected {classes_selected} classes. Loaded {len(dataset)} ' + f'samples.') + + if 'distribution' in train_config: + distribution = train_config['distribution'] + elif 'distribution' in eval_config: + distribution = eval_config['distribution'] + else: + log.warning('No distribution type specified. Assumed normal = N(0, 0.2)') + distribution = 'normal' + + # + # Models + # + arch = import_module(f"model.architectures.{train_config['arch']}") + E = arch.Encoder(train_config).to(device) + G = arch.Generator(train_config).to(device) + + E.eval() + G.eval() + + num_samples = len(dataset.point_clouds_names_valid) + data_loader = DataLoader(dataset, batch_size=num_samples, + shuffle=False, num_workers=4, + drop_last=False, pin_memory=True) + + # We take 3 times as many samples as there are in test data in order to + # perform JSD calculation in the same manner as in the reference publication + noise = torch.FloatTensor(3 * num_samples, train_config['z_size'], 1) + noise = noise.to(device) + + X, _ = next(iter(data_loader)) + X = X.to(device) + + results = {} + + for epoch in reversed(epochs): + try: + E.load_state_dict(torch.load( + join(weights_path, f'{epoch:05}_E.pth'))) + G.load_state_dict(torch.load( + join(weights_path, f'{epoch:05}_G.pth'))) + + start_clock = datetime.now() + + # We average JSD computation from 3 independet trials. + js_results = [] + for _ in range(3): + if distribution == 'normal': + noise.normal_(0, 0.2) + elif distribution == 'beta': + noise_np = np.random.beta(train_config['z_beta_a'], + train_config['z_beta_b'], + noise.shape) + noise = torch.tensor(noise_np).float().round().to(device) + + with torch.no_grad(): + X_g = G(noise) + if X_g.shape[-2:] == (3, 2048): + X_g.transpose_(1, 2) + + jsd = jsd_between_point_cloud_sets(X, X_g, voxels=28) + js_results.append(jsd) + + js_result = np.mean(js_results) + log.debug(f'Epoch: {epoch} JSD: {js_result: .6f} ' + f'Time: {datetime.now() - start_clock}') + results[epoch] = js_result + except KeyboardInterrupt: + log.debug(f'Interrupted during epoch: {epoch}') + break + + results = pd.DataFrame.from_dict(results, orient='index', columns=['jsd']) + log.debug(f"Minimum JSD at epoch {results.idxmin()['jsd']}: " + f"{results.min()['jsd']: .6f}") + + +if __name__ == '__main__': + logger = logging.getLogger() + + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--config', default=None, type=str, + help='File path for evaluation config') + args = parser.parse_args() + + evaluation_config = None + if args.config is not None and args.config.endswith('.json'): + with open(args.config) as f: + evaluation_config = json.load(f) + assert evaluation_config is not None + + main(evaluation_config) diff --git a/evaluation/generate_data_for_metrics.py b/evaluation/generate_data_for_metrics.py new file mode 100644 index 0000000..d92a12c --- /dev/null +++ b/evaluation/generate_data_for_metrics.py @@ -0,0 +1,146 @@ +import argparse +import json +import logging +import random +from importlib import import_module +from os.path import join + +import numpy as np +import torch +from torch.distributions import Beta +from torch.utils.data import DataLoader + +from datasets.shapenet.shapenet import ShapeNetDataset +from loggers.basic_logger import setup_logging +from utils.util import find_latest_epoch, cuda_setup + + +def main(eval_config): + # Load hyperparameters as they were during training + train_results_path = join(eval_config['results_root'], eval_config['arch'], + eval_config['experiment_name']) + with open(join(train_results_path, 'config.json')) as f: + train_config = json.load(f) + + random.seed(train_config['seed']) + torch.manual_seed(train_config['seed']) + torch.cuda.manual_seed_all(train_config['seed']) + + setup_logging(join(train_results_path, 'results')) + log = logging.getLogger(__name__) + + weights_path = join(train_results_path, 'weights') + if eval_config['epoch'] == 0: + epoch = find_latest_epoch(weights_path) + else: + epoch = eval_config['epoch'] + log.debug(f'Starting from epoch: {epoch}') + + device = cuda_setup(eval_config['cuda'], eval_config['gpu']) + log.debug(f'Device variable: {device}') + if device.type == 'cuda': + log.debug(f'Current CUDA device: {torch.cuda.current_device()}') + + # + # Dataset + # + dataset_name = train_config['dataset'].lower() + if dataset_name == 'shapenet': + dataset = ShapeNetDataset(root_dir=train_config['data_dir'], + classes=train_config['classes'], split='test') + elif dataset_name == 'faust': + from datasets.dfaust import DFaustDataset + dataset = DFaustDataset(root_dir=train_config['data_dir'], + classes=train_config['classes'], split='test') + elif dataset_name == 'mcgill': + from datasets.mcgill import McGillDataset + dataset = McGillDataset(root_dir=train_config['data_dir'], + classes=train_config['classes'], split='test') + else: + raise ValueError(f'Invalid dataset name. Expected `shapenet` or ' + f'`faust`. Got: `{dataset_name}`') + classes_selected = ('all' if not train_config['classes'] + else ','.join(train_config['classes'])) + log.debug(f'Selected {classes_selected} classes. Loaded {len(dataset)} ' + f'samples.') + + if 'distribution' in train_config: + distribution = train_config['distribution'] + elif 'distribution' in eval_config: + distribution = eval_config['distribution'] + else: + log.warning('No distribution type specified. Assumed normal = N(0, 0.2)') + distribution = 'normal' + + # + # Models + # + arch = import_module(f"model.architectures.{eval_config['arch']}") + E = arch.Encoder(train_config).to(device) + G = arch.Generator(train_config).to(device) + + # + # Load saved state + # + E.load_state_dict(torch.load(join(weights_path, f'{epoch:05}_E.pth'))) + G.load_state_dict(torch.load(join(weights_path, f'{epoch:05}_G.pth'))) + + E.eval() + G.eval() + + num_samples = len(dataset.point_clouds_names_test) + data_loader = DataLoader(dataset, batch_size=num_samples, + shuffle=False, num_workers=4, + drop_last=False, pin_memory=True) + + # We take 3 times as many samples as there are in test data in order to + # perform JSD calculation in the same manner as in the reference publication + noise = torch.FloatTensor(3 * num_samples, train_config['z_size'], 1) + noise = noise.to(device) + + X, _ = next(iter(data_loader)) + X = X.to(device) + + np.save(join(train_results_path, 'results', f'{epoch:05}_X'), X) + + for i in range(3): + if distribution == 'normal': + noise.normal_(0, 0.2) + else: + noise_np = np.random.beta(train_config['z_beta_a'], + train_config['z_beta_b'], + noise.shape) + noise = torch.tensor(noise_np).float().round().to(device) + with torch.no_grad(): + X_g = G(noise) + if X_g.shape[-2:] == (3, 2048): + X_g.transpose_(1, 2) + + np.save(join(train_results_path, 'results', f'{epoch:05}_Xg_{i}'), X_g) + + with torch.no_grad(): + z_e = E(X.transpose(1, 2)) + if isinstance(z_e, tuple): + z_e = z_e[0] + X_rec = G(z_e) + if X_rec.shape[-2:] == (3, 2048): + X_rec.transpose_(1, 2) + + np.save(join(train_results_path, 'results', f'{epoch:05}_Xrec'), X_rec) + + +if __name__ == '__main__': + logger = logging.getLogger() + + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--config', default=None, type=str, + help='File path for evaluation config') + args = parser.parse_args() + + evaluation_config = None + if args.config is not None and args.config.endswith('.json'): + with open(args.config) as f: + evaluation_config = json.load(f) + assert evaluation_config is not None + + main(evaluation_config) diff --git a/experiments/train_autoencoder.py b/experiments/train_autoencoder.py new file mode 100644 index 0000000..137e878 --- /dev/null +++ b/experiments/train_autoencoder.py @@ -0,0 +1,198 @@ +import argparse +import json +import logging +import random +from datetime import datetime +from importlib import import_module +from itertools import chain +from os.path import join, exists + +import matplotlib.pyplot as plt +import torch +import torch.backends.cudnn as cudnn +import torch.nn.parallel +import torch.optim as optim +import torch.utils.data +from torch.utils.data import DataLoader + +from datasets.shapenet import ShapeNetDataset +from losses.champfer_loss import ChamferLoss +from losses.earth_mover_distance import EMD +from utils.pcutil import plot_3d_point_cloud +from utils.util import find_latest_epoch, prepare_results_dir, cuda_setup, setup_logging + +cudnn.benchmark = True + + +def weights_init(m): + classname = m.__class__.__name__ + if classname in ('Conv1d', 'Linear'): + torch.nn.init.kaiming_normal_(m.weight, nonlinearity='relu') + if m.bias is not None: + torch.nn.init.constant_(m.bias, 0) + + +def main(config): + random.seed(config['seed']) + torch.manual_seed(config['seed']) + torch.cuda.manual_seed_all(config['seed']) + + results_dir = prepare_results_dir(config) + starting_epoch = find_latest_epoch(results_dir) + 1 + + if not exists(join(results_dir, 'config.json')): + with open(join(results_dir, 'config.json'), mode='w') as f: + json.dump(config, f) + + setup_logging(results_dir) + log = logging.getLogger(__name__) + + device = cuda_setup(config['cuda'], config['gpu']) + log.debug(f'Device variable: {device}') + if device.type == 'cuda': + log.debug(f'Current CUDA device: {torch.cuda.current_device()}') + + weights_path = join(results_dir, 'weights') + + # + # Dataset + # + dataset_name = config['dataset'].lower() + if dataset_name == 'shapenet': + dataset = ShapeNetDataset(root_dir=config['data_dir'], + classes=config['classes']) + else: + raise ValueError(f'Invalid dataset name. Expected `shapenet` or ' + f'`faust`. Got: `{dataset_name}`') + log.debug("Selected {} classes. Loaded {} samples.".format( + 'all' if not config['classes'] else ','.join(config['classes']), + len(dataset))) + + points_dataloader = DataLoader(dataset, batch_size=config['batch_size'], + shuffle=config['shuffle'], + num_workers=config['num_workers'], + drop_last=True, pin_memory=True) + + # + # Models + # + arch = import_module(f"model.architectures.{config['arch']}") + G = arch.Generator(config).to(device) + E = arch.Encoder(config).to(device) + + G.apply(weights_init) + E.apply(weights_init) + + if config['reconstruction_loss'].lower() == 'chamfer': + reconstruction_loss = ChamferLoss().to(device) + elif config['reconstruction_loss'].lower() == 'earth_mover': + reconstruction_loss = EMD().to(device) + else: + raise ValueError(f'Invalid reconstruction loss. Accepted `chamfer` or ' + f'`earth_mover`, got: {config["reconstruction_loss"]}') + + # + # Optimizers + # + EG_optim = getattr(optim, config['optimizer']['EG']['type']) + EG_optim = EG_optim(chain(E.parameters(), G.parameters()), + **config['optimizer']['EG']['hyperparams']) + + if starting_epoch > 1: + G.load_state_dict(torch.load( + join(weights_path, f'{starting_epoch-1:05}_G.pth'))) + E.load_state_dict(torch.load( + join(weights_path, f'{starting_epoch-1:05}_E.pth'))) + + EG_optim.load_state_dict(torch.load( + join(weights_path, f'{starting_epoch-1:05}_EGo.pth'))) + + for epoch in range(starting_epoch, config['max_epochs'] + 1): + start_epoch_time = datetime.now() + + G.train() + E.train() + + total_loss = 0.0 + for i, point_data in enumerate(points_dataloader, 1): + log.debug('-' * 20) + + X, _ = point_data + X = X.to(device) + + # Change dim [BATCH, N_POINTS, N_DIM] -> [BATCH, N_DIM, N_POINTS] + if X.size(-1) == 3: + X.transpose_(X.dim() - 2, X.dim() - 1) + + X_rec = G(E(X)) + + loss = torch.mean( + config['reconstruction_coef'] * + reconstruction_loss(X.permute(0, 2, 1) + 0.5, + X_rec.permute(0, 2, 1) + 0.5)) + + EG_optim.zero_grad() + E.zero_grad() + G.zero_grad() + + loss.backward() + total_loss += loss.item() + EG_optim.step() + + log.debug(f'[{epoch}: ({i})] ' + f'Loss: {loss.item():.4f} ' + f'Time: {datetime.now() - start_epoch_time}') + + log.debug( + f'[{epoch}/{config["max_epochs"]}] ' + f'Loss: {total_loss / i:.4f} ' + f'Time: {datetime.now() - start_epoch_time}' + ) + + # + # Save intermediate results + # + G.eval() + E.eval() + with torch.no_grad(): + X_rec = G(E(X)).data.cpu().numpy() + + for k in range(5): + fig = plot_3d_point_cloud(X[k][0], X[k][1], X[k][2], + in_u_sphere=True, show=False, + title=str(epoch)) + fig.savefig( + join(results_dir, 'samples', f'{epoch:05}_{k}_real.png')) + plt.close(fig) + + for k in range(5): + fig = plot_3d_point_cloud(X_rec[k][0], X_rec[k][1], X_rec[k][2], + in_u_sphere=True, show=False, + title=str(epoch)) + fig.savefig(join(results_dir, 'samples', + f'{epoch:05}_{k}_reconstructed.png')) + plt.close(fig) + + if epoch % config['save_frequency'] == 0: + torch.save(G.state_dict(), join(weights_path, f'{epoch:05}_G.pth')) + torch.save(E.state_dict(), join(weights_path, f'{epoch:05}_E.pth')) + + torch.save(EG_optim.state_dict(), + join(weights_path, f'{epoch:05}_EGo.pth')) + + +if __name__ == '__main__': + logger = logging.getLogger() + + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--config', default=None, type=str, + help='config file path') + args = parser.parse_args() + + config = None + if args.config is not None and args.config.endswith('.json'): + with open(args.config) as f: + config = json.load(f) + assert config is not None + + main(config) diff --git a/metrics/__init__.py b/metrics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/metrics/jsd.py b/metrics/jsd.py new file mode 100644 index 0000000..c8e0074 --- /dev/null +++ b/metrics/jsd.py @@ -0,0 +1,157 @@ +import numpy as np +import torch +from numpy.linalg import norm +from scipy.stats import entropy +from sklearn.neighbors import NearestNeighbors + + +__all__ = ['js_divercence_between_pc', 'jsd_between_point_cloud_sets'] + + +# +# Compute JS divergence +# + + +def js_divercence_between_pc(pc1: torch.Tensor, pc2: torch.Tensor, + voxels: int = 64) -> float: + """Method for computing JSD from 2 sets of point clouds.""" + pc1_ = _pc_to_voxel_distribution(pc1, voxels) + pc2_ = _pc_to_voxel_distribution(pc2, voxels) + jsd = _js_divergence(pc1_, pc2_) + return jsd + + +def _js_divergence(P, Q): + # Ensure probabilities. + P_ = P / np.sum(P) + Q_ = Q / np.sum(Q) + + # Calculate JSD using scipy.stats.entropy() + e1 = entropy(P_, base=2) + e2 = entropy(Q_, base=2) + e_sum = entropy((P_ + Q_) / 2.0, base=2) + res1 = e_sum - ((e1 + e2) / 2.0) + + # Calcujate JS-Div using manually defined KL divergence. + # res2 = _jsdiv(P_, Q_) + # + # if not np.allclose(res1, res2, atol=10e-5, rtol=0): + # warnings.warn('Numerical values of two JSD methods don\'t agree.') + + return res1 + + +def _jsdiv(P, Q): + """Another way of computing JSD to check numerical stability.""" + def _kldiv(A, B): + a = A.copy() + b = B.copy() + idx = np.logical_and(a > 0, b > 0) + a = a[idx] + b = b[idx] + return np.sum([v for v in a * np.log2(a / b)]) + + P_ = P / np.sum(P) + Q_ = Q / np.sum(Q) + + M = 0.5 * (P_ + Q_) + + return 0.5 * (_kldiv(P_, M) + _kldiv(Q_, M)) + + +def _pc_to_voxel_distribution(pc: torch.Tensor, n_voxels: int = 64) -> np.ndarray: + pc_ = pc.clamp(-0.5, 0.4999) + 0.5 + # Because points are in range [0, 1], simple multiplication will bin them. + pc_ = (pc_ * n_voxels).int() + pc_ = pc_[:, :, 0] * n_voxels ** 2 + pc_[:, :, 1] * n_voxels + pc_[:, :, 2] + + B = np.zeros(n_voxels**3, dtype=np.int32) + values, amounts = np.unique(pc_, return_counts=True) + B[values] = amounts + return B + + +# +# Stanford way to calculate JSD +# + + +def jsd_between_point_cloud_sets(sample_pcs, ref_pcs, voxels=28, + in_unit_sphere=True): + """Computes the JSD between two sets of point-clouds, as introduced in the + paper ```Learning Representations And Generative Models For 3D Point + Clouds```. + Args: + sample_pcs: (np.ndarray S1xR2x3) S1 point-clouds, each of R1 points. + ref_pcs: (np.ndarray S2xR2x3) S2 point-clouds, each of R2 points. + voxels: (int) grid-resolution. Affects granularity of measurements. + """ + sample_grid_var = _entropy_of_occupancy_grid(sample_pcs, voxels, + in_unit_sphere)[1] + ref_grid_var = _entropy_of_occupancy_grid(ref_pcs, voxels, + in_unit_sphere)[1] + return _js_divergence(sample_grid_var, ref_grid_var) + + +def _entropy_of_occupancy_grid(pclouds, grid_resolution, in_sphere=False): + """Given a collection of point-clouds, estimate the entropy of the random + variables corresponding to occupancy-grid activation patterns. + Inputs: + pclouds: (numpy array) #point-clouds x points per point-cloud x 3 + grid_resolution (int) size of occupancy grid that will be used. + """ + pclouds = pclouds.cpu().numpy() + epsilon = 10e-4 + bound = 0.5 + epsilon + # if abs(np.max(pclouds)) > bound or abs(np.min(pclouds)) > bound: + # warnings.warn('Point-clouds are not in unit cube.') + # + # if in_sphere and np.max(np.sqrt(np.sum(pclouds ** 2, axis=2))) > bound: + # warnings.warn('Point-clouds are not in unit sphere.') + + grid_coordinates, _ = _unit_cube_grid_point_cloud(grid_resolution, in_sphere) + grid_coordinates = grid_coordinates.reshape(-1, 3) + grid_counters = np.zeros(len(grid_coordinates)) + grid_bernoulli_rvars = np.zeros(len(grid_coordinates)) + nn = NearestNeighbors(n_neighbors=1).fit(grid_coordinates) + + for pc in pclouds: + _, indices = nn.kneighbors(pc) + indices = np.squeeze(indices) + for i in indices: + grid_counters[i] += 1 + indices = np.unique(indices) + for i in indices: + grid_bernoulli_rvars[i] += 1 + + acc_entropy = 0.0 + n = float(len(pclouds)) + for g in grid_bernoulli_rvars: + p = 0.0 + if g > 0: + p = float(g) / n + acc_entropy += entropy([p, 1.0 - p]) + + return acc_entropy / len(grid_counters), grid_counters + + +def _unit_cube_grid_point_cloud(resolution, clip_sphere=False): + """Returns the center coordinates of each cell of a 3D grid with resolution^3 cells, + that is placed in the unit-cube. + If clip_sphere it True it drops the "corner" cells that lie outside the unit-sphere. + """ + grid = np.ndarray((resolution, resolution, resolution, 3), np.float32) + spacing = 1.0 / float(resolution - 1) + for i in range(resolution): + for j in range(resolution): + for k in range(resolution): + grid[i, j, k, 0] = i * spacing - 0.5 + grid[i, j, k, 1] = j * spacing - 0.5 + grid[i, j, k, 2] = k * spacing - 0.5 + + if clip_sphere: + grid = grid.reshape(-1, 3) + grid = grid[norm(grid, axis=1) <= 0.5] + + return grid, spacing diff --git a/models/avae.py b/models/aae.py similarity index 92% rename from models/avae.py rename to models/aae.py index 9b26604..4cf8312 100644 --- a/models/avae.py +++ b/models/aae.py @@ -42,7 +42,7 @@ def __init__(self, config): self.relu_slope = config['model']['D']['relu_slope'] self.dropout = config['model']['D']['dropout'] - self.pc_discriminator_fc = nn.Sequential( + self.model = nn.Sequential( nn.Linear(self.z_size, 512, bias=True), nn.ReLU(inplace=True), @@ -60,7 +60,7 @@ def __init__(self, config): ) def forward(self, x): - logit = self.pc_discriminator_fc(x) + logit = self.model(x) return logit @@ -72,7 +72,7 @@ def __init__(self, config): self.use_bias = config['model']['E']['use_bias'] self.relu_slope = config['model']['E']['relu_slope'] - self.pc_discriminator_conv = nn.Sequential( + self.conv = nn.Sequential( nn.Conv1d(in_channels=3, out_channels=64, kernel_size=1, bias=self.use_bias), nn.ReLU(inplace=True), @@ -93,7 +93,7 @@ def __init__(self, config): bias=self.use_bias), ) - self.pc_discriminator_fc = nn.Sequential( + self.fc = nn.Sequential( nn.Linear(512, 256, bias=True), nn.ReLU(inplace=True) ) @@ -107,9 +107,9 @@ def reparameterize(self, mu, logvar): return eps.mul(std).add_(mu) def forward(self, x): - output = self.pc_discriminator_conv(x) + output = self.conv(x) output2 = output.max(dim=2)[0] - logit = self.pc_discriminator_fc(output2) + logit = self.fc(output2) mu = self.mu_layer(logit) logvar = self.std_layer(logit) z = self.reparameterize(mu, logvar) diff --git a/models/avae_bin.py b/models/aae_bin.py similarity index 70% rename from models/avae_bin.py rename to models/aae_bin.py index 193b168..82feeb4 100644 --- a/models/avae_bin.py +++ b/models/aae_bin.py @@ -9,22 +9,21 @@ def __init__(self, config): super().__init__() self.z_size = config['z_size'] - use_bias = config['model']['G']['use_bias'] - self.sigmoid = nn.Sigmoid() + self.use_bias = config['model']['G']['use_bias'] self.model = nn.Sequential( - nn.Linear(in_features=self.z_size, out_features=64, bias=use_bias), + nn.Linear(in_features=self.z_size, out_features=64, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Linear(in_features=64, out_features=128, bias=use_bias), + nn.Linear(in_features=64, out_features=128, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Linear(in_features=128, out_features=512, bias=use_bias), + nn.Linear(in_features=128, out_features=512, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Linear(in_features=512, out_features=1024, bias=use_bias), + nn.Linear(in_features=512, out_features=1024, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Linear(in_features=1024, out_features=2048 * 3, bias=use_bias), + nn.Linear(in_features=1024, out_features=2048 * 3, bias=self.use_bias), ) # self.model = nn.DataParallel(self.model) @@ -45,20 +44,19 @@ def __init__(self, config): self.dropout = config['model']['D']['dropout'] self.model = nn.Sequential( - - nn.Linear(self.z_size, 512, bias=True), + nn.Linear(self.z_size, 512, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Linear(512, 512, bias=True), + nn.Linear(512, 512, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Linear(512, 128, bias=True), + nn.Linear(512, 128, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Linear(128, 64, bias=True), + nn.Linear(128, 64, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Linear(64, 1, bias=True) + nn.Linear(64, 1, bias=self.use_bias) ) # self.model = nn.DataParallel(self.model) @@ -76,28 +74,23 @@ def __init__(self, config): self.use_bias = config['model']['E']['use_bias'] self.relu_slope = config['model']['E']['relu_slope'] - self.pc_encoder_conv = nn.Sequential( - nn.Conv1d(in_channels=3, out_channels=64, kernel_size=1, - bias=self.use_bias), + self.conv = nn.Sequential( + nn.Conv1d(in_channels=3, out_channels=64, kernel_size=1, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Conv1d(in_channels=64, out_channels=128, kernel_size=1, - bias=self.use_bias), + nn.Conv1d(in_channels=64, out_channels=128, kernel_size=1, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Conv1d(in_channels=128, out_channels=256, kernel_size=1, - bias=self.use_bias), + nn.Conv1d(in_channels=128, out_channels=256, kernel_size=1, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Conv1d(in_channels=256, out_channels=256, kernel_size=1, - bias=self.use_bias), + nn.Conv1d(in_channels=256, out_channels=256, kernel_size=1, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Conv1d(in_channels=256, out_channels=512, kernel_size=1, - bias=self.use_bias), + nn.Conv1d(in_channels=256, out_channels=512, kernel_size=1, bias=self.use_bias), ) - self.pc_encoder_fc = nn.Sequential( + self.fc = nn.Sequential( nn.Linear(512, 256, bias=True), nn.ReLU(inplace=True), nn.Linear(256, self.z_size, bias=True) @@ -106,8 +99,8 @@ def __init__(self, config): self.sigmoid = nn.Sigmoid() def forward(self, x): - output = self.pc_encoder_conv(x) + output = self.conv(x) output2 = output.max(dim=2)[0] - logit = self.pc_encoder_fc(output2) + logit = self.fc(output2) z = self.sigmoid(logit) return z diff --git a/models/avae_triplet.py b/models/aae_triplet.py similarity index 71% rename from models/avae_triplet.py rename to models/aae_triplet.py index 794648f..6a19018 100644 --- a/models/avae_triplet.py +++ b/models/aae_triplet.py @@ -12,37 +12,33 @@ def __init__(self, config): self.use_bias = config['model']['E']['use_bias'] self.relu_slope = config['model']['E']['relu_slope'] - self.pc_encoder_conv = nn.Sequential( - nn.Conv1d(in_channels=3, out_channels=64, kernel_size=1, - bias=self.use_bias), + self.conv = nn.Sequential( + nn.Conv1d(in_channels=3, out_channels=64, kernel_size=1, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Conv1d(in_channels=64, out_channels=128, kernel_size=1, - bias=self.use_bias), + nn.Conv1d(in_channels=64, out_channels=128, kernel_size=1, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Conv1d(in_channels=128, out_channels=256, kernel_size=1, - bias=self.use_bias), + nn.Conv1d(in_channels=128, out_channels=256, kernel_size=1, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Conv1d(in_channels=256, out_channels=256, kernel_size=1, - bias=self.use_bias), + nn.Conv1d(in_channels=256, out_channels=256, kernel_size=1, bias=self.use_bias), nn.ReLU(inplace=True), - nn.Conv1d(in_channels=256, out_channels=512, kernel_size=1, - bias=self.use_bias), + nn.Conv1d(in_channels=256, out_channels=512, kernel_size=1, bias=self.use_bias), ) - self.pc_encoder_fc = nn.Sequential( + self.fc = nn.Sequential( nn.Linear(512, 256, bias=True), nn.ReLU(inplace=True), + nn.Linear(256, self.z_size, bias=True) ) self.sigmoid = nn.Sigmoid() def forward(self, x): - output = self.pc_encoder_conv(x) + output = self.conv(x) output2 = output.max(dim=2)[0] - logit = self.pc_encoder_fc(output2) + logit = self.fc(output2) return logit diff --git a/models/autoencoder.py b/models/autoencoder.py new file mode 100644 index 0000000..1a7240e --- /dev/null +++ b/models/autoencoder.py @@ -0,0 +1,78 @@ +import torch.nn as nn + + +class Generator(nn.Module): + def __init__(self, config): + super().__init__() + + self.z_size = config['z_size'] + self.use_bias = config['model']['G']['use_bias'] + + self.model = nn.Sequential( + nn.Linear(in_features=self.z_size, out_features=64, + bias=self.use_bias), + nn.ReLU(inplace=True), + + nn.Linear(in_features=64, out_features=128, + bias=self.use_bias), + nn.ReLU(inplace=True), + + nn.Linear(in_features=128, out_features=512, + bias=self.use_bias), + nn.ReLU(inplace=True), + + nn.Linear(in_features=512, out_features=1024, + bias=self.use_bias), + nn.ReLU(inplace=True), + + nn.Linear(in_features=1024, out_features=2048 * 3, + bias=self.use_bias), + ) + + def forward(self, z): + output = self.model(z.squeeze()) + output = output.view(-1, 3, 2048) + return output + + +class Encoder(nn.Module): + def __init__(self, config): + super().__init__() + + self.z_size = config['z_size'] + self.use_bias = config['model']['E']['use_bias'] + self.relu_slope = config['model']['E']['relu_slope'] + + self.pc_encoder_conv = nn.Sequential( + nn.Conv1d(in_channels=3, out_channels=64, kernel_size=1, + bias=self.use_bias), + nn.ReLU(inplace=True), + + nn.Conv1d(in_channels=64, out_channels=128, kernel_size=1, + bias=self.use_bias), + nn.ReLU(inplace=True), + + nn.Conv1d(in_channels=128, out_channels=256, kernel_size=1, + bias=self.use_bias), + nn.ReLU(inplace=True), + + nn.Conv1d(in_channels=256, out_channels=256, kernel_size=1, + bias=self.use_bias), + nn.ReLU(inplace=True), + + nn.Conv1d(in_channels=256, out_channels=self.z_size, kernel_size=1, + bias=self.use_bias), + ) + + # self.pc_encoder_fc = nn.Sequential( + # nn.Linear(512, 256, bias=True), + # nn.ReLU(inplace=True), + # + # nn.Linear(256, self.z_size, bias=True) + # ) + + def forward(self, x): + output = self.pc_encoder_conv(x) + output = output.max(dim=2)[0] + # output = self.pc_encoder_fc(output) + return output