From ae21df1b00ab0ce362e9bfb0144c19cb2bf1e1ef Mon Sep 17 00:00:00 2001 From: Kyle Beltran Hatch Date: Mon, 15 Mar 2021 13:06:40 -0600 Subject: [PATCH] cut out non velocity functions --- .gitignore | 17 + bhmtorch_cpu.py | 271 ++++++++++++++ configs/1_toy1_vel | 32 ++ configs/defaults | 30 ++ kernel.py | 74 ++++ plot_methods.py | 343 ++++++++++++++++++ query_methods.py | 195 ++++++++++ spatun.py | 160 ++++++++ spatun_crossval.py | 205 +++++++++++ spatun_multiframe.py | 227 ++++++++++++ train_methods.py | 87 +++++ utils_data_preprocessing/5_arisim_process.py | 39 ++ utils_data_preprocessing/6_jfk_process.py | 137 +++++++ .../add_timesteps_to_velocity_data.py | 29 ++ .../format_astyx_hires_data.py | 108 ++++++ .../format_nuscenes_data.py | 82 +++++ utils_data_preprocessing/normalize_data.py | 49 +++ .../temp_visualize_pptk.py | 14 + utils_data_preprocessing/train_test_split.py | 60 +++ utils_filereader.py | 137 +++++++ utils_metrics.py | 89 +++++ 21 files changed, 2385 insertions(+) create mode 100644 .gitignore create mode 100755 bhmtorch_cpu.py create mode 100644 configs/1_toy1_vel create mode 100755 configs/defaults create mode 100755 kernel.py create mode 100755 plot_methods.py create mode 100644 query_methods.py create mode 100755 spatun.py create mode 100755 spatun_crossval.py create mode 100644 spatun_multiframe.py create mode 100644 train_methods.py create mode 100644 utils_data_preprocessing/5_arisim_process.py create mode 100644 utils_data_preprocessing/6_jfk_process.py create mode 100644 utils_data_preprocessing/add_timesteps_to_velocity_data.py create mode 100644 utils_data_preprocessing/format_astyx_hires_data.py create mode 100644 utils_data_preprocessing/format_nuscenes_data.py create mode 100644 utils_data_preprocessing/normalize_data.py create mode 100755 utils_data_preprocessing/temp_visualize_pptk.py create mode 100644 utils_data_preprocessing/train_test_split.py create mode 100644 utils_filereader.py create mode 100644 utils_metrics.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0a4037f --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +__pycache__/ +.remote-sync.json +mdls/ +plots/ +query_data/ +/datasets/* +!/datasets/generation/ +!/datasets/bernoulli-surface/ +/reports/* +/configs/kyle_ransalu_tuned/* +/configs/kyle_ransalu_tuned_dec/* +.idea +.DS_Store +datasets/.DS_Store +.ipynb_checkpoints +.ipynb_checkpoints/* +scratch.py diff --git a/bhmtorch_cpu.py b/bhmtorch_cpu.py new file mode 100755 index 0000000..81a9698 --- /dev/null +++ b/bhmtorch_cpu.py @@ -0,0 +1,271 @@ +""" +# 3D Bayesian Hilbert Maps with pytorch +# Ransalu Senanayake, Jason Zheng, and Kyle Hatch +""" +import math +import numpy as np +import torch +import statsmodels.api as sm + +from kernel import rbf_kernel_conv, rbf_kernel_wasserstein, rbf_kernel + +class BHM_VELOCITY_PYTORCH: + def __init__(self, alpha=None, beta=None, gamma=0.05, grid=None, cell_resolution=(5, 5), cell_max_min=None, X=None, nIter=2, kernel_type='rbf', likelihood_type="gamma", device='cpu', w_hatx=None, w_haty=None, w_hatz=None): + self.nIter = nIter + self.rbf_kernel_type = kernel_type + self.likelihood_type = likelihood_type + + if device == 'cpu': + self.device = torch.device("cpu") + elif device == "gpu": + self.device = torch.device("cuda:0") + + self.alpha = alpha + self.beta = beta + + self.gamma = torch.tensor(gamma, device=self.device) + if self.gamma.shape[0] > 2: + self.gamma = self.gamma[:2] + + if grid is not None: + self.grid = grid + else: + self.grid = self.__calc_grid_auto(cell_resolution, cell_max_min, X) + + if w_hatx is not None: + self.w_hatx = w_hatx + + if w_haty is not None: + self.w_haty = w_haty + + if w_hatz is not None: + self.w_hatz = w_hatz + + def updateMuSig(self, mu_x, sig_x, mu_y, sig_y, mu_z, sig_z): + self.mu_x = mu_x + self.sig_x = sig_x + + self.mu_y = mu_y + self.sig_y = sig_y + + self.mu_z = mu_z + self.sig_z = sig_z + + def fit(self, X, y_vx, y_vy, y_vz, eps=1e-10): + if self.likelihood_type == "gamma": + return self.fit_gamma_likelihood(X, y_vx, y_vy, y_vz, eps) + elif self.likelihood_type == "gaussian": + return self.fit_gaussian_likelihood(X, y_vx, y_vy, y_vz) + else: + raise ValueError("Unsupported likelihood type: \"{}\"".format(self.likelihood_type)) + + + def fit_gamma_likelihood(self, X, y_vx, y_vy, y_vz, eps=1e-10): + X = self.__sparse_features(X, self.rbf_kernel_type) + + all_ys = torch.cat((y_vx, y_vy, y_vz), dim=-1) + + X = X.double() + y_vx = y_vx.double() + y_vy = y_vy.double() + y_vz = y_vz.double() + + y_vx = torch.log(y_vx + eps) + y_vy = torch.log(y_vy + eps) + y_vz = torch.log(y_vz + eps) + lam = 0.1 + + self.w_hatx = torch.pinverse(X.t().mm(X) + lam*torch.eye(X.shape[1], dtype=torch.double)).mm(X.t().mm(y_vx).double()) + self.w_haty = torch.pinverse(X.t().mm(X) + lam*torch.eye(X.shape[1], dtype=torch.double)).mm(X.t().mm(y_vy).double()) + self.w_hatz = torch.pinverse(X.t().mm(X) + lam*torch.eye(X.shape[1], dtype=torch.double)).mm(X.t().mm(y_vz).double()) + + + def fit_gaussian_likelihood(self, X, y_vx, y_vy, y_vz): + """ + :param X: raw data + :param y: labels + """ + print(" Data shape:", X.shape) + X = self.__sparse_features(X, self.rbf_kernel_type) + print(" Kernelized data shape:", X.shape) + print(" Hinge point shape:", self.grid.shape) + + self.mu_x, self.sig_x = self.__calc_posterior(X, y_vx) + self.mu_y, self.sig_y = self.__calc_posterior(X, y_vy) + self.mu_z, self.sig_z = self.__calc_posterior(X, y_vz) + return self.mu_x, self.sig_x, self.mu_y, self.sig_y, self.mu_z, self.sig_z + + def predict(self, Xq, query_blocks=-1, variance_only=False): + if self.likelihood_type == "gamma": + return self.predict_gamma_likelihood(Xq, query_blocks, variance_only) + elif self.likelihood_type == "gaussian": + return self.predict_gaussian_likelihood(Xq, query_blocks, variance_only) + else: + raise ValueError("Unsupported likelihood type: \"{}\"".format(self.likelihood_type)) + + def predict_gaussian_likelihood(self, Xq, query_blocks=-1, variance_only=False): + """ + :param Xq: raw inquery points + :return: mean occupancy (Laplace approximation) + """ + + Nq, M = Xq.shape[0], self.grid.shape[0] + + Xq = Xq.float() + print(" Query data shape:", Xq.shape) + + if query_blocks <= 0: + Xq = self.__sparse_features(Xq, self.rbf_kernel_type) # .double() + print(" Kernelized query data shape:", Xq.shape) + + if variance_only: + sig2_inv_a_x = 1/self.beta + diag_only_mm(Xq.mm(self.sig_x), Xq.t()) # (635, 2508) X (2508, 2508) --> (635, 2508), (635, 2508) X (2508, 635) --> (635, 635) + sig2_inv_a_y = 1/self.beta + diag_only_mm(Xq.mm(self.sig_y), Xq.t()) + sig2_inv_a_z = 1/self.beta + diag_only_mm(Xq.mm(self.sig_z), Xq.t()) + else: + sig2_inv_a_x = 1/self.beta + Xq.mm(self.sig_x).mm(Xq.t()) # (635, 2508) X (2508, 2508) --> (635, 2508), (635, 2508) X (2508, 625) --> (635, 635) + sig2_inv_a_y = 1/self.beta + Xq.mm(self.sig_y).mm(Xq.t()) + sig2_inv_a_z = 1/self.beta + Xq.mm(self.sig_z).mm(Xq.t()) + else: + step_size = Xq.shape[0] // query_blocks + if Nq % step_size != 0: + query_blocks += 1 + + mu_a_x = torch.zeros((Nq, 1)) + mu_a_y = torch.zeros((Nq, 1)) + mu_a_z = torch.zeros((Nq, 1)) + + if variance_only: + sig2_inv_a_x = torch.zeros((Nq,)) + sig2_inv_a_y = torch.zeros((Nq,)) + sig2_inv_a_z = torch.zeros((Nq,)) + else: + sig2_inv_a_x = torch.zeros((Nq, Nq)) + sig2_inv_a_y = torch.zeros((Nq, Nq)) + sig2_inv_a_z = torch.zeros((Nq, Nq)) + + for i in range(query_blocks): + start = i * step_size + end = start + step_size + if end > Nq: + end = Nq + + Xq_block_i = self.__sparse_features(Xq[start:end], self.rbf_kernel_type) # .double() + print(" Kernelized query data shape {} in block {} out of {}".format(Xq_block_i.shape, i, query_blocks)) + + mu_a_x[start:end] = Xq_block_i.mm(self.mu_x.reshape(-1, 1))#.squeeze() + mu_a_y[start:end] = Xq_block_i.mm(self.mu_y.reshape(-1, 1))#.squeeze() + mu_a_z[start:end] = Xq_block_i.mm(self.mu_z.reshape(-1, 1))#.squeeze() + + if variance_only: + #print("VARIANCE ONLY") + sig2_inv_a_x[start:end] = 1/self.beta + diag_only_mm(Xq_block_i.mm(self.sig_x), Xq_block_i.t()) + sig2_inv_a_y[start:end] = 1/self.beta + diag_only_mm(Xq_block_i.mm(self.sig_y), Xq_block_i.t()) + sig2_inv_a_z[start:end] = 1/self.beta + diag_only_mm(Xq_block_i.mm(self.sig_z), Xq_block_i.t()) + else: + #print("NO VARIANCE ONLY") + for j in range(query_blocks): + start2 = j * step_size + end2 = start2 + step_size + if end2 > Xq.shape[0]: + end2 = Xq.shape[0] + + Xq_block_2 = self.__sparse_features(Xq[start2:end2], self.rbf_kernel_type) + sig2_inv_a_x[start:end, start2:end2] = 1/self.beta + Xq_block_i.mm(self.sig_x).mm(Xq_block_2.t()) + sig2_inv_a_y[start:end, start2:end2] = 1/self.beta + Xq_block_i.mm(self.sig_y).mm(Xq_block_2.t()) + sig2_inv_a_z[start:end, start2:end2] = 1/self.beta + Xq_block_i.mm(self.sig_z).mm(Xq_block_2.t()) + + if variance_only: + sig2_inv_a_x, sig2_inv_a_y, sig2_inv_a_z = sig2_inv_a_x.view(-1,1), sig2_inv_a_y.view(-1,1), sig2_inv_a_z.view(-1,1) + + return mu_a_x, sig2_inv_a_x, mu_a_y, sig2_inv_a_y, mu_a_z, sig2_inv_a_z + + def predict_gamma_likelihood(self, Xq, query_blocks=-1): + print(" Query data shape:", Xq.shape) + Xq = self.__sparse_features(Xq, self.rbf_kernel_type).double() + print(" Kernelized query data shape:", Xq.shape) + + if query_blocks <= 0: + mean_x, mean_y, mean_z = torch.exp(Xq.mm(self.w_hatx)), torch.exp(Xq.mm(self.w_haty)), torch.exp(Xq.mm(self.w_hatz)) + else: + mean_x_, mean_y_, mean_z_ = torch.exp(Xq.mm(self.w_hatx)), torch.exp(Xq.mm(self.w_haty)), torch.exp(Xq.mm(self.w_hatz)) + + step_size = Xq.shape[0] // query_blocks + if Xq.shape[0] % step_size != 0: + query_blocks += 1 + + mu_a_x = torch.zeros((Xq.shape[0], 1)) + mu_a_y = torch.zeros((Xq.shape[0], 1)) + mu_a_z = torch.zeros((Xq.shape[0], 1)) + sig2_inv_a_x = torch.zeros((Xq.shape[0], Xq.shape[0])) + sig2_inv_a_y = torch.zeros((Xq.shape[0], Xq.shape[0])) + sig2_inv_a_z = torch.zeros((Xq.shape[0], Xq.shape[0])) + + for i in range(query_blocks): + start = i * step_size + end = start + step_size + if end > Xq.shape[0]: + end = Xq.shape[0] + + + return mean_x, mean_y, mean_z + + def __calc_grid_auto(self, cell_resolution, max_min, X): + """ + :param X: a sample of lidar locations + :param cell_resolution: resolution to hinge RBFs as (x_resolution, y_resolution) + :param max_min: realm of the RBF field as (x_min, x_max, y_min, y_max, z_min, z_max) + :return: numpy array of size (# of RNFs, 2) with grid locations + """ + + if max_min is None: + # if 'max_min' is not given, make a boundarary based on X + # assume 'X' contains samples from the entire area + expansion_coef = 1.2 + x_min, x_max = expansion_coef*X[:, 0].min(), expansion_coef*X[:, 0].max() + y_min, y_max = expansion_coef*X[:, 1].min(), expansion_coef*X[:, 1].max() + else: + x_min, x_max = max_min[0], max_min[1] + y_min, y_max = max_min[2], max_min[3] + z_min, z_max = max_min[4], max_min[5] + + xx, yy, zz = torch.meshgrid(torch.arange(x_min, x_max, cell_resolution[0]), \ + torch.arange(y_min, y_max, cell_resolution[1]), \ + torch.arange(z_min, z_max, cell_resolution[2])) + + return torch.stack([xx.flatten(), yy.flatten(), zz.flatten()], dim=1) + + + def __sparse_features(self, X, rbf_kernel_type='conv'): + """ + :param X: inputs of size (N,3) + :return: hinged features with intercept of size (N, # of features + 1) + """ + if rbf_kernel_type == 'conv': + raise NotImplementedError + # rbf_features = rbf_kernel_conv(X, self.grid, gamma=self.gamma, sigma=sigma, device=self.device) + elif rbf_kernel_type == 'wass': + raise NotImplementedError + # rbf_features = rbf_kernel_wasserstein(X, self.grid, gamma=self.gamma, sigma=sigma, device=self.device) + else: + rbf_features = rbf_kernel(X, self.grid, gamma=self.gamma) + return rbf_features + + def __calc_posterior(self, X, y): + """ + :param X: input features + :param y: labels + :return: new_mean, new_varaiance + """ + order = X.shape[1] + theta = X.numpy() + + A = self.beta*theta.T.dot(theta) + self.alpha*np.eye((order)) + sig = np.linalg.pinv(A) + mu = self.beta*sig.dot(theta.T.dot(y)) + + return torch.tensor(mu, dtype=torch.float32), torch.tensor(sig, dtype=torch.float32) # change to double?? + + +def diag_only_mm(x, y): + return (x * y.T).sum(-1) diff --git a/configs/1_toy1_vel b/configs/1_toy1_vel new file mode 100644 index 0000000..362699e --- /dev/null +++ b/configs/1_toy1_vel @@ -0,0 +1,32 @@ +{ + "mode": "tqp", + "num_frames": 1, + "config": "", + "save_config": "./kyle_ransalu_tuned_dec/1_toy1_vel", + + "likelihood_type": "gaussian", + "dataset_path": "./datasets/kyle_ransalu/1_toy/1_toy1_vel_train_normalized", + "area_min": [-1.2,-1.2,-1.2], + "area_max": [1.2,1.2,1.2], + "hinge_type": "grid", + "hinge_dist": [0.2, 0.2, 0.2], + "kernel_type": "rbf", + "gamma": [30], + "num_partitions": [1,1,1], + "partition_bleed": 0.25, + "save_model_path": "tmp", + + "query_dist": [0.1,0.1,0.1,-0.6], + "query_blocks": 10, + "variance_only": true, + "eval_path": "./datasets/kyle_ransalu/1_toy/1_toy1_vel_test", + "eval": false, + "save_query_data_path": "tmp", + + "occupancy_plot_type": "scatter", + "plot_volumetric": false, + "plot_axis": "x", + "plot_title": "1_toy1_vel", + "surface_threshold": [-1,120], + "variance_threshold": 30 +} diff --git a/configs/defaults b/configs/defaults new file mode 100755 index 0000000..9d91aef --- /dev/null +++ b/configs/defaults @@ -0,0 +1,30 @@ +{ + "mode": "tqp", + "num_frames": 1, + "config": "", + "save_config": "", + + "likelihood_type": "", + "dataset_path": "./datasets/toy/toy", + "area_min": [0,0,0], + "area_max": [0,0,0], + "hinge_dist": [3,3,3], + "kernel_type": "conv", + "gamma": [0.1], + "num_partitions": [1,1,1], + "partition_bleed": 0.25, + "save_model_path": "tmp", + + "query_dist": [0.5,0.5,0.5], + "query_blocks": 10, + "variance_only": false, + "eval_path": "", + "eval": false, + "save_query_data_path": "tmp", + + "occupancy_plot_type": "scatter", + "plot_volumetric": false, + "plot_axis": "x", + "plot_title": "", + "surface_threshold": [-10000,10000] +} diff --git a/kernel.py b/kernel.py new file mode 100755 index 0000000..803a1b3 --- /dev/null +++ b/kernel.py @@ -0,0 +1,74 @@ +import math +import numpy as np +import torch + +from sklearn.metrics.pairwise import euclidean_distances, rbf_kernel as sklearn_kernel + +DEFAULT_DEVICE = torch.device("cpu") + +# ============================================================================== +# Kernel Method Implementations using PyTorch +# ============================================================================== +def rbf_kernel_conv(X, Y, gamma, sigma, device=DEFAULT_DEVICE): + """ + Vectorized implementation + Performs rbf kernel convolution on input distributions and hinge point grid + """ + N, d = X.shape + if gamma is None: + gamma = 1. / d + if sigma is None: + sigma = torch.zeros(N) + + if sigma.unsqueeze(-1).shape[1] == 1 and gamma.unsqueeze(-1).shape[0] == 1: + K = torch.from_numpy(euclidean_distances(X, Y, squared=True)).float().to(device) + sig_terms = 2*sigma**2 + return (1/((1+gamma*sig_terms)**(1/2))).unsqueeze(-1) * torch.exp(-1*(K/((1/gamma) + sig_terms).unsqueeze(-1))) + + # Handle Multi-dim sigma/gamma + M = Y.shape[0] + if len(sigma.shape) == 1: + sigma = sigma.unsqueeze(-1) + det = (torch.det(torch.diag_embed(2*gamma*sigma+1))**(-0.5)).unsqueeze(-1) + diff = X.unsqueeze(1)-Y + p = torch.diag_embed((2*sigma+1/gamma)**(-1)) + return det * torch.exp(-1 *((diff@p).view(N*M, 1, d)).bmm(diff.view(N*M, d, 1)).view(N,M)) + + +def rbf_kernel_wasserstein(X, Y, gamma, sigma=None, device=DEFAULT_DEVICE): + """ + Vectorized implementation + Performs rbf kernel wasserstein on input distributions and hinge point grid + """ + N, d = X.shape + if gamma is None: + gamma = 1. / d + if sigma is None: + sigma = torch.zeros(N) + + if gamma.unsqueeze(-1).shape[0] == 1: + K = torch.from_numpy(euclidean_distances(X, Y, squared=True)).float().to(device) + if sigma.unsqueeze(-1).shape[1] == 1: + return torch.exp(-1*gamma*(K+(sigma**2).unsqueeze(-1))) + covar_trace = torch.sum(sigma) + return torch.exp(-1*gamma*(K+covar_trace)) + else: + M = Y.shape[0] + gamma = torch.diag(gamma) + diff = X.unsqueeze(1)-Y + covar_trace = torch.sum(sigma) + return torch.exp(-1*((diff@gamma).view(N*M, 1, d)).bmm(diff.view(N*M, d, 1)).view(N,M) + covar_trace) + + +def rbf_kernel(X, Y, gamma, sigma=None, device=DEFAULT_DEVICE): + """ + Vectorized Implementations + Performs RBF kernel method using ARD with 2 and 3 dimensional gamma + """ + if gamma.unsqueeze(-1).shape[0] == 1: + return torch.tensor(sklearn_kernel(X.cpu().detach().numpy(), Y.cpu().detach().numpy(), gamma.cpu().detach().numpy()), device=device) + + else: + print("In second") + K = torch.from_numpy(euclidean_distances(X, Y, squared=True)).float().to(device) + return torch.exp(-1/2 * torch.sum(gamma)*K) diff --git a/plot_methods.py b/plot_methods.py new file mode 100755 index 0000000..d56361e --- /dev/null +++ b/plot_methods.py @@ -0,0 +1,343 @@ +import argparse +import json +import os +import pandas as pd +import plotly +import plotly.graph_objects as go +import time +import torch + +#vivian +import plotly.figure_factory as ff +import trimesh +import time +import torch +import numpy as np +from skimage import measure +import utils_filereader + +from plotly.subplots import make_subplots + +from sklearn.metrics.pairwise import euclidean_distances + +plotly.io.orca.config.executable = "/home/khatch/anaconda3/envs/hilbert/bin/orca" + + +# ============================================================================== +# BHM Plotting Class +# ============================================================================== +class BHM_PLOTTER(): + def __init__(self, args, plot_title, surface_threshold, variance_threshold, query_dist, occupancy_plot_type='scatter', plot_volumetric=False, plot_axis="x"): + self.args = args + self.plot_title = plot_title + self.variance_threshold = variance_threshold + self.surface_threshold = surface_threshold + self.query_dist = query_dist + self.occupancy_plot_type = occupancy_plot_type + + self.plot_volumetric = plot_volumetric + self.plot_axis = plot_axis + + print(' Successfully initialized plotly plotting class') + + def _filter_predictions_velocity(self, X, y, var): + """ + :param X: Nx3 position + :param y: N values + :return: thresholded X, y vals + """ + + # Filter -1 to 1 + min_filterout = X.max(dim=-1).values >= 1 + max_filterout = X.min(dim=-1).values <= -1 + mask = torch.logical_not(torch.logical_or(min_filterout, max_filterout)) + X = X[mask, :] + y = y[mask, :] + var = var[mask, :] + + if len(self.surface_threshold) == 1: + mask = y.squeeze() >= self.surface_threshold[0] + else: + min_mask = y.squeeze() >= self.surface_threshold[0] + max_mask = y.squeeze() <= self.surface_threshold[1] + mask = torch.logical_and(min_mask, max_mask) + + X = X[mask, :] + y = y[mask, :] + var = var[mask, :] + + var_mask = var.squeeze(-1) <= self.variance_threshold + X = X[var_mask, :] + y = y[var_mask, :] + var = var[var_mask, :] + + return X, y, var + + def _filter_predictions_velocity_where(self, X, y, var): + """ + :param X: Nx3 position + :param y: N values + :return: thresholded X, y vals + """ + + # Filter -1 to 1 + min_filterout = X.max(dim=-1).values >= 1 + max_filterout = X.min(dim=-1).values <= -1 + mask = torch.logical_not(torch.logical_or(min_filterout, max_filterout)) + # X = torch.where((torch.ones_like(X) * mask[:, None]).to(dtype=torch.bool), X, torch.ones_like(X) * -1000) + y = torch.where((torch.ones_like(y) * mask[:, None]).to(dtype=torch.bool), y, torch.ones_like(y) * -1000) + var = torch.where((torch.ones_like(var) * mask[:, None]).to(dtype=torch.bool), var, torch.ones_like(var) * -1000) + + if len(self.surface_threshold) == 1: + mask = y.squeeze() >= self.surface_threshold[0] + else: + min_mask = y.squeeze() >= self.surface_threshold[0] + max_mask = y.squeeze() <= self.surface_threshold[1] + mask = torch.logical_and(min_mask, max_mask) + + # X = torch.where((torch.ones_like(X) * mask[:, None]).to(dtype=torch.bool), X, torch.ones_like(X) * -1000) + y = torch.where((torch.ones_like(y) * mask[:, None]).to(dtype=torch.bool), y, torch.ones_like(y) * -1000) + var = torch.where((torch.ones_like(var) * mask[:, None]).to(dtype=torch.bool), var, torch.ones_like(var) * -1000) + + var_mask = var.squeeze(-1) <= self.variance_threshold + # X = torch.where((torch.ones_like(X) * var_mask[:, None]).to(dtype=torch.bool), X, torch.ones_like(X) * -1000) + y = torch.where((torch.ones_like(y) * var_mask[:, None]).to(dtype=torch.bool), y, torch.ones_like(y) * -1000) + var = torch.where((torch.ones_like(var) * var_mask[:, None]).to(dtype=torch.bool), var, torch.ones_like(var) * -1000) + + return X, y, var + + def _plot_velocity_volumetric(self, Xqs, yqs, fig, row, col, plot_args=None): + """ + # generic method for any plot + :param Xqs: filtered Nx3 position + :param yqs: filtered N values + :param fig: + :param row: + :param col:print("Number of points after filtering: ", Xq_mv.shape[0]) + :param plot_args: symbol, size, opacity, cbar_x_pos, cbar_min, cbar_max + """ + + print(" Plotting row {}, col {}".format(row, col)) + fname_in = "./datasets/kyle_ransalu/5_airsim/5_airsim1/5_airsim1_vel_train_normalized_infilled" + prefilled_X = pd.read_csv(fname_in + '.csv', delimiter=',').to_numpy()[:2542,1:4] + mask = np.sum(euclidean_distances(Xqs, prefilled_X) <= 0.3, axis=1) >= 1 + # Xqs = torch.where((torch.ones_like(Xqs) * mask[:, None]).to(dtype=torch.bool), Xqs, torch.ones_like(Xqs) * -1000) + yqs = torch.where((torch.ones_like(yqs) * mask[:, None]).to(dtype=torch.bool), yqs, torch.ones_like(yqs) * -1000) + + # marker and colorbar arguments + if plot_args is None: + symbol, size, opacity, cbar_x_pos, cbar_min, cbar_max = 'square', 8, 0.2, False, yqs[:,0].min(), yqs[:,0].max() + else: + symbol, size, opacity, cbar_x_pos, cbar_min, cbar_max = plot_args + if cbar_x_pos is not False: + colorbar = dict(x=cbar_x_pos, + len=1, + y=0.5 + ) + else: + colorbar = dict() + + colorbar["tickfont"] = dict(size=18) + + fig.add_trace( + go.Volume( + x=Xqs[:, 0], + y=Xqs[:, 1], + z=Xqs[:, 2], + isomin=-7, + isomax=7, + value=yqs, + opacity=0.05, + surface_count=40, + colorscale="Jet", + opacityscale=[[0, 0], [self.surface_threshold[0], 0], [1, 1]], + colorbar=colorbar, + # cmax=1, + # cmin=self.surface_threshold[0], + ), + row=1, + col=2 + ) + + def _plot_velocity_scatter(self, Xqs, yqs, fig, row, col, plot_args=None): + """ + # generic method for any plot + :param Xqs: filtered Nx3 position + :param yqs: filtered N values + :param fig: + :param row: + :param col:print("Number of points after filtering: ", Xq_mv.shape[0]) + :param plot_args: symbol, size, opacity, cbar_x_pos, cbar_min, cbar_max + """ + + print(" Plotting row {}, col {}".format(row, col)) + # marker and colorbar arguments + if plot_args is None: + symbol, size, opacity, cbar_x_pos, cbar_min, cbar_max = 'square', 8, 0.2, False, yqs[:,0].min(), yqs[:,0].max() + else: + symbol, size, opacity, cbar_x_pos, cbar_min, cbar_max = plot_args + if cbar_x_pos is not False: + colorbar = dict(x=cbar_x_pos, + len=1, + y=0.5 + ) + else: + colorbar = dict() + + colorbar["tickfont"] = dict(size=18) + + # plot + fig.add_trace( + go.Scatter3d( + x=Xqs[:,0], + y=Xqs[:,1], + z=Xqs[:,2], + mode='markers', + marker=dict( + color=yqs[:,0], + colorscale="Jet", + cmax=cbar_max, + cmin=cbar_min, + colorbar=colorbar, + opacity=opacity, + symbol=symbol, + size=size + ), + ), + row=row, + col=col + ) + + def _plot_velocity_1by3(self, X, y_vy, Xq_mv, mean_y, var_y, i): + """ + # This plot is good for radar data + :param X: ground truth positions + :param y_vy: ground truth y velocity + :param Xq_mv: query X positions + :param mean_y: predicted y velocity mean + :param i: ith frame + """ + print(" Plotting 1x3 subplots") + + # setup plot + specs = [[{"type": "scene"}, {"type": "scene"}, {"type": "scene"}],] + titles = ["Ground truth", "Prediction (mean)", "Predictions (variance)"] + fig = make_subplots( + rows=1, + cols=3, + specs=specs, + subplot_titles=titles + ) + + # filter by surface threshold + print(" Surface_thresh: ", self.surface_threshold) + print(" Number of points before filtering: {}".format(Xq_mv.shape[0])) + + if self.plot_volumetric: + Xq_mv, mean_y, var_y = self._filter_predictions_velocity_where(Xq_mv, mean_y, var_y) + else: + Xq_mv, mean_y, var_y = self._filter_predictions_velocity(Xq_mv, mean_y, var_y) + + print(" Number of points after filtering: {}".format(Xq_mv.shape[0])) + + # set colorbar + cbar_min = min(mean_y.min().item(), y_vy.min().item()) + cbar_max = max(mean_y.max().item(), y_vy.max().item()) + print(f"mean_y.min().item(): {mean_y.min().item()}, y_vy.min().item(): {y_vy.min().item()}") + print(f"mean_y.max().item(): {mean_y.max().item()}, y_vy.max().item(): {y_vy.max().item()}") + # fig.update_layout(coloraxis={'colorscale':'Jet', "cmin":cbar_min, "cmax":max_c}) # global colrobar + + # plot + # plot_args - symbol, size, opacity, cbar_x_pos, cbar_min, cbar_max + plot_setting = 5 + if plot_setting == 1: #for 1x3, scatter, shared colorbar + plot_args_data = ['circle', 5, 0.7, 0.3, cbar_min, cbar_max] + plot_args_pred_mean = ['circle', 5, 0.7, 0.6, cbar_min, cbar_max] #opacity=0.1 + plot_args_pred_var = ['circle', 5, 0.7, 0.9, None, None] #opacity=0.1 + elif plot_setting == 2: #for 1x3, scatter, separate axis + plot_args_data = ['circle', 5, 0.7, 0.3, None, None] + plot_args_pred_mean = ['circle', 5, 0.7, 0.6, None, None] #opacity=0.1 + plot_args_pred_var = ['circle', 5, 0.7, 0.9, None, None] #opacity=0.1 + elif plot_setting == 3: #for 1x3 query slice, shared colobar + plot_args_data = ['circle', 5, 0.7, 0.25, cbar_min, cbar_max] + plot_args_pred_mean = ['square', 5, 0.7, 0.63, cbar_min, cbar_max] #opacity=0.1 + plot_args_pred_var = ['square', 5, 0.7, 0.95, None, None] #opacity=0.1 + elif plot_setting == 4: # for 1x3 query slice, separate colobar + plot_args_data = ['circle', 5, 0.7, 0.25, None, None] + plot_args_pred_mean = ['square', 5, 0.7, 0.63, None, None] + plot_args_pred_var = ['square', 5, 0.7, 0.95, None, None] + elif plot_setting == 5: #for 1x3 query everywhere, shared colobar + plot_args_data = ['circle', 5, 0.7, 0.265, cbar_min, cbar_max] + # plot_args_data = ['circle', 1.5, 0.7, 0.265, cbar_min, cbar_max] + plot_args_pred_mean = ['square', 2.5, 0.4, 0.63, cbar_min, cbar_max] #opacity=0.1 + plot_args_pred_var = ['square', 2.5, 0.4, 0.975, 0, None] #opacity=0.1 + elif plot_setting == 6: #for 1x3 query everywhere, sperate colorbar + plot_args_data = ['circle', 3, 0.7, 0.3, None, None] + plot_args_pred_mean = ['square', 3, 0.3, 0.6, None, None] #opacity=0.1 + plot_args_pred_var = ['square', 3, 0.3, 0.9, None, None] #opacity=0.1 + else: + pass + + if self.plot_volumetric: + self._plot_velocity_volumetric(X.float(), y_vy, fig, 1, 1, plot_args_data) + self._plot_velocity_volumetric(Xq_mv.float(), mean_y.float(), fig, 1, 2, plot_args_pred_mean) + self._plot_velocity_volumetric(Xq_mv.float(), var_y.float(), fig, 1, 3, plot_args_pred_var) + else: + self._plot_velocity_scatter(X.float(), y_vy, fig, 1, 1, plot_args_data) + self._plot_velocity_scatter(Xq_mv.float(), mean_y.float(), fig, 1, 2, plot_args_pred_mean) + self._plot_velocity_scatter(Xq_mv.float(), var_y.float(), fig, 1, 3, plot_args_pred_var) + + # update camera + camera = dict( + eye=dict(x=2.25, y=-2.25, z=1.25) + # eye=dict(x=-2.25, y=-2.25, z=1.25) + # eye=dict(x=-4, y=0.2, z=0.5) + ) + fig.layout.scene1.camera = camera + fig.layout.scene2.camera = camera + fig.layout.scene3.camera = camera + + # update plot settings + layout = dict(xaxis=dict(nticks=4, range=[self.args.area_min[0], self.args.area_max[0]], ), + yaxis=dict(nticks=4, range=[self.args.area_min[1], self.args.area_max[1]], ), + zaxis=dict(nticks=4, range=[self.args.area_min[2], self.args.area_max[2]], ), + aspectmode="manual", + aspectratio=dict(x=2, y=2, z=2)) + fig.update_layout(scene1=layout, scene2=layout, scene3=layout, font=dict(size=15)) + + plots_dir = os.path.abspath("./plots/velocity") + if not os.path.isdir(plots_dir): + print(f"Creating \"{plots_dir}\"...") + os.makedirs(plots_dir) + + fig.update_layout(title='{}_velocity_frame{}'.format(self.plot_title, i), height=450) + filename = os.path.abspath('./plots/velocity/{}_frame{}.html'.format(self.plot_title, i)) + plotly.offline.plot(fig, filename=filename, auto_open=False) + print(' Plot saved as ' + filename) + + pdf_filename = os.path.abspath('./plots/velocity/{}_frame{}.pdf'.format(self.plot_title, i)) + fig.write_image(pdf_filename, width=1500, height=450) + print(' Plot also saved as ' + pdf_filename) + + svg_filename = os.path.abspath('./plots/velocity/{}_frame{}.svg'.format(self.plot_title, i)) + fig.write_image(svg_filename, width=1500, height=450) + print(' Plot also saved as ' + svg_filename) + + png_filename = os.path.abspath('./plots/velocity/{}_frame{}.png'.format(self.plot_title, i)) + fig.write_image(png_filename, width=1500, height=450) + print(' Plot also saved as ' + png_filename) + + + def plot_velocity_frame(self, X, y_vx, y_vy, y_vz, Xq_mv, mean_x, var_x, mean_y, var_y, mean_z, var_z, i): + time1 = time.time() + + if self.plot_axis == "x": + self._plot_velocity_1by3(X, y_vx, Xq_mv, mean_x, var_x, i) + elif self.plot_axis == "y": + self._plot_velocity_1by3(X, y_vy, Xq_mv, mean_y, var_y, i) + elif self.plot_axis == "z": + self._plot_velocity_1by3(X, y_vz, Xq_mv, mean_z, var_z, i) + else: + raise ValueError("Unsupported plot axis \"{self.plot_axis}\"") + + print(' Total plotting time=%2f s' % (time.time()-time1)) diff --git a/query_methods.py b/query_methods.py new file mode 100644 index 0000000..f56aaa9 --- /dev/null +++ b/query_methods.py @@ -0,0 +1,195 @@ +import argparse +import json +import os +import pandas as pd +import time +import torch +import numpy as np + +from bhmtorch_cpu import BHM_VELOCITY_PYTORCH +from utils_filereader import read_frame_velocity +from utils_metrics import calc_scores_velocity + + +def load_mdl(args, path): + """ + @param path (str): path relative to the mdl folder to load from + @returns: model: BHM Module that is loaded + """ + filename = './mdls/{}'.format(path) + print(' Loading the trained model from ' + filename) + model_params = torch.load(filename) + + if args.likelihood_type == "gamma": + model = BHM_VELOCITY_PYTORCH( + gamma=args.gamma, + grid=model_params['grid'], + w_hatx=model_params["w_hatx"], + w_haty=model_params["w_haty"], + w_hatz=model_params["w_hatz"], + kernel_type=args.kernel_type, + likelihood_type=model_params["likelihood_type"] + ) + elif args.likelihood_type == "gaussian": + model = BHM_VELOCITY_PYTORCH( + alpha=model_params['alpha'], + beta=model_params['beta'], + gamma=args.gamma, + grid=model_params['grid'], + kernel_type=args.kernel_type, + likelihood_type=model_params["likelihood_type"] + ) + model.updateMuSig(model_params['mu_x'], model_params['sig_x'], + model_params['mu_y'], model_params['sig_y'], + model_params['mu_z'], model_params['sig_z']) + else: + raise ValueError("Unsupported likelihood type: \"{}\"".format(args.likelihood_type)) + + return model, model_params['train_time'] + + +def save_query_data(data, path): + """ + @param data (tuple of elements): datapoints from regression/occupancy query to save + @param path (str): path relative to query_data folder to save data + """ + + ###===### + complete_dir = './query_data/{}'.format(path).split("/") + complete_dir = "/".join(complete_dir[:-1]) + + if not os.path.isdir(complete_dir): + os.makedirs(complete_dir) + ###///### + + filename = './query_data/{}'.format(path) + torch.save(data, filename) + print( ' Saving queried output as ' + filename) + +def query_velocity(args, X, y_vx, y_vy, y_vz, partitions, cell_resolution, cell_max_min, framei): + bhm_velocity_mdl, train_time = load_mdl(args, 'velocity/{}_f{}'.format(args.save_model_path, framei)) + + option = '' + if args.eval_path != '' and args.eval: + #if eval is True, test the query + print(" Query data from the test dataset") + Xq_mv, y_vx_true, y_vy_true, y_vz_true, _ = read_frame_velocity(args, framei, args.eval_path, cell_max_min) + option = args.eval_path + elif args.query_dist[0] <= 0 and args.query_dist[1] <= 0 and args.query_dist[2] <= 0: + #if all q_res are non-positive, then query input = X + print(" Query data is the same as input data") + Xq_mv = X + option = 'Train data' + elif args.query_dist[0] <= 0 or args.query_dist[1] <= 0 or args.query_dist[2] <= 0: + #if at least one q_res is non-positive, then + if args.query_dist[0] <= 0: #x-slice + print(" Query data is x={} slice ".format(args.query_dist[3])) + xx, yy, zz = torch.meshgrid( + torch.arange( + args.query_dist[3], + args.query_dist[3] + 0.1, + 1 + ), + torch.arange( + cell_max_min[2], + cell_max_min[3] + args.query_dist[1], + args.query_dist[1] + ), + torch.arange( + cell_max_min[4], + cell_max_min[5] + args.query_dist[2], + args.query_dist[2] + ) + ) + Xq_mv = torch.stack([xx.flatten(), yy.flatten(), zz.flatten()], dim=1) + option = 'X slice at '.format(args.query_dist[3]) + elif args.query_dist[1] <= 0: #y-slice + print(" Query data is y={} slice ".format(args.query_dist[3])) + xx, yy, zz = torch.meshgrid( + torch.arange( + cell_max_min[0], + cell_max_min[1] + args.query_dist[0], + args.query_dist[0] + ), + torch.arange( + args.query_dist[3], + args.query_dist[3] + 0.1, + 1 + ), + torch.arange( + cell_max_min[4], + cell_max_min[5] + args.query_dist[2], + args.query_dist[2] + ) + ) + Xq_mv = torch.stack([xx.flatten(), yy.flatten(), zz.flatten()], dim=1) + option = 'Y slice at '.format(args.query_dist[3]) + else: #z-slice + print(" Query data is z={} slice ".format(args.query_dist[3])) + xx, yy, zz = torch.meshgrid( + torch.arange( + cell_max_min[0], + cell_max_min[1] + args.query_dist[0], + args.query_dist[0] + ), + torch.arange( + cell_max_min[2], + cell_max_min[3] + args.query_dist[1], + args.query_dist[1] + ), + torch.arange( + args.query_dist[3], + args.query_dist[3] + 0.1, + 1 + ) + ) + Xq_mv = torch.stack([xx.flatten(), yy.flatten(), zz.flatten()], dim=1) + option = 'Z slice at '.format(args.query_dist[3]) + else: + #if not use the grid + print(" Query data is a 3D grid.") + xx, yy, zz = torch.meshgrid( + torch.arange( + cell_max_min[0], + cell_max_min[1]+args.query_dist[0], + args.query_dist[0] + ), + torch.arange( + cell_max_min[2], + cell_max_min[3]+args.query_dist[1], + args.query_dist[1] + ), + torch.arange( + cell_max_min[4], + cell_max_min[5]+args.query_dist[2], + args.query_dist[2] + ) + ) + Xq_mv = torch.stack([xx.flatten(), yy.flatten(), zz.flatten()], dim=1) + option = '3D grid' + + time1 = time.time() + + if args.likelihood_type == "gamma": + mean_x, mean_y, mean_z = bhm_velocity_mdl.predict(Xq_mv) + elif args.likelihood_type == "gaussian": + mean_x, var_x, mean_y, var_y, mean_z, var_z = bhm_velocity_mdl.predict(Xq_mv, args.query_blocks, args.variance_only) + else: + raise ValueError("Unsupported likelihood type: \"{}\"".format(args.likelihood_type)) + + query_time = time.time() - time1 + + print(' Total querying time={} s'.format(round(query_time, 2))) + save_query_data((X, y_vx, y_vy, y_vz, Xq_mv, mean_x, var_x, mean_y, var_y, mean_z, var_z, framei), \ + 'velocity/{}_f{}'.format(args.save_query_data_path, framei)) + + if args.eval: + if hasattr(args, 'report_notes'): + notes = args.report_notes + else: + notes = '' + axes = [('x', y_vx_true, mean_x, var_x), ('y', y_vy_true, mean_y, var_y), ('z', y_vz_true, mean_z, var_z)] + for axis, Xqi, mean, var in axes: + mdl_name = 'reports/' + args.plot_title + '_' + axis + calc_scores_velocity(mdl_name, option, Xqi.numpy(), mean.numpy().ravel(), predicted_var=\ + np.diagonal(var.numpy()), train_time=train_time, query_time=query_time, save_report=True, notes=notes) diff --git a/spatun.py b/spatun.py new file mode 100755 index 0000000..d0164fd --- /dev/null +++ b/spatun.py @@ -0,0 +1,160 @@ +import argparse +import json +import os +import pandas as pd +import time +import torch +import numpy as np + +import train_methods +import query_methods +import utils_filereader +from plot_methods import BHM_PLOTTER + + +def load_query_data(path): + """ + @param path (str): path relative to query_data folder to save data + """ + filename = './query_data/{}'.format(path) + print(' Reading queried output from ' + filename) + return torch.load(filename) + +# ============================================================================== +# Train +# ============================================================================== +def train(fn_train, cell_max_min, cell_resolution): + """ + @params: [fn_train, cell_max_min, cell_resolution] + @returns: [] + Fits the 3D BHM on each frame of the dataset and plots occupancy or regression + """ + print('\nTraining started---------------') + alpha = 10**-2 + beta = 10**2 + for framei in range(args.num_frames): + X, y_vx, y_vy, y_vz, partitions = utils_filereader.read_frame_velocity(args, framei, fn_train, cell_max_min) + train_methods.train_velocity(args, alpha, beta, X, y_vx, y_vy, y_vz, partitions, cell_resolution, cell_max_min, framei) ###///### + del X, y_vx, y_vy, y_vz, partitions + + print('Training completed---------------\n') + + +# ============================================================================== +# Query +# ============================================================================== +def query(fn_train, cell_max_min): + """ + @params: [fn_train, cell_max_min] + @returns: [] + Queries the 3D BHM for occupancy or regression on each frame of the dataset + """ + print('Querying started---------------') + for framei in range(args.num_frames): + X, y_vx, y_vy, y_vz, partitions = utils_filereader.read_frame_velocity(args, framei, fn_train, cell_max_min) + print(f"cell_resolution: {cell_resolution}, args.query_dist: {args.query_dist}") + query_methods.query_velocity(args, X, y_vx, y_vy, y_vz, partitions, cell_resolution, cell_max_min, framei) ###///### + del X, y_vx, y_vy, y_vz, partitions + + print('Querying completed---------------\n') + +# ============================================================================== +# Plot +# ============================================================================== +def plot(): + """ + @params: [] + @returns: [] + Plots data loaded from the args.save_query_data_path parameter + """ + print('Plotting started---------------') + plotter = BHM_PLOTTER(args, args.plot_title, args.surface_threshold, args.variance_threshold, args.query_dist, occupancy_plot_type=args.occupancy_plot_type, + plot_volumetric=args.plot_volumetric, plot_axis=args.plot_axis) + for framei in range(args.num_frames): + X, y_vx, y_vy, y_vz, Xq_mv, mean_x, var_x, mean_y, var_y, mean_z, var_z, framei = load_query_data('velocity/{}_f{}'.format(args.save_query_data_path, framei)) + plotter.plot_velocity_frame(X, y_vx, y_vy, y_vz, Xq_mv, mean_x, var_x, mean_y, var_y, mean_z, var_z, framei) + print('Plotting completed---------------\n') + + + +# ============================================================================== +if __name__ == '__main__': + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + # Settings Arguments + parser.add_argument('--mode', type=str, help='tqp: Train Query and Plot, to: Train only, qo: Query only, po: Plot only') + parser.add_argument('--num_frames', type=int, help='Number of data frames') + parser.add_argument('--config', type=str, help='Path to the config to load relative to the config folder') + parser.add_argument('--save_config', type=str, help='Saves the argparse config to path if set relative to the config folder') + + # Train Arguments + parser.add_argument('--likelihood_type', type=str, help='Likelihood type (Gamma, Gaussian)') + parser.add_argument('--dataset_path', type=str, help='Path to dataset') + parser.add_argument('--area_min', nargs=3, type=int, help='X Y Z minimum coordinates in bounding box (3 values)') + parser.add_argument('--area_max', nargs=3, type=int, help='X Y Z maximum coordinates in bounding box (3 values)') + parser.add_argument('--hinge_type', type=str, help='grid, - used in 3D surface only') + parser.add_argument('--hinge_dist', nargs=3, type=int, help='X Y Z hinge point resolution (3 values)') + parser.add_argument('--kernel_type', type=str, help='Type of RBF kernel: Vanilla RBF(), Convolution (conv), Wasserstein (wass)') + parser.add_argument('--gamma', nargs='+', type=float, help='X Y Z Gamma (1-3 values)') + parser.add_argument('--num_partitions', nargs=3, type=int, help='X Y Z number of partitions per axis (3 values)') + parser.add_argument('--partition_bleed', type=float, help='Amount of bleed between partitions for plot stitching') + parser.add_argument('--save_model_path', type=str, help='Path to save each model \ + (i.e. save_model_path is set to \"toy3_run0\", then the model at partition 1, frame 1 would save to \ + mdls/occupancy/toy3_run0_f1_p1)' + ) + + # Query Arguments + parser.add_argument('--query_dist', nargs=3, type=float, help='X Y Z Q-resolution (3 values). If any value is\ + negative, a 4th value should be provided to slice the corresponding axis. If all negative, X_query=X_train.') + parser.add_argument('--query_blocks', type=int, default=None, help='How many blocks to break the query method into') + parser.add_argument('--variance_only', action="store_true", default=False, help='Only calculate the diagonal of the covariance matrix') + parser.add_argument('--eval_path', type=str, help='Path of the evaluation dataset') + parser.add_argument('--eval', action="store_true", default=False, help='1=evaluate metrics, 0, otherwise. Use data in --eval_path, if given.') + parser.add_argument('--save_query_data_path', type=str, help='Path save each set of queried data \ + (i.e. save_model_path is set to \"toy3_run0\" and the model type is set to occupancy, \ + then the model at frame 1 would save to query_data/occupancy/toy3_run0_f1_p1)' + ) + + # Plot Arguments + parser.add_argument('--occupancy_plot_type', type=str, help='Plot occupancy as scatter or volumetric plot') + parser.add_argument('--plot_title', type=str, help='') + parser.add_argument('--surface_threshold', nargs=2, type=float, help='Minimum threshold to show surface prediction on plot. Min or [Min, Max]') + parser.add_argument('--variance_threshold', type=float, help='Maximum threshold of the variance to show predictions on plot.') + parser.add_argument('--plot_volumetric', action="store_true", default=False, help='Whether to plot a volumetric surface instead of 3D scatter plot') + parser.add_argument('--plot_axis', type=str, choices=["x", "y", "z"], help="Which axis to plot") + + args = parser.parse_args() + device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + + # Set arguments according to the following Priority (High->Low): + # 1:CL provided arguments, 2: config provided arguments, 3:default arguments + if args.config: + config = json.load(open('./configs/' + args.config, 'r')) + defaults = json.load(open('./configs/defaults', 'r')) + for key in vars(args): + if key == 'save_config': continue + if getattr(args, key): continue + if key in config and config[key]: + args.__dict__[key] = config[key] + else: + args.__dict__[key] = defaults[key] + if args.save_config: + with open('./configs/' + args.save_config, 'w') as f: + json.dump(args.__dict__, f, indent=2) + assert len(args.gamma) <= 3, 'Cannot support gamma with greater than dimension 3.' + + fn_train, cell_max_min, cell_resolution = utils_filereader.format_config(args) + if args.mode == 'tqp' or args.mode == 't': + train(fn_train, cell_max_min, cell_resolution) + if args.mode == 'tqp' or args.mode == 'q': + query(fn_train, cell_max_min) + if args.mode == 'tqp' or args.mode == 'p': + plot() + if args.mode == 'tq': + train(fn_train, cell_max_min, cell_resolution) + query(fn_train, cell_max_min) + if args.mode == 'qp': + query(fn_train, cell_max_min) + plot() + + print("Mission complete!\n\n") diff --git a/spatun_crossval.py b/spatun_crossval.py new file mode 100755 index 0000000..f1964e0 --- /dev/null +++ b/spatun_crossval.py @@ -0,0 +1,205 @@ +import argparse +import json +import os +import pandas as pd +import time +import torch +import numpy as np +import timeout_decorator + +import train_methods +import query_methods +import utils_filereader +from plot_methods import BHM_PLOTTER + +import sys +import traceback + + +def load_query_data(path): + """ + @param path (str): path relative to query_data folder to save data + """ + filename = './query_data/{}'.format(path) + print(' Reading queried output from ' + filename) + return torch.load(filename) + +# ============================================================================== +# Train +# ============================================================================== +def train(fn_train, cell_max_min, cell_resolution): + """ + @params: [fn_train, cell_max_min, cell_resolution] + @returns: [] + Fits the 3D BHM on each frame of the dataset and plots occupancy or regression + """ + print('\nTraining started---------------') + alpha = 10**-2 + beta = 10**2 + for framei in range(args.num_frames): + X, y_vx, y_vy, y_vz, partitions = utils_filereader.read_frame_velocity(args, framei, fn_train, cell_max_min) + train_methods.train_velocity(args, alpha, beta, X, y_vx, y_vy, y_vz, partitions, cell_resolution, cell_max_min, framei) ###///### + del X, y_vx, y_vy, y_vz, partitions + print('Training completed---------------\n') + + +# ============================================================================== +# Query +# ============================================================================== +def query(fn_train, cell_max_min): + """ + @params: [fn_train, cell_max_min] + @returns: [] + Queries the 3D BHM for occupancy or regression on each frame of the dataset + """ + print('Querying started---------------') + for framei in range(args.num_frames): + X, y_vx, y_vy, y_vz, partitions = utils_filereader.read_frame_velocity(args, framei, fn_train, cell_max_min) + query_methods.query_velocity(args, X, y_vx, y_vy, y_vz, partitions, cell_resolution, cell_max_min, framei) ###///### + del X, y_vx, y_vy, y_vz, partitions + print('Querying completed---------------\n') + +# ============================================================================== +# Plot +# ============================================================================== +def plot(): + """ + @params: [] + @returns: [] + Plots data loaded from the args.save_query_data_path parameter + """ + print('Plotting started---------------') + plotter = BHM_PLOTTER(args, args.plot_title, args.surface_threshold, args.query_dist, occupancy_plot_type=args.occupancy_plot_type, + plot_volumetric=args.plot_volumetric, plot_axis=args.plot_axis) + for framei in range(args.num_frames): + X, y_vx, y_vy, y_vz, Xq_mv, mean_x, var_x, mean_y, var_y, mean_z, var_z, framei = load_query_data('velocity/{}_f{}'.format(args.save_query_data_path, framei)) + plotter.plot_velocity_frame(X, y_vx, y_vy, y_vz, Xq_mv, mean_x, var_x, mean_y, var_y, mean_z, var_z, framei) + print('Plotting completed---------------\n') + + + +# ============================================================================== +if __name__ == '__main__': + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + # Settings Arguments + parser.add_argument('--mode', type=str, help='tqp: Train Query and Plot, to: Train only, qo: Query only, po: Plot only') + parser.add_argument('--num_frames', type=int, help='Number of data frames') + parser.add_argument('--config', type=str, help='Path to the config to load relative to the config folder') + parser.add_argument('--save_config', type=str, help='Saves the argparse config to path if set relative to the config folder') + + # Train Arguments + parser.add_argument('--likelihood_type', type=str, help='Likelihood type (Gamma, Gaussian)') + parser.add_argument('--dataset_path', type=str, help='Path to dataset') + parser.add_argument('--area_min', nargs=3, type=int, help='X Y Z minimum coordinates in bounding box (3 values)') + parser.add_argument('--area_max', nargs=3, type=int, help='X Y Z maximum coordinates in bounding box (3 values)') + parser.add_argument('--hinge_dist', nargs=3, type=int, help='X Y Z hinge point resolution (3 values)') + parser.add_argument('--kernel_type', type=str, help='Type of RBF kernel: Vanilla RBF(), Convolution (conv), Wasserstein (wass)') + parser.add_argument('--gamma', nargs='+', type=float, help='X Y Z Gamma (1-3 values)') + parser.add_argument('--num_partitions', nargs=3, type=int, help='X Y Z number of partitions per axis (3 values)') + parser.add_argument('--partition_bleed', type=float, help='Amount of bleed between partitions for plot stitching') + parser.add_argument('--save_model_path', type=str, help='Path to save each model \ + (i.e. save_model_path is set to \"toy3_run0\", then the model at partition 1, frame 1 would save to \ + mdls/occupancy/toy3_run0_f1_p1)' + ) + + # Query Arguments + parser.add_argument('--query_dist', nargs=3, type=float, help='X Y Z Q-resolution (3 values). If any value is\ + negative, a 4th value should be provided to slice the corresponding axis. If all negative, X_query=X_train.') + parser.add_argument('--query_blocks', type=int, default=None, help='How many blocks to break the query method into') + parser.add_argument('--variance_only', action="store_true", default=False, help='Only calculate the diagonal of the covariance matrix') + parser.add_argument('--eval_path', type=str, help='Path of the evaluation dataset') + parser.add_argument('--eval', action="store_true", default=False, help='1=evaluate metrics, 0, otherwise. Use data in --eval_path, if given.') + parser.add_argument('--save_query_data_path', type=str, help='Path save each set of queried data \ + (i.e. save_model_path is set to \"toy3_run0\" and the model type is set to occupancy, \ + then the model at frame 1 would save to query_data/occupancy/toy3_run0_f1_p1)' + ) + + # Plot Arguments + parser.add_argument('--occupancy_plot_type', type=str, help='Plot occupancy as scatter or volumetric plot') + parser.add_argument('--plot_title', type=str, help='') + parser.add_argument('--surface_threshold', nargs=2, type=float, help='Minimum threshold to show surface prediction on plot. Min or [Min, Max]') + parser.add_argument('--plot_volumetric', action="store_true", default=False, help='Whether to plot a volumetric surface instead of 3D scatter plot') + parser.add_argument('--plot_axis', type=str, choices=["x", "y", "z"], help="Which axis to plot") + + args = parser.parse_args() + device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + + # Set arguments according to the following Priority (High->Low): + # 1:CL provided arguments, 2: config provided arguments, 3:default arguments + if args.config: + config = json.load(open('./configs/' + args.config, 'r')) + defaults = json.load(open('./configs/defaults', 'r')) + for key in vars(args): + if key == 'save_config': continue + if getattr(args, key): continue + if key in config and config[key]: + args.__dict__[key] = config[key] + else: + args.__dict__[key] = defaults[key] + if args.save_config: + with open('./configs/' + args.save_config, 'w') as f: + json.dump(args.__dict__, f, indent=2) + assert len(args.gamma) <= 3, 'Cannot support gamma with greater than dimension 3.' + + fn_train, cell_max_min, cell_resolution = utils_filereader.format_config(args) + + @timeout_decorator.timeout(1800) #in seconds + def tqp_methods(): + if not os.path.isdir("reports"): + os.makedirs("reports") + + if args.mode == 'tqp' or args.mode == 't': + train(fn_train, cell_max_min, cell_resolution) + if args.mode == 'tqp' or args.mode == 'q': + query(fn_train, cell_max_min) + if args.mode == 'tqp' or args.mode == 'p': + plot() + if args.mode == 'tq': + train(fn_train, cell_max_min, cell_resolution) + query(fn_train, cell_max_min) + if args.mode == 'qp': + query(fn_train, cell_max_min) + plot() + + global gamma_vals + # gamma_vals = [0.01, 0.1, 0.5, 1, 5, 10, 100, 200, 300] + # hinge_dist_factor = [1, 1.5, 2, 3, 4, 5, 6] + gamma_vals = [0.01, 0.01, 1, 5, 10, 20, 30, 40, 50, 60, 70, 100] + hinge_dist_factor = [1, 1.25, 2, 3, 4, 5, 6] + args.hinge_dist_orig = args.hinge_dist + args.eval = True + timeout_err = False + for fi in hinge_dist_factor: + for gi in gamma_vals: + try: + args.gamma = [gi] + args.hinge_dist = [args.hinge_dist_orig[0]/fi, args.hinge_dist_orig[1]/fi, args.hinge_dist_orig[2]/fi] + cell_resolution = args.hinge_dist + args.report_notes = 'scale_factor={}; hinge_dist={}; gamma={}'.format(fi, args.hinge_dist, args.gamma) + print('\n=============================================') + print(args.report_notes) + + try: + tqp_methods() + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + exception_message_lines = traceback.format_exception(exc_type, exc_value, exc_traceback) + print("".join("\t > " + line for line in exception_message_lines)) + + print(" Rans Error TIMEOUT. Trying rest of the hinge factors aborted!") + timeout_err = True + exit() + + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + exception_message_lines = traceback.format_exception(exc_type, exc_value, exc_traceback) + print("".join("\t > " + line for line in exception_message_lines)) + + print(" Rans Error EXCEPTED when fi={}, gi={}".format(fi, gi)) + + if timeout_err is True: + exit() + + + print("Mission complete!\n\n") diff --git a/spatun_multiframe.py b/spatun_multiframe.py new file mode 100644 index 0000000..d261d99 --- /dev/null +++ b/spatun_multiframe.py @@ -0,0 +1,227 @@ +import argparse +import json +import os +import pandas as pd +import time +import torch +import numpy as np +import timeout_decorator + +import train_methods +import query_methods +import utils_filereader +from plot_methods import BHM_PLOTTER + +from utils_data_preprocessing.train_test_split import split_n_save + +import sys +import traceback + + + +def load_query_data(path): + """ + @param path (str): path relative to query_data folder to save data + """ + filename = './query_data/{}'.format(path) + print(' Reading queried output from ' + filename) + return torch.load(filename) + +# ============================================================================== +# Train +# ============================================================================== +def train(fn_train, cell_max_min, cell_resolution): + """ + @params: [fn_train, cell_max_min, cell_resolution] + @returns: [] + Fits the 3D BHM on each frame of the dataset and plots occupancy or regression + """ + print('\nTraining started---------------') + alpha = 10**-2 + beta = 10**2 + for framei in range(args.num_frames): + X, y_vx, y_vy, y_vz, partitions = utils_filereader.read_frame_velocity(args, framei, fn_train, cell_max_min) + train_methods.train_velocity(args, alpha, beta, X, y_vx, y_vy, y_vz, partitions, cell_resolution, cell_max_min, framei) ###///### + del X, y_vx, y_vy, y_vz, partitions + + print('Training completed---------------\n') + + +# ============================================================================== +# Query +# ============================================================================== +def query(fn_train, cell_max_min): + """ + @params: [fn_train, cell_max_min] + @returns: [] + Queries the 3D BHM for occupancy or regression on each frame of the dataset + """ + print('Querying started---------------') + for framei in range(args.num_frames): + X, y_vx, y_vy, y_vz, partitions = utils_filereader.read_frame_velocity(args, framei, fn_train, cell_max_min) + print(f"cell_resolution: {cell_resolution}, args.query_dist: {args.query_dist}") + query_methods.query_velocity(args, X, y_vx, y_vy, y_vz, partitions, cell_resolution, cell_max_min, framei) ###///### + del X, y_vx, y_vy, y_vz, partitions + + print('Querying completed---------------\n') + +# ============================================================================== +# Plot +# ============================================================================== +def plot(): + """ + @params: [] + @returns: [] + Plots data loaded from the args.save_query_data_path parameter + """ + print('Plotting started---------------') + plotter = BHM_PLOTTER(args=args, plot_title=args.plot_title, surface_threshold=args.surface_threshold, variance_threshold=args.variance_threshold, query_dist=args.query_dist, occupancy_plot_type=args.occupancy_plot_type, + plot_volumetric=args.plot_volumetric, plot_axis=args.plot_axis) + for framei in range(args.num_frames): + X, y_vx, y_vy, y_vz, Xq_mv, mean_x, var_x, mean_y, var_y, mean_z, var_z, framei = load_query_data('velocity/{}_f{}'.format(args.save_query_data_path, framei)) + plotter.plot_velocity_frame(X, y_vx, y_vy, y_vz, Xq_mv, mean_x, var_x, mean_y, var_y, mean_z, var_z, framei) + print('Plotting completed---------------\n') + + + +# ============================================================================== +if __name__ == '__main__': + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + # Settings Arguments + parser.add_argument('--mode', type=str, help='tqp: Train Query and Plot, to: Train only, qo: Query only, po: Plot only') + parser.add_argument('--num_frames', type=int, help='Number of data frames') + parser.add_argument('--config', type=str, help='Path to the config to load relative to the config folder') + parser.add_argument('--save_config', type=str, help='Saves the argparse config to path if set relative to the config folder') + + # Train Arguments + parser.add_argument('--likelihood_type', type=str, help='Likelihood type (Gamma, Gaussian)') + parser.add_argument('--dataset_path', type=str, help='Path to dataset') + parser.add_argument('--area_min', nargs=3, type=int, help='X Y Z minimum coordinates in bounding box (3 values)') + parser.add_argument('--area_max', nargs=3, type=int, help='X Y Z maximum coordinates in bounding box (3 values)') + parser.add_argument('--hinge_dist', nargs=3, type=int, help='X Y Z hinge point resolution (3 values)') + parser.add_argument('--kernel_type', type=str, help='Type of RBF kernel: Vanilla RBF(), Convolution (conv), Wasserstein (wass)') + parser.add_argument('--gamma', nargs='+', type=float, help='X Y Z Gamma (1-3 values)') + parser.add_argument('--num_partitions', nargs=3, type=int, help='X Y Z number of partitions per axis (3 values)') + parser.add_argument('--partition_bleed', type=float, help='Amount of bleed between partitions for plot stitching') + parser.add_argument('--save_model_path', type=str, help='Path to save each model \ + (i.e. save_model_path is set to \"toy3_run0\", then the model at partition 1, frame 1 would save to \ + mdls/occupancy/toy3_run0_f1_p1)' + ) + + # Query Arguments + parser.add_argument('--query_dist', nargs=3, type=float, help='X Y Z Q-resolution (3 values). If any value is\ + negative, a 4th value should be provided to slice the corresponding axis. If all negative, X_query=X_train.') + parser.add_argument('--query_blocks', type=int, default=None, help='How many blocks to break the query method into') + parser.add_argument('--variance_only', action="store_true", default=False, help='Only calculate the diagonal of the covariance matrix') + parser.add_argument('--eval_path', type=str, help='Path of the evaluation dataset') + parser.add_argument('--eval', action="store_true", default=False, help='1=evaluate metrics, 0, otherwise. Use data in --eval_path, if given.') + parser.add_argument('--save_query_data_path', type=str, help='Path save each set of queried data \ + (i.e. save_model_path is set to \"toy3_run0\" and the model type is set to occupancy, \ + then the model at frame 1 would save to query_data/occupancy/toy3_run0_f1_p1)' + ) + + # Plot Arguments + parser.add_argument('--occupancy_plot_type', type=str, help='Plot occupancy as scatter or volumetric plot') + parser.add_argument('--plot_title', type=str, help='') + parser.add_argument('--surface_threshold', nargs=2, type=float, help='Minimum threshold to show surface prediction on plot. Min or [Min, Max]') + parser.add_argument('--variance_threshold', type=float, help='Maximum threshold of the variance to show predictions on plot.') + parser.add_argument('--plot_volumetric', action="store_true", default=False, help='Whether to plot a volumetric surface instead of 3D scatter plot') + parser.add_argument('--plot_axis', type=str, choices=["x", "y", "z"], help="Which axis to plot") + + parser.add_argument("-s", '--start-n', type=int, default=0, help='') + parser.add_argument("-e", '--end-n', type=int, default=0, help='') + + args = parser.parse_args() + device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + + # Set arguments according to the following Priority (High->Low): + # 1:CL provided arguments, 2: config provided arguments, 3:default arguments + if args.config: + config = json.load(open('./configs/' + args.config, 'r')) + defaults = json.load(open('./configs/defaults', 'r')) + for key in vars(args): + if key == 'save_config': continue + if getattr(args, key): continue + if key in config and config[key]: + args.__dict__[key] = config[key] + else: + args.__dict__[key] = defaults[key] + if args.save_config: + with open('./configs/' + args.save_config, 'w') as f: + json.dump(args.__dict__, f, indent=2) + assert len(args.gamma) <= 3, 'Cannot support gamma with greater than dimension 3.' + + fn_train, cell_max_min, cell_resolution = utils_filereader.format_config(args) + + @timeout_decorator.timeout(1800) #in seconds + def tqp_methods(): + if not os.path.isdir("reports"): + os.makedirs("reports") + + if args.mode == 'tqp' or args.mode == 't': + train(fn_train, cell_max_min, cell_resolution) + if args.mode == 'tqp' or args.mode == 'q': + query(fn_train, cell_max_min) + if args.mode == 'tqp' or args.mode == 'p': + plot() + if args.mode == 'tq': + train(fn_train, cell_max_min, cell_resolution) + query(fn_train, cell_max_min) + if args.mode == 'qp': + query(fn_train, cell_max_min) + plot() + + args.eval = False + timeout_err = False + + base_filebase = "/".join(args.dataset_path.split("/")[:-1]) + "/{:06d}" + train_filebase = base_filebase + "_train" + eval_filebase = "/".join(args.eval_path.split("/")[:-1]) + "/{:06d}_test" + plot_title_base = args.plot_title + "_{:06d}" + save_model_path_base = args.save_model_path + "_{:06d}" + save_query_data_path = args.save_query_data_path + "_{:06d}" + + # file_idxs = range(10) + file_idxs = range(args.start_n, args.end_n) + for file_idx in file_idxs: + try: + fn_train = train_filebase.format(file_idx) + args.eval_path = eval_filebase.format(file_idx) + args.plot_title = plot_title_base.format(file_idx) + args.save_model_path = save_model_path_base.format(file_idx) + args.save_query_data_path = save_query_data_path.format(file_idx) + if not os.path.isfile(fn_train): + base_file = base_filebase.format(file_idx) + print(f"Splitting {base_file} into train and test sets...") + split_n_save(base_file, 0.2) + + + cell_resolution = args.hinge_dist + args.report_notes = 'file_idx={}; hinge_dist={}; gamma={}'.format(file_idx, args.hinge_dist, args.gamma) + print('\n=============================================') + print(args.report_notes) + + try: + tqp_methods() + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + exception_message_lines = traceback.format_exception(exc_type, exc_value, exc_traceback) + print("".join("\t > " + line for line in exception_message_lines)) + + print(" Rans Error TIMEOUT. Trying rest of the hinge factors aborted!") + timeout_err = True + exit() + + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + exception_message_lines = traceback.format_exception(exc_type, exc_value, exc_traceback) + print("".join("\t > " + line for line in exception_message_lines)) + + print(" Rans Error EXCEPTED when fi={}, gi={}".format(fi, gi)) + + if timeout_err is True: + exit() + + + print("Mission complete!\n\n") diff --git a/train_methods.py b/train_methods.py new file mode 100644 index 0000000..7178e8f --- /dev/null +++ b/train_methods.py @@ -0,0 +1,87 @@ +import argparse +import json +import os +import pandas as pd +import time +import torch +import numpy as np + +from bhmtorch_cpu import BHM_VELOCITY_PYTORCH + +def save_mdl(args, model, path, train_time): + """ + @param model: BHM Module to save + @param path (str): path relative to the mdl folder to save to + """ + mdl_type = type(model).__name__ + print(" mdl_type:", mdl_type) + + print(" Saving as ./mdls/velocity/{}".format(path)) + if not os.path.isdir("./mdls/velocity/"): + os.makedirs('./mdls/velocity/') + + if args.likelihood_type == "gamma": + torch.save({ + 'grid': model.grid, + "w_hatx":model.w_hatx, + "w_haty":model.w_haty, + "w_hatz":model.w_hatz, + "likelihood_type":model.likelihood_type, + 'train_time': train_time, + }, "./mdls/velocity/{}".format(path) + ) ###///### + elif args.likelihood_type == "gaussian": + torch.save({ + 'mu_x': model.mu_x, + 'sig_x': model.sig_x, + 'mu_y': model.mu_y, + 'sig_y': model.sig_y, + 'mu_z': model.mu_z, + 'sig_z': model.sig_z, + 'grid': model.grid, + 'alpha': model.alpha, + 'beta': model.beta, + 'likelihood_type': model.likelihood_type, + 'train_time': train_time, + }, "./mdls/velocity/{}".format(path) + ) + else: + raise ValueError("Unsupported likelihood type: \"{}\"".format(args.likelihood_type)) + +def train_velocity(args, alpha, beta, X, y_vx, y_vy, y_vz, partitions, cell_resolution, cell_max_min, framei): + totalTime = 0 + # filter X,y such that only give the X's where y is 1 + + if args.likelihood_type == "gamma": + bhm_velocity_mdl = BHM_VELOCITY_PYTORCH( + gamma=args.gamma, + grid=None, + cell_resolution=cell_resolution, + cell_max_min=cell_max_min, + X=X, + nIter=1, + kernel_type=args.kernel_type, + likelihood_type=args.likelihood_type + ) + elif args.likelihood_type == "gaussian": + bhm_velocity_mdl = BHM_VELOCITY_PYTORCH( + gamma=args.gamma, + alpha=alpha, + beta=beta, + grid=None, + cell_resolution=cell_resolution, + cell_max_min=cell_max_min, + X=X, + nIter=1, + kernel_type=args.kernel_type, + likelihood_type=args.likelihood_type + ) + else: + raise ValueError(" Unsupported likelihood type: \"{}\"".format(args.likelihood_type)) + + time1 = time.time() + bhm_velocity_mdl.fit(X, y_vx, y_vy, y_vz, eps=0) # , y_vy, y_vz + train_time = time.time() - time1 + print(' Total training time={} s'.format(round(train_time, 2))) + save_mdl(args, bhm_velocity_mdl, '{}_f{}'.format(args.save_model_path, framei), train_time) + del bhm_velocity_mdl diff --git a/utils_data_preprocessing/5_arisim_process.py b/utils_data_preprocessing/5_arisim_process.py new file mode 100644 index 0000000..97bd563 --- /dev/null +++ b/utils_data_preprocessing/5_arisim_process.py @@ -0,0 +1,39 @@ +import pandas as pd +import numpy as np +import pptk +import sys + +# Settings +mode = 'pos' #vel, pos, chris +fn_in_dir = '/home/ransalu/Desktop/PycharmProjects/SpaTUn/datasets/5_airsim/5_airsim1/' +fn_out_dir = '/home/ransalu/Desktop/PycharmProjects/SpaTUn/datasets/5_airsim/5_airsim1/' + +df0 = pd.read_csv(fn_in_dir + 'formatted_data0.csv').to_numpy() +df1 = pd.read_csv(fn_in_dir + 'formatted_data1.csv').to_numpy() +df2 = pd.read_csv(fn_in_dir + 'formatted_data2.csv').to_numpy() +df3 = pd.read_csv(fn_in_dir + 'formatted_data3.csv').to_numpy() + + +traj = np.concatenate((0*np.ones(df0.shape[0]), 1*np.ones(df1.shape[0]), 2*np.ones(df2.shape[0]), 3*np.ones(df3.shape[0]))) +txyzv3i = np.vstack((df0, df1, df2, df3)) +txyzv3i = np.hstack((txyzv3i, traj[:, None])) + +txyzv3i[:, 0] = 0.0 +txyzv3i[:, 3] *= -6 +txyzv3i[:, 1:4] /= 100 + +if mode == 'vel': + np.savetxt(fn_out_dir + '5_airsim1_vel.csv', txyzv3i, delimiter=',', header='t, X, Y, Z, V_X, V_Y, V_Z, traj_id', comments='')#, fmt=' '.join(['%i'] + ['%1.8f']*7)) +elif mode == 'pos': + txyzv3i[:, 4:7] = txyzv3i[:, 1:4] + np.savetxt(fn_out_dir + '5_airsim1_pos.csv', txyzv3i, delimiter=',', header='t, X, Y, Z, X, Y, Z, traj_id', comments='')#, fmt=' '.join(['%i'] + ['%1.8f']*7)) + + +print(mode, 'array size', txyzv3i.shape) +print("min: {}".format(txyzv3i.min(axis=0))) +print("max: {}".format(txyzv3i.max(axis=0))) +print("mean: {}".format(txyzv3i.mean(axis=0))) + +v0 = pptk.viewer(txyzv3i[:, 1:4], txyzv3i[:, 4], txyzv3i[:, 5], txyzv3i[:, 6], txyzv3i[:, 7]) +v0.color_map('jet')#, scale=[0,20]) +#v0.set(point_size=0.01) \ No newline at end of file diff --git a/utils_data_preprocessing/6_jfk_process.py b/utils_data_preprocessing/6_jfk_process.py new file mode 100644 index 0000000..e34435e --- /dev/null +++ b/utils_data_preprocessing/6_jfk_process.py @@ -0,0 +1,137 @@ +import numpy as np +import pickle +import pymap3d as pm +import matplotlib.pyplot as pl +import pptk +import sys + +# Settings +mode = 'vel' #vel, pos, chris +fn_in_dir = '/home/ransalu/Desktop/PycharmProjects/JFK_dataset/' +fn_out_dir = '/home/ransalu/Desktop/PycharmProjects/SpaTUn/datasets/6_jfk/' + +# Load trajectories +# The files contains extracted trajectories flying within 30NM from JFK airport. +# * jfk_trajs_set_1.pkl : 20120501 ~ 20120505 +# * jfk_trajs_set_2.pkl : + +traj_data = pickle.load(open(fn_in_dir+'jfk_trajs_set_1.pkl', 'rb')) +traj_data = np.array(traj_data) +print('total number of trajectories=', len(traj_data)) +print('first traj shape=', traj_data[0].shape) # shape of first(sample) trajectory + +# Draw 2D plot +M_TO_NM = 0.000539957 +M_TO_FT = 3.28084 +FT_TO_M = .3048 + +airport_lat, airport_lon, airport_altitude = 40.63993, -73.77869, 12.7 +jfk_runways = [[[40.62202, -73.78558], [40.65051, -73.76332]], + [[40.62543, -73.77035], [40.64524, -73.75486]], + [[40.65776, -73.79025], [40.64372, -73.75928]], + [[40.64836, -73.81671],[40.62799, -73.77178]]] #04L-22R, 04R-22L, 13L-31R, 13R-31L + +def plot_rwy(rwy_coords, color): + for r in rwy_coords: + a,b = r[0],r[1] + start_x, start_y, _ = pm.geodetic2enu(a[0], a[1], airport_altitude, airport_lat, airport_lon, airport_altitude) + end_x, end_y, _ = pm.geodetic2enu(b[0], b[1], airport_altitude, airport_lat, airport_lon, airport_altitude) + pl.plot([start_x*M_TO_NM, end_x*M_TO_NM], [start_y*M_TO_NM, end_y*M_TO_NM], '-', c=color, lw=2) + + +traj_data_subset = traj_data[:100] +for i, traj in enumerate(traj_data_subset): + t = traj[:, 0] #time-step + diff = t[1:] - t[:-1] + pos = traj[:, 1:4] #position [xEast(m), yNorth(m), zUp(m)] + pos[:,:2] /= 25 #TODO scaled + v = np.sqrt(np.sum((pos[1:, :] - pos[:-1,:])**2, axis=1)) / diff + vx = (pos[1:, 0] - pos[:-1, 0]) / diff + vy = (pos[1:, 1] - pos[:-1, 1]) / diff + vz = (pos[1:, 2] - pos[:-1, 2]) / diff + #print(v.min(), v.max(), v.mean()) + + if mode == 'vel': + temp = np.hstack((t[1:,None], pos[1:,:], vx[:,None], vy[:,None], vz[:,None], i+0*vx[:,None])) #velocity + elif mode == 'pos': + temp = np.hstack((t[1:, None], pos[1:, :], pos[1:, :], i + 0 * vx[:, None])) #position + if i == 0: + txyzv3i = temp + else: + txyzv3i = np.vstack((txyzv3i, temp)) + +# filtering area +delta = 500 +cond1 = np.logical_and(txyzv3i[:,1]<=delta, txyzv3i[:,1]>=-delta) +cond = np.logical_and(cond1, np.logical_and(txyzv3i[:,2]<=delta, txyzv3i[:,2]>=-delta)) +txyzv3i = txyzv3i[cond,:] +txyzv3i[:,1:4] /= 250 #TODO scaled to be in +-2 +txyzv3i[:,0] = 0 + +pl.figure(figsize=(14,3)) +pl.subplot(131) +pl.scatter(txyzv3i[:,1], txyzv3i[:,2], c=txyzv3i[:,3], s=2, cmap='jet'); pl.colorbar(); pl.title('xy') +pl.subplot(132) +pl.scatter(txyzv3i[:,1], txyzv3i[:,3], c=txyzv3i[:,2], s=2, cmap='jet'); pl.colorbar(); pl.title('xz') +pl.subplot(133) +pl.scatter(txyzv3i[:,2], txyzv3i[:,3], c=txyzv3i[:,1], s=2, cmap='jet'); pl.colorbar(); pl.title('yz') +pl.show() +#sys.exit() + +np.set_printoptions(precision=2) +print(mode, 'array size', txyzv3i.shape) +print("min: {}".format(txyzv3i.min(axis=0))) +print("max: {}".format(txyzv3i.max(axis=0))) +print("mean: {}".format(txyzv3i.mean(axis=0))) +v = pptk.viewer(txyzv3i[:,1:4], txyzv3i[:,4], txyzv3i[:,5], txyzv3i[:,6], txyzv3i[:,7]) +v.color_map('jet')#, scale=[0,20]) + +if mode == 'chris': + noise = np.random.normal(0, 2, size=txyzv3i.shape) + txyzv3i += noise + np.save(fn_out_dir + 'chris_3d_dataset.npy', txyzv3i) +elif mode == 'pos': + fn_out = fn_out_dir + '6_jfk_partial1_pos.csv' + np.savetxt(fn_out, txyzv3i, delimiter=',', header='t, X, Y, Z, X, Y, Z, traj_id', + comments='') # , fmt=' '.join(['%i'] + ['%1.8f']*7)) +elif mode == 'vel': + fn_out = fn_out_dir + '6_jfk_partial1_vel.csv' + np.savetxt(fn_out, txyzv3i, delimiter=',', header='t, X, Y, Z, V_X, V_Y, V_Z, traj_id', + comments='') # , fmt=' '.join(['%i'] + ['%1.8f']*7)) + +sys.exit() + + +# plot whole set of trajectories +pl.figure(figsize=(7,7)) +pl.xlabel('East (NM)'); pl.ylabel('North (NM)') +pl.xlim([-30, 30]); pl.ylim([-30, 30]) + +for i, traj in enumerate(traj_data): + t = traj[:, 0] #time-step + pos = traj[:, 1:4] #position [xEast(m), yNorth(m), zUp(m)] + + pl.plot(pos[:,0]*M_TO_NM, pos[:,1]*M_TO_NM,'--b', lw=0.1) #2D plot +plot_rwy(jfk_runways, color='r') +pl.show() + + +# plot subset of trajectories +traj_data_subset = traj_data[:10] + +pl.figure(figsize=(7,7)) +pl.xlabel('East (NM)'); pl.ylabel('North (NM)') +pl.xlim([-30, 30]); pl.ylim([-30, 30]) + +for i, traj in enumerate(traj_data_subset): + t = traj[:, 0] #time-step + pos = traj[:, 1:4] #position [xEast(m), yNorth(m), zUp(m)] + + pl.plot(pos[:,0]*M_TO_NM, pos[:,1]*M_TO_NM,'--b', lw=0.5) #2D plot +plot_rwy(jfk_runways, color='r') +pl.show() + + + + + diff --git a/utils_data_preprocessing/add_timesteps_to_velocity_data.py b/utils_data_preprocessing/add_timesteps_to_velocity_data.py new file mode 100644 index 0000000..7b44cfa --- /dev/null +++ b/utils_data_preprocessing/add_timesteps_to_velocity_data.py @@ -0,0 +1,29 @@ +import numpy as np + +DATA_PATH = "./datasets/toy_velocity/toy_velocity_1.csv" +# DATA_PATH = "./datasets/velocity1/radar_carla_test1_frame_250.csv" + +def add_timesteps(data_path): + new_file_contents = "" + + i = -1 + with open(data_path, "r") as f: + for line in f: + if i < 0: + new_file_contents += "t," + line.replace(" ", "") + else: + # new_file_contents += "{},".format(i) + line.replace(" ", "") + new_file_contents += "{},".format(0) + line.replace(" ", "") + i += 1 + + with open(data_path, "w") as f: + f.write(new_file_contents) + +def get_header(data_path): + with open(data_path, "r") as f: + line = f.readline() + header = line.replace("\n", "").split(",") + return header + +if __name__ == "__main__": + add_timesteps(DATA_PATH) diff --git a/utils_data_preprocessing/format_astyx_hires_data.py b/utils_data_preprocessing/format_astyx_hires_data.py new file mode 100644 index 0000000..5fca875 --- /dev/null +++ b/utils_data_preprocessing/format_astyx_hires_data.py @@ -0,0 +1,108 @@ +import numpy as np +import os +from glob import glob +from tqdm import tqdm + +# UNOFORMATTED_DATA_FILE = "/home/khatch/Documents/Bayesian-Dynamics/datasets/radar_dataset_astyx_hires2019/radar_6455/000000.txt" +# FORMATTED_DATA_PATH = "/home/khatch/Documents/Bayesian-Dynamics/datasets/radar_dataset_astyx_hires2019/formatted/000000.txt" +# UNOFORMATTED_DATA_FILE = "/Users/khatch/Desktop/SISL/Bayesian Hilbert Project/radar_dataset_astyx_hires2019/dataset_astyx_hires2019/dataset_astyx_hires2019/radar_6455/000047.txt" +# FORMATTED_DATA_PATH = "/Users/khatch/Desktop/SISL/Bayesian Hilbert Project/Bayesian-Dynamics/datasets/radar_dataset_astyx_hires2019/formatted/000047.csv" +# NORMALIZED_DATA_PATH = "/Users/khatch/Desktop/SISL/Bayesian Hilbert Project/Bayesian-Dynamics/datasets/radar_dataset_astyx_hires2019/normalized/000047.csv" + +# UNOFORMATTED_DATA_DIR = "/Users/khatch/Desktop/SISL/Bayesian Hilbert Project/radar_dataset_astyx_hires2019/dataset_astyx_hires2019/dataset_astyx_hires2019/radar_6455" +# FORMATTED_DATA_DIR = "/Users/khatch/Desktop/SISL/Bayesian Hilbert Project/Bayesian-Dynamics/datasets/radar_dataset_astyx_hires2019/formatted" +# NORMALIZED_DATA_DIR = "/Users/khatch/Desktop/SISL/Bayesian Hilbert Project/Bayesian-Dynamics/datasets/radar_dataset_astyx_hires2019/normalized" +UNOFORMATTED_DATA_DIR = "/Users/khatch/Desktop/SISL/Bayesian Hilbert Project/radar_dataset_astyx_hires2019/dataset_astyx_hires2019/dataset_astyx_hires2019/radar_6455" +FORMATTED_DATA_DIR = "/Users/khatch/Desktop/SISL/Bayesian Hilbert Project/SpaTUn/datasets/kyle_ransalu/3_astyx/3_astyx1/formatted" +NORMALIZED_DATA_DIR = "/Users/khatch/Desktop/SISL/Bayesian Hilbert Project/SpaTUn/datasets/kyle_ransalu/3_astyx/3_astyx1/normalized" + + +def format_all_files(unformatted_data_dir, formatted_data_dir, normalized_data_dir): + unformatted_files = glob(os.path.join(unformatted_data_dir, "*.txt")) + for unformatted_file in tqdm(unformatted_files, total=len(unformatted_files), desc="Formatting"): + example_name = unformatted_file.split("/")[-1].split(".")[0] + formatted_data_path = os.path.join(formatted_data_dir, example_name + ".csv") + normalized_data_path = os.path.join(normalized_data_dir, example_name + ".csv") + format_file(unformatted_file, formatted_data_path, normalized_data_path) + +def format_file(unformatted_data_file, formatted_data_path, normalized_data_path): + # unformatted_data = np.loadtxt(unformatted_data_file, delimiter=" ", skiprows=2) + # print("unformatted_data:", unformatted_data) + # print("unformatted_data.shape:", unformatted_data.shape) + + new_file_contents = "" + + i = -1 + with open(unformatted_data_file, "r") as f: + next(f) + for line in f: + line = line.replace("\n", "") + line = line.split(" ") + if i < 0: + line = line[:-2] + line += ["V_X", "V_Y", "V_Z"] + line.insert(0, "t") + + # new_file_contents += "t," + line.replace(" ", ",") + else: + line = line[:-1] + vr = line[-1] + line += [vr, vr] + line.insert(0, "0") + # new_file_contents += "{},".format(i) + line.replace(" ", "") + # new_file_contents += "{},".format(0) + line.replace(" ", ",") + + new_line = ",".join(line) + new_line += "\n" + new_file_contents += new_line + i += 1 + + with open(formatted_data_path, "w") as f: + f.write(new_file_contents) + + + + X = np.loadtxt(formatted_data_path, delimiter=",", skiprows=1)#, usecols=list(range(1, 7))) + print("X.shape:", X.shape) + for col in range(1, X.shape[1]): + min_val = np.amin(X[:, col]) + max_val = np.amax(X[:, col]) + print("\nmin_val:", min_val) + print("max_val:", max_val) + X[:, col] -= min_val + X[:, col] /= (max_val - min_val) + X[:, col] *= 2 + X[:, col] -= 1 + + for col in range(1, X.shape[1]): + min_val = np.amin(X[:, col]) + max_val = np.amax(X[:, col]) + print("\nmin_val:", min_val) + print("max_val:", max_val) + + header = get_header(formatted_data_path) + np.savetxt(normalized_data_path, X, delimiter=",", header=header, comments="") + + +# def normalize(val, max, min): +# val -= min +# val /= (max - min) +# val *= 2 +# val -= 1 +# return val +# +# def unnormalize(val, max, min): +# val += 1 +# val /= 2 +# val *= (max - min) +# val += min +# return val + +def get_header(data_path): + with open(data_path, "r") as f: + line = f.readline() + header = line.replace("\n", "")#.split(",") + return header + +if __name__ == "__main__": + format_all_files(UNOFORMATTED_DATA_DIR, FORMATTED_DATA_DIR, NORMALIZED_DATA_DIR) diff --git a/utils_data_preprocessing/format_nuscenes_data.py b/utils_data_preprocessing/format_nuscenes_data.py new file mode 100644 index 0000000..00c7de7 --- /dev/null +++ b/utils_data_preprocessing/format_nuscenes_data.py @@ -0,0 +1,82 @@ +import numpy as np +import os +from glob import glob +from tqdm import tqdm +from nuscenes.utils.data_classes import RadarPointCloud + +PCD_DIRS = "../../v1.0-mini/samples/RADAR_FRONT" +# FORMATTED_DATA_DIR = "/Users/khatch/Desktop/SISL/Bayesian Hilbert Project/Bayesian-Dynamics/datasets/nuscenes/samples/formatted/RADAR_FRONT" +# NORMALIZED_DATA_DIR = "/Users/khatch/Desktop/SISL/Bayesian Hilbert Project/Bayesian-Dynamics/datasets/nuscenes/samples/normalized/RADAR_FRONT" +FORMATTED_DATA_DIR = "/Users/khatch/Desktop/SISL/Bayesian Hilbert Project/SpaTUn/datasets/kyle_ransalu/4_nuscenes/4_nuscenes1/samples/formatted/RADAR_FRONT" +NORMALIZED_DATA_DIR = "/Users/khatch/Desktop/SISL/Bayesian Hilbert Project/SpaTUn/datasets/kyle_ransalu/4_nuscenes/4_nuscenes1/samples/formatted/RADAR_FRONT" + +def format_all_files(pcd_dirs, formatted_data_dir_path, normalized_data_dir_path): + radar_files = glob(os.path.join(pcd_dirs, "*.pcd")) + for radar_file in tqdm(radar_files, total=len(radar_files), desc="Formatting files"): + radar_name = get_name_from_radar_file(radar_file) + formatted_data_path = os.path.join(formatted_data_dir_path, radar_name + ".csv") + normalized_data_path = os.path.join(normalized_data_dir_path, radar_name + ".csv") + run_format(radar_file, formatted_data_path, normalized_data_path) + print("Done") + + +def get_name_from_radar_file(radar_file): + return radar_file.split(".")[-2].split("/")[-1] + + +def run_format(pcd_file, formatted_data_path, normalized_data_path): + pcd = RadarPointCloud.from_file(pcd_file) + radar_points = pcd.points.T + print("radar_points.shape:", radar_points.shape) + + positions = radar_points[:, 0:3] + print("positions.shape:", positions.shape) + print("positions:", positions) + velocities = radar_points[:, 6:8] + print("velocities.shape:", velocities.shape) + print("velocities:", velocities) + + formatted_data = np.concatenate((np.zeros((radar_points.shape[0], 1)), positions, velocities, np.zeros((radar_points.shape[0], 1))), axis=1) + + print("formatted_data.shape:", formatted_data.shape) + + header = "t,X,Y,Z,V_X,V_Y,V_Z" + + formatted_data_dir = "/" + os.path.join(*formatted_data_path.split("/")[:-1]) + print("formatted_data_dir:", formatted_data_dir) + if not os.path.isdir(formatted_data_dir): + os.makedirs(formatted_data_dir) + + np.savetxt(formatted_data_path, formatted_data, delimiter=",", header=header, comments="") + + # Normalize data + for col in range(1, formatted_data.shape[1]): + min_val = np.amin(formatted_data[:, col]) + max_val = np.amax(formatted_data[:, col]) + print("\nmin_val:", min_val) + print("max_val:", max_val) + + if min_val != 0 or max_val != 0: # Leave columns with only 0s alone + formatted_data[:, col] -= min_val + formatted_data[:, col] /= (max_val - min_val) + formatted_data[:, col] *= 2 + formatted_data[:, col] -= 1 + + for col in range(1, formatted_data.shape[1]): + min_val = np.amin(formatted_data[:, col]) + max_val = np.amax(formatted_data[:, col]) + print("\nmin_val:", min_val) + print("max_val:", max_val) + + + normalized_data_dir = "/" + os.path.join(*normalized_data_path.split("/")[:-1]) + print("normalized_data_dir:", normalized_data_dir) + if not os.path.isdir(normalized_data_dir): + os.makedirs(normalized_data_dir) + + np.savetxt(normalized_data_path, formatted_data, delimiter=",", header=header, comments="") + + +if __name__ == "__main__": + # run_format(PCD_PATH, FORMATTED_DATA_PATH, NORMALIZED_DATA_PATH) + format_all_files(PCD_DIRS, FORMATTED_DATA_DIR, NORMALIZED_DATA_DIR) diff --git a/utils_data_preprocessing/normalize_data.py b/utils_data_preprocessing/normalize_data.py new file mode 100644 index 0000000..cf01479 --- /dev/null +++ b/utils_data_preprocessing/normalize_data.py @@ -0,0 +1,49 @@ +import numpy as np +import pandas as pd + +def normalize_minmax(fname_in, fname_out, min_val=0, max_val=1, columns=None): + data = pd.read_csv(fname_in + '.csv', delimiter=',') + + if columns == None: + x = data + else: + print(data.columns.tolist()) + x = data[columns] + + x_min = x.min(axis=0) + x_max = x.max(axis=0) + + x_normalized = min_val + (x - x_min)/(x_max - x_min)*(max_val - min_val) + + if columns == None: + data = x_normalized + else: + data[columns] = x_normalized + + pd.DataFrame(data).to_csv(fname_out + '.csv', index=False) + + return data + +if __name__ == "__main__": + # fname_in = "./datasets/kyle_ransalu/1_toy/1_toy1_vel_train" + # fname_out = fname_in + '_normalized' + # data = normalize_minmax(fname_in, fname_out, min_val=-1, max_val=1, columns=[' X', ' Y', ' Z']) + # print(data.min(axis=0)) + # print(data.max(axis=0)) + + import os + from glob import glob + from tqdm import tqdm + + FORMATTED_FILES_DIR = "./datasets/kyle_ransalu/3_astyx/3_astyx1/formatted" + NORMALIZED_FILES_FILES_DIR = "./datasets/kyle_ransalu/3_astyx/3_astyx1/normalized2" + + if not os.path.isdir(NORMALIZED_FILES_FILES_DIR): + os.makedirs(NORMALIZED_FILES_FILES_DIR) + + formatted_files = glob(os.path.join(FORMATTED_FILES_DIR, "*.csv")) + for formatted_file in tqdm(formatted_files, total=len(formatted_files)): + name = formatted_file.split("/")[-1].split(".")[0] + fname_in = os.path.join(FORMATTED_FILES_DIR, name) + fname_out = os.path.join(NORMALIZED_FILES_FILES_DIR, name) + data = normalize_minmax(fname_in, fname_out, min_val=-1, max_val=1, columns=['X', 'Y', 'Z']) diff --git a/utils_data_preprocessing/temp_visualize_pptk.py b/utils_data_preprocessing/temp_visualize_pptk.py new file mode 100755 index 0000000..e88d7b9 --- /dev/null +++ b/utils_data_preprocessing/temp_visualize_pptk.py @@ -0,0 +1,14 @@ +import os +import numpy as np +import pandas as pd +import pptk + +dataset_path = os.getcwd() + '/datasets/ahmed/ransvid1_600.csv' +fn_train = os.path.abspath(dataset_path) +data = pd.read_csv(fn_train).to_numpy() #['', 't', 'X', 'Y', 'Z', 'occupancy', 'sig_x', 'sig_y', 'sig_z'] +print(data) + +v = pptk.viewer(data[:,2:5]) +v.attributes(data[:,5],) +v.set(point_size=1) + diff --git a/utils_data_preprocessing/train_test_split.py b/utils_data_preprocessing/train_test_split.py new file mode 100644 index 0000000..f533f46 --- /dev/null +++ b/utils_data_preprocessing/train_test_split.py @@ -0,0 +1,60 @@ +import pandas as pd +import numpy as np +import os + +def split(data, propotion): + """ + :param data: data matrix + :param propotion: as a fraction e.g. 0.2 for 20% test + :return: train set, test set + """ + N = data.shape[0] + all_indx = np.arange(N) + test_indx = np.random.choice(all_indx, np.int(N*propotion), replace=False) + train_indx = np.in1d(all_indx, test_indx, invert=True) + print(' Orig size={}, Train size={}, Test size={}'.format(data.shape[0], train_indx.shape[0], test_indx.shape[0])) + + return data[train_indx, :], data[test_indx, :] + +def split_n_save(fn, propotion=0.2): + """ + :param fn: file name of the raw dataset + :param propotion: as a fraction e.g. 0.2 for 20% test + """ + print(' Train-test {} splitting '.format(propotion) + fn + '.csv...') + data = pd.read_csv(fn+'.csv', delimiter=',') + train_set, test_set = split(data.values, propotion) + + header = data.columns.values + header = ', '.join(header) + with open(fn + '_train.csv', 'w') as f_handle: # try 'a' + np.savetxt(f_handle, train_set, delimiter=',', header=header, comments='') + with open(fn + '_test.csv', 'w') as f_handle: # try 'a' + np.savetxt(f_handle, test_set, delimiter=',', header=header, comments='') + + print(' Train-test datasets saved.') + +if __name__ == "__main__": + # files = \ + # ['1_toy/1_toy1_vel', + # '2_carla/2_carla1_frame_250', + # '3_astyx/3_astyx1/normalized/000002', + # '4_nuscenes/4_nuscenes1/samples/normalized/RADAR_FRONT/n008-2018-08-01-15-16-36-0400__RADAR_FRONT__1533151603555991', + # '5_airsim/5_airsim1/5_airsim1_pos', + # '5_airsim/5_airsim1/5_airsim1_vel', + # '6_jfk/6_jfk_partial1_pos', + # '6_jfk/6_jfk_partial1_vel' + # ] + + # for f in files: + # split_n_save('../datasets/kyle_ransalu/' + f, 0.2) + + + file_base = "3_astyx/3_astyx1/normalized/{:06d}" + file_idxs_start = 0 + file_idx_stop = 9 + + for i in range(file_idxs_start, file_idx_stop + 1): + f = file_base.format(i) + print(f) + split_n_save('../datasets/kyle_ransalu/' + f, 0.2) diff --git a/utils_filereader.py b/utils_filereader.py new file mode 100644 index 0000000..7f48667 --- /dev/null +++ b/utils_filereader.py @@ -0,0 +1,137 @@ +import argparse +import json +import os +import pandas as pd +import time +import torch +import numpy as np + +# ============================================================================== +# Utility Functions +# ============================================================================== +def format_config(args): + """ + Formats default parameters from argparse to be easily digested by module + """ + # default parameter to reduce min and max bounds of cell_max_min + delta = (args.area_max[0] - args.area_min[0])*0.03 + + fn_train = os.path.abspath(args.dataset_path) + cell_resolution = ( + args.hinge_dist[0], + args.hinge_dist[1], + args.hinge_dist[2] + ) + cell_max_min = [ + args.area_min[0] + delta, + args.area_max[0] - delta, + args.area_min[1] + delta, + args.area_max[1] - delta, + args.area_min[2] + delta, + args.area_max[2] - delta + ] + return fn_train, cell_max_min, cell_resolution + +def get3DPartitions(args, cell_max_min, nPartx1, nPartx2, nPartx3): + """ + @param cell_max_min: The size of the entire area + @param nPartx1: How many partitions along the longitude + @param nPartx2: How many partitions along the latitude + @param nPartx3: How many partition along the altitude + @return: a list of all partitions + """ + width = cell_max_min[1] - cell_max_min[0] + length = cell_max_min[3] - cell_max_min[2] + height = cell_max_min[5] - cell_max_min[4] + + x_margin = width/2 + y_margin = length/2 + z_margin = height/2 + + x_partition_size = width/nPartx1 + y_partition_size = length/nPartx2 + z_partition_size = height/nPartx3 + cell_max_min_segs = [] + for x in range(nPartx1): + for y in range(nPartx2): + for z in range(nPartx3): + seg_i = ( + cell_max_min[0] + x_partition_size*(x-args.partition_bleed), # Lower X + cell_max_min[0] + x_partition_size*(x+1+args.partition_bleed), # Upper X + cell_max_min[2] + y_partition_size*(y-args.partition_bleed), # Lower Y + cell_max_min[2] + y_partition_size*(y+1+args.partition_bleed), # Upper Y + cell_max_min[4] + z_partition_size*(z-args.partition_bleed), # Lower Z + cell_max_min[4] + z_partition_size*(z+1+args.partition_bleed) # Upper Z + ) + cell_max_min_segs.append(seg_i) + return cell_max_min_segs + +def read_frame(args, framei, fn_train, cell_max_min): + """ + @params: framei (int) - the index of the current frame being read + @params: fn_train (str) - the path of the dataset frames being read + @params: cell_max_min (tuple of 6 ints) - bounding area observed + + @returns: g (float32 tensor) + @returns: X (float32 tensor) + @returns: y_occupancy (float32 tensor) + @returns: sigma (float32 tensor) + @returns: cell_max_min_segments - partitioned frame data for frame i + + Reads a single frame (framei) of the dataset defined by (fn_train) and + and returns LIDAR hit data corresponding to that frame and its partitions + """ + print(' Reading '+fn_train+'.csv...') + g = pd.read_csv(fn_train+'.csv', delimiter=',') + g = g.loc[g['t'] == framei].values[:, 2:] + + g = torch.tensor(g, dtype=torch.float32) + X = g[:, :3] + y_occupancy = g[:, 3].reshape(-1, 1) + sigma = g[:, 4:] + + # If there are no defaults, automatically set bounding area. + if cell_max_min[0] == None: + cell_max_min[0] = X[:,0].min() + if cell_max_min[1] == None: + cell_max_min[1] = X[:,0].max() + if cell_max_min[2] == None: + cell_max_min[2] = X[:,1].min() + if cell_max_min[3] == None: + cell_max_min[3] = X[:,1].max() + if cell_max_min[4] == None: + cell_max_min[4] = X[:,2].min() + if cell_max_min[5] == None: + cell_max_min[5] = X[:,2].max() + + cell_max_min_segments = get3DPartitions(args, tuple(cell_max_min), args.num_partitions[0], args.num_partitions[1], args.num_partitions[2]) + return g, X, y_occupancy, sigma, cell_max_min_segments + +def read_frame_velocity(args, framei, fn, cell_max_min): + print(' Reading '+fn+'.csv...') + g = pd.read_csv(fn+'.csv', delimiter=',') + g = g.loc[np.isclose(g['t'],framei)].values[:, 1:] + + # g = torch.tensor(g, dtype=torch.double) + g = torch.tensor(g, dtype=torch.float32) + X = g[:, :3] + y_vx = g[:, 3].reshape(-1, 1) + y_vy = g[:, 4].reshape(-1, 1) + y_vz = g[:, 5].reshape(-1, 1) + + # If there are no defaults, automatically set bounding area. + if cell_max_min[0] == None: + cell_max_min[0] = X[:,0].min() + if cell_max_min[1] == None: + cell_max_min[1] = X[:,0].max() + if cell_max_min[2] == None: + cell_max_min[2] = X[:,1].min() + if cell_max_min[3] == None: + cell_max_min[3] = X[:,1].max() + if cell_max_min[4] == None: + cell_max_min[4] = X[:,2].min() + if cell_max_min[5] == None: + cell_max_min[5] = X[:,2].max() + + cell_max_min_segments = get3DPartitions(args, tuple(cell_max_min), args.num_partitions[0], args.num_partitions[1], args.num_partitions[2]) + return X, y_vx, y_vy, y_vz, cell_max_min_segments diff --git a/utils_metrics.py b/utils_metrics.py new file mode 100644 index 0000000..529fe05 --- /dev/null +++ b/utils_metrics.py @@ -0,0 +1,89 @@ +import numpy as np +import csv +import sys +import scipy as sp +import os +from sklearn import metrics +from datetime import datetime + +def read_txy_csv(fn): + data = readCsvFile(fn) + Xtest = data[:, :3] + Ytest = data[:, 3][:, np.newaxis] + return Xtest, Ytest + + +def readCsvFile( fileName ): + reader = csv.reader(open(fileName,'r') ) + dataList = [] + for row in reader: + dataList.append( [float(elem) for elem in row ] ) + return np.array( dataList ) + +def entropy(x): + x[x < sys.max_info.min] = sys.max_info.min + return -1*np.sum(x*np.log2(x)) + +def cross_entropy(act, pred): + #negative log-loss sklearn + epsilon = 1e-15 + pred = sp.maximum(epsilon, pred) + pred = sp.minimum(1-epsilon, pred) + ll = act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred)) + return -ll + +def mean_standardized_log_loss(true_labels, predicted_mean, predicted_var): + """ + :param true_labels: + :param predicted_mean: + :param predicted_var: + :return: 0 for simple methods, negative for better methods (eq. 2.34 GPML book) + """ + predicted_var = predicted_var + 10e6*np.finfo(float).eps #to avoid /0 and log(0) + msll = np.average(0.5*np.log(2*np.pi*predicted_var) + ((predicted_mean - true_labels)**2)/(2*predicted_var)) + msll *= -1 + return msll + +def calc_scores_velocity(mdl_name, query_type, true, predicted, predicted_var=None, train_time=-1, query_time=-1, save_report=True, notes=''): + fn = mdl_name+ '.csv' + + rmse = np.sqrt(metrics.mean_squared_error(true, predicted)) + msll = mean_standardized_log_loss(true, predicted, predicted_var) + + print(' Metrics: RMSE={:.3f}, MSLL={:.3f}, train_time={:.3f}, query_time={:.3f}'.format(rmse, msll, train_time, query_time)) + if save_report is True: + if os.path.isfile(fn): # If the file already exists + header = '' + else: + header = 'Time, Tested on, RMSE, MSLL, Train time, Query time, Notes' + with open(fn,'ab') as f_handle: #try 'a' + np.savetxt(f_handle, np.array([[datetime.now(), query_type, rmse, msll, train_time, query_time, notes]]), \ + delimiter=',', fmt='%s, %s, %.3f, %.3f, %.3f, %.3f, %s', header=header, comments='') + +def calc_scores_occupancy(mdl_name, true, predicted, predicted_var=None, time_taken=-11, N_points=0, do_return=False, + save_report=True): + #TODO: double check + fn = mdl_name + '.csv' + + predicted_binarized = np.int_(predicted >= 0.5) + accuracy = np.round(metrics.accuracy_score(true.ravel(), predicted_binarized.ravel()), 3) + + auc = np.round(metrics.roc_auc_score(true.ravel(), predicted.ravel()), 3) + + nll = np.round(metrics.log_loss(true.ravel(), predicted.ravel()), 3) + + if predicted_var is not None: + neg_smse = np.round(neg_ms_log_loss(true, predicted[0].ravel(), predicted_var.ravel()), 3) + else: + neg_smse = -11 + + print(mdl_name + ': accuracy={}, auc={}, nll={}, smse={}, time_taken={}'.format(accuracy, auc, nll, neg_smse, + time_taken)) + # print(metrics.confusion_matrix(true.ravel(), predicted_binarized.ravel())) + if save_report is True: + with open(fn, 'ab') as f_handle: # try 'a' + # np.savetxt(f_handle, np.array([[neg_smse]]), delimiter=',', fmt="%.3f") + np.savetxt(f_handle, np.array([[accuracy, auc, nll, neg_smse, time_taken, N_points]]), delimiter=',', + fmt="%.3f") + if do_return: + return accuracy, auc, nll