full upload so not to lose anything important

This commit is contained in:
Jan Kowalczyk
2025-03-14 18:02:23 +01:00
parent 35fcfb7d5a
commit b824ff7482
33 changed files with 3539 additions and 353 deletions

View File

@@ -0,0 +1,243 @@
import pickle
import shutil
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from pointcloudset import Dataset
# define data path containing the bag files
all_data_path = Path("/home/fedex/mt/data/subter")
output_path = Path("/home/fedex/mt/plots/data_missing_points")
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
latest_folder_path = output_path / "latest"
archive_folder_path = output_path / "archive"
output_datetime_path = output_path / datetime_folder_name
# if output does not exist, create it
output_path.mkdir(exist_ok=True, parents=True)
output_datetime_path.mkdir(exist_ok=True, parents=True)
latest_folder_path.mkdir(exist_ok=True, parents=True)
archive_folder_path.mkdir(exist_ok=True, parents=True)
data_resolution = 32 * 2048
normal_experiment_paths, anomaly_experiment_paths = [], []
# find all bag files and sort them correctly by name (experiments with smoke in the name are anomalies)
for bag_file_path in all_data_path.iterdir():
if bag_file_path.suffix != ".bag":
continue
if "smoke" in bag_file_path.name:
anomaly_experiment_paths.append(bag_file_path)
else:
normal_experiment_paths.append(bag_file_path)
# sort anomaly and normal experiments by filesize, ascending
anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size)
normal_experiment_paths.sort(key=lambda path: path.stat().st_size)
# function that plots histogram of how many points are missing in pointclouds for both normal and anomaly experiments
def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
# function that finds the number of missing points in list of experiments (used for both normal and anomalous)
def find_missing_points(experiment_paths):
missing_points = []
for dataset in (
Dataset.from_file(experiment_path, topic="/ouster/points")
for experiment_path in experiment_paths
):
missing_points_per_pc = []
for pc in dataset:
missing_points_per_pc.append(data_resolution - pc.data.shape[0])
missing_points.append(missing_points_per_pc)
# FIXME temporary break to test code on only one experiment
# break
return missing_points
# check if the data has already been calculated and saved to a pickle file
if (output_path / "missing_points.pkl").exists():
with open(output_path / "missing_points.pkl", "rb") as file:
missing_points_normal, missing_points_anomaly = pickle.load(file)
else:
missing_points_normal = find_missing_points(normal_experiment_paths)
missing_points_anomaly = find_missing_points(anomaly_experiment_paths)
# for faster subsequent runs, save the data to a pickle file
with open(output_path / "missing_points.pkl", "wb") as file:
pickle.dump(
(missing_points_normal, missing_points_anomaly),
file,
protocol=pickle.HIGHEST_PROTOCOL,
)
# combine all missing points into one list for each type of experiment
missing_points_normal = np.concatenate(missing_points_normal)
missing_points_anomaly = np.concatenate(missing_points_anomaly)
# create histogram of missing points for normal and anomaly experiments
plt.figure(figsize=(10, 5))
plt.hist(missing_points_normal, bins=100, alpha=0.5, label="Normal Experiments")
plt.hist(missing_points_anomaly, bins=100, alpha=0.5, label="Anomaly Experiments")
plt.title(title)
plt.xlabel("Number of Missing Points")
plt.ylabel("Number of Pointclouds")
plt.legend()
plt.tight_layout()
plt.savefig(output_datetime_path / "missing_points.png")
plt.clf()
# alternatively plot curves representing the data
# create alternative version with missing points on y axis and number of pointclouds on x axis
plt.figure(figsize=(10, 5))
plt.hist(
missing_points_normal,
bins=100,
alpha=0.5,
label="Normal Experiments",
orientation="horizontal",
)
plt.hist(
missing_points_anomaly,
bins=100,
alpha=0.5,
label="Anomaly Experiments",
orientation="horizontal",
)
plt.title(title)
plt.xlabel("Number of Pointclouds")
plt.ylabel("Number of Missing Points")
plt.legend()
plt.tight_layout()
plt.savefig(output_datetime_path / "missing_points_alternative.png")
# find min and max of both categories so we can set the same limits for both plots
min = np.min([np.min(missing_points_normal), np.min(missing_points_anomaly)])
max = np.max([np.max(missing_points_normal), np.max(missing_points_anomaly)])
# create bins array with min and max values
bins = np.linspace(min, max, 100)
# since the two histograms (normal and anomalous) have different scales, normalize their amplitude and plot a density version as well
plt.clf()
plt.figure(figsize=(10, 5))
plt.hist(
missing_points_normal,
bins=bins,
alpha=0.5,
label="Normal Experiments",
color="blue",
density=True,
)
plt.hist(
missing_points_anomaly,
bins=bins,
alpha=0.5,
color="red",
label="Anomaly Experiments",
density=True,
)
plt.title(title)
plt.xlabel("Number of Missing Points")
plt.ylabel("Density")
plt.legend()
plt.tight_layout()
plt.savefig(output_datetime_path / "missing_points_density.png")
# create another density version which does not plot number of missing points but percentage of measurements that are missing (total number of points is 32*2048)
bins = np.linspace(0, 1, 100)
plt.clf()
plt.figure(figsize=(10, 5))
plt.hist(
missing_points_normal / data_resolution,
bins=bins,
alpha=0.5,
label="Normal Experiments (No Artifical Smoke)",
color="blue",
density=True,
)
plt.hist(
missing_points_anomaly / data_resolution,
bins=bins,
alpha=0.5,
color="red",
label="Anomaly Experiments (With Artifical Smoke)",
density=True,
)
plt.title(title)
plt.xlabel("Percentage of Missing Lidar Measurements")
plt.ylabel("Density")
# display the x axis as percentages
plt.gca().set_xticklabels(
["{:.0f}%".format(x * 100) for x in plt.gca().get_xticks()]
)
plt.legend()
plt.tight_layout()
plt.savefig(output_datetime_path / "missing_points_density_percentage.png")
# mathplotlib does not support normalizing the histograms to the same scale, so we do it manually using numpy
num_bins = 100
bin_lims = np.linspace(0, 40000, num_bins + 1)
bin_centers = 0.5 * (bin_lims[:-1] + bin_lims[1:])
bin_widths = bin_lims[1:] - bin_lims[:-1]
# calculate the histogram for normal and anomaly experiments
normal_hist, _ = np.histogram(missing_points_normal, bins=bin_lims)
anomaly_hist, _ = np.histogram(missing_points_anomaly, bins=bin_lims)
# normalize the histograms to the same scale
normal_hist_normalized = np.array(normal_hist, dtype=float) / np.max(normal_hist)
anomaly_hist_normalized = np.array(anomaly_hist, dtype=float) / np.max(anomaly_hist)
# plot the normalized histograms
plt.clf()
plt.figure(figsize=(10, 5))
plt.bar(
bin_centers,
normal_hist_normalized,
width=bin_widths,
align="center",
alpha=0.5,
label="Normal Experiments",
)
plt.bar(
bin_centers,
anomaly_hist_normalized,
width=bin_widths,
align="center",
alpha=0.5,
label="Anomaly Experiments",
)
plt.title(title)
plt.xlabel("Number of Missing Points")
plt.ylabel("Normalized Density")
plt.legend()
plt.tight_layout()
plt.savefig(output_datetime_path / "missing_points_normalized.png")
# plot histogram of missing points for normal and anomaly experiments
plot_data_points(
normal_experiment_paths,
anomaly_experiment_paths,
"Missing Lidar Measurements per Scan",
)
# delete current latest folder
shutil.rmtree(latest_folder_path, ignore_errors=True)
# create new latest folder
latest_folder_path.mkdir(exist_ok=True, parents=True)
# copy contents of output folder to the latest folder
for file in output_datetime_path.iterdir():
shutil.copy2(file, latest_folder_path)
# copy this python script to the output datetime folder to preserve the code used to generate the plots
shutil.copy2(__file__, output_datetime_path)
shutil.copy2(__file__, latest_folder_path)
# move output date folder to archive
shutil.move(output_datetime_path, archive_folder_path)