full upload so not to lose anything important

2025-03-14 18:02:23 +01:00
parent 35fcfb7d5a
commit b824ff7482
33 changed files with 3539 additions and 353 deletions
--- a/tools/plot_scripts/data_missing_points.py
+++ b/tools/plot_scripts/data_missing_points.py
@@ -0,0 +1,243 @@
+import pickle
+import shutil
+from datetime import datetime
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+from pointcloudset import Dataset
+
+# define data path containing the bag files
+all_data_path = Path("/home/fedex/mt/data/subter")
+
+output_path = Path("/home/fedex/mt/plots/data_missing_points")
+datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+
+latest_folder_path = output_path / "latest"
+archive_folder_path = output_path / "archive"
+output_datetime_path = output_path / datetime_folder_name
+
+# if output does not exist, create it
+output_path.mkdir(exist_ok=True, parents=True)
+output_datetime_path.mkdir(exist_ok=True, parents=True)
+latest_folder_path.mkdir(exist_ok=True, parents=True)
+archive_folder_path.mkdir(exist_ok=True, parents=True)
+
+data_resolution = 32 * 2048
+
+normal_experiment_paths, anomaly_experiment_paths = [], []
+
+# find all bag files and sort them correctly by name (experiments with smoke in the name are anomalies)
+for bag_file_path in all_data_path.iterdir():
+    if bag_file_path.suffix != ".bag":
+        continue
+    if "smoke" in bag_file_path.name:
+        anomaly_experiment_paths.append(bag_file_path)
+    else:
+        normal_experiment_paths.append(bag_file_path)
+
+# sort anomaly and normal experiments by filesize, ascending
+anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size)
+normal_experiment_paths.sort(key=lambda path: path.stat().st_size)
+
+
+# function that plots histogram of how many points are missing in pointclouds for both normal and anomaly experiments
+def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
+    # function that finds the number of missing points in list of experiments (used for both normal and anomalous)
+    def find_missing_points(experiment_paths):
+        missing_points = []
+        for dataset in (
+            Dataset.from_file(experiment_path, topic="/ouster/points")
+            for experiment_path in experiment_paths
+        ):
+            missing_points_per_pc = []
+            for pc in dataset:
+                missing_points_per_pc.append(data_resolution - pc.data.shape[0])
+            missing_points.append(missing_points_per_pc)
+            # FIXME temporary break to test code on only one experiment
+            # break
+        return missing_points
+
+    # check if the data has already been calculated and saved to a pickle file
+    if (output_path / "missing_points.pkl").exists():
+        with open(output_path / "missing_points.pkl", "rb") as file:
+            missing_points_normal, missing_points_anomaly = pickle.load(file)
+    else:
+        missing_points_normal = find_missing_points(normal_experiment_paths)
+        missing_points_anomaly = find_missing_points(anomaly_experiment_paths)
+
+    # for faster subsequent runs, save the data to a pickle file
+    with open(output_path / "missing_points.pkl", "wb") as file:
+        pickle.dump(
+            (missing_points_normal, missing_points_anomaly),
+            file,
+            protocol=pickle.HIGHEST_PROTOCOL,
+        )
+
+    # combine all missing points into one list for each type of experiment
+    missing_points_normal = np.concatenate(missing_points_normal)
+    missing_points_anomaly = np.concatenate(missing_points_anomaly)
+
+    # create histogram of missing points for normal and anomaly experiments
+    plt.figure(figsize=(10, 5))
+    plt.hist(missing_points_normal, bins=100, alpha=0.5, label="Normal Experiments")
+    plt.hist(missing_points_anomaly, bins=100, alpha=0.5, label="Anomaly Experiments")
+    plt.title(title)
+    plt.xlabel("Number of Missing Points")
+    plt.ylabel("Number of Pointclouds")
+    plt.legend()
+    plt.tight_layout()
+    plt.savefig(output_datetime_path / "missing_points.png")
+
+    plt.clf()
+
+    # alternatively plot curves representing the data
+
+    # create alternative version with missing points on y axis and number of pointclouds on x axis
+    plt.figure(figsize=(10, 5))
+    plt.hist(
+        missing_points_normal,
+        bins=100,
+        alpha=0.5,
+        label="Normal Experiments",
+        orientation="horizontal",
+    )
+    plt.hist(
+        missing_points_anomaly,
+        bins=100,
+        alpha=0.5,
+        label="Anomaly Experiments",
+        orientation="horizontal",
+    )
+    plt.title(title)
+    plt.xlabel("Number of Pointclouds")
+    plt.ylabel("Number of Missing Points")
+    plt.legend()
+    plt.tight_layout()
+    plt.savefig(output_datetime_path / "missing_points_alternative.png")
+
+    # find min and max of both categories so we can set the same limits for both plots
+    min = np.min([np.min(missing_points_normal), np.min(missing_points_anomaly)])
+    max = np.max([np.max(missing_points_normal), np.max(missing_points_anomaly)])
+
+    # create bins array with min and max values
+    bins = np.linspace(min, max, 100)
+
+    # since the two histograms (normal and anomalous) have different scales, normalize their amplitude and plot a density version as well
+    plt.clf()
+    plt.figure(figsize=(10, 5))
+    plt.hist(
+        missing_points_normal,
+        bins=bins,
+        alpha=0.5,
+        label="Normal Experiments",
+        color="blue",
+        density=True,
+    )
+    plt.hist(
+        missing_points_anomaly,
+        bins=bins,
+        alpha=0.5,
+        color="red",
+        label="Anomaly Experiments",
+        density=True,
+    )
+    plt.title(title)
+    plt.xlabel("Number of Missing Points")
+    plt.ylabel("Density")
+    plt.legend()
+    plt.tight_layout()
+    plt.savefig(output_datetime_path / "missing_points_density.png")
+
+    # create another density version which does not plot number of missing points but percentage of measurements that are missing (total number of points is 32*2048)
+    bins = np.linspace(0, 1, 100)
+    plt.clf()
+    plt.figure(figsize=(10, 5))
+    plt.hist(
+        missing_points_normal / data_resolution,
+        bins=bins,
+        alpha=0.5,
+        label="Normal Experiments (No Artifical Smoke)",
+        color="blue",
+        density=True,
+    )
+    plt.hist(
+        missing_points_anomaly / data_resolution,
+        bins=bins,
+        alpha=0.5,
+        color="red",
+        label="Anomaly Experiments (With Artifical Smoke)",
+        density=True,
+    )
+    plt.title(title)
+    plt.xlabel("Percentage of Missing Lidar Measurements")
+    plt.ylabel("Density")
+    # display the x axis as percentages
+    plt.gca().set_xticklabels(
+        ["{:.0f}%".format(x * 100) for x in plt.gca().get_xticks()]
+    )
+    plt.legend()
+    plt.tight_layout()
+    plt.savefig(output_datetime_path / "missing_points_density_percentage.png")
+
+    # mathplotlib does not support normalizing the histograms to the same scale, so we do it manually using numpy
+    num_bins = 100
+    bin_lims = np.linspace(0, 40000, num_bins + 1)
+    bin_centers = 0.5 * (bin_lims[:-1] + bin_lims[1:])
+    bin_widths = bin_lims[1:] - bin_lims[:-1]
+    # calculate the histogram for normal and anomaly experiments
+    normal_hist, _ = np.histogram(missing_points_normal, bins=bin_lims)
+    anomaly_hist, _ = np.histogram(missing_points_anomaly, bins=bin_lims)
+    # normalize the histograms to the same scale
+    normal_hist_normalized = np.array(normal_hist, dtype=float) / np.max(normal_hist)
+    anomaly_hist_normalized = np.array(anomaly_hist, dtype=float) / np.max(anomaly_hist)
+    # plot the normalized histograms
+    plt.clf()
+    plt.figure(figsize=(10, 5))
+    plt.bar(
+        bin_centers,
+        normal_hist_normalized,
+        width=bin_widths,
+        align="center",
+        alpha=0.5,
+        label="Normal Experiments",
+    )
+    plt.bar(
+        bin_centers,
+        anomaly_hist_normalized,
+        width=bin_widths,
+        align="center",
+        alpha=0.5,
+        label="Anomaly Experiments",
+    )
+    plt.title(title)
+    plt.xlabel("Number of Missing Points")
+    plt.ylabel("Normalized Density")
+    plt.legend()
+    plt.tight_layout()
+    plt.savefig(output_datetime_path / "missing_points_normalized.png")
+
+
+# plot histogram of missing points for normal and anomaly experiments
+plot_data_points(
+    normal_experiment_paths,
+    anomaly_experiment_paths,
+    "Missing Lidar Measurements per Scan",
+)
+
+# delete current latest folder
+shutil.rmtree(latest_folder_path, ignore_errors=True)
+
+# create new latest folder
+latest_folder_path.mkdir(exist_ok=True, parents=True)
+
+# copy contents of output folder to the latest folder
+for file in output_datetime_path.iterdir():
+    shutil.copy2(file, latest_folder_path)
+
+# copy this python script to the output datetime folder to preserve the code used to generate the plots
+shutil.copy2(__file__, output_datetime_path)
+shutil.copy2(__file__, latest_folder_path)
+
+# move output date folder to archive
+shutil.move(output_datetime_path, archive_folder_path)