full upload so not to lose anything important

This commit is contained in:
Jan Kowalczyk
2025-03-14 18:02:23 +01:00
parent 35fcfb7d5a
commit b824ff7482
33 changed files with 3539 additions and 353 deletions

View File

@@ -0,0 +1,179 @@
# this script loads the numpy array files containing the lidar frames and counts the number of frames in each file
# the number of frames is then printed to the console per file as well as a congregated sum of all frames in files
# containing the word smoke and ones that do not contain that word, as well as an overall sum of all frames
# We also plot a pie chart of the distribution of data points in normal and anomalous experiments
import shutil
from datetime import datetime
from pathlib import Path
import numpy as np
from tabulate import tabulate
# define data path containing the numpy array files and output path for the plots
data_path = Path("/home/fedex/mt/data/subter")
output_path = Path("/home/fedex/mt/plots/data_count_lidar_frames")
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
latest_folder_path = output_path / "latest"
archive_folder_path = output_path / "archive"
output_datetime_path = output_path / datetime_folder_name
# if output does not exist, create it
output_path.mkdir(exist_ok=True, parents=True)
output_datetime_path.mkdir(exist_ok=True, parents=True)
latest_folder_path.mkdir(exist_ok=True, parents=True)
archive_folder_path.mkdir(exist_ok=True, parents=True)
# find all numpy array files and sort them correctly by name
normal_experiment_paths, anomaly_experiment_paths = [], []
for npy_file_path in data_path.iterdir():
if npy_file_path.suffix != ".npy":
continue
if "smoke" in npy_file_path.name:
anomaly_experiment_paths.append(npy_file_path)
else:
normal_experiment_paths.append(npy_file_path)
# function that counts the number of frames in one experiment
def count_frames(npy_file_path):
frames = np.load(npy_file_path).shape[0]
return frames
# we want to print the numbers of frames in a table so we first gather all the data in two maps
normal_experiment_frames = {
npy_file_path.stem: count_frames(npy_file_path)
for npy_file_path in normal_experiment_paths
}
anomaly_experiment_frames = {
npy_file_path.stem: count_frames(npy_file_path)
for npy_file_path in anomaly_experiment_paths
}
# prepare data for tabulate
normal_experiment_table = [
(experiment, frames) for experiment, frames in normal_experiment_frames.items()
]
anomaly_experiment_table = [
(experiment, frames) for experiment, frames in anomaly_experiment_frames.items()
]
# sort the tables by experiment name
normal_experiment_table.sort(key=lambda x: x[0])
anomaly_experiment_table.sort(key=lambda x: x[0])
# add the sum of all frames to the tables
normal_experiment_table.append(("Sum", sum(normal_experiment_frames.values())))
anomaly_experiment_table.append(("Sum", sum(anomaly_experiment_frames.values())))
# print the number of frames in each file using tabulate
print("Normal experiments:")
print(
tabulate(normal_experiment_table, headers=["Experiment", "Frames"], tablefmt="grid")
)
# print the smallest, largest, mean and median time of the normal experiments assuming 10 frames per second
normal_experiment_frames_values = list(normal_experiment_frames.values())
print(
f"Smallest time: {min(normal_experiment_frames_values) / 10} seconds, Largest time: {max(normal_experiment_frames_values) / 10} seconds, Mean time: {np.mean(normal_experiment_frames_values) / 10} seconds, Median time: {np.median(normal_experiment_frames_values) / 10} seconds"
)
print("Anomaly experiments:")
print(
tabulate(
anomaly_experiment_table, headers=["Experiment", "Frames"], tablefmt="grid"
)
)
# print the smallest, largest, mean and median time of the anomalous experiments assuming 10 frames per second
anomaly_experiment_frames_values = list(anomaly_experiment_frames.values())
print(
f"Smallest time: {min(anomaly_experiment_frames_values) / 10} seconds, Largest time: {max(anomaly_experiment_frames_values) / 10} seconds, Mean time: {np.mean(anomaly_experiment_frames_values) / 10} seconds, Median time: {np.median(anomaly_experiment_frames_values) / 10} seconds"
)
# print the sum of all frames in all experiments
total_frames = sum(normal_experiment_frames.values()) + sum(
anomaly_experiment_frames.values()
)
print(f"Total frames in all (normal and anmoaly) experiments: {total_frames} frames")
# print the sum of normal and anomalous experiments as percentage of the total frames
print(
f"Percentage of normal experiments: {sum(normal_experiment_frames.values()) / total_frames * 100}%"
)
print(
f"Percentage of anomaly experiments: {sum(anomaly_experiment_frames.values()) / total_frames * 100}%"
)
sum(normal_experiment_frames.values()) + sum(anomaly_experiment_frames.values())
# define function to plot pie chart of the distribution of data points in normal and anomalous experiments
def plot_data_points_pie(normal_experiment_frames, anomaly_experiment_frames):
import matplotlib.pyplot as plt
# we want to plot the sum of all frames in normal and anomaly experiments as total values and also percentages
total_normal_frames = sum(normal_experiment_frames.values())
total_anomaly_frames = sum(anomaly_experiment_frames.values())
total_frames = total_normal_frames + total_anomaly_frames
# prepare data for pie chart
labels = [
"Normal Lidar Frames\nNon-Degraded Pointclouds",
"Anomalous Lidar Frames\nDegraded Pointclouds",
]
sizes = [total_normal_frames, total_anomaly_frames]
explode = (0.1, 0) # explode the normal slice
# define an autopct function that shows percentage and total number of frames per slice
def make_autopct(pct):
return f"{pct:.1f}%\n({int(pct * total_frames / 100)} frames)"
# plot pie chart without shadow and with custom autopct
fig1, ax1 = plt.subplots()
# set a figure size of 10x5
fig1.set_size_inches(10, 5)
ax1.pie(sizes, explode=explode, labels=labels, autopct=make_autopct, shadow=False)
# for labels use center alignment
ax1.axis("equal") # Equal aspect ratio ensures that pie is drawn as a circle.
# display the total number of frames in the center of the pie chart (adjusted vertically)
plt.text(
0,
0.2,
f"Total:\n{total_frames} frames",
fontsize=12,
ha="center",
va="center",
color="black",
)
plt.title(
"Distribution of Normal and Anomalous\nPointclouds in all Experiments (Lidar Frames)"
)
plt.tight_layout()
# save the plot
plt.savefig(output_datetime_path / "data_points_pie.png")
plot_data_points_pie(normal_experiment_frames, anomaly_experiment_frames)
# delete current latest folder
shutil.rmtree(latest_folder_path, ignore_errors=True)
# create new latest folder
latest_folder_path.mkdir(exist_ok=True, parents=True)
# copy contents of output folder to the latest folder
for file in output_datetime_path.iterdir():
shutil.copy2(file, latest_folder_path)
# copy this python script to the output datetime folder to preserve the code used to generate the plots
shutil.copy2(__file__, output_datetime_path)
shutil.copy2(__file__, latest_folder_path)
# move output date folder to archive
shutil.move(output_datetime_path, archive_folder_path)

View File

@@ -0,0 +1,243 @@
import pickle
import shutil
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from pointcloudset import Dataset
# define data path containing the bag files
all_data_path = Path("/home/fedex/mt/data/subter")
output_path = Path("/home/fedex/mt/plots/data_missing_points")
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
latest_folder_path = output_path / "latest"
archive_folder_path = output_path / "archive"
output_datetime_path = output_path / datetime_folder_name
# if output does not exist, create it
output_path.mkdir(exist_ok=True, parents=True)
output_datetime_path.mkdir(exist_ok=True, parents=True)
latest_folder_path.mkdir(exist_ok=True, parents=True)
archive_folder_path.mkdir(exist_ok=True, parents=True)
data_resolution = 32 * 2048
normal_experiment_paths, anomaly_experiment_paths = [], []
# find all bag files and sort them correctly by name (experiments with smoke in the name are anomalies)
for bag_file_path in all_data_path.iterdir():
if bag_file_path.suffix != ".bag":
continue
if "smoke" in bag_file_path.name:
anomaly_experiment_paths.append(bag_file_path)
else:
normal_experiment_paths.append(bag_file_path)
# sort anomaly and normal experiments by filesize, ascending
anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size)
normal_experiment_paths.sort(key=lambda path: path.stat().st_size)
# function that plots histogram of how many points are missing in pointclouds for both normal and anomaly experiments
def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
# function that finds the number of missing points in list of experiments (used for both normal and anomalous)
def find_missing_points(experiment_paths):
missing_points = []
for dataset in (
Dataset.from_file(experiment_path, topic="/ouster/points")
for experiment_path in experiment_paths
):
missing_points_per_pc = []
for pc in dataset:
missing_points_per_pc.append(data_resolution - pc.data.shape[0])
missing_points.append(missing_points_per_pc)
# FIXME temporary break to test code on only one experiment
# break
return missing_points
# check if the data has already been calculated and saved to a pickle file
if (output_path / "missing_points.pkl").exists():
with open(output_path / "missing_points.pkl", "rb") as file:
missing_points_normal, missing_points_anomaly = pickle.load(file)
else:
missing_points_normal = find_missing_points(normal_experiment_paths)
missing_points_anomaly = find_missing_points(anomaly_experiment_paths)
# for faster subsequent runs, save the data to a pickle file
with open(output_path / "missing_points.pkl", "wb") as file:
pickle.dump(
(missing_points_normal, missing_points_anomaly),
file,
protocol=pickle.HIGHEST_PROTOCOL,
)
# combine all missing points into one list for each type of experiment
missing_points_normal = np.concatenate(missing_points_normal)
missing_points_anomaly = np.concatenate(missing_points_anomaly)
# create histogram of missing points for normal and anomaly experiments
plt.figure(figsize=(10, 5))
plt.hist(missing_points_normal, bins=100, alpha=0.5, label="Normal Experiments")
plt.hist(missing_points_anomaly, bins=100, alpha=0.5, label="Anomaly Experiments")
plt.title(title)
plt.xlabel("Number of Missing Points")
plt.ylabel("Number of Pointclouds")
plt.legend()
plt.tight_layout()
plt.savefig(output_datetime_path / "missing_points.png")
plt.clf()
# alternatively plot curves representing the data
# create alternative version with missing points on y axis and number of pointclouds on x axis
plt.figure(figsize=(10, 5))
plt.hist(
missing_points_normal,
bins=100,
alpha=0.5,
label="Normal Experiments",
orientation="horizontal",
)
plt.hist(
missing_points_anomaly,
bins=100,
alpha=0.5,
label="Anomaly Experiments",
orientation="horizontal",
)
plt.title(title)
plt.xlabel("Number of Pointclouds")
plt.ylabel("Number of Missing Points")
plt.legend()
plt.tight_layout()
plt.savefig(output_datetime_path / "missing_points_alternative.png")
# find min and max of both categories so we can set the same limits for both plots
min = np.min([np.min(missing_points_normal), np.min(missing_points_anomaly)])
max = np.max([np.max(missing_points_normal), np.max(missing_points_anomaly)])
# create bins array with min and max values
bins = np.linspace(min, max, 100)
# since the two histograms (normal and anomalous) have different scales, normalize their amplitude and plot a density version as well
plt.clf()
plt.figure(figsize=(10, 5))
plt.hist(
missing_points_normal,
bins=bins,
alpha=0.5,
label="Normal Experiments",
color="blue",
density=True,
)
plt.hist(
missing_points_anomaly,
bins=bins,
alpha=0.5,
color="red",
label="Anomaly Experiments",
density=True,
)
plt.title(title)
plt.xlabel("Number of Missing Points")
plt.ylabel("Density")
plt.legend()
plt.tight_layout()
plt.savefig(output_datetime_path / "missing_points_density.png")
# create another density version which does not plot number of missing points but percentage of measurements that are missing (total number of points is 32*2048)
bins = np.linspace(0, 1, 100)
plt.clf()
plt.figure(figsize=(10, 5))
plt.hist(
missing_points_normal / data_resolution,
bins=bins,
alpha=0.5,
label="Normal Experiments (No Artifical Smoke)",
color="blue",
density=True,
)
plt.hist(
missing_points_anomaly / data_resolution,
bins=bins,
alpha=0.5,
color="red",
label="Anomaly Experiments (With Artifical Smoke)",
density=True,
)
plt.title(title)
plt.xlabel("Percentage of Missing Lidar Measurements")
plt.ylabel("Density")
# display the x axis as percentages
plt.gca().set_xticklabels(
["{:.0f}%".format(x * 100) for x in plt.gca().get_xticks()]
)
plt.legend()
plt.tight_layout()
plt.savefig(output_datetime_path / "missing_points_density_percentage.png")
# mathplotlib does not support normalizing the histograms to the same scale, so we do it manually using numpy
num_bins = 100
bin_lims = np.linspace(0, 40000, num_bins + 1)
bin_centers = 0.5 * (bin_lims[:-1] + bin_lims[1:])
bin_widths = bin_lims[1:] - bin_lims[:-1]
# calculate the histogram for normal and anomaly experiments
normal_hist, _ = np.histogram(missing_points_normal, bins=bin_lims)
anomaly_hist, _ = np.histogram(missing_points_anomaly, bins=bin_lims)
# normalize the histograms to the same scale
normal_hist_normalized = np.array(normal_hist, dtype=float) / np.max(normal_hist)
anomaly_hist_normalized = np.array(anomaly_hist, dtype=float) / np.max(anomaly_hist)
# plot the normalized histograms
plt.clf()
plt.figure(figsize=(10, 5))
plt.bar(
bin_centers,
normal_hist_normalized,
width=bin_widths,
align="center",
alpha=0.5,
label="Normal Experiments",
)
plt.bar(
bin_centers,
anomaly_hist_normalized,
width=bin_widths,
align="center",
alpha=0.5,
label="Anomaly Experiments",
)
plt.title(title)
plt.xlabel("Number of Missing Points")
plt.ylabel("Normalized Density")
plt.legend()
plt.tight_layout()
plt.savefig(output_datetime_path / "missing_points_normalized.png")
# plot histogram of missing points for normal and anomaly experiments
plot_data_points(
normal_experiment_paths,
anomaly_experiment_paths,
"Missing Lidar Measurements per Scan",
)
# delete current latest folder
shutil.rmtree(latest_folder_path, ignore_errors=True)
# create new latest folder
latest_folder_path.mkdir(exist_ok=True, parents=True)
# copy contents of output folder to the latest folder
for file in output_datetime_path.iterdir():
shutil.copy2(file, latest_folder_path)
# copy this python script to the output datetime folder to preserve the code used to generate the plots
shutil.copy2(__file__, output_datetime_path)
shutil.copy2(__file__, latest_folder_path)
# move output date folder to archive
shutil.move(output_datetime_path, archive_folder_path)

View File

@@ -0,0 +1,220 @@
import pickle
import shutil
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from pointcloudset import Dataset
# define data path containing the bag files
all_data_path = Path("/home/fedex/mt/data/subter")
output_path = Path("/home/fedex/mt/plots/data_particles_near_sensor")
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
latest_folder_path = output_path / "latest"
archive_folder_path = output_path / "archive"
output_datetime_path = output_path / datetime_folder_name
# if output does not exist, create it
output_path.mkdir(exist_ok=True, parents=True)
output_datetime_path.mkdir(exist_ok=True, parents=True)
latest_folder_path.mkdir(exist_ok=True, parents=True)
archive_folder_path.mkdir(exist_ok=True, parents=True)
normal_experiment_paths, anomaly_experiment_paths = [], []
# find all bag files and sort them correctly by name (experiments with smoke in the name are anomalies)
for bag_file_path in all_data_path.iterdir():
if bag_file_path.suffix != ".bag":
continue
if "smoke" in bag_file_path.name:
anomaly_experiment_paths.append(bag_file_path)
else:
normal_experiment_paths.append(bag_file_path)
# print out the names of the normal and anomaly experiments that we found
print("Normal experiments:")
for path in normal_experiment_paths:
print(path.name)
print("\nAnomaly experiments:")
for path in anomaly_experiment_paths:
print(path.name)
# sort anomaly and normal experiments by filesize, ascending
anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size)
normal_experiment_paths.sort(key=lambda path: path.stat().st_size)
# function that plots histogram of how many measurements have a range smaller than 0.5m for both normal and anomaly experiments
def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
# function that finds the number of measurements with a range smaller than 0.5m in list of experiments (used for both normal and anomalous)
def find_particles_near_sensor(experiment_paths, range_threshold):
particles_near_sensor = []
for dataset in (
Dataset.from_file(experiment_path, topic="/ouster/points")
for experiment_path in experiment_paths
):
particles_near_sensor_per_pc = []
for pc in dataset:
particles_near_sensor_per_pc.append(
pc.data[pc.data["range"] < range_threshold].shape[0]
)
particles_near_sensor.append(particles_near_sensor_per_pc)
return particles_near_sensor
range_thresholds = [500, 750, 1000, 1250, 1500]
for rt in range_thresholds:
print(f"Processing range threshold {rt}...")
# check if the data has already been calculated and saved to a pickle file
if (output_path / f"particles_near_sensor_counts_{rt}.pkl").exists():
with open(
output_path / f"particles_near_sensor_counts_{rt}.pkl", "rb"
) as file:
particles_near_sensor_normal, particles_near_sensor_anomaly = (
pickle.load(file)
)
else:
particles_near_sensor_normal = find_particles_near_sensor(
normal_experiment_paths,
rt,
)
particles_near_sensor_anomaly = find_particles_near_sensor(
anomaly_experiment_paths,
rt,
)
# for faster subsequent runs, save the data to a pickle file
with open(output_path / f"particles_near_sensor_counts_{rt}.pkl", "wb") as file:
pickle.dump(
(particles_near_sensor_normal, particles_near_sensor_anomaly),
file,
protocol=pickle.HIGHEST_PROTOCOL,
)
# combine all counts of how many particles are close to the sensor into one list for each type of experiment
particles_near_sensor_normal = np.concatenate(particles_near_sensor_normal)
particles_near_sensor_anomaly = np.concatenate(particles_near_sensor_anomaly)
# find min and max of both categories so we can set the same limits for both plots
min = np.min(
[
np.min(particles_near_sensor_normal),
np.min(particles_near_sensor_anomaly),
]
)
max = np.max(
[
np.max(particles_near_sensor_normal),
np.max(particles_near_sensor_anomaly),
]
)
# create bins array with min and max values
bins = np.linspace(min, max, 100)
# since the two histograms (normal and anomalous) have different scales, normalize their amplitude and plot a density version as well
# commented out since boxplot is more informative
# plt.clf()
# plt.figure(figsize=(10, 5))
# plt.hist(
# particles_near_sensor_normal,
# bins=bins,
# alpha=0.5,
# label="Normal Experiments (No Artifical Smoke)",
# color="blue",
# density=True,
# )
# plt.hist(
# particles_near_sensor_anomaly,
# bins=bins,
# alpha=0.5,
# color="red",
# label="Anomaly Experiments (With Artifical Smoke)",
# density=True,
# )
# plt.title(title)
# plt.xlabel("Number of Particles Near Sensor")
# plt.ylabel("Density")
# plt.legend()
# plt.tight_layout()
# plt.savefig(output_datetime_path / f"particles_near_sensor_density_{rt}.png")
# alternatively create a box plot to show the distribution of the data
# instead of plotting the frequency of particles near sensor we'll plot the percentage of points (compared to the total number of points in the pointcloud)
data_resolution = 32 * 2048
plt.clf()
plt.figure(figsize=(10, 5))
plt.boxplot(
[
particles_near_sensor_normal / data_resolution,
particles_near_sensor_anomaly / data_resolution,
],
tick_labels=[
"Normal Experiments (No Artifical Smoke)",
"Anomaly Experiments (With Artifical Smoke)",
],
)
# format the y axis labels as percentages
plt.gca().set_yticklabels(
["{:.0f}%".format(y * 100) for y in plt.gca().get_yticks()]
)
plt.title("Particles Closer than 0.5m to the Sensor")
plt.ylabel("Percentage of measurements closer than 0.5m")
plt.tight_layout()
plt.savefig(output_datetime_path / f"particles_near_sensor_boxplot_{rt}.png")
# we create the same boxplot but limit the y-axis to 5% to better see the distribution of the data
plt.clf()
plt.figure(figsize=(10, 5))
plt.boxplot(
[
particles_near_sensor_normal / data_resolution,
particles_near_sensor_anomaly / data_resolution,
],
tick_labels=[
"Normal Experiments (No Artifical Smoke)",
"Anomaly Experiments (With Artifical Smoke)",
],
)
# format the y axis labels as percentages
plt.gca().set_yticklabels(
["{:.0f}%".format(y * 100) for y in plt.gca().get_yticks()]
)
plt.title("Particles Closer than 0.5m to the Sensor")
plt.ylabel("Percentage of measurements closer than 0.5m")
plt.ylim(0, 0.05)
plt.tight_layout()
plt.savefig(
output_datetime_path / f"particles_near_sensor_boxplot_zoomed_{rt}.png"
)
# plot histogram of how many measurements have a range smaller than 0.5m for both normal and anomaly experiments
plot_data_points(
normal_experiment_paths,
anomaly_experiment_paths,
"Density of Number of Particles Near Sensor",
)
# delete current latest folder
shutil.rmtree(latest_folder_path, ignore_errors=True)
# create new latest folder
latest_folder_path.mkdir(exist_ok=True, parents=True)
# copy contents of output folder to the latest folder
for file in output_datetime_path.iterdir():
shutil.copy2(file, latest_folder_path)
# copy this python script to the output datetime folder to preserve the code used to generate the plots
shutil.copy2(__file__, output_datetime_path)
shutil.copy2(__file__, latest_folder_path)
# move output date folder to archive
shutil.move(output_datetime_path, archive_folder_path)

View File

@@ -0,0 +1,188 @@
import argparse
import shutil
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colormaps
from matplotlib.cm import ScalarMappable
from matplotlib.colors import Colormap, ListedColormap
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
from PIL import Image # new import
def get_colormap_with_special_missing_color(
colormap_name: str, missing_data_color: str = "black", reverse: bool = False
) -> Colormap:
colormap = (
colormaps[colormap_name] if not reverse else colormaps[f"{colormap_name}_r"]
)
colormap.set_bad(missing_data_color)
colormap.set_over("white")
return colormap
# --- Setup output folders (similar to data_missing_points.py) ---
# Change the output path as needed
output_path = Path("/home/fedex/mt/plots/data_2d_projections")
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output_datetime_path = output_path / datetime_folder_name
latest_folder_path = output_path / "latest"
archive_folder_path = output_path / "archive"
for folder in (
output_path,
output_datetime_path,
latest_folder_path,
archive_folder_path,
):
folder.mkdir(exist_ok=True, parents=True)
# --- Parse command-line arguments ---
parser = argparse.ArgumentParser(
description="Plot two 2D projections (from .npy files) in vertical subplots"
)
parser.add_argument(
"--input1",
type=Path,
default=Path(
"/home/fedex/mt/data/subter/new_projection/1_loop_closure_illuminated_2023-01-23.npy"
),
help="Path to first .npy file containing 2D projection data",
)
parser.add_argument(
"--input2",
type=Path,
default=Path(
"/home/fedex/mt/data/subter/new_projection/3_smoke_human_walking_2023-01-23.npy"
),
help="Path to second .npy file containing 2D projection data",
)
parser.add_argument(
"--frame1",
type=int,
default=955,
help="Frame index to plot from first file (0-indexed)",
)
parser.add_argument(
"--frame2",
type=int,
default=242,
help="Frame index to plot from second file (0-indexed)",
)
parser.add_argument(
"--colormap",
default="viridis",
type=str,
help="Name of matplotlib colormap to use",
)
parser.add_argument(
"--missing-data-color",
default="black",
type=str,
help="Color to use for missing data in projection",
)
parser.add_argument(
"--reverse-colormap",
action="store_true",
help="Reverse the colormap if specified",
)
args = parser.parse_args()
# --- Load the numpy projection data from the provided files ---
# Each file is assumed to be a 3D array: (num_frames, height, width)
proj_data1 = np.load(args.input1)
proj_data2 = np.load(args.input2)
# Choose the desired frames
try:
frame1 = proj_data1[args.frame1]
except IndexError:
raise ValueError(f"Frame index {args.frame1} out of range for file: {args.input1}")
try:
frame2 = proj_data2[args.frame2]
except IndexError:
raise ValueError(f"Frame index {args.frame2} out of range for file: {args.input2}")
# Determine shared range across both frames (ignoring NaNs)
global_vmin = 0 # min(np.nanmin(frame1), np.nanmin(frame2))
global_vmax = 0.8 # max(np.nanmax(frame1), np.nanmax(frame2))
# Create colormap using the utility (to mimic create_2d_projection)
cmap = get_colormap_with_special_missing_color(
args.colormap, args.missing_data_color, args.reverse_colormap
)
# --- Create a figure with 2 vertical subplots ---
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(10, 5))
for ax, frame, title in zip(
(ax1, ax2),
(frame1, frame2),
(
"Projection of Lidar Frame without Degradation",
"Projection of Lidar Frame with Degradation (Artifical Smoke)",
),
):
im = ax.imshow(frame, cmap=cmap, aspect="auto", vmin=global_vmin, vmax=global_vmax)
ax.set_title(title)
ax.axis("off")
# Adjust layout to fit margins for a paper
plt.tight_layout(rect=[0, 0.05, 1, 1])
# Add a colorbar with the colormap below the subplots
cbar = fig.colorbar(im, ax=[ax1, ax2], orientation="vertical", fraction=0.05)
cbar.set_label("Normalized Range")
# Add a separate colorbar for NaN values
sm = ScalarMappable(cmap=ListedColormap([cmap.get_bad(), cmap.get_over()]))
divider = make_axes_locatable(cbar.ax)
nan_ax = divider.append_axes(
"bottom", size="15%", pad="3%", aspect=3, anchor=cbar.ax.get_anchor()
)
nan_ax.grid(visible=False, which="both", axis="both")
nan_cbar = fig.colorbar(sm, cax=nan_ax, orientation="vertical")
nan_cbar.set_ticks([0.3, 0.7])
nan_cbar.set_ticklabels(["NaN", f"> {global_vmax}"])
nan_cbar.ax.tick_params(length=0)
# Save the combined plot
output_file = output_datetime_path / "data_2d_projections.png"
plt.savefig(output_file, dpi=300, bbox_inches="tight", pad_inches=0.1)
plt.close()
print(f"Plot saved to: {output_file}")
# --- Create grayscale images (high precision) from the numpy frames using Pillow ---
# Convert NaN values to 0 and ensure the array is in float32 for 32-bit precision.
for degradation_status, frame_number, frame in (
("normal", args.frame1, frame1),
("smoke", args.frame2, frame2),
):
frame_gray = np.nan_to_num(frame, nan=0).astype(np.float32)
gray_image = Image.fromarray(frame_gray, mode="F")
gray_output_file = (
output_datetime_path
/ f"frame_{frame_number}_grayscale_{degradation_status}.tiff"
)
gray_image.save(gray_output_file)
print(f"Grayscale image saved to: {gray_output_file}")
# --- Handle folder structure: update latest folder and archive the output folder ---
# Delete current latest folder and recreate it
shutil.rmtree(latest_folder_path, ignore_errors=True)
latest_folder_path.mkdir(exist_ok=True, parents=True)
# Copy contents of the current output datetime folder to latest folder
for file in output_datetime_path.iterdir():
shutil.copy2(file, latest_folder_path)
# Copy this python script to both output datetime folder and latest folder for preservation
script_path = Path(__file__)
shutil.copy2(script_path, output_datetime_path)
shutil.copy2(script_path, latest_folder_path)
# Move the output datetime folder to the archive folder
shutil.move(output_datetime_path, archive_folder_path)
print(f"Output archived to: {archive_folder_path}")

View File

@@ -0,0 +1,160 @@
import shutil
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
# define data path containing the npy output files from your method
all_data_path = Path(
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/all_infer/inference"
)
output_path = Path("/home/fedex/mt/plots/deepsad_reduced_latent_space")
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
latest_folder_path = output_path / "latest"
archive_folder_path = output_path / "archive"
output_datetime_path = output_path / datetime_folder_name
# create required output directories if they do not exist
output_path.mkdir(exist_ok=True, parents=True)
output_datetime_path.mkdir(exist_ok=True, parents=True)
latest_folder_path.mkdir(exist_ok=True, parents=True)
archive_folder_path.mkdir(exist_ok=True, parents=True)
normal_experiment_paths = []
anomaly_experiment_paths = []
# locate and sort the npy files (experiment outputs) based on file size
for file_path in all_data_path.iterdir():
if file_path.suffix != ".npy":
continue
# check if the file name contains "output" to ensure it's an experiment output file
if "output" not in file_path.stem:
continue
if "smoke" in file_path.name:
anomaly_experiment_paths.append(file_path)
else:
normal_experiment_paths.append(file_path)
print("Normal experiments:")
for path in normal_experiment_paths:
print(path.name)
print("\nAnomaly experiments:")
for path in anomaly_experiment_paths:
print(path.name)
normal_experiment_paths.sort(key=lambda path: path.stat().st_size)
anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size)
def load_latent_space_data(experiment_paths):
"""
Load latent space data from npy files and return a single numpy array.
Modify this function if your file structure is different.
"""
data_list = []
for path in experiment_paths:
latent_data = np.load(path)
data_list.append(latent_data)
return np.vstack(data_list)
def reduce_dimensionality(data, n_components=50):
"""
Reduce the dimensionality of the data using PCA.
This function can be re-used by TSNE or other methods for an initial reduction.
"""
pca = PCA(n_components=n_components, random_state=42)
return pca.fit_transform(data)
def plot_tsne_latent_space(normal_data, anomaly_data, title="TSNE of Latent Space"):
"""
Plot the TSNE representation of the latent space.
This function first applies a PCA-based dimensionality reduction for efficiency.
"""
# Combine normal and anomaly data
combined_data = np.vstack((normal_data, anomaly_data))
# Initial dimensionality reduction with PCA
reduced_data = reduce_dimensionality(combined_data, n_components=50)
# Apply TSNE transformation on the PCA-reduced data
tsne = TSNE(n_components=2, random_state=42)
tsne_results = tsne.fit_transform(reduced_data)
# Split the TSNE results back into normal and anomaly arrays
tsne_normal = tsne_results[: len(normal_data)]
tsne_anomaly = tsne_results[len(normal_data) :]
# Plotting TSNE results
plt.clf()
plt.figure(figsize=(10, 5))
plt.scatter(
tsne_anomaly[:, 0], tsne_anomaly[:, 1], label="Anomaly", alpha=0.6, marker="x"
)
plt.scatter(
tsne_normal[:, 0], tsne_normal[:, 1], label="Normal", alpha=0.6, marker="o"
)
plt.title(title)
plt.legend()
plt.tight_layout()
plt.savefig(output_datetime_path / "tsne_latent_space_plot.png")
def plot_pca_scatter(normal_data, anomaly_data, title="PCA Scatter Plot"):
"""
Plot a 2-dimensional scatterplot of the latent space using PCA.
This is useful for visualization and can be easily extended.
"""
# Combine normal and anomaly data
combined_data = np.vstack((normal_data, anomaly_data))
pca = PCA(n_components=2, random_state=42)
pca_results = pca.fit_transform(combined_data)
# Split the PCA results back into normal and anomaly arrays
pca_normal = pca_results[: len(normal_data)]
pca_anomaly = pca_results[len(normal_data) :]
# Plotting PCA scatter results
plt.clf()
plt.figure(figsize=(10, 5))
plt.scatter(
pca_anomaly[:, 0], pca_anomaly[:, 1], label="Anomaly", alpha=0.6, marker="x"
)
plt.scatter(
pca_normal[:, 0], pca_normal[:, 1], label="Normal", alpha=0.6, marker="o"
)
plt.title(title)
plt.legend()
plt.tight_layout()
plt.savefig(output_datetime_path / "pca_latent_space_plot.png")
# load latent space data for both normal and anomalous experiments
normal_data = load_latent_space_data(normal_experiment_paths)
anomaly_data = load_latent_space_data(anomaly_experiment_paths)
# call your plotting functions
plot_tsne_latent_space(normal_data, anomaly_data)
plot_pca_scatter(normal_data, anomaly_data)
# update the 'latest' results folder: delete previous and copy current outputs
shutil.rmtree(latest_folder_path, ignore_errors=True)
latest_folder_path.mkdir(exist_ok=True, parents=True)
for file in output_datetime_path.iterdir():
shutil.copy2(file, latest_folder_path)
# copy this script to the output folder and to the latest folder to preserve the used code
script_path = Path(__file__)
shutil.copy2(script_path, output_datetime_path)
shutil.copy2(script_path, latest_folder_path)
# move the output date folder to the archive folder for record keeping
shutil.move(output_datetime_path, archive_folder_path)