Files
mt/tools/plot_scripts/deepsad_tsne_latent_space.py
2025-03-14 18:02:23 +01:00

161 lines
5.5 KiB
Python

import shutil
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
# define data path containing the npy output files from your method
all_data_path = Path(
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/all_infer/inference"
)
output_path = Path("/home/fedex/mt/plots/deepsad_reduced_latent_space")
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
latest_folder_path = output_path / "latest"
archive_folder_path = output_path / "archive"
output_datetime_path = output_path / datetime_folder_name
# create required output directories if they do not exist
output_path.mkdir(exist_ok=True, parents=True)
output_datetime_path.mkdir(exist_ok=True, parents=True)
latest_folder_path.mkdir(exist_ok=True, parents=True)
archive_folder_path.mkdir(exist_ok=True, parents=True)
normal_experiment_paths = []
anomaly_experiment_paths = []
# locate and sort the npy files (experiment outputs) based on file size
for file_path in all_data_path.iterdir():
if file_path.suffix != ".npy":
continue
# check if the file name contains "output" to ensure it's an experiment output file
if "output" not in file_path.stem:
continue
if "smoke" in file_path.name:
anomaly_experiment_paths.append(file_path)
else:
normal_experiment_paths.append(file_path)
print("Normal experiments:")
for path in normal_experiment_paths:
print(path.name)
print("\nAnomaly experiments:")
for path in anomaly_experiment_paths:
print(path.name)
normal_experiment_paths.sort(key=lambda path: path.stat().st_size)
anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size)
def load_latent_space_data(experiment_paths):
"""
Load latent space data from npy files and return a single numpy array.
Modify this function if your file structure is different.
"""
data_list = []
for path in experiment_paths:
latent_data = np.load(path)
data_list.append(latent_data)
return np.vstack(data_list)
def reduce_dimensionality(data, n_components=50):
"""
Reduce the dimensionality of the data using PCA.
This function can be re-used by TSNE or other methods for an initial reduction.
"""
pca = PCA(n_components=n_components, random_state=42)
return pca.fit_transform(data)
def plot_tsne_latent_space(normal_data, anomaly_data, title="TSNE of Latent Space"):
"""
Plot the TSNE representation of the latent space.
This function first applies a PCA-based dimensionality reduction for efficiency.
"""
# Combine normal and anomaly data
combined_data = np.vstack((normal_data, anomaly_data))
# Initial dimensionality reduction with PCA
reduced_data = reduce_dimensionality(combined_data, n_components=50)
# Apply TSNE transformation on the PCA-reduced data
tsne = TSNE(n_components=2, random_state=42)
tsne_results = tsne.fit_transform(reduced_data)
# Split the TSNE results back into normal and anomaly arrays
tsne_normal = tsne_results[: len(normal_data)]
tsne_anomaly = tsne_results[len(normal_data) :]
# Plotting TSNE results
plt.clf()
plt.figure(figsize=(10, 5))
plt.scatter(
tsne_anomaly[:, 0], tsne_anomaly[:, 1], label="Anomaly", alpha=0.6, marker="x"
)
plt.scatter(
tsne_normal[:, 0], tsne_normal[:, 1], label="Normal", alpha=0.6, marker="o"
)
plt.title(title)
plt.legend()
plt.tight_layout()
plt.savefig(output_datetime_path / "tsne_latent_space_plot.png")
def plot_pca_scatter(normal_data, anomaly_data, title="PCA Scatter Plot"):
"""
Plot a 2-dimensional scatterplot of the latent space using PCA.
This is useful for visualization and can be easily extended.
"""
# Combine normal and anomaly data
combined_data = np.vstack((normal_data, anomaly_data))
pca = PCA(n_components=2, random_state=42)
pca_results = pca.fit_transform(combined_data)
# Split the PCA results back into normal and anomaly arrays
pca_normal = pca_results[: len(normal_data)]
pca_anomaly = pca_results[len(normal_data) :]
# Plotting PCA scatter results
plt.clf()
plt.figure(figsize=(10, 5))
plt.scatter(
pca_anomaly[:, 0], pca_anomaly[:, 1], label="Anomaly", alpha=0.6, marker="x"
)
plt.scatter(
pca_normal[:, 0], pca_normal[:, 1], label="Normal", alpha=0.6, marker="o"
)
plt.title(title)
plt.legend()
plt.tight_layout()
plt.savefig(output_datetime_path / "pca_latent_space_plot.png")
# load latent space data for both normal and anomalous experiments
normal_data = load_latent_space_data(normal_experiment_paths)
anomaly_data = load_latent_space_data(anomaly_experiment_paths)
# call your plotting functions
plot_tsne_latent_space(normal_data, anomaly_data)
plot_pca_scatter(normal_data, anomaly_data)
# update the 'latest' results folder: delete previous and copy current outputs
shutil.rmtree(latest_folder_path, ignore_errors=True)
latest_folder_path.mkdir(exist_ok=True, parents=True)
for file in output_datetime_path.iterdir():
shutil.copy2(file, latest_folder_path)
# copy this script to the output folder and to the latest folder to preserve the used code
script_path = Path(__file__)
shutil.copy2(script_path, output_datetime_path)
shutil.copy2(script_path, latest_folder_path)
# move the output date folder to the archive folder for record keeping
shutil.move(output_datetime_path, archive_folder_path)