import shutil from datetime import datetime from pathlib import Path import matplotlib.pyplot as plt import numpy as np from sklearn.decomposition import PCA from sklearn.manifold import TSNE # define data path containing the npy output files from your method all_data_path = Path( "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/all_infer/inference" ) output_path = Path("/home/fedex/mt/plots/deepsad_reduced_latent_space") datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") latest_folder_path = output_path / "latest" archive_folder_path = output_path / "archive" output_datetime_path = output_path / datetime_folder_name # create required output directories if they do not exist output_path.mkdir(exist_ok=True, parents=True) output_datetime_path.mkdir(exist_ok=True, parents=True) latest_folder_path.mkdir(exist_ok=True, parents=True) archive_folder_path.mkdir(exist_ok=True, parents=True) normal_experiment_paths = [] anomaly_experiment_paths = [] # locate and sort the npy files (experiment outputs) based on file size for file_path in all_data_path.iterdir(): if file_path.suffix != ".npy": continue # check if the file name contains "output" to ensure it's an experiment output file if "output" not in file_path.stem: continue if "smoke" in file_path.name: anomaly_experiment_paths.append(file_path) else: normal_experiment_paths.append(file_path) print("Normal experiments:") for path in normal_experiment_paths: print(path.name) print("\nAnomaly experiments:") for path in anomaly_experiment_paths: print(path.name) normal_experiment_paths.sort(key=lambda path: path.stat().st_size) anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size) def load_latent_space_data(experiment_paths): """ Load latent space data from npy files and return a single numpy array. Modify this function if your file structure is different. """ data_list = [] for path in experiment_paths: latent_data = np.load(path) data_list.append(latent_data) return np.vstack(data_list) def reduce_dimensionality(data, n_components=50): """ Reduce the dimensionality of the data using PCA. This function can be re-used by TSNE or other methods for an initial reduction. """ pca = PCA(n_components=n_components, random_state=42) return pca.fit_transform(data) def plot_tsne_latent_space(normal_data, anomaly_data, title="TSNE of Latent Space"): """ Plot the TSNE representation of the latent space. This function first applies a PCA-based dimensionality reduction for efficiency. """ # Hardcoded variables to choose every nth normal sample and mth anomaly sample n = 10 # Change this value to select every nth normal sample m = 2 # Change this value to select every mth anomaly sample # Select every nth normal sample and mth anomaly sample normal_data = normal_data[::n] anomaly_data = anomaly_data[::m] # Combine normal and anomaly data combined_data = np.vstack((normal_data, anomaly_data)) # Initial dimensionality reduction with PCA reduced_data = reduce_dimensionality(combined_data, n_components=100) # Apply TSNE transformation on the PCA-reduced data tsne = TSNE(n_components=2, random_state=42) tsne_results = tsne.fit_transform(reduced_data) # Split the TSNE results back into normal and anomaly arrays tsne_normal = tsne_results[: len(normal_data)] tsne_anomaly = tsne_results[len(normal_data) :] # Plotting TSNE results plt.clf() plt.figure(figsize=(10, 5)) plt.scatter( tsne_anomaly[:, 0], tsne_anomaly[:, 1], label="Anomaly", alpha=0.6, marker="x" ) plt.scatter( tsne_normal[:, 0], tsne_normal[:, 1], label="Normal", alpha=0.6, marker="o" ) plt.title(title) plt.legend() plt.tight_layout() plt.savefig(output_datetime_path / "tsne_latent_space_plot.png") def plot_pca_scatter(normal_data, anomaly_data, title="PCA Scatter Plot"): """ Plot a 2-dimensional scatterplot of the latent space using PCA. This is useful for visualization and can be easily extended. """ # Combine normal and anomaly data combined_data = np.vstack((normal_data, anomaly_data)) pca = PCA(n_components=2, random_state=42) pca_results = pca.fit_transform(combined_data) # Split the PCA results back into normal and anomaly arrays pca_normal = pca_results[: len(normal_data)] pca_anomaly = pca_results[len(normal_data) :] # Plotting PCA scatter results plt.clf() plt.figure(figsize=(10, 5)) plt.scatter( pca_anomaly[:, 0], pca_anomaly[:, 1], label="Anomaly", alpha=0.6, marker="x" ) plt.scatter( pca_normal[:, 0], pca_normal[:, 1], label="Normal", alpha=0.6, marker="o" ) plt.title(title) plt.legend() plt.tight_layout() plt.savefig(output_datetime_path / "pca_latent_space_plot.png") # load latent space data for both normal and anomalous experiments normal_data = load_latent_space_data(normal_experiment_paths) anomaly_data = load_latent_space_data(anomaly_experiment_paths) # call your plotting functions plot_tsne_latent_space(normal_data, anomaly_data) plot_pca_scatter(normal_data, anomaly_data) # update the 'latest' results folder: delete previous and copy current outputs shutil.rmtree(latest_folder_path, ignore_errors=True) latest_folder_path.mkdir(exist_ok=True, parents=True) for file in output_datetime_path.iterdir(): shutil.copy2(file, latest_folder_path) # copy this script to the output folder and to the latest folder to preserve the used code script_path = Path(__file__) shutil.copy2(script_path, output_datetime_path) shutil.copy2(script_path, latest_folder_path) # move the output date folder to the archive folder for record keeping shutil.move(output_datetime_path, archive_folder_path)