From b48412cf983390b9c6c89925a9255f58ec11b93e Mon Sep 17 00:00:00 2001
From: Jan Kowalczyk <jan.kowalczyk@v2c2.at>
Date: Tue, 1 Jul 2025 14:46:26 +0200
Subject: [PATCH] thesis commit

---
 thesis/Main.tex   | 78 ++++++++++++++++++++++++++++++++---------------
 thesis/flake.lock | 10 +++---
 thesis/flake.nix  |  2 +-
 3 files changed, 60 insertions(+), 30 deletions(-)

diff --git a/thesis/Main.tex b/thesis/Main.tex
index 9b76ce3..2ade45d 100755
--- a/thesis/Main.tex
+++ b/thesis/Main.tex
@@ -71,7 +71,7 @@
 
 \DeclareRobustCommand{\threadtodo}[4]{%
   \todo[inline,
-  %\todo[disable,
+  % \todo[disable,
         backgroundcolor=red!20,
         bordercolor=red!50,
         textcolor=black!80,
@@ -209,7 +209,7 @@
 {emotional rescue missions, explain why data may be bad, state core research question}
 {what has and hasn't been done $\rightarrow$ Scope of Research}
 
-Autonomous robots have gained more and more prevailance in search and rescue missions due to not endangering another human being and still being able to fulfil the difficult tasks of navigating hazardous environments like collapsed structures, identifying and locating victims and assessing the environment's safety for human rescue teams. To understand the environment, robots employ multiple sensor systems such as lidar, radar, ToF, ultrasound, optical cameras or infrared cameras of which lidar is the most prominently used due to its accuracy. The robots use the sensors' data to map their environments, navigate their surroundings and make decisions like which paths to prioritize. Many of the aforementioned algorithms are deep learning-based algorithms which are trained on large amounts of data whose characteristics are learned by the models.
+Autonomous robots have gained more and more prevailance in search and rescue (SAR) missions due to not endangering another human being and still being able to fulfil the difficult tasks of navigating hazardous environments like collapsed structures, identifying and locating victims and assessing the environment's safety for human rescue teams. To understand the environment, robots employ multiple sensor systems such as lidar, radar, ToF, ultrasound, optical cameras or infrared cameras of which lidar is the most prominently used due to its accuracy. The robots use the sensors' data to map their environments, navigate their surroundings and make decisions like which paths to prioritize. Many of the aforementioned algorithms are deep learning-based algorithms which are trained on large amounts of data whose characteristics are learned by the models.
 
 Environments of search and rescue situations provide challenging conditions for the sensor systems to produce reliable data. One of the most promiment examples are aerosol particles from smoke and dust which can obstruct the view and lead sensors to produce erroneous data. If such degraded data was not present in the robots' algorithms' training data these errors may lead to unexpected outputs and potentially endanger the robot or even human rescue targets. This is especially important for autonomous robots whose decisions are entirely based on their sensor data without any human intervention. To safeguard against these problems, robots need a way to assess the trustworthiness of their sensor systems' data.
 
@@ -702,15 +702,15 @@ Based on the previously discussed requirements and the challenges of obtaining r
 	\renewcommand{\arraystretch}{1.25}
 	\rowcolors{2}{gray!08}{white}
 	\scriptsize
-	\begin{tabular}{cp{3.5cm}p{4cm}p{5.5cm}}
-		\textbf{\#} & \textbf{Sensor}                                         & \textbf{Recorded Data}            & \textbf{Key Specs}                                         \\
-		1           & \sensorcell{Spinning 3-D LiDAR}{Ouster OS1-32}          & 3-D cloud, reflectivity           & 10 Hz, 32 ch, 360° × 42.4°, $\leq$ 120 m \rule{0pt}{2.6ex} \\
-		2           & \sensorcell{mm-wave RADAR (×4)}{TI IWR6843AoP}          & 4 × 60° RADAR point clouds        & 30 Hz, 60 GHz, 9 m max, 0.05 m res.                        \\
-		3           & \sensorcell{Solid-state LiDAR}{Velodyne Velarray M1600} & Forward LiDAR cloud               & 10 Hz, 160 ch, 120° × 32°, 0.1–30 m                        \\
-		4           & \sensorcell{RGB-D / stereo cam}{Luxonis OAK-D Pro}      & RGB image, depth map, point cloud & 15 fps, 75 mm baseline, active IR 930 nm                   \\
-		5           & \sensorcell{LED flood-light}{RS PRO WL28R}              & Illumination for stereo cam       & 7 W, 650 lm (no data stream)                               \\
-		6           & \sensorcell{IMU}{Pixhawk 2.1 Cube Orange}               & Accel, gyro, mag, baro            & 190 Hz, 9-DoF, vibration-damped                            \\
-		7           & \sensorcell{On-board PC}{Intel NUC i7}                  & Time-synced logging               & Quad-core i7, 16 GB RAM, 500 GB SSD                        \\
+	\begin{tabular}{cp{4cm}p{4.5cm}p{5.5cm}}
+		\textbf{\#} & \textbf{Sensor}                                         & \textbf{Recorded Data}       & \textbf{Key Specs}                                         \\
+		1           & \sensorcell{Spinning 3-D LiDAR}{Ouster OS1-32}          & 3-D cloud, reflectivity      & 10 Hz, 32 ch, 360° × 42.4°, $\leq$ 120 m \rule{0pt}{2.6ex} \\
+		2           & \sensorcell{mm-wave RADAR (×4)}{TI IWR6843AoP}          & 4 × 60° RADAR point clouds   & 30 Hz, 60 GHz, 9 m max, 0.05 m res.                        \\
+		3           & \sensorcell{Solid-state LiDAR}{Velodyne Velarray M1600} & Forward LiDAR cloud          & 10 Hz, 160 ch, 120° × 32°, 0.1–30 m                        \\
+		4           & \sensorcell{RGB-D / stereo cam}{Luxonis OAK-D Pro}      & stereo b/w images, depth map & 15 fps, 75 mm baseline, active IR 930 nm                   \\
+		5           & \sensorcell{LED flood-light}{RS PRO WL28R}              & Illumination for stereo cam  & 7 W, 650 lm (no data stream)                               \\
+		6           & \sensorcell{IMU}{Pixhawk 2.1 Cube Orange}               & Accel, gyro, mag, baro       & 190 Hz, 9-DoF, vibration-damped                            \\
+		7           & \sensorcell{On-board PC}{Intel NUC i7}                  & Time-synced logging          & Quad-core i7, 16 GB RAM, 500 GB SSD                        \\
 	\end{tabular}
 
 \end{table}
@@ -866,32 +866,62 @@ Figure~\ref{fig:data_projections} displays two examples of LiDAR point cloud pro
 
 %We discussed the requirements to data labels in section~\ref{sec:data}, where we mentioned the challenges but also importance of correctly labeled data, especially for evaluation. Since to our knowledege no public dataset with objective labels regarding dataset degradation of lidar data in subterranean environments is available and the dataset chosen for evaluation in this thesis \cite{subter} does not contain any explicit data or measurements about the dedata degradation, we had to choose a method of how we would label the data ourselves for evaluation. After considering multiple avenues, we decided to simply label all point clouds created during experiments with artifical smoke present as anomalies and all point clouds from other experiments as normal data. 
 
-The remaining challenge, was labeling a large enough portion of the dataset in a reasonably accurate manner, whose difficulties and general approach we described in section~\ref{sec:data_req}. Since, to our knowledge, neither our chosen dataset nor any other publicly available dataset provide objective labels for LiDAR data degradation in the SAR domain, we had to define our own labeling approach. With objective measures of degradation unavailable, we explored alternative labeling methods—such as using statistical properties like the number of missing measurements per point cloud or the higher incidence of erroneous measurements near the sensor we described in section~\ref{sec:data_req}. Ultimately, we were concerned that these statistical approaches might lead the method to simply mimic the statistical evaluation rather than to quantify degradation in a generalized and robust manner. After considering these options, we decided to label all point clouds from experiments with artificial smoke as anomalies, while point clouds from experiments without smoke were labeled as normal data. This labeling strategy—based on the presence or absence of smoke—is fundamentally an environmental indicator, independent of the intrinsic data properties recorded during the experiments.
+The remaining challenge, was labeling a large enough portion of the dataset in a reasonably accurate manner, whose difficulties and general approach we described in section~\ref{sec:data_req}. Since, to our knowledge, neither our chosen dataset nor any other publicly available dataset provide objective labels for LiDAR data degradation in the SAR domain, we had to define our own labeling approach. With objective measures of degradation unavailable, we explored alternative labeling methods—such as using the datas' statistical properties like the number of missing measurements per point cloud or the higher incidence of erroneous measurements near the sensor we described in section~\ref{sec:data_req}. Ultimately, we were concerned that these statistical approaches might lead the method to simply mimic the statistical evaluation rather than to quantify degradation in a generalized and robust manner. After considering these options, we decided to label all point clouds from experiments with artificial smoke as anomalies, while point clouds from experiments without smoke were labeled as normal data. This labeling strategy—based on the presence or absence of smoke—is fundamentally an environmental indicator, independent of the intrinsic data properties recorded during the experiments.
 
 %\todo[inline, color=green!40]{this simple labeling method is quite flawed since we do not label based on the actual degradation of the scan (not by some kind of threshold of analog measurement threshold, statistical info about scan) since (TODO FIXME) this would result in training which only learns this given metric (example missing measurement points) which would make this methodology useless since we could simply use that same measurement as an more simple way to quantify the scan's degradation. }
 
 %This simplistic approach has both Advantages and disadvantages. The approach is simple to implement and provides a clear and straightforward distinction between normal and anomalous data. As a negative, there are clearly point clouds without subjective degradation present in the experiments with added degradation, which-using this method-get labeled as anomalies even though for actual trainging and evaluation purposes they should not be labeleld as such. Since we do not have an objective measure available, we looked into other ways to label the data such as statistical data about missing measurements per point cloud or the aforementioned phenomenon of more erroneous measurements up close to the sensor in degraded environments, but we feared that any statistical property of the data or any combination of them would only result in the method learning to replicate those statistical evaluations rather than to actually quantify the degradation in a generalized way. The classification of wether smoke was present during an experiment or not is different here in that it is not dependent on the data but is rather an expression of the environment itself, during the recording of the data.
 
-This simplistic labeling approach has both advantages and disadvantages. On the positive side, it is easy to implement and creates a clear distinction between normal and anomalous data. However, its simplicity is also its drawback: some point clouds from experiments with artificial smoke do not exhibit perceptible degradation, yet they are still labeled as anomalies. The reason for this, is that during the three non-static anomalous experiments the sensor platform starts recording in a tunnel roughly 20 meters from the smoke machine location. It then approaches the smoke machine, navigates the room for some time and then leaves the room, distancing itself from the smoke machine once again. Since the artificical smoke's density is far larger near the machine it originates from, the beginning and end of the anomalous experiments capture point clouds which do not exhibit perceptible degradation.
+The simplicity of this labeling approach has both advantages and disadvantages. On the positive side, it is easy to implement and creates a clear distinction between normal and anomalous data. However, its simplicity is also its drawback: some point clouds from experiments with artificial smoke do not exhibit perceptible degradation, yet they are still labeled as anomalies. The reason for this, is that during the three non-static anomalous experiments the sensor platform starts recording in a tunnel roughly 20 meters from the smoke machine's location. It starts by approaching the smoke machine, navigates close to the machine for some time and then leaves its perimeter once again. Since the artificical smoke's density is far larger near the machine it originates from, the time the sensor platform spent close to it produced highly degraded point clouds, whereas the temporal beginnings and ends of the anomalous experiments capture point clouds which are subjectively not degraded and appear similar to ones from the normal experiments. This effect is clearly illustrated by the degradation indicators which we talked about earlier-the proportion of missing points and the amount of erroneous points close to the sensor per pointcloud-as can be seen in figure~\ref{fig:data_anomalies_timeline}.
 
-Afraid that the incorrectly labeled data may negatively impact the training of DeepSAD, we chose to manually remove the anomalous labels from the beginning and end of each anomalous experiment, resulting in \dots.
-\todo[inline]{only strongly degraded samples, how many vs earlier, all normal experiments were kept labeled since no smoke was ever present, even though general dust in subt environment may be present in a few samples}
-\todo[inline]{this allowed us to safely use the manually labeld anomalies for semi-supervised training, but we found that for evaluation the manually labeled samples were so degraded that multiple methods were able to create a perfect classifier - making evaluation on this not really meaningful. so we also kept experiment based labeling, with the knowledge that there may be a cap of how well any method may perform on this data, since it contains x percent more samples for which it is unclear if they should be anomalies - some definitely should not }
+\fig{data_anomalies_timeline}{figures/data_combined_anomalies_timeline.png}{Missing points and points with a measured range smaller than 50cm per point cloud over a normalized timeline of the individual experiments. This illustrates the rise, plateau and fall of degradation intensity during the anomalous experiments, owed to the spacial proximity to the degradation source (smoke machine). One of the normal experiments (without artifical smoke) is included as a baseline.}
+
+Afraid that the incorrectly labeled data may negatively impact DeepSAD's semi-supervised training, we chose to manually remove the anomalous labels from the beginning and end of the anomalous experiments, for training purposes. This refinement gave us more confidence in the training signal but reduced the number of labeled anomalies. For evaluation, we therefore report results under both schemes:
+
+\begin{enumerate}
+	\item \textbf{Experiment-based labels:} All scans from anomalous experiments marked anomalous, including border cases—yielding conservative performance metrics that reflect real-world label noise.
+	\item \textbf{Manually-refined labels:} Only unequivocally degraded scans marked anomalous—producing near-ideal separation in a lot of cases.
+\end{enumerate}
+
+By evaluating and comparing both approaches, we hope to demonstrate a more thorough performance investigatation than with only one of the two.
+
+%. This resulted in less labeled anomalies which are subjectively easy to assign a binary label to, due to the high degree of degradation present in them. Figure~\ref{fig:data_anomalies_timeline} also alludes to an issue, we faced when using the experiment-based labeling approach during evaluation, namely that the normal experiment which we included as a baseline, shows indication of similar degrees of degradation to the beginning and end states of the anomalous experiments. From this we figured, that while still subjective, an unknown amount of samples will most likely be incorrectly labeled, resulting in a lowered ceiling for performance metrics when using them for evaluation. For this reason we decided to utilize both, the experiment-based and the manually selected labels for evaluation, to achieve a more complete understanding of the compared methods' performance. While we can be quite certain that the manually labeled samples were correctly labeled, the strong level of data degradation present in them led to perfect classifiers in our training, which alone is not very useful when gauging the methods performance. On the other hand, the experiment-based labels resulted in lower performance metrics than initially expected, but which we can explain due to the incorrectly labeled samples, we highlighted in this section.
 
 %\todo[inline]{TODO maybe evaluate based on different thresholds? missing datapoints, number of detected outliers, number of particles in phantom circle around sensor?}
 %\todo[inline]{maybe also mention that we considered labeling using output of down-the-pipeline algorithm (e.g., SLAM) and how it performs/how confident it is and retrospectively label the quality of the data based on that}
 
 \newchapter{experimental_setup}{Experimental Setup}
-\newsection{autoencoder_architecture}{Deep SAD Autoencoder Architecture}
-\newsection{data_setup}{Training/Evaluation Data Distribution}
-\todo[inline]{which data was used how in training/evaluation}
-\todo[inline]{explain concept of global/local application for global-/window quantifiction}
 
-\newsection{evaluation_metrics}{Evaluation Metrics}
-\todo[inline]{k-fold evaluation, ROC, generalization (evaluation on other datasets?)}
+\threadtodo
+{introduce experimental setup, give overview of what will be covered}
+{motivation, bg, method and data is know and understood, how was it used}
+{codebase, hardware description overview of training setup, details of deepsad setup}
+{overview of chapter given $\rightarrow$ give sequential setup overview}
 
-\newsection{hyperparameters}{Hyperparameters}
-\todo[inline]{vary hyperparameters (no labeled anomalies vs some), specific training on local windows (only z-axis difference?), window size?}
+\todo[inline]{codebase}
+
+\newsection{setup_overview}{General Description}
+  \todo[inline]{starting from deepsad codebase}
+  \todo[inline]{data preprocessed (2d projections, normalized range)}
+  \todo[inline]{k-fold data loading, training, testing}
+  \todo[inline]{deepsad + baselines = isoforest, ocsvm (deepsad ae, dim reduction)}
+  \todo[inline]{roc, prc, inference}
+
+
+\newsection{setup_deepsad}{DeepSAD Description}
+  \todo[inline]{architectures, visualization, receptive field (explanation, images, x/y resolution)}
+  \todo[inline]{hyperparameters, LR, eta, epochs, latent space size (hyper param search), semi labels}
+
+% \newsection{autoencoder_architecture}{Deep SAD Autoencoder Architecture}
+% \newsection{data_setup}{Training/Evaluation Data Distribution}
+% \todo[inline]{which data was used how in training/evaluation}
+% \todo[inline]{explain concept of global/local application for global-/window quantifiction}
+%
+% \newsection{evaluation_metrics}{Evaluation Metrics}
+% \todo[inline]{k-fold evaluation, ROC, generalization (evaluation on other datasets?)}
+%
+% \newsection{hyperparameters}{Hyperparameters}
+% \todo[inline]{vary hyperparameters (no labeled anomalies vs some), specific training on local windows (only z-axis difference?), window size?}
 
 \newchapter{results_discussion}{Results and Discussion}
 \newsection{results}{Results}
diff --git a/thesis/flake.lock b/thesis/flake.lock
index c81087e..0581679 100644
--- a/thesis/flake.lock
+++ b/thesis/flake.lock
@@ -5,11 +5,11 @@
         "systems": "systems"
       },
       "locked": {
-        "lastModified": 1710146030,
-        "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
+        "lastModified": 1731533236,
+        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
         "owner": "numtide",
         "repo": "flake-utils",
-        "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
+        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
         "type": "github"
       },
       "original": {
@@ -21,8 +21,8 @@
     "nixpkgs": {
       "locked": {
         "lastModified": 0,
-        "narHash": "sha256-IAoYyYnED7P8zrBFMnmp7ydaJfwTnwcnqxUElC1I26Y=",
-        "path": "/nix/store/4cpakzyvfw1rmm9v5i3387x6jd2h1v86-source",
+        "narHash": "sha256-GfpyMzxwkfgRVN0cTGQSkTC0OHhEkv3Jf6Tcjm//qZ0=",
+        "path": "/nix/store/kcmmd6alr3lx56vkf72503h3pxgf6iv4-source",
         "type": "path"
       },
       "original": {
diff --git a/thesis/flake.nix b/thesis/flake.nix
index 3558bc9..e20016e 100644
--- a/thesis/flake.nix
+++ b/thesis/flake.nix
@@ -18,7 +18,7 @@
         latex-packages = with pkgs; [
           texlive.combined.scheme-full
           which
-          python39Packages.pygments
+          python310Packages.pygments
         ];
 
         dev-packages = with pkgs; [