% **************************************************************************************************
% ** SPSC Report and Thesis Template
% **************************************************************************************************
%
% ***** Authors *****
% Daniel Arnitz, Paul Meissner, Stefan Petrik, Dietmar Malli, Johanna Rock
% Signal Processing and Speech Communication Laboratory (SPSC)
% Graz University of Technology (TU Graz), Austria
%
% ***** Changelog *****
% 0.1   2010-01-25   extracted from report template by Daniel Arnitz (not ready yet)
% 0.2   2010-02-08   added thesis titlepage and modified layout (not ready yet)
% 0.3   2010-02-18   added TUG logo and statutory declaration
% 0.4   2010-02-18   moved the information fields below \input{./base/packages} (encoding...)
% 0.5   2010-03-02   added \ShortTitle to fix problems with long thesis titles
%                    added \ThesisType (makes the template suitable for MSc, BSc, PhD, ... Thesis)
% 0.6   2010-06-05   added pagestyle and pagenumbering after frontmatter, packages has now type
% 0.7   2010-09      \Advisors -> \Assessors, inserted frontmatter for thesis
% 0.8   2010-11      added examples
% 0.9   2011-04      \Twosided now {true,false}, scrbook for thesis (\front-, \main-, \backmatter)
%                    added \SpecialNote for titlepage (funding, etc.), added type "homework"
% 0.10  2011-10-18   fixed two typos in \bibliographystyle{} (bug reported by Michael Tauch)
% 0.11  2011-11-09   fixed/modified preamble (bug reported by Michael Tauch)
% 0.12  2012-07-20   added ./base/opt_macros to deal with optional macros
% 0.13  2012-07-27   added \PaperSize
% 0.14  2017-11-03   Fixed thispagestyle issue
%                    Implemented automatic setting of correct page number after switching from
%                    roman numbering back to normal numbering
%                    Implemented \DraftText hack
%                    Moved makeindex from external programm to newer stuff (package...)
%                    Made confidential dependent from \DraftText
%                    Made OptDraftMode and DisplayContentBoxes dependet from \DraftText
%                    Included some syntax formatting definitions
%                    Fixed wrong usage of scrbook class and \emptydoublepage mess... One should
%                    NOT need to adjust/tweak the layout by hand. That's what latex is for...
%                    Replaced bibtex with more modern biblatex (utf8 support in bibliography...)
%                    Added \printbibliography, \listoffigures, \listoftables and 
%                    \printglossary[type=\acronymtype]
%                    Renewed and extended Introduction/Usage
% 0.15  2018-03-20   Homework and report now compileable again. Fixed a missing if.
% 0.16	2018-08-08	 fixed/modified title according to official TUG template
% 0.17	2018-08-09	 updated placeholder commands for new title page
%
% ***** Todo *****
%
% **************************************************************************************************
% basic setup
\newcommand{\DocumentType}{thesis} % "thesis" / "report" / "homework"
\newcommand{\DocumentLanguage}{en} % "en" / "de"
\newcommand{\PaperSize}{a4paper} % "a4paper" / "letterpaper"
\newcommand{\Twosided}{true} % "true" / "false" (=Duplex...)
\newcommand{\FramedLinks}{false} %"true" / "false"

% **************************************************************************************************
% template setup -- do not change these unless you know what you are doing!
\input{./base/documentclass_\DocumentType}
\input{./base/packages}
\input{./base/layout_\DocumentType}
\input{./base/macros}

% **************************************************************************************************

% uncomment to get watermarks:
% \usepackage[first,bottom,light,draft]{draftcopy}
% \draftcopyName{ENTWURF}{160}
 
\usepackage{xcolor}
\usepackage[colorinlistoftodos]{todonotes}

\DeclareRobustCommand{\threadtodo}[4]{%
  \todo[inline,
        backgroundcolor=red!20,
        bordercolor=red!50,
        textcolor=black!80,
        size=\small,
        caption={Common Thread Note}]{%
        \textbf{Goal:} #1 \newline
        \textbf{Context:} #2 \newline
        \textbf{Method:} #3 \newline
        \textbf{Transition:} #4
  }%
}

% correct bad hyphenation
\hyphenation{}

% switches
\newboolean{OptDraftMode}
\newboolean{DisplayContentBoxes}
% \setboolean{OptDraftMode}{true} % optional draft mode for pixel graphics (speed up generation; add \OptDraft to options)

\ifthenelse{\boolean{OptDraftMode}}
{
	\setboolean{DisplayContentBoxes}{true}
}
{
	\setboolean{DisplayContentBoxes}{false}
}


% **************************************************************************************************
% information fields

% general
\newcommand{\DocumentTitle}{Lidar Degradation Quantification for Robot Navigation in Hazy Environments}
%\newcommand{\DocumentSubtitle}{}
\newcommand{\ShortTitle}{} % used in headers (keep short!)
% for thesis: Firstname Surename, current university degree (e.g. BSc)
% for report, homework: Firstname Surename, Mat.Nr.
\newcommand{\DocumentAuthor}{Jan Kowalczyk}
\newcommand{\DocumentPlace}{Graz}

% FOR THESIS ONLY
% used for the title page and statutory declaration
% one out of: "bachelor's thesis" / "Bachelorarbeit" /
%			  "master's thesis" / "Masterarbeit" /
%			  "diploma thesis" / "Diplomarbeit" /
%			  "doctoral thesis" / "Dissertation"
% ATTENTION: use correct language! Otherwise statutory declaration is faulty.
\newcommand{\ThesisTitle}{master's thesis}
\newcommand{\Institute}{Signal Processing and Speech Communication Laboratory}
\newcommand{\OrganizationsAdditional}{in cooperation with \\[0.2cm] \par Virtual Vehicle Research GmbH \\ Graz, Austria \\[2.0cm] \par} 
\newcommand{\Supervisors}{Univ.-Prof. Dipl.-Ing. Dr.mont Franz Pernkopf} % Supervisor 1 \\ Supervisor 2 ...
\newcommand{\SpecialNote}{}

% FOR REPORT ONLY
%revision numbers
\newcommand{\RevPrefix}{alpha~}
\newcommand{\RevLarge}{1}
\newcommand{\RevSmall}{0}

% confidential? (can of course also be used for other messages/notes)
\newcommand{\ConfidNote}{\ifthenelse{\boolean{OptDraftMode}}{
		\textbf{DRAFT}, \today,
]}{
		%\textbf{CONFIDENTIAL}
}}

\DeclareMathAlphabet\mathbfcal{OMS}{cmsy}{b}{n}
\newcommand*\wc{{\mkern 2mu\cdot\mkern 2mu}}

\input{./base/opt_macros}

% variable for page numbering
\newcounter{mypageno}
% **************************************************************************************************
\begin{document}
% **************************************************************************************************
\input{./base/syntax_formatting}

% for thesis: switch to frontmatter (Roman numbering, etc.)
\ifthenelse{\equal{\DocumentType}{thesis}}
{
	\frontmatter \pagestyle{plain} \pagenumbering{Roman}
}{}

%title
\input{./base/titlepage_\DocumentType}

% for thesis: abstract, kurzfassung, affidavit and statutory declaration
\ifthenelse{\equal{\DocumentType}{thesis}}
{
	\emptydoublepage
	\addcontentsline{toc}{chapter}{Statutory Declaration}
	\input{./base/declaration_\DocumentLanguage}
	\emptydoublepage
	\input{thesis_preamble/acknowledgements}
	\emptydoublepage
	\input{thesis_preamble/abstract}
	\emptydoublepage
	\input{thesis_preamble/kurzfassung}
	\emptydoublepage
}{}

\tableofcontents

\ifthenelse{\equal{\DocumentType}{thesis}}
{
	\emptydoublepage
	\setcounter{mypageno}{\value{page}}
	\mainmatter \pagestyle{scrheadings} \pagenumbering{arabic}
	\setcounter{page}{\value{mypageno}}
}


% **************************************************************************************************
% mainmatter (=content)

\newchapter{introduction}{Introduction}
%\todo[inline, color=green!40]{its a master thesis where we try to know how trustworthy the sensor data for robot navigation is}
%\newsection{Motivation and Problem Statement}{motivation}
%\todo[inline]{lidar and its role in robot navigation. discuss sensor degradation and its effects on navigation.}
\threadtodo
{\textit{"What should the reader know after reading this section?"}}
{\textit{"Why is that of interest to the reader at this point?"}}
{\textit{"How am I achieving the stated goal?"}}
{\textit{"How does this lead to the next question or section?"}}

\threadtodo
{Create interest in topic, introduce main goal of thesis, summarize results}
{Reader only has abstract as context, need to create interest at beginning}
{emotional rescue missions, explain why data may be bad, state core research question}
{what has and hasn't been done $\rightarrow$ Scope of Research}

Autonomous robots have gained more and more prevailance in search and rescue missions due to not endangering another human being and still being able to fulfil the difficult tasks of navigating hazardous environments like collapsed structures, identifying and locating victims and assessing the environment's safety for human rescue teams. To understand the environment, robots employ multiple sensor systems such as lidar, radar, ToF, ultrasound, optical cameras or infrared cameras of which lidar is the most prominently used due to its accuracy. The robots use the sensors' data to map their environments, navigate their surroundings and make decisions like which paths to prioritize. Many of the aforementioned algorithms are deep learning-based algorithms which are trained on large amounts of data whose characteristics are learned by the models.

Environments of search and rescue situations provide challenging conditions for the sensor systems to produce reliable data. One of the most promiment examples are aerosol particles from smoke and dust which can obstruct the view and lead sensors to produce erroneous data. If such degraded data was not present in the robots' algorithms' training data these errors may lead to unexpected outputs and potentially endanger the robot or even human rescue targets. This is especially important for autonomous robots whose decisions are entirely based on their sensor data without any human intervention. To safeguard against these problems, robots need a way to assess the trustworthiness of their sensor systems' data.


For remote controlled robots a human operator can make these decisions but many search and rescue missions do not allow remote control due to environment factors, such as radio signal attenuation or the search area's size and therefore demand autonomous robots. Therefore, during the design for such robots we arrive at the following critical question:

\begin{quote} Can autonomous robots quantify the reliability of lidar sensor data in hazardous environments to make more informed decisions? \end{quote}

In this thesis we aim to answer this question by assessing a deep learning-based anomaly detection method and its performance when quantifying the sensor data's degradation. The employed algorithm is a semi-supervised anomaly detection algorithm which uses manually labeled training data to improve its performance over unsupervised methods. We show how much the introduction of these labeled samples improves the methods performance. The models output is an anomaly score which quantifies the data reliability and can be used by algorithms that rely on the sensor data. These reliant algorithms may decide to for example slow down the robot to collect more data, choose alternative routes, signal for help or rely more heavily on other sensor's input data.

\todo[inline]{discuss results (we showed X)}

%\todo[inline, color=green!40]{autonomous robots have many sensors for understanding the world around them, especially visual sensors (lidar, radar, ToF, ultrasound, optical cameras, infrared cameras), they use that data for navigation mapping, SLAM algorithms, and decision making. these are often deep learning algorithms, oftentimes only trained on good data}
%\todo[inline, color=green!40]{difficult environments for sensors to produce good data quality (earthquakes, rescue robots), produced data may be unreliable, we don't know how trustworthy that data is (no quantification, confidence), since all navigation and decision making is based on input data, this makes the whole pipeline untrustworthy/problematic}
%\todo[inline, color=green!40]{contribution/idea of this thesis is to calculate a confidence score which describes how trustworthy input data is. algorithms further down the pipeline (slam, navigation, decision) can use this to make more informed decisions - examples: collect more data by reducing speed, find alternative routes, signal for help, do not attempt navigation, more heavily weight input from other sensors}

\newsection{scope_research}{Scope of Research}

\threadtodo
{clearly state what has and hasn't been researched + explanation why}
{from intro its clear what thesis wants to achieve, now we explain how we do that}
{state limit on data domain, sensors, output of method + reasoning for decisions}
{clear what we want to achieve $\rightarrow$ how is thesis structured to show this work}
%\todo[inline]{output is score, thresholding (yes/no), maybe confidence in sensor/data? NOT how this score is used in navigation/other decisions further down the line}
%\todo[inline]{Sensor degradation due to dust/smoke not rain/fog/...}
%\todo[inline, color=green!40]{we look at domain of rescue robots which save buried people after earthquakes, or in dangerous conditions (after fires, collapsed buildings) which means we are mostly working with indoors or subterranean environments which oftentimes are polluted by smoke and a lot of dust, ideally works for any kind of sensor data degradation but we only explore this domain}

%In this thesis we limit the domain of our research to that of autonomous rescue robots and their unique challenges. The degradation of sensor data in this domain appears to mainly stem from airborne particles, which we evaluate. Other kinds of degradation like from adverse weather effects, specific material properties, irrationally moving structures like leaves from trees and others are out of scope of our research due to the low likelihood of them occuring in the rescue scenarios of autonomous robots. While our approach does not specifically exclude these types of degradation and actually a case can be built for the employed method allowing for quantifying any of these and more kinds of degradation, we do not explicitely look at them or evaluate any of them other than airborne particles.

In this thesis, we focus our research on the unique challenges faced by autonomous rescue robots, specifically the degradation of sensor data caused by airborne particles. While degradation in sensor data can also arise from adverse weather, material properties, or dynamic elements such as moving leaves, these factors are considered less relevant to the rescue scenarios targeted by our study and are therefore excluded. Although our method is versatile enough to quantify various types of degradation, our evaluation is limited to degradation from airborne particles, as this is the most prevalent issue in the operational environments of autonomous rescue robots.

%\todo[inline, color=green!40]{mostly use lidar (state of the art) since they are very accurate in 3d mapping environments, so we focus on quantifying how trustworthy the lidar data is by itself. we do not look at other sensor data (tof, ultrasound, optical)}

%While computer vision systems of robots oftentimes include a multitude of sensor types like time of flight cameras, IR cameras, ultrasound sensors and others, we found that for autonomous robots in rescue missions mapping and navigation challenges are so hard that they require and mostly rely on lidar sensor data which is very accurate, high resolution and allow for mapping the whole surroundings due to their high field of view which oftentimes contains the whole 360° horizontal fov and a quite large vertical fov as well. additionally the cost of these lidar sensors has plummeted over the last decades due to their use in autonomous driving, drones and robotics as well as advancements in manufacturing like the utilisation of microeletromechanical systems which makes their proliferation for these types of vision problems near universal. for these reasons we limit our research to data produced by lidar sensors, namely the pointclouds of measurements in a coordinate system they produce. oftentimes sensor data is fused to achieve better accuracy and higher confidence in data but firstly examining these sensor fusion data would most likely increase the initial complexity of the research too much and secondly it would limit our research to platforms which utilise all the sensors we included instead of being able to quantify the sensor degradation by the lidar data itself.

While robotic computer vision systems often incorporate a variety of sensors—such as time-of-flight cameras, infrared cameras, and ultrasound sensors—we found that autonomous rescue robots primarily depend on LiDAR data for mapping and navigation. LiDAR sensors offer high accuracy, high resolution, and an extensive field of view (often a full 360° horizontally and a substantial vertical coverage), which are essential for constructing comprehensive environmental maps in challenging scenarios. Furthermore, the cost of LiDAR sensors has decreased significantly in recent decades, driven by their widespread adoption in autonomous driving, drones, and robotics, as well as manufacturing advancements like microelectromechanical systems (MEMS). For these reasons, our research is focused exclusively on LiDAR sensor data—specifically, the point clouds generated within a defined coordinate system. Although sensor fusion techniques are commonly used to enhance data accuracy and confidence, incorporating fused data would not only add significant complexity to our study but also limit our analysis to platforms equipped with all the sensor types involved. Consequently, we concentrate on quantifying sensor degradation solely through LiDAR data.

%\todo[inline, color=green!40]{intended output is confidence score which simply means higher score = worse data quality, lower score = trustworthy data. this score can be interpreted by algorithms in pipeline. we do not look at how this is implemented in the algorithms, no binary classifier but analog value, if this is wished followup algorithm has to decide (example by threshold or other methods)}

%The output of the method utilized by us and in our experiments is an analog score which relates to the confidence in the data and inversely to the data degradation. We do not look at how such a score might be utilized but can see many applications like simple thresholding and depending on the outcome deciding not to proceed in a certain direction or a direct usage by for example tying the robots speed to its confidence in the data, if necessary slowing down and collecting more data before progressing. The output score is independent of the time dimension and just a snapshot of each lidar scans degradation. In reality many lidars produce multiple scans per second, which would allow for including the time series of data and the scores they produce into an analysis as well such as for example a running average of the score or more complex statistical analysis. We do not investigate the differences between lidar scans which were taken with a small time delta, only at single snapshots of data in time.

The method we employ produces an analog score that reflects the confidence in the sensor data, with lower confidence indicating higher degradation. Although we do not investigate the direct applications of this score, potential uses include simple thresholding to decide whether to proceed with a given action as well as dynamically adjusting the robot's speed based on data quality to collect additional data when confidence is low. Importantly, this output score is a snapshot for each LiDAR scan and does not incorporate temporal information. While many LiDAR sensors capture multiple scans per second—enabling the possibility of time-series analyses such as running averages or more advanced statistical evaluations—we focus solely on individual scans without examining the differences between successive scans.

\newsection{thesis_structure}{Structure of the Thesis}

\threadtodo
{explain how structure will guide reader from zero knowledge to answer of research question}
{since reader knows what we want to show, an outlook over content is a nice transition}
{state structure of thesis and explain why specific background is necessary for next section}
{reader knows what to expect $\rightarrow$ necessary background info and related work}

\todo[inline]{brief overview of thesis structure}
\todo[inline, color=green!40]{in section x we discuss anomaly detection, semi-supervised learning since such an algorithm was used as the chosen method,  we also discuss how lidar works and the data it produces. then in we discuss in detail the chosen method Deep SAD in section X, in section 4 we discuss the traing and evaluation data, in sec 5 we describe our setup for training and evaluation (whole pipeline). results are presented and discussed in section 6. section 7 contains a conclusion and discusses future work}

\newchapter{background}{Background and Related Work}

%\todo[inline, color=green!40]{in this section we will discuss necessary background knowledge for our chosen method and the sensor data we work with. related work exists mostly from autonomous driving which does not include subter data and mostly looks at precipitation as source of degradation, we modeled after one such paper and try to adapt the same method for the domain of rescue robots, this method is a semi-supervised deep learning approach to anomaly detection which we describe in more detail in sections 2.1 and 2.2. in the last subsection 2.3 we discuss lidar sensors and the data they produce}

%As the domain of robotics and embedded systems often does, this thesis constitutes quite  broad interdisciplinary challenge of various fields of study. As we will see in this chapter, anomaly detection-the methodology we posed our degradation quantification problem as-has roots in statistical analysis and finds utility in many domains. As is the case for many fields of study, there has been success in incorporating learning based techniques-especially deep learning-into it to better or more efficiently solve problems anchored in interpretation of large data amounts. The very nature of anomalies often times makes their form and structure unpredictable, which lends itself to unsupervised learning techniques-ones where the training data is not assigned labels beforehand, since you cannot label what you cannot expect. These unsupervised techniques can oftentimes be improved by utilizing a small but impactful number of labeled training data, which results in semi-supervised methods. The method we evaluate for our task-Deep SAD-is a not only a semi-supervised deep learning approach but also employs an autoencoder in its architecture, a type of neural network architecture which has found widespread use in many deep learning applications over the last few years due to its feature extraction capability which solely relies on unlabeleld data. Its approach typically lends itself especially well to complex data for which feature extraction of conventional manual methods is hard to achieve, like the lidar data we are working with in this thesis. Lidar sensors measure the range from the sensor to the next reflective object for many angles simultaneously by projecting a laser in a specified direction and measuring the time it takes a reflected ray to return to the sensor. From the output angles of the rays and the measured travel time the sensor can construct a point cloud which is oftentimes dense enough to map out the sensors surroundings. In this chapter we will discuss these necessary technologies, give an overview of their history, use-cases and describe how it will be utilized in this thesis.

\threadtodo
{explain which background knowledge is necessary and why + mention related work}
{reader learns what he needs to know and which related work exists}
{state which background subsections will follow + why and mention related work}
{necessity of knowledge and order of subsections are explained $\rightarrow$ essential background}

This thesis tackles a broad, interdisciplinary challenge at the intersection of robotics, embedded systems, and data science. In this chapter, we introduce the background of anomaly detection—the framework we use to formulate our degradation quantification problem. Anomaly detection has its roots in statistical analysis and has been successfully applied in various domains. Recently, the incorporation of learning-based techniques, particularly deep learning, has enabled more efficient and effective analysis of large datasets.

Because anomalies are, by nature, unpredictable in form and structure, unsupervised learning methods are often preferred since they do not require pre-assigned labels—a significant advantage when dealing with unforeseen data patterns. However, these methods can be further refined through the integration of a small amount of labeled data, giving rise to semi-supervised approaches. The method evaluated in this thesis, DeepSAD, is a semi-supervised deep learning approach that also leverages an autoencoder architecture. Autoencoders have gained widespread adoption in deep learning for their ability to extract features from unlabeled data, which is particularly useful for handling complex data types such as LiDAR scans.

LiDAR sensors function by projecting lasers in multiple directions simultaneously, measuring the time it takes for each reflected ray to return. Using the angles and travel times, the sensor constructs a point cloud that is often dense enough to accurately map its surroundings. In the following sections, we will delve into these technologies, review their historical development and use cases, and describe how they are employed in this thesis.

\todo[inline, color=green!40]{mention related work + transition to anomaly detection}


\newsection{anomaly_detection}{Anomaly Detection}

\threadtodo
{explain AD in general, allude to DeepSAD which was core method here}
{problem is formulated as AD problem, so reader needs to understand AD}
{give overview of AD goals, categories and challenges. explain we use DeepSAD}
{lots of data but few anomalies + hard to label $\rightarrow$ semi-supervised learning}

Anomaly detection refers to the process of detecting unexpected patterns of data, outliers which deviate significantly from the majority of data which is implicitly defined as normal by its prevalence. In classic statistical analysis these techniques have been studied as early as the 19th century~\cite{anomaly_detection_history}. Since then, a multitude of methods and use-cases for them have been proposed and studied. Examples of applications include healthcare, where computer vision algorithms are used to detect anomalies in medical images for diagnostics and early detection of diseases~\cite{anomaly_detection_medical}, detection of fraud in decentralized financial systems based on block-chain technology~\cite{anomaly_detection_defi} as well as fault detection in industrial machinery using acoustic sound data~\cite{anomaly_detection_manufacturing}.

Figure~\ref{fig:anomaly_detection_overview} depicts a simple but illustrative example of data which can be classified as either normal or anomalous and shows the problem anomaly detection methods try to generally solve. A successful anomaly detection method would somehow learn to differentiate normal from anomalous data, for example by learning the boundaries around the available normal data and classifying it as either normal or anomalous based on its location inside or outside of those boundaries. Another possible approach could calculate an analog value which correlates with the likelihood of an sample being anomalous, for example by using the sample's distance from the closest normal data cluster's center.


\begin{figure}
	\begin{center}
		\includegraphics[width=0.5\textwidth]{figures/anomaly_detection_overview}
	\end{center}
	\caption{An illustrative example of anomalous and normal data containing 2-dimensional data with clusters of normal data $N_1$ and $N_2$ as well as two single anomalies $o_1$ and $o_2$ and a cluster of anomalies $O_3$. Reproduced from~\cite{anomaly_detection_survey}}\label{fig:anomaly_detection_overview}
\end{figure}

By their very nature anomalies are rare occurences and oftentimes unpredictable in nature, which makes it hard to define all possible anomalies in any system. It also makes it very challenging to create an algorithm which is capable of detecting anomalies which may have never occured before and may not have been known to exist during the creation of the detection algorithm. There are multiple possible approaches taken by anomaly detection algorithms to achieve this feat.

\citeauthor{anomaly_detection_survey} categorize anomaly detection algorithms in~\cite{anomaly_detection_survey} into six distinct categories based on the techniques used:

\begin{enumerate}
	\item \textbf{Classification Based} - Using classification techniques such as SVMs, neural networks to classify samples as either normal or anomalous based on labeled training data. Alternatively, if not enough labeled training data is available a one-class classification algorithm can be used which assumes all training samples to be normal and then learns a boundary around the normal samples to differentiate them from anomalous samples which lie outside the learnt boundary.
	\item \textbf{Clustering Based} - Using clustering techniques such as K-Means clustering, DBSCAN to cluster normal data together with the assumption that anomalies do not belong to the cluster, are an appreciable distance from the clusters center or belong to smaller different clusters than the normal data.
	\item \textbf{Nearest Neighbor Based} - Similar to clustering based, these techniques assume normal data is more closely clustered than anomalies and therefore judge samples based on either the distance to their $k^{th}$ nearest neighbor or on the density of their local neighborhood.
	\item \textbf{Statistical} - Using statistical techniques to fit a statistical model of the normal behaviour to the data and determining if samples are anomalous based on their likelihood of fitting into the statistical model.
	\item \textbf{Information Theoretic} - Using information theoretic measures to determine iregularities in the data's information content which are assumed to be caused by anomalies.
	\item \textbf{Spectral} - Using dimensionality reduction techniques like PCA to embed the data into a lower dimensional subspace where normal data appears significantly different from anomalous data. Spectral techniques may also be used as a pre-processing step followed by any other anomaly detection algorithm in the lower dimensional subspace.
\end{enumerate}


In this thesis we used an anomaly detection method, namely Deep Semi-Supervised Anomaly Detection~\cite{deepsad} to model our problem -how to quantify the degradation of lidar sensor data- as an anomaly detection problem. We do this by classifying good quality data as normal and degraded data as anomalous and rely on a method which can express each samples likelihood of being anomalous as an analog anomaly score, which enables us to interpret it as the datas degradation quantification value.

Chapter~\ref{chp:deepsad} describes DeepSAD in more detail, which shows that it is a clustering based approach with a spectral pre-processing component, in that it first uses a neural network to reduce the inputs dimensionality while simultaneously clustering normal data closely around a given centroid and then gives an anomaly score by calculating the geometric distance between a single data sample and the aforementioned cluster centroid in the lower dimensional subspace. Since our data is high dimensional it makes sense to use a spectral method to reduce the datas dimensionality and an approach which results in an analog value rather than a binary classification is useful for our use-case since we want to quantify not only classify the data degradation.

%\todo[inline, color=green!40]{data availability leading into semi-supervised learning algorithms}

As already shortly mentioned at the beginning of this section, anomaly detection methods and their usage are oftentimes challenged by the limited availability of anomalous data, owing to the very nature of anomalies which are rare occurences. Oftentimes the intended use-case is to even find unknown anomalies in a given dataset which have not yet been identified. In addition, it can be challenging to classify anomalies correctly for complex data, since the very definition of an anomaly is dependent on many factors such as the type of data, the intended use-case or even how the data evolves over time. For these reasons most types of anomaly detection approaches limit their reliance on anomalous data during training and many of them do not differentiate between normal and anomalous data at all. DeepSAD is a semi-supervised method which is characterized by using a mixture of labeled and unlabeled data.


% strategies of anomaly detection algorithnms according to x include classification, neirest neighbor, clustering, spectral, information theoretic, statistical

%\todo[inline, color=green!40]{cite exists since X and has been used to find anomalous data in many domains and works with all kinds of data types/structures (visual, audio, numbers). examples healthcare (computer vision diagnostics, early detection), financial anomalies (credit card fraud, maybe other example), security/safety video cameras (public, traffic, factories).}
%\todo[inline, color=green!40]{the goal of these algorithms is to differentiate between normal and anomalous data by finding statistically relevant information which separates the two, since these methods learn how normal data typically is distributed they do not have to have prior knowledge of the types of all anomalies, therefore can potentially detect unseen, unclassified anomalies as well. main challenges when implementing are that its difficult to cleanly separate normal from anormal data}
%\todo[inline, color=green!40]{typically no or very little labeled data is available and oftentimes the kinds of possible anomalies are unknown and therefore its not possible to label all of them. due to these circumstances anomaly detection methods oftentimes do not rely on labeled data but on the fact that normal circumstances make up the majority of training data (quasi per defintion)}
%\todo[inline, color=green!40]{figure example shows 2d data but anomaly detection methods work with any kind of dimensionality/shape. shows two clusters of normal data with clear boundaries and outside examples of outliers (anomalous data two single points and one cluster), anomaly detection methods learn to draw these boundaries from the training data given to them which can then be used to judge if unseen data is normal or anormal}
%\todo[inline, color=green!40]{as discussed in motivation, and same as in reference paper (rain autonomous driving) we model our problem as an anomaly detection problem where we define that good quality sensor data is normal data and degraded sensor data (in our case due to dust/smoke) is defined as an anomaly. this allows us to quantify the degradation of data by using the anomaly detection method to check how likely new data is an anomaly}
\iffalse
	Anomaly detection algorithms are designed to detect or quantify the likelihood of a pattern in data deviating significantly from a well-defined expected norm. Deviations such as these are classified as anomalies or outliers and often signify critical or actionable information.
	\begin{figure}
		\begin{center}
			\includegraphics[width=0.5\textwidth]{figures/anomaly_detection_overview}
		\end{center}
		\caption{An example of a 2-dimensional data set with anomalies. Reproduced from~\cite{Chandola2009AnomalyDA}}\label{fig:anomaly_detection_overview}
	\end{figure}

	\todo[inline]{Figure example normal data boundaries, single outliers o1, o2, cluster of outliers o3. difficult to define boundaries so that all normal data inside and anomalies outside }
\fi

\newsection{semi_supervised}{Semi-Supervised Learning Algorithms}

\threadtodo
{Give machine learning overview, focus on semi-supervised (what \& why)}
{used method is semi-supervised ML algorithm, reader needs to understand}
{explain what ML is, how the different approaches work, why to use semi-supervised}
{autoencoder special case (un-/self-supervised) used in DeepSAD $\rightarrow$ explain autoencoder}

Machine learning defines types of algorithms capable of learning from existing data  on previously unseen data without being explicitely programmed to do so~\cite{machine_learning_first_definition}. They are oftentimes categorized by the underlying technique employed, by the type of task they are trained to achieve or by the feedback provided to the algorithm during training. The last categorization typically includes supervised learning, unsupervised learning and reinforcement learning.

For supervised learning each data sample is augmented by including a label which depicts the ideal output of the algorithm for the given sample. During the learning step these algorithms can compare their generated output with the one provided by an expert to improve their performance. Such labels are typically either a categorical target or a continuous target which are most commonly used for classification and regression tasks respectively.

Unsupervised learning algorithms use raw data without a target label that can be used during the learning process. These types of algorithms are often used to identify underlying patterns in data which may be hard to discover using classical data analysis due to large data size or high data complexity. Common use cases include clustering the data into two or more clusters which can be differentiated from each other according to some predesignated criteria and dimensionality reduction tasks which transform high-dimensional data into a lower-dimensional subspace while retaining meaningful information of the original data.

The third category -reinforcement learning- takes a more interactive approach to learning in that it provides the algorithm with an environment and an interpreter of the environment's state which can be used during learning to explore new actions and their impact on the environment's state. The interpreter can then provide a reward or punishment to the algorithm based on the outcome of its actions. To improve the algorithms capability it will try to maximize the rewards received from the interpreter while still introducing some randomness to attempt the exploration of different ideas. Reinforcement learning is usually used for cases where an algorithm has to make sequences of decisions in complex environments such as for autonomous driving tasks.

Semi-Supervised learning algorithms are -as the name implies- an inbetween category of supervised and unsupervised algorithms in that they use a mixture of labeled and unlabeled data. Typically vastly more unlabeled data is used during training of such algorithms than labeled data, oftentimes due to the effort and expertise required to label large quantities of data correctly for supervised training methods. The target tasks of semi-supervised methods can come from both the domains of supervised and unsupervised algorithms. For classification tasks which are typically achieved using supervised learning the additional unsupervised data is added during training with the hope to achieve a better outcome than when training only with the supervised portion of the data. In contrast for typical unsupervised learning tasks such as clustering algorithms, the addition of labeled samples can help guide the learning algorithm to improve performance over fully unsupervised training.


%\todo[inline, color=green!40]{our chosen method Deep SAD is a semi-supervised deep learning method whose workings will be discussed in more detail in secion X}
\todo[inline, color=green!40]{DeepSAD is semi-supervised, autoencoder in pre-training is interesting case since its un-/self-supervised. explained in more detail next}

\newsection{autoencoder}{Autoencoder}

\threadtodo
{Explain how autoencoders work and what they are used for}
{autoencoder used in deepSAD}
{explain basic idea, unfixed architecture, infomax, mention usecases, dimension reduction}
{dimensionality reduction, useful for high dim data $\rightarrow$ pointclouds from lidar}

\todo[inline]{autoencoder explanation}
\todo[inline, color=green!40]{autoencoders are a neural network architecture archetype (words) whose training target is to reproduce the input data itself - hence the name. the architecture is most commonly a mirrored one consisting of an encoder which transforms input data into a hyperspace represantation in a latent space and a decoder which transforms the latent space into the same data format as the input data (phrasing), this method typically results in the encoder learning to extract the most robust and critical information of the data and the (todo maybe something about the decoder + citation for both). it is used in many domains translations, LLMs, something with images (search example + citations)}
\todo[inline, color=green!40]{typical encoder decoder mirrored figure}
\todo[inline, color=green!40]{explain figure}
\todo[inline, color=green!40]{our chosen method Deep SAD uses an autoencoder to translate input data into a latent space, in which it can more easily differentiate between normal and anomalous data}

\newsection{lidar_related_work}{Lidar - Light Detection and Ranging}

\threadtodo
{Explain how lidars work and what data they produce}
{understand why data is degraded, and how data looks}
{explain how radar/lidar works, usecases, output = pointclouds, what errors}
{lidar used in automotive $\rightarrow$ related work - rain degradation}

\todo[inline]{related work in lidar}
\todo[inline, color=green!40]{the older more commonly known radar works by sending out an electromagnetic wave in the radiofrequency and detecting the time it takes to return (if it returns at all) signalling a reflective object in the path of the radiowave. lidar works on the same principle but sends out a lightray produced by a laser (citation needed) and measuring the time it takes for the ray to return to the sensor. since the speed of light is constant in air the system can calculate the distance between the sensor and the measured point. modern lidar systems send out multiple, often millions of measurement rays per second which results in a three dimensional point cloud, constructed from the information in which direction the ray was cast and the distance that was measured}
\todo[inline, color=green!40]{lidar is used in most domains reliant on accurate 3d representations of the world like autonomous driving, robot navigation, (+ maybe quickly look up two other domains), its main advantage is high measurement accuracy, precision (use correct term), and high resolution (possible due to single point measurements instead of cones like radar, ToF, Ultrasonic) which enables more detailed mappings of the environment}
\todo[inline, color=green!40]{due to point precision, lidar is sensitive to noise/degradation of airborne particles, which may produce early returns, deflections, errrors of light rays, this results in noise in the 3d point cloud and possibly missing data of the measurement behind the aerosol particle.}
\todo[inline, color=green!40]{because of the given advantages of lidar it is most commonly used nowadays on robot platforms for environment mapping and navigiation - so we chose to demonstrate our method based on degraded data collected by a lidar sensor as discussed in more dtail in section (data section)}

\newsection{related_work}{Related Work}

\threadtodo
{What other research has been done on this topic}
{reader knows all background, what is starting point of research}
{talk about rain degradation paper from automotive, cleaning pointclouds?}
{Rain paper successful with DeepSAD $\rightarrow$ what is DeepSAD}


\newchapter{deepsad}{Deep SAD: Semi-Supervised Anomaly Detection}

%In this chapter we explore the method \emph{Deep Semi-Supervised Anomaly Detection}~\cite{deepsad} which we employed during our experiments to quanitfy the degradation of lidar scans caused by artifically introduced water vapor from a theater smoke machine. The same approach of modeling a degradation quantification problem as an anomaly detection task was succesfully used in \cite{degradation_quantification_rain} to quantify the degradation caused to lidar scans by bad weather conditions such as rain, fog and snow for autonomous driving tasks. Deep SAD is characterized by it being a deep-learning approach to anomaly detection which enables it to learn more complex anomalous data patterns than more classic statistical approaches and its capability of employing hand-labeled data samples-both normal and anomalous-during its training step to better teach the model to differentiate between know anomalies and normal data than if only an unsupervised approach was used which basically just learns the most common patterns in the implicitely more common normal data and to differentiate anything from that.

\threadtodo
{Introduce DeepSAD, how and why do we use it}
{let reader know why they need to know about Deepsad in detail}
{explain use-case, similar use-case worked, allude to core features}
{interest/curiosity created $\rightarrow$ wants to learn about DeepSAD}

In this chapter, we explore the method \emph{Deep Semi-Supervised Anomaly Detection} (Deep SAD)~\cite{deepsad}, which we employ to quantify the degradation of LiDAR scans caused by airborne particles in the form of artificially introduced water vapor from a theater smoke machine. A similar approach—modeling degradation quantification as an anomaly detection task—was successfully applied in \cite{degradation_quantification_rain} to assess the impact of adverse weather conditions on LiDAR data for autonomous driving applications. Deep SAD leverages deep learning to capture complex anomalous patterns that classical statistical methods might miss. Furthermore, by incorporating a limited amount of hand-labeled data (both normal and anomalous), it can more effectively differentiate between known anomalies and normal data compared to purely unsupervised methods, which typically learn only the most prevalent patterns in the dataset~\cite{deepsad}.


%Deep Semi-Supervised Anomaly Detection~\cite{deepsad} is a deep-learning based anomaly detection method whose performance in regards to sensor degradation quantification we explore in this thesis. It is a semi-supervised method which allows the introduction of manually labeled samples in addition to the unlabeled training data to improve the algorithm's performance over its unsupervised predecessor Deep One-Class Classification~\cite{deepsvdd}. The working principle of the method is to encode the input data onto a latent space and train the network to cluster normal data close together while anomalies get mapped further away in that latent space. 
%\todo[inline, color=green!40]{Deep SAD is a semi-supervised anomaly detection method proposed in cite, which is based on an unsupervised method (Deep SVDD) and additionally allows for providing some labeled data which is used during the training phase to improve the method's performance}
\newsection{algorithm_description}{Algorithm Description}
%\todo[inline]{explain deepsad in detail}

%Deep SAD is a typical clustering based anomaly detection technique which is described in \cite{anomaly_detection_survey} to generally have a two step approach to anomaly detection. First a clustering algorithm is used to cluster data closely together around a centroid and secondly the distances from data to that centroid is calculated and interpreted as an anomaly score. This general idea can also be found in the definition of the Deep SAD algorithm, which uses the encoder part of an autoencoder architecture which is trained to cluster data around a centroid in the latent space of its output. The datas geometric distance to that centroid in the latent space is defined as an anomaly score. Deep SAD is a semi-supervised training based method which can work completely unsupervised (no labeled data available) in which case it falls back to its predecessor method Deep SVDD but additionally allows the introduction of labeleld data samples during training to more accurately map known normal samples near the centroid and known anomalous samples further away from it.

\threadtodo
{give general overview about how it works}
{overview helps reader understand method, then go into detail}
{how clustering AD generally works, how it does in DeepSAD}
{since the reader knows the general idea $\rightarrow$ what is the step-by-step?}

\todo[inline]{remove categorization? its a bit complicated since it looks like clustering, also has spectral and can maybe even be interpreted to be information theoretic?}

Deep SAD is an anomaly detection algorithm that belongs to the category of clustering-based methods, which according to~\cite{anomaly_detection_survey} typically follow a two-step approach. First, a clustering algorithm groups data points around a centroid; then, the distances of individual data points from this centroid are calculated and used as an anomaly score. In addition to that, DeepSAD also utilizes a spectral component by mapping the input data onto a lower-dimensional space, which enables it to detect anomalies in high-dimensional complex data types. In Deep SAD, these concepts are implemented by employing a neural network, which is jointly trained to map data into a latent space and to minimize the volume of an data-encompassing hypersphere whose center is the aforementioned centroid. The geometric distance in the latent space to the hypersphere center is used as the anomaly score, where a larger distance between data and centroid corresponds to a higher probability of a sample being anomalous. This is achieved by shrinking the data-encompassing hypersphere during training, proportionally to all training data, of which is required that there is significantly more normal than anomalous data present. The outcome of this approach is that normal data gets clustered more closely around the centroid, while anomalies appear further away from it as can be seen in the toy example depicted in figure~\ref{fig:deep_svdd_transformation}. 

\fig{deep_svdd_transformation}{figures/deep_svdd_transformation}{DeepSAD teaches a neural network to transform data into a latent space and minimize the volume of an data-encompassing hypersphere centered around a predetermined centroid $\textbf{c}$. \\Reproduced from~\cite{deepsvdd}.}

\threadtodo
{first step is pre-training, why does it use an autoencoder?}
{go chronologically over how the algorithm works. starting at pre-training}
{pre-training is autoencoder, self-supervised, dimensionality reduction}
{pre-training done $\rightarrow$ how are the pre-training results used?}

DeepSAD requires a pre-training step, during which an autoencoder is trained on all available training data. One of DeepSAD's goals is to map input data onto a lower dimensional latent space, in which the separation between normal and anomalous data can be achieved. To this end DeepSAD and its predecessor Deep SVDD make use of the autoencoder's reconstruction goal, whose successful training ensures confidence in that the encoder architecture is suitable for extracting the input datas' most prominent information to the latent space inbetween the encoder and decoder. DeepSAD goes on to use just the encoder as its main network architecture, discarding the decoder at this step, since reconstruction of the input is unnecessary. 

%The results of the pre-training are used twofold. Firstly the encoders' weights at the end of pre-training can be used to initialize Deep SAD's weights for the main training step which aligns with the aforementioned Infomax principle, since we can assume the autoencoder maximized the shared information between the input and the latent space represenation. Secondly an initial forward-pass is run on the encoder network for all available training data samples and the results' mean position in the latent space is used to define the hypersphere center $\mathbf{c}$ which according to~\cite{deepsad} allows for faster convergence during the main training step than randomly chosen centroids. An alternative method of initializing the hypersphere center could be to use only labeled normal examples for the forward-pass, so not to pollute $\mathbf{c}$'s position with anomalous samples, which would only be possible if sufficient labeled normal samples are available. From this point on, the hypersphere center $\mathbf{c}$ stays fixed and does not change, which is necessary since it being a free optimization variable could lead to a trivial hypersphere collapse solution if the network was trained fully unsupervised.

\threadtodo
{what is pre-training output used for, how is centroid calculated and why}
{reader knows about pre-training, what are next steps and how is it used}
{pre-training weights used to init main network, c is mean of forward pass, collapse}
{network built and initialized, centroid fixed $\rightarrow$ start main training}

The pre-training results are used in two more key ways. First, the encoder weights obtained from the autoencoder pre-training initialize DeepSAD’s network for the main training phase. Second, we perform an initial forward pass through the encoder on all training samples, and the mean of these latent representations is set as the hypersphere center, $\mathbf{c}$. According to \cite{deepsad}, this initialization method leads to faster convergence during the main training phase compared to using a randomly selected centroid. An alternative would be to compute $\mathbf{c}$ using only the labeled normal examples, which would prevent the center from being influenced by anomalous samples; however, this requires a sufficient number of labeled normal samples. Once defined, the hypersphere center $\mathbf{c}$ remains fixed, as allowing it to be optimized freely could in the unsupervised case lead to a hypersphere collapse-a trivial solution where the network learns to map all inputs directly onto the centroid $\mathbf{c}$.

\threadtodo
{how does the main training work, what data is used, what is the optimization target}
{main training is next step since all preconditions are met}
{main training is SGD backpropagation, minimizing volume, un-/labeled data used}
{network is trained $\rightarrow$ how does one use it for AD?}

In the main training step, DeepSAD's network is trained using SGD backpropagation. The unlabeled training data is used with the goal to minimize an data-encompassing hypersphere. Since one of the pre-conditions of training was the significant prevelance of normal data over anomalies in the training set, normal samples collectively cluster more tightly around the centroid, while the rarer anomalous samples do not contribute as significantly to the optimization, resulting in them staying further from the hypersphere center. The labeled data includes binary class labels signifying their status as either normal or anomalous samples. Labeled anomalies are pushed away from the center by defining their optimization target as maximizing the distance between them and $\mathbf{c}$. Labeled normal samples are treated similar to unlabeled samples with the difference that DeepSAD includes a hyperparameter capable of controling the proportion with which labeled and unlabeled data contribute to the overall optimization. The resulting network has learned to map normal data samples closer to $\mathbf{c}$ in the latent space and anomalies further away.

\todo[inline]{maybe pseudocode algorithm block?}

\threadtodo
{how to use the trained network?}
{since we finished training, we need to know how to utilize it}
{forward pass, calculate distance from c =  anomaly score, analog, unknown magnitude}
{General knowledge of the algorithm achieved $\rightarrow$ go into more detail}

To infer if a previously unknown data sample is normal or anomalous, the sample is fed in a forward-pass through the fully trained network. During inference, the centroid $\mathbf{c}$ needs to be known, to calculate the geometric distance of the samples latent representation to $\mathbf{c}$. This distance is tantamount to an anomaly score, which correlates with the likelihood of the sample being anomalous. Due to differences in input data type, training success and latent space dimensionality, the anomaly score's magnitude has to be judged on an individual basis for each trained network. This means, scores produced by one network that signify normal data, may very well clearly indicate an anomaly for another network. The geometric distance between two points in space is a scalar analog value, therefore post-processing of the score is necessary to achieve a binary classification of normal and anomalous if desired.

\newsection{algorithm_details}{Algorithm Details and Hyperparameters}
%\todo[inline]{backpropagation optimization formula, hyperaparameters explanation}

%As a pre-training step an autoencoder architecture is trained and its weights are used to initialize its encoder part before training of the method itself begins. \citeauthor{deepsad} argue in~\cite{deepsad} that this pre-training step which was already present in~\cite{deepsvdd}, allows them to not only interpret the method in geometric terms as minimum volume estimation but also in probalistic terms as entropy minimization over the latent distribution, since the autoencoding objective implicitely maximizes the mutual information between the data and its latent space represenation. This insight-that the method follows the Infomax principle with the additional objective of the latent distribution having mininmal entropy-allowed \citeauthor{deepsad} to introduce an additional term in Deep SAD's - over Deep SVDD's objective, which encorporates labeled data to better model the nature of normal and anomalous data. They show that Deep SAD's objective can be interpreted as normal data's distribution in the latent space being modeled to have low entropy and anomalous data's distribution in that latent space being modeled as having high entropy, which they argue captures the nature of the difference between normal and anomalous data by interpreting anomalies ``as being generated from an infinite mixture of distributions that are different from normal data distribution''~\cite{deepsad}.

Since Deep SAD is heavily based on its predecessor Deep SVDD it is helpful to first understand Deep SVDD's optimization objective, so we start with explaining it here.  For input space $\mathcal{X} \subseteq \mathbb{R}^D$, output space $\mathcal{Z} \subseteq \mathbb{R}^d$ and a neural network $\phi(\wc; \mathcal{W}) : \mathcal{X} \to \mathcal{Z}$ where $\mathcal{W}$ depicts the neural networks' weights with $L$ layers $\{\mathbf{W}_1, \dots, \mathbf{W}_L\}$, $n$ the number of unlabeled training samples $\{\mathbf{x}_1, \dots, \mathbf{x}_n\}$, $\mathbf{c}$ the center of the hypersphere in the latent space, Deep SVDD teaches the neural network to cluster normal data closely together in the latent space by defining its optimization objective as seen in~\ref{eq:deepsvdd_optimization_objective}.

\begin{equation}
	\label{eq:deepsvdd_optimization_objective}
	\min_{\mathcal{W}} \quad
	\frac{1}{n} \sum_{i=1}^{n}\|\phi(\mathbf{x}_{i};\mathcal{W})-\mathbf{c}\|^{2}
	+\frac{\lambda}{2}\sum_{\ell=1}^{L}\|\mathbf{W}^{\ell}\|_{F}^{2}.
\end{equation}

As can be seen from \ref{eq:deepsvdd_optimization_objective}, Deep SVDD is an unsupervised method which does not rely on labeled data to train the network to differentiate between normal and anomalous data. The first term of the optimization objective depicts the shrinking of the data-encompassing hypersphere around the given center $\mathbf{c}$. For each data sample $\{\mathbf{x}_1, \dots, \mathbf{x}_n\}$, its geometric distance to $\mathbf{c}$ in the latent space produced by the neural network $\phi(\wc; \mathcal{W})$ is minimized proportionally to the amount of data samples $n$. The second term is a standard L2 regularization term which prevents overfitting with hyperparameter $\lambda > 0$  and $\|\wc\|_F$ denoting the Frobenius norm.

\citeauthor{deepsad} argue in \cite{deepsad} that the pre-training step employing an autoencoder—originally introduced in \cite{deepsvdd}—not only allows a geometric interpretation of the method as minimum volume estimation i.e., the shrinking of the data encompassing hypersphere but also a probabilistic one as entropy minimization over the latent distribution. The autoencoding objective during pre-training implicitly maximizes the mutual information between the data and its latent representation, aligning the approach with the Infomax principle while encouraging a latent space with minimal entropy. This insight enabled \citeauthor{deepsad} to introduce an additional term in DeepSAD’s objective, beyond that of its predecessor Deep SVDD~\cite{deepsvdd}, which incorporates labeled data to better capture the characteristics of normal and anomalous data. They demonstrate that DeepSAD’s objective effectively models the latent distribution of normal data as having low entropy, while that of anomalous data is characterized by higher entropy. In this framework, anomalies are interpreted as being generated from an infinite mixture of distributions that differ from the normal data distribution.

The introduction of the aforementioned term in Deep SAD's objective allows it to learn in a semi-supervised way, though it can operate in a fully unsupervised mode—effectively reverting to its predecessor, Deep SVDD~\cite{deepsvdd}—when no labeled data are available. However, it also allows for the incorporation of labeled samples during training. This additional supervision helps the model better position known normal samples near the hypersphere center and push known anomalies farther away, thereby enhancing its ability to differentiate between normal and anomalous data.


From this it is easy to understand Deep SAD's optimization objective seen in \ref{eq:deepsad_optimization_objective} which additionally defines $m$ number of labeled data samples $\{(\mathbf{\tilde{x}}_1, \tilde{y}_1), \dots, (\mathbf{\tilde{x}}_m, \tilde{y}_1)\} \in \mathcal{X} \times \mathcal{Y}$ and $\mathcal{Y} = \{-1,+1\}$ for which $\tilde{y} = +1$ denotes normal and $\tilde{y} = -1$ anomalous samples as well as a new hyperparameter $\eta > 0$ which can be used to balance the strength with which labeled and unlabeled samples contribute to the training.

\begin{equation}
	\label{eq:deepsad_optimization_objective}
	\min_{\mathcal{W}} \quad
	\frac{1}{n+m} \sum_{i=1}^{n}\|\phi(\mathbf{x}_{i};\mathcal{W})-\mathbf{c}\|^{2}
	+\frac{\eta}{n+m}\sum_{j=1}^{m}\left(\|\phi(\tilde{\mathbf{x}}_{j};\mathcal{W})-\mathbf{c}\|^{2}\right)^{\tilde{y}_{j}}
	+\frac{\lambda}{2}\sum_{\ell=1}^{L}\|\mathbf{W}^{\ell}\|_{F}^{2}.
\end{equation}

The first term of \ref{eq:deepsad_optimization_objective} stays mostly the same, only atributing for the introduced $m$ labeled datasamples in its proportionality. The second term is newly introduced to incorporate the labeled data samples with the strength of hyperparameter $\eta$, by depending on each data samples label $\tilde{y}$ either minimizing or maximizing the distance from the samples latent represenation to $\mathbf{c}$. The third term, is kept identical compared to Deep SVDD as standard L2 regularization. It can also be observed that in case of $m = 0$ labeled samples, Deep SAD falls back to the same optimization objective of Deep SVDD and can therefore be used in a completely unsupervised fashion as well.


\newsubsubsectionNoTOC{Hyperparameters}

The neural network architecture of DeepSAD is not fixed but rather dependent on the datatype the algorithm is supposed to operate on. This is due to the way it employs an autoencoder for pre-training and the encoder part of the network for its main training step. This makes the adaption of an autoencoder architecture suitable to the specific application necessary but also allows for flexibility in choosing a fitting architecture depending on the application's requirements. For this reason the specific architecture employed, may be considered an hyperparameter of the Deep SAD algorithm. During the pre-training step-as is typical for autoencoders-no labels are necessary since the optimization objective of autoencoders is generally to reproduce the input, as is indicated by the architecture's name.
\todo[inline]{Talk about choosing the correct architecture (give example receptive fields for image data from object detection?)}

%\todo[inline, color=green!40]{Core idea of the algorithm is to learn a transformation to map input data into a latent space where normal data clusters close together and anomalous data gets mapped further away. to achieve this the methods first includes a pretraining step of an auto-encoder to extract the most relevant information, second it fixes a hypersphere center in the auto-encoders latent space as a target point for normal data and third it traings the network to map normal data closer to that hypersphere center. Fourth The resulting network can map new data into this latent space and interpret its distance from the hypersphere center as an anomaly score which is larger the more anomalous the datapoint is}
%\todo[inline, color=green!40]{explanation pre-training step: architecture of the autoencoder is dependent on the input data shape, but any data shape is generally permissible. for the autoencoder we do not need any labels since the optimization target is always the input itself. the latent space dimensionality can be chosen based on the input datas complexity (search citations). generally a higher dimensional latent space has more learning capacity but tends to overfit more easily (find cite). the pre-training step is used to find weights for the encoder which genereally extract robust and critical data from the input because TODO read deepsad paper (cite deepsad). as training data typically all data (normal and anomalous) is used during this step.}
%\todo[inline, color=green!40]{explanation hypersphere center step: an additional positive ramification of the pretraining is that the mean of all pre-training's latent spaces can be used as the hypersphere target around which normal data is supposed to cluster. this is advantageous because it allows the main training to converge faster than choosing a random point in the latent space as hypersphere center. from this point onward the center C is fixed for the main training and inference and does not change anymore.}
%\todo[inline, color=green!40]{explanation training step: during the main training step the method starts with the pre-trained weights of the encoder but removes the decoder from the architecture since it optimizes the output in the latent space and does not need to reproduce the input data format. it does so by minimizing the geometric distance of each input data's latent space represenation to the previously defined hypersphere center c. Due to normal data being more common in the inputs this results in normal data clustering closely to C and anormal data being pushed away from it. additionally during this step the labeled data is used to more correctly map normal and anormal data}
%\todo[inline, color=green!40]{explanation inference step: with the trained network we can transform new input data into the latent space and calculate its distance from the hypersphere center which will be smaller the more confident the network is in the data being normal and larger the more likely the data is anomalous. This output score is an analog value dependent on multiple factors like the latent space dimensionality, encoder architecture and ??? and has to be interpreted further to be used (for example thresholding)}

%\todo[inline, color=green!40]{in formula X we see the optimization target of the algorithm. explain in one paragraph the variables in the optimization formula}
%\todo[inline, color=green!40]{explain the three terms (unlabeled, labeled, regularization)}

\newsection{advantages_limitations}{Advantages and Limitations}
\todo[inline]{semi supervised, learns normality by amount of data (no labeling/ground truth required), very few labels for better training to specific situation}

\newchapter{data_preprocessing}{Data and Preprocessing}
%\todo[inline, color=green!40]{good data important for learning based methods and for evaluation. in this chapter we talk about the requirements we have for our data and the difficulties that come with them and will then give some information about the dataset that was used as well as how the data was preprocessed for the experiments (sec 4.2)}

%Fortunately situations like earthquakes, structural failures and other circumstances where rescue robots need to be employed are uncommon occurences. When such an operation is conducted, the main focus lies on the fast and safe rescue of any survivors from the hazardous environment, therefore it makes sense that data collection is not a priority. Paired with the rare occurences this leads to a lack of publicly available data of such situations. To improve any method, a large enough, diversified and high quality dataset is always necessary to provide a comprehensive evaluation. Additionally, in this work we evaluate a training based method, which increases the requirements on the data manifold, which makes it all the more complex to find a suitable dataset. In this chapter we will state the requirements we defined for the data, talk about the dataset that was chosen for this task, including some statistics and points of interest, as well as how it was preprocessed for the training and evaluation of the methods.

Situations such as earthquakes, structural failures, and other emergencies that require rescue robots are fortunately rare. When these operations do occur, the primary focus is on the rapid and safe rescue of survivors rather than on data collection. Consequently, there is a scarcity of publicly available data from such scenarios. To improve any method, however, a large, diverse, and high-quality dataset is essential for comprehensive evaluation. This challenge is further compounded in our work, as we evaluate a training-based approach that imposes even higher requirements on the data to enable training, making it difficult to find a suitable dataset.

In this chapter, we outline the specific requirements we established for the data, describe the dataset selected for this task—including key statistics and notable features—and explain the preprocessing steps applied for training and evaluating the methods.


\newsection{data}{Data}


%\todo[inline]{describe data sources, limitations}
%\todo[inline]{screenshots of camera/3d data?}
%\todo[inline]{difficulties: no ground truth, different lidar sensors/settings, different data shapes, available metadata, ...}

%\todo[inline, color=green!40]{we require lidar sensor data that was collected in a domain as closely related to our target domain (rescue robots indoors, cave-ins, ) as possible which also includes some kind of appreciable degradation for which we have some kind of labeling possibility. ideally the degradation should be from smoke/dust/aerosol particles. most data should be without degradation (since we require more normal than anormal data to train the method as described in X) but we need enough anormal data so we can confidently evaluate the methods performance}

%Our main requirement for the data was for it to be as closely related to the target domain of rescue operations as possible. Since autonomous robots get largely used in situations where a structural failures occured we require of the data to be subterranean. This provides the additional benefit, that data from this domain oftentimes already has some amount of airborne particles like dust due to limited ventilation and oftentimes exposed rock, which is to be expected to also be present in rescue situations. The second and by far more limiting requirement on the data, was that there has to be appreciable degradation due to airborne particles as would occur during a fire from smoke. The type of data has to at least include lidar but for better understanding other types of visual data e.g., visual camera images would be benefical. The amount of data has to be sufficient for training the learning based methods while containing mostly good quality data without degradation, since the semi-supervised method implicitely requires a larger amount of normal than anomalous training for successful training. Nonetheless, the number of anomalous data samples has to be large enough that a comprehensive evaluation of the methods' performance is possible.
\newsubsubsectionNoTOC{Requirements}

Our primary requirement for the dataset was that it closely reflects the target domain of rescue operations. Because autonomous robots are predominantly deployed in scenarios involving structural failures, the data should be taken from subterranean environments. This setting not only aligns with the operational context but also inherently includes a larger than normal amount of airborne particles (e.g., dust) from limited ventilation and exposed rock surfaces, which is typically encountered during rescue missions.

A second, more challenging requirement is that the dataset must exhibit significant degradation due to airborne particles, as would be expected in scenarios involving smoke from fires. The dataset should at minimum include LiDAR data, and ideally also incorporate other visual modalities (e.g., camera images) to provide a more comprehensive understanding of the environment.

Additionally, the dataset must be sufficiently large for training learning-based methods. Since the semi-supervised approach we utilize relies on a predominance of normal data over anomalous data, it is critical that the dataset predominantly consists of high-quality, degradation-free samples. At the same time, there must be enough anomalous samples to allow for a thorough evaluation of the method’s performance.

\newsubsubsectionNoTOC{Labeling Challenges}

%\todo[inline, color=green!40]{labeling is an especially problematic topic since ideally we would want an analog value which corresponds with the amount of smoke present for evaluation. for training we only require the possibility to provide labels in the form of normal or anormal targets (binary classification) and these labels do not have to be present for all data, only for some of the data (since semi-supervised only uses some labeled data as discussed in X)}

%To evaluate how proficiently any method can quantify the degradation of lidar data we require some kind of degradation label per scan. Ideally we would want an analog value per scan which somehow correlates to the degradation, but even a binary label of either degraded or not degraded would be useful. To find out which options are available for this task, we first have to figure out what degradation means in the context of lidar scans and especially the point clouds in which they result. Lidar sensors combine multiple range measurements which are executed near simultaneously into a point cloud whose reference point is the sensor location at the time of measurement. Ideally for each attempted measurement during a scan one point is produced, albeit in reality there are many factors why a fraction of the measurements cannot be completed and therefore there will be missing points even in good conditions. Additionally, there are also measurements which result in an incorrect range, like for example when an aerosol particle is hit by the measurement ray and a smaller range than was intended to be measured (to the next solid object) was returned. The sum of missing and erroneous measurements makes up the degradation, although it can be alleged that the term also includes the type or structure of errors or missing points and the resulting difficulties when further utilizing the resulting point cloud. For example, if aerosol particles are dense enough in a small portion of the frame, they could produce a point cloud where the particles are interpreted as a solid object even though the amount of erroneous measurements is smaller than for another scan where aerosol particles are evenly distributed around the sensor. In the latter case the erroneous measurements may be identified by outlier detection algorithms and after removal do not hinder further processing of the point cloud. For these reasons it is not simple to define data degradation for lidar scans. 

%Another option would be to try to find an objective measurement of degradation. As the degradation in our use case mostly stems from airborne particles, it stands to reason that measuring the amount of them would enable us to label each frame with an analog score which correlates to the amount of degradation. This approach turns out to be difficult to implement in real life, since sensors capable of measuring the amount and size of airborne particles typically do so at the location of the sensor while the lidar sensor also sends measurement rays into all geometries visible to it. This localized measurement could be useful if the aerosol particle distribution is uniform enough but would not allow the system to anticipate degradation in other parts of the point cloud. We are not aware of any public dataset fit for our requirements which also includes data on aerosol particle density and size.

To evaluate how effectively a method can quantify LiDAR data degradation, we require a degradation label for each scan. Ideally, each scan would be assigned an analog value that correlates with the degree of degradation, but even a binary label—indicating whether a scan is degraded or not—would be useful.

Before identifying available options for labeling, it is essential to define what “degradation” means in the context of LiDAR scans and the resulting point clouds. LiDAR sensors combine multiple range measurements, taken nearly simultaneously, into a single point cloud with the sensor’s location as the reference point. In an ideal scenario, each measurement produces one point; however, in practice, various factors cause some measurements to be incomplete, resulting in missing points even under good conditions. Additionally, some measurements may return incorrect ranges. For example, when a measurement ray strikes an aerosol particle, it may register a shorter range than the distance to the next solid object. The combined effect of missing and erroneous measurements constitutes degradation. One could also argue that degradation includes the type or structure of errors and missing points, which in turn affects how the point cloud can be further processed. For instance, if aerosol particles are densely concentrated in a small region, they might be interpreted as a solid object which could indicate a high level of degradation, even if the overall number of erroneous measurements is lower when compared to a scan where aerosol particles are evenly distributed. In the latter case, outlier detection algorithms might easily remove the erroneous points, minimizing their impact on subsequent processing. Thus, defining data degradation for LiDAR scans is not straightforward.

An alternative approach would be to establish an objective measurement of degradation. Since the degradation in our use case primarily arises from airborne particles, one might assume that directly measuring their concentration would allow us to assign an analog score that correlates with degradation. However, this approach is challenging to implement in practice. Sensors that measure airborne particle concentration and size typically do so only at the sensor’s immediate location, whereas the LiDAR emits measurement rays that traverse a wide field of view. This localized measurement might be sufficient if the aerosol distribution is uniform, but it does not capture variations in degradation across the entire point cloud. To our knowledge, no public dataset exists that meets our requirements while also including detailed data on aerosol particle density and size.

%For training purposes we generally do not require labels since the semi-supervised method may fall back to a unsupervised one if no labels are provided. To improve the method's performance it is possible to provide binary labels i.e., normal and anomalous-correlating to non-degraded and degraded respectively-but the amount of the provided training labels does not have to be large and can be handlabelled as is typical for semi-supervised methods, since they often work on mostly unlabeled data which is difficult or even impossible to fully label.

For training, explicit labels are generally not required because the semi-supervised method we employ can operate in an unsupervised manner when labels are absent. However, incorporating binary labels—normal for non-degraded and anomalous for degraded conditions—can enhance the method's performance. Importantly, only a small number of labels is needed, and these can be hand-labeled, which is typical in semi-supervised learning where the majority of the data remains unlabeled due to the difficulty or impracticality of fully annotating the dataset.


%\todo[inline, color=green!40]{We chose to evaulate the method on the dataset "Multimodal Dataset from Harsh Sub-Terranean Environment with Aerosol Particles for Frontier Exploration"~\cite{alexander_kyuroson_2023_7913307} which is a public dataset collected by X in a sub-terranean environment and includes data from multiple sensors on a moving sensor platform as well as experiments where sensor data is explicitely degraded by aerosol particles produced by a smoke machine.}

\newsubsubsectionNoTOC{Chosen Dataset}

%\todo[inline, color=green!40]{list sensors on the platform}
%Based on the previously discussed requirements and labeling difficulties we decided to train and evaluate the methods on \emph{Multimodal Dataset from Harsh Sub-Terranean Environment with Aerosol Particles for Frontier Exploration}~\cite{subter}. The dataset is comprised of data from multiple sensors on a moving sensor platform which was driven through tunnels and rooms in a subterranean setting. What makes it especially fitting for our use case is that during some of the experiments, an artifical smoke machine was employed to simulate aerosol particles. 
%The sensors employed during capture of the dataset include:
Based on the previously discussed requirements and the challenges of obtaining reliable labels, we selected the \emph{Multimodal Dataset from Harsh Sub-Terranean Environment with Aerosol Particles for Frontier Exploration}~\cite{subter} for training and evaluation. This dataset comprises multimodal sensor data collected from a moving platform navigating tunnels and rooms in a subterranean environment. Notably, some experiments incorporated an artificial smoke machine to simulate aerosol particles, making the dataset particularly well-suited to our use case. The sensors used during data capture include:\todo[inline, color=green!40]{refer to sketch with numbers}

\begin{itemize}
	\item Lidar - Ouster OS1-32
	\item mmWave RADARs - 4 IWR6843AoP ES2.0 based radar models
	\item Lidar - Velodyne Velarray M1600
	\item IR-enabled RBG-D Camera - OAK-D Pro
	\item IMU - Pixhawk 2.1 Cube Orange,
\end{itemize}
%\todo[inline, color=green!40]{lidar data of 360° sensor is captured at 10 frames per second. each sensor output consists of point cloud which resulted from measurement of 32 vertical channels for each of which 2048 measurement points are taken during each measurement equiangular distributed around the whole horizontal 360°, so the sensor measures 32 * 2048 = 65536 measurements 10 times a second for which ideally every one produces a point in the point cloud consisting of x,y,z coordinates (relative to sensor platform) as well as some other values per measurement (reflectivity, intensity originally measured range value)}

%We mainly utilize the data from the \emph{Ouster OS1-32} lidar sensor, which produces 10 frames per second with a resolution of 32 vertical channels by 2048 measurements per channel, both equiangularly spaced over the vertical and horizontal fields of view of 42.4° and 360° respectively. Every measurement of the lidar therefore results in a point cloud with a maximum of 65536 points. Every point contains the \emph{X}, \emph{Y} and \emph{Z} coordinates in meters with the sensor location as origin, as well as values for the \emph{range}, \emph{intensity} and \emph{reflectivity} which are typical data measured by lidar sensors. The data is dense, meaning missing measurements are still present in the data of each point cloud with zero values for most fields.

\todo[inline, color=green!40]{short description of sensor platform and refer to photo}

We use data from the \emph{Ouster OS1-32} LiDAR sensor, which was configured to capture 10 frames per second with a resolution of 32 vertical channels and 2048 measurements per channel. These settings yield equiangular measurements across a vertical field of view of 42.4° and a complete 360° horizontal field of view. Consequently, every LiDAR scan can generate up to 65,536 points. Each point contains the \emph{X}, \emph{Y}, and \emph{Z} coordinates (in meters, with the sensor location as the origin) along with values for \emph{range}, \emph{intensity}, and \emph{reflectivity}—typical metrics measured by LiDAR sensors. Although the dataset is considered dense, each point cloud still contains missing measurements, with fields of these missing measurements registering as zero.

\begin{figure}
	\centering
	\subfigure{\includegraphics[width=0.45\textwidth]{figures/data_subter_platform_photo.jpg}\label{fig:subter_platform_sketch}}%
	\hfill
	\subfigure{\includegraphics[width=0.45\textwidth]{figures/data_subter_platform_sketch.png}\label{fig:subter_platform_photo}}%
	\caption{\todo[inline, color=green!40]{better caption} 1-OS1-32, 2-mmWave RADARs, 3-M1600, 4-OAK-D Pro. 5-LED, 6-IMU, and 7-Intel NUC. Reproduced from~\cite{subter}}\label{fig:subter_platform}
\end{figure}

%During the measurement campaign 14 experiments were conducted, of which 10 did not contain the utilization of the artifical smoke machine and 4 which did contain the artifical degradation, henceforth refered to as normal and anomalous experiments respectively. During 13 of the experiments the sensor platform was in near constant movement (sometimes translation - sometimes rotation) with only 1 anomalous experiment having the sensor platform stationary. This means we do not have 2 stationary experiments to directly compare the data from a normal and an anomalous experiment, where the sensor platform was not moved, nonetheless the genereal experiments are similar enough for direct comparisons. During anomalous experiments the artifical smoke machine appears to have been running for some time before data collection, since in camera images and lidar data alike, the water vapor appears to be distributed quite evenly throughout the closer perimeter of the smoke machine. The stationary experiment is also unique in that the smoke machine is quite close to the sensor platform and actively produces new smoke, which is dense enough for the lidar data to see the surface of the newly produced water vapor as a solid object. 

During the measurement campaign, 14 experiments were conducted—10 without the artificial smoke machine (hereafter referred to as normal experiments) and 4 with it (anomalous experiments). In 13 of these experiments, the sensor platform was in near-constant motion (either translating or rotating), with only one anomalous experiment conducted while the platform remained stationary. Although this means we do not have two stationary experiments for a direct comparison between normal and anomalous conditions, the overall experiments are similar enough to allow for meaningful comparisons.

In the anomalous experiments, the artificial smoke machine appears to have been running for some time before data collection began, as evidenced by both camera images and LiDAR data showing an even distribution of water vapor around the machine. The stationary experiment is particularly unique: the smoke machine was positioned very close to the sensor platform and was actively generating new, dense smoke, to the extent that the LiDAR registered the surface of the fresh water vapor as if it were a solid object.

%\todo[inline, color=green!40]{shortly mention the differences in conditions for these experiments and why they do not matter for us}

%The 14 experiments differ regarding the available illumination, the presence of humans-traversing the measurement grounds- or additional static objects as artifcats and of course regarding the presence of the water vapor from the smoke machine. Aside from the artifical smoke which is essential for our use case, the other differences during the individual experiments are of no interestet to us and do not affect it in any way. Regardless of illumination, the lidar sensor produces indistinguishable point clouds and any static objects do not factor into our quantification of the point clouds' degradation.

The 14 experiments varied in illumination conditions, the presence of humans on the measurement grounds, and additional static artifacts, as well as in the presence of water vapor from the smoke machine. For our purposes, only the artificial smoke is relevant; differences in lighting or incidental static objects do not affect our analysis. Regardless of illumination, the LiDAR sensor consistently produces comparable point clouds, and the presence of static objects does not influence our quantification of point cloud degradation.

%\todo[inline, color=green!40]{include representative image of point cloud and camera image}

The figures~\ref{fig:data_screenshot_pointcloud}~and~\ref{fig:data_screenshot_camera} show an representative depiction of the environment of the experiments as a camera image of the IR camera and the point cloud created by the OS1 lidar sensor at practically the same time.

\fig{data_screenshot_pointcloud}{figures/data_screenshot_pointcloud.png}{Screenshot of 3D rendering of an experiment without smoke and with illumination (same frame and roughly same alignment as figure~\ref{fig:data_screenshot_camera}). Point color corresponds to measurement range and axis in center of figure is the lidar's position.}
\fig{data_screenshot_camera}{figures/data_screenshot_camera.png}{Screenshot of IR camera output of an experiment without smoke and with illumination (same frame and roughly same alignment as figure~\ref{fig:data_screenshot_pointcloud})}

%\todo[inline, color=green!40]{talk about how much data is available (maybe a plot about data?), number of experiments with/without degradation, other factors in these experiments which do not concern our use-case of them}
%Regarding the amount of data, of the 10 normal experiments the shortest was 88.7 seconds and the longest 363.1 seconds with a mean of 157.65 seconds between all 10 experiments, which results in 15765 non-degraded point clouds. Of the 4 anomalous experiments, the shortest was the stationary one with 11.7 seconds and the longest was 62.1 seconds, having a mean of 47.325 seconds, resulting in 1893 degraded point clouds. This gives us 17658 point clouds alltogether with 89.28\% of them being non-degraded/normal samples and the other 10.72\% of them begin degraded/anomalous samples. 

Regarding the dataset volume, the 10 normal experiments ranged from 88.7 to 363.1 seconds, with an average duration of 157.65 seconds. At a capture rate of 10 frames per second, these experiments yield 15,765 non-degraded point clouds. In contrast, the 4 anomalous experiments, including one stationary experiment lasting 11.7 seconds and another extending to 62.1 seconds, averaged 47.33 seconds, resulting in 1,893 degraded point clouds. In total, the dataset comprises 17,658 point clouds, with approximately 89.28\% classified as non-degraded (normal) and 10.72\% as degraded (anomalous). The distribution of experimental data is visualized in figure~\ref{fig:data_points_pie}.

\fig{data_points_pie}{figures/data_points_pie.png}{Pie chart visualizing the amount and distribution of normal and anomalous point clouds in \cite{subter}}

%BEGIN missing points
As we can see in figure~\ref{fig:data_missing_points}, the artifical smoke introduced as explicit degradation during some experiments results in more missing measurements during scans, which can be explained by measurement rays hitting airborne particles but not being reflected back to the sensor in a way it can measure.

\fig{data_missing_points}{figures/data_missing_points.png}{Density histogram showing the percentage of missing measurements per scan for normal experiments without degradation and anomalous experiments with artifical smoke introduced as degradation.}
%END missing points

%BEGIN early returns

% In experiments with artifical smoke present, we observe many points in the point cloud very close to the sensor where there are no solid objects and therefore the points have to be produced by airborne particles from the artifical smoke. The phenomenon can be explained, in that the closer to the sensor an airborne particle is hit, the higher the chance of it reflecting the ray in a way the lidar can measure. In \ref{fig:particles_near_sensor} we see a box diagram depicting how significantly more measurements of the anomaly expirements produce a range smaller than 50 centimeters. Due to the sensor platform's setup and its paths taken during experiments we can conclude that any measurement with a range smaller than 50 centimeters has to be erroneous. While the amount of these returns near the sensor could most likely be used to estimate the sensor data quality while the sensor itself is located inside an environment containing airborne particles, this method would not allow to anticipate sensor data degradation before the sensor itself enters the affected area. Since lidar is used to sense the visible geometry from a distance, it would be desireable to quantify the data degradation of an area before the sensor itself enters it. Due to these reasons we did not use this phenomenon in our work.

In experiments with artificial smoke, we observe numerous points in the point cloud very close to the sensor, even though no solid objects exist at that range. These points are therefore generated by airborne particles in the artificial smoke. This phenomenon occurs because the closer an airborne particle is to the sensor, the higher the probability it reflects the laser beam in a measurable way. As shown in Figure~\ref{fig:particles_near_sensor}, a box diagram illustrates that significantly more measurements during these experiments report ranges shorter than 50 centimeters. Given the sensor platform's setup and its experimental trajectory, we conclude that any measurement with a range under 50 centimeters is erroneous.

While the density of these near-sensor returns might be used to estimate data quality when the sensor is already in an environment with airborne particles, this method cannot anticipate data degradation before the sensor enters such an area. Since LiDAR is intended to capture visible geometry from a distance, it is preferable to quantify potential degradation of an area in advance. For these reasons, we did not incorporate this phenomenon into our subsequent analysis.

\fig{particles_near_sensor}{figures/particles_near_sensor_boxplot_zoomed_500.png}{Box diagram depicting the percentage of measurements closer than 50 centimeters to the sensor for normal and anomalous experiments}
%END early returns

\newsection{preprocessing}{Preprocessing Steps and Labeling}

\newsubsubsectionNoTOC{Preprocessing}
%\todo{describe how 3d lidar data was preprocessed (2d projection), labeling}
%\todo[inline]{screenshots of 2d projections?}

%\todo[inline, color=green!40]{while as described in sec X the method Deep SAD is not dependend on any specific type/structure of data it requires to train an auto encoder in the pretraining step. such autoencoders are better understood in the image domain since there are many uses cases for this such as X (TODO citation needed), there are also 3d data auto encoders such as X (todo find example). same as the reference paper (rain cite) we chose to transform the 3d data to 2d by using a spherical spherical projection to map each of the 3d points onto a 2d plane where the range of each measurement can be expressed as the brightness of a single pixel. this leaves us with a 2d image of resolution 32x2048 (channels by horizontal measurements), which is helpful for visualization as well as for choosing a simpler architecture for the autoencoder of deepsad, the data in the rosbag is sparse meaning that measurements of the lidar which did not produce any value (no return ray detected before sensor specific timeout) are simply not present in the lidar scan. meaning we have at most 65xxx measurements per scan but mostly fewer than this, (maybe statistic about this? could aslo be interesting to show smoke experiment stuff)}

%As described in section~\ref{sec:algorithm_description} the method we want to evaluate is datatype agnostic and can be adjusted to work with any kind of data. The data from~\cite{subter} that we will train on is a point cloud per scan created by the lidar sensor which contains up to 65536 points with \emph{X}, \emph{Y}, and \emph{Z} coordinates (in meters) per point. To adjust the architecture of Deep SAD to work with a specific datatype, we have to define an autoencoder architecture that works for the given datatype. While autoencoders can be created for any datatype, as~\cite{autoencoder_survey} points out over 60\% of research papers pertaining autoencoders in recent years look at image classification and reconstruction, so we have a better understanding of their architectures for two dimensional images than for three dimensional point clouds.

As described in Section~\ref{sec:algorithm_description}, the method under evaluation is data type agnostic and can be adapted to work with any kind of data. In our case, we train on point clouds from~\cite{subter}, where each scan produced by the LiDAR sensor contains up to 65,536 points, with each point represented by its \emph{X}, \emph{Y}, and \emph{Z} coordinates. To tailor the Deep SAD architecture to this specific data type, we would need to design an autoencoder suitable for processing three-dimensional point clouds. Although autoencoders can be developed for various data types, as noted in~\cite{autoencoder_survey}, over 60\% of recent research on autoencoders focuses on two-dimensional image classification and reconstruction. Consequently, there is a more established understanding of architectures for images compared to those for three-dimensional point clouds.

%\todo[inline, color=green!40]{to achieve this transformation we used the helpful measurement index and channel present in each measurement point of the dataset which allowed a perfect reconstruction of the 2d projection without calculating the pixel position in the projection of each measurement via angles which in our experience typically leads to some ambiguity in the projection (multiple measurements mapping to the same pixel due to precision loss/other errors) the measurement index increases even for unavailable measurements (no ray return) so we can simply create the 2d projection by mapping the normalized range (FIXME really normalized) value to the pixel position y = channel, x = measurement index. by initalizing the array to NaN values originally we have a 2d data structure with the range values and NaN on pixel positions where originally no measurement took place (missing measurements in scans due to no ray return)}

%For this reason we decided to preprocess the point clouds by converting them to two dimensional grayscale images using spherical projection. Additionally, \cite{degradation_quantification_rain}-which we modeled our approach after-successfully chose this approach. In the projected image each measurement is encoded to a single pixel, whose grayscale value $v$ is the normalized range of the measurement $v = \sqrt{\emph{X}^2 + \emph{Y}^2 + \emph{Z}^2}$. Due to the settings of the datasets' lidar, this results in images with the resolution of 2048 pixels wide by 32 pixels tall. Missing measurements of the point cloud are mapped to pixels with a brightness of 0. To create the mapping we used the measurements indices and channels which are available since the dataset contains dense point clouds and which can be used since the point indices are ordered from 0 to 65535 horizontally ascending channel by channel. For point clouds without indices which can be directly mapped, as is often the case for sparse ones, it would be necessary to use the pitch and yaw angles to the sensor origin to map each point to a pixel on the projection.

For this reason and to simplify the architecture, we converted the point clouds into two-dimensional grayscale images using a spherical projection. This approach—also employed successfully in \cite{degradation_quantification_rain}—encodes each LiDAR measurement as a single pixel, where the pixel’s grayscale value is determined by the normalized range, calculated as $v = \sqrt{\emph{X}^2 + \emph{Y}^2 + \emph{Z}^2}$. Given the LiDAR sensor's configuration, the resulting images have a resolution of 2048 pixels in width and 32 pixels in height. Missing measurements in the point cloud are mapped to pixels with a brightness value of 0.

To create this mapping, we leveraged the available measurement indices and channel information inherent in the dense point clouds, which are ordered from 0 to 65,535 in a horizontally ascending, channel-by-channel manner. For sparser point clouds without such indices, one would need to rely on the pitch and yaw angles relative to the sensor's origin to correctly map each point to its corresponding pixel.

%In figure~\ref{fig:data_projections} we see two projections of lidar point clouds from the experiments, which are visually different from the preprocessed data for better understanding of the reader. While the point clouds were converted to grayscale images with a resolution of 2048 by 32 pixels, these projections can be hard to interpret for humans. For this reason the projections are depicted using the viridis colormap and vertically stretched, so single measurements are multiple pixels tall, since otherwise the image is only 32 pixels tall and hard to decipher. The top projection was created from a lidar scan where no artifical smoke and therefore appreciable degradation was present, whereas the lower projection is from an experiment with artifical smoke and a lot of degradation. 

Figure~\ref{fig:data_projections} displays two examples of LiDAR point cloud projections to aid in the reader’s understanding. Although the original point clouds were converted into grayscale images with a resolution of 2048×32 pixels, these raw images can be challenging to interpret. To enhance human readability, we applied the viridis colormap and vertically stretched the images so that each measurement occupies multiple pixels in height. The top projection is derived from a scan without artificial smoke—and therefore minimal degradation—while the lower projection comes from an experiment where artificial smoke introduced significant degradation.

\todo[inline, color=green!40]{add same projections as they are used in training}

\fig{data_projections}{figures/data_2d_projections.png}{Two-dimensional projections of two pointclouds, one from an experiment without degradation and one from an experiment with artifical smoke as degradation}

\newsubsubsectionNoTOC{Labeling}

%\todo[inline, color=green!40]{another important preprocessing step is labeling of the lidar frames as normal/anormal. this is one hand used during training (experiments with zero labeled up to most of the data being labeled) and on the other hand is important for evaluation of the method performance. originally we do not have any labels on the data regarding degradation and no analog values from another sensor which measures current smoke particles in the air. our simple approach was to label all frames from experiments which included artifical degradation by fog machine smoke as anomalous and all frames from experiments without artifical degradation as normal.}

%We discussed the requirements to data labels in section~\ref{sec:data}, where we mentioned the challenges but also importance of correctly labeled data, especially for evaluation. Since to our knowledege no public dataset with objective labels regarding dataset degradation of lidar data in subterranean environments is available and the dataset chosen for evaluation in this thesis \cite{subter} does not contain any explicit data or measurements about the dedata degradation, we had to choose a method of how we would label the data ourselves for evaluation. After considering multiple avenues, we decided to simply label all point clouds created during experiments with artifical smoke present as anomalies and all point clouds from other experiments as normal data. 

We discussed the challenges and importance of obtaining correctly labeled data in Section~\ref{sec:data}, particularly for evaluation purposes. Since, to our knowledge, no public dataset provides objective labels for LiDAR data degradation in subterranean environments—and the dataset selected for this thesis \cite{subter} lacks explicit measurements of degradation—we had to develop our own labeling approach. After considering several options, we decided to label all point clouds from experiments with artificial smoke as anomalies, while point clouds from experiments without smoke were labeled as normal data.

%\todo[inline, color=green!40]{this simple labeling method is quite flawed since we do not label based on the actual degradation of the scan (not by some kind of threshold of analog measurement threshold, statistical info about scan) since (TODO FIXME) this would result in training which only learns this given metric (example missing measurement points) which would make this methodology useless since we could simply use that same measurement as an more simple way to quantify the scan's degradation. }

%This simplistic approach has both Advantages and disadvantages. The approach is simple to implement and provides a clear and straightforward distinction between normal and anomalous data. As a negative, there are clearly point clouds without subjective degradation present in the experiments with added degradation, which-using this method-get labeled as anomalies even though for actual trainging and evaluation purposes they should not be labeleld as such. Since we do not have an objective measure available, we looked into other ways to label the data such as statistical data about missing measurements per point cloud or the aforementioned phenomenon of more erroneous measurements up close to the sensor in degraded environments, but we feared that any statistical property of the data or any combination of them would only result in the method learning to replicate those statistical evaluations rather than to actually quantify the degradation in a generalized way. The classification of wether smoke was present during an experiment or not is different here in that it is not dependent on the data but is rather an expression of the environment itself, during the recording of the data.

This simplistic labeling approach has both advantages and disadvantages. On the positive side, it is easy to implement and creates a clear distinction between normal and anomalous data. However, its simplicity is also its drawback: some point clouds from experiments with artificial smoke do not exhibit perceptible degradation, yet they are still labeled as anomalies. In an ideal scenario, these point clouds should not be classified as degraded for training and evaluation purposes.

Since an objective measure of degradation is unavailable, we explored alternative labeling methods—such as using statistical properties like the number of missing measurements per point cloud or the higher incidence of erroneous measurements near the sensor in degraded environments. Ultimately, we were concerned that these statistical approaches might lead the method to simply mimic the statistical evaluation rather than to quantify degradation in a generalized and robust manner. Notably, our labeling strategy—based on the presence or absence of smoke—is fundamentally an environmental indicator, independent of the intrinsic data properties recorded during the experiments.

%\todo[inline]{TODO maybe evaluate based on different thresholds? missing datapoints, number of detected outliers, number of particles in phantom circle around sensor?}
\todo[inline]{maybe also mention that we considered labeling using output of down-the-pipeline algorithm (e.g., SLAM) and how it performs/how confident it is and retrospectively label the quality of the data based on that}

\newchapter{experimental_setup}{Experimental Setup}
\newsection{autoencoder_architecture}{Deep SAD Autoencoder Architecture}
\newsection{data_setup}{Training/Evaluation Data Distribution}
\todo[inline]{which data was used how in training/evaluation}
\todo[inline]{explain concept of global/local application for global-/window quantifiction}

\newsection{evaluation_metrics}{Evaluation Metrics}
\todo[inline]{k-fold evaluation, ROC, generalization (evaluation on other datasets?)}

\newsection{hyperparameters}{Hyperparameters}
\todo[inline]{vary hyperparameters (no labeled anomalies vs some), specific training on local windows (only z-axis difference?), window size?}

\newchapter{results_discussion}{Results and Discussion}
\newsection{results}{Results}
\todo[inline]{some results, ROC curves, for both global and local}

\newsection{hyperparameter_analysis}{Hyperparameter Analysis}
\todo[inline]{result for different amounts of labeled data}

\newchapter{conclusion_future_work}{Conclusion and Future Work}
\newsection{conclusion}{Conclusion}
\todo[inline]{summarize what has been achieved}

\newsection{future_work}{Future Work}
\todo[inline]{confirm results with real smoke data}


% end mainmatter
% **************************************************************************************************

\appendix
\ifthenelse{\equal{\DocumentType}{thesis}}
{
	\setcounter{mypageno}{\value{page}}
	\frontmatter \pagestyle{plain} \pagenumbering{Roman}
	\setcounter{page}{\value{mypageno}}
}{}

\printbibliography
\listoffigures
\listoftables
\printglossary[type=\acronymtype]

% **************************************************************************************************

% place all floats and create label on last page
\FloatBarrier\label{end-of-document}
\end{document}