% ************************************************************************************************** % ** SPSC Report and Thesis Template % ************************************************************************************************** % % ***** Authors ***** % Daniel Arnitz, Paul Meissner, Stefan Petrik, Dietmar Malli, Johanna Rock % Signal Processing and Speech Communication Laboratory (SPSC) % Graz University of Technology (TU Graz), Austria % % ***** Changelog ***** % 0.1 2010-01-25 extracted from report template by Daniel Arnitz (not ready yet) % 0.2 2010-02-08 added thesis titlepage and modified layout (not ready yet) % 0.3 2010-02-18 added TUG logo and statutory declaration % 0.4 2010-02-18 moved the information fields below \input{./base/packages} (encoding...) % 0.5 2010-03-02 added \ShortTitle to fix problems with long thesis titles % added \ThesisType (makes the template suitable for MSc, BSc, PhD, ... Thesis) % 0.6 2010-06-05 added pagestyle and pagenumbering after frontmatter, packages has now type % 0.7 2010-09 \Advisors -> \Assessors, inserted frontmatter for thesis % 0.8 2010-11 added examples % 0.9 2011-04 \Twosided now {true,false}, scrbook for thesis (\front-, \main-, \backmatter) % added \SpecialNote for titlepage (funding, etc.), added type "homework" % 0.10 2011-10-18 fixed two typos in \bibliographystyle{} (bug reported by Michael Tauch) % 0.11 2011-11-09 fixed/modified preamble (bug reported by Michael Tauch) % 0.12 2012-07-20 added ./base/opt_macros to deal with optional macros % 0.13 2012-07-27 added \PaperSize % 0.14 2017-11-03 Fixed thispagestyle issue % Implemented automatic setting of correct page number after switching from % roman numbering back to normal numbering % Implemented \DraftText hack % Moved makeindex from external programm to newer stuff (package...) % Made confidential dependent from \DraftText % Made OptDraftMode and DisplayContentBoxes dependet from \DraftText % Included some syntax formatting definitions % Fixed wrong usage of scrbook class and \emptydoublepage mess... One should % NOT need to adjust/tweak the layout by hand. That's what latex is for... % Replaced bibtex with more modern biblatex (utf8 support in bibliography...) % Added \printbibliography, \listoffigures, \listoftables and % \printglossary[type=\acronymtype] % Renewed and extended Introduction/Usage % 0.15 2018-03-20 Homework and report now compileable again. Fixed a missing if. % 0.16 2018-08-08 fixed/modified title according to official TUG template % 0.17 2018-08-09 updated placeholder commands for new title page % % ***** Todo ***** % % ************************************************************************************************** % basic setup \newcommand{\DocumentType}{thesis} % "thesis" / "report" / "homework" \newcommand{\DocumentLanguage}{en} % "en" / "de" \newcommand{\PaperSize}{a4paper} % "a4paper" / "letterpaper" \newcommand{\Twosided}{true} % "true" / "false" (=Duplex...) \newcommand{\FramedLinks}{false} %"true" / "false" % ************************************************************************************************** % template setup -- do not change these unless you know what you are doing! \input{./base/documentclass_\DocumentType} \input{./base/packages} \input{./base/layout_\DocumentType} \input{./base/macros} % ************************************************************************************************** % uncomment to get watermarks: % \usepackage[first,bottom,light,draft]{draftcopy} % \draftcopyName{ENTWURF}{160} \usepackage{todonotes} % correct bad hyphenation \hyphenation{} % switches \newboolean{OptDraftMode} \newboolean{DisplayContentBoxes} % \setboolean{OptDraftMode}{true} % optional draft mode for pixel graphics (speed up generation; add \OptDraft to options) \ifthenelse{\boolean{OptDraftMode}} { \setboolean{DisplayContentBoxes}{true} } { \setboolean{DisplayContentBoxes}{false} } % ************************************************************************************************** % information fields % general \newcommand{\DocumentTitle}{Lidar Degradation Quantification for Robot Navigation in Hazy Environments} \newcommand{\DocumentSubtitle}{} \newcommand{\ShortTitle}{} % used in headers (keep short!) % for thesis: Firstname Surename, current university degree (e.g. BSc) % for report, homework: Firstname Surename, Mat.Nr. \newcommand{\DocumentAuthor}{Jan Kowalczyk} \newcommand{\DocumentPlace}{Graz} % FOR THESIS ONLY % used for the title page and statutory declaration % one out of: "bachelor's thesis" / "Bachelorarbeit" / % "master's thesis" / "Masterarbeit" / % "diploma thesis" / "Diplomarbeit" / % "doctoral thesis" / "Dissertation" % ATTENTION: use correct language! Otherwise statutory declaration is faulty. \newcommand{\ThesisTitle}{master's thesis} \newcommand{\Institute}{Signal Processing and Speech Communication Laboratory} \newcommand{\OrganizationsAdditional}{in cooperation with \\[0.2cm] \par Virtual Vehicle Research GmbH \\ Graz, Austria \\[2.0cm] \par} \newcommand{\Supervisors}{Univ.-Prof. Dipl.-Ing. Dr.mont Franz Pernkopf} % Supervisor 1 \\ Supervisor 2 ... \newcommand{\SpecialNote}{} % FOR REPORT ONLY %revision numbers \newcommand{\RevPrefix}{alpha~} \newcommand{\RevLarge}{1} \newcommand{\RevSmall}{0} % confidential? (can of course also be used for other messages/notes) \newcommand{\ConfidNote}{\ifthenelse{\boolean{OptDraftMode}}{ \textbf{DRAFT}, \today, ]}{ %\textbf{CONFIDENTIAL} }} \input{./base/opt_macros} % variable for page numbering \newcounter{mypageno} % ************************************************************************************************** \begin{document} % ************************************************************************************************** \input{./base/syntax_formatting} % for thesis: switch to frontmatter (Roman numbering, etc.) \ifthenelse{\equal{\DocumentType}{thesis}} { \frontmatter \pagestyle{plain} \pagenumbering{Roman} }{} %title \input{./base/titlepage_\DocumentType} % for thesis: abstract, kurzfassung, affidavit and statutory declaration \ifthenelse{\equal{\DocumentType}{thesis}} { \emptydoublepage \addcontentsline{toc}{chapter}{Statutory Declaration} \input{./base/declaration_\DocumentLanguage} \emptydoublepage \input{thesis_preamble/acknowledgements} \emptydoublepage \input{thesis_preamble/abstract} \emptydoublepage \input{thesis_preamble/kurzfassung} \emptydoublepage }{} \tableofcontents \ifthenelse{\equal{\DocumentType}{thesis}} { \emptydoublepage \setcounter{mypageno}{\value{page}} \mainmatter \pagestyle{scrheadings} \pagenumbering{arabic} \setcounter{page}{\value{mypageno}} } % ************************************************************************************************** % mainmatter (=content) \newchapter{Introduction}{chap:introduction} %\todo[inline, color=green!40]{its a master thesis where we try to know how trustworthy the sensor data for robot navigation is} %\newsection{Motivation and Problem Statement}{sec:motivation} %\todo[inline]{lidar and its role in robot navigation. discuss sensor degradation and its effects on navigation.} Autonomous robots have gained more and more prevailance in search and rescue missions due to not endangering another human being and still being able to fulfil the difficult tasks of navigating hazardous environments like collapsed structures, identifying and locating victims and assessing the environment's safety for human rescue teams. To understand the environment, robots employ multiple sensor systems such as lidar, radar, ToF, ultrasound, optical cameras or infrared cameras of which lidar is the most prominently used due to its accuracy. The robots use the sensors' data to map their environments, navigate their surroundings and make decisions like which paths to prioritize. Many of the aforementioned algorithms are deep learning-based algorithms which are trained on large amounts of data whose characteristics are learned by the models. Environments of search and rescue situations provide challenging conditions for the sensor systems to produce reliable data. One of the most promiment examples are aerosol particles from smoke and dust which can obstruct the view and lead sensors to produce erroneous data. If such degraded data was not present in the robots' algorithms' training data these errors may lead to unexpected outputs and potentially endanger the robot or even human rescue targets. This is especially important for autonomous robots whose decisions are entirely based on their sensor data without any human intervention. To safeguard against these problems, robots need a way to assess the trustworthiness of their sensor systems' data. For remote controlled robots a human operator can make these decisions but many search and rescue missions do not allow remote control due to environment factors, such as radio signal attenuation or the search area's size and therefore demand autonomous robots. Therefore, during the design for such robots we arrive at the following critical question: \begin{quote} Can autonomous robots quantify the reliability of lidar sensor data in hazardous environments to make more informed decisions? \end{quote} In this thesis we aim to answer this question by assessing a deep learning-based anomaly detection method and its performance when quantifying the sensor data's degradation. The employed algithm is a semi-supervised anomaly detection algorithm which uses manually labeled training data to improve its performance over unsupervised methods. We show how much the introduction of these labeled samples improves the methods performance. The models output is an anomaly score which quantifies the data reliability and can be used by algorithms that rely on the sensor data. These reliant algorithms may decide to for example slow down the robot to collect more data, choose alternative routes, signal for help or rely more heavily on other sensor's input data. \todo[inline]{discuss results (we showed X)} %\todo[inline, color=green!40]{autonomous robots have many sensors for understanding the world around them, especially visual sensors (lidar, radar, ToF, ultrasound, optical cameras, infrared cameras), they use that data for navigation mapping, SLAM algorithms, and decision making. these are often deep learning algorithms, oftentimes only trained on good data} %\todo[inline, color=green!40]{difficult environments for sensors to produce good data quality (earthquakes, rescue robots), produced data may be unreliable, we don't know how trustworthy that data is (no quantification, confidence), since all navigation and decision making is based on input data, this makes the whole pipeline untrustworthy/problematic} %\todo[inline, color=green!40]{contribution/idea of this thesis is to calculate a confidence score which describes how trustworthy input data is. algorithms further down the pipeline (slam, navigation, decision) can use this to make more informed decisions - examples: collect more data by reducing speed, find alternative routes, signal for help, do not attempt navigation, more heavily weight input from other sensors} \newsection{Scope of Research}{chap:scope_research} \todo[inline]{output is score, thresholding (yes/no), maybe confidence in sensor/data? NOT how this score is used in navigation/other decisions further down the line} \todo[inline]{Sensor degradation due to dust/smoke not rain/fog/...} \todo[inline, color=green!40]{we look at domain of rescue robots which save buried people after earthquakes, or in dangerous conditions (after fires, collapsed buildings) which means we are mostly working with indoors or subterranean environments which oftentimes are polluted by smoke and a lot of dust, ideally works for any kind of sensor data degradation but we only explore this domain} \todo[inline, color=green!40]{mostly use lidar (state of the art) since they are very accurate in 3d mapping environments, so we focus on quantifying how trustworthy the lidar data is by itself. we do not look at other sensor data (tof, ultrasound, optical)} \todo[inline, color=green!40]{intended output is confidence score which simply means higher score = worse data quality, lower score = trustworthy data. this score can be interpreted by algorithms in pipeline. we do not look at how this is implemented in the algorithms, no binary classifier but analog value, if this is wished followup algorithm has to decide (example by threshold or other methods)} \newsection{Structure of the Thesis}{sec:thesis_structure} \todo[inline]{brief overview of thesis structure} \todo[inline, color=green!40]{in section x we discuss anomaly detection, semi-supervised learning since such an algorithm was used as the chosen method, we also discuss how lidar works and the data it produces. then in we discuss in detail the chosen method DeepSAD in section X, in section 4 we discuss the traing and evaluation data, in sec 5 we describe our setup for training and evaluation (whole pipeline). results are presented and discussed in section 6. section 7 contains a conclusion and discusses future work} \newchapter{Background and Related Work}{chap:background} \todo[inline, color=green!40]{in this section we will discuss necessary background knowledge for our chosen method and the sensor data we work with. related work exists mostly from autonomous driving which does not include subter data and mostly looks at precipitation as source of degradation, we modeled after one such paper and try to adapt the same method for the domain of rescue robots, this method is a semi-supervised deep learning approach to anomaly detection which we describe in more detail in sections 2.1 and 2.2. in the last subsection 2.3 we discuss lidar sensors and the data they produce} \newsection{Anomaly Detection}{sec:anomaly_detection} \todo[inline, color=green!40]{cite exists since X and has been used to find anomalous data in many domains and works with all kinds of data types/structures (visual, audio, numbers). examples healthcare (computer vision diagnostics, early detection), financial anomalies (credit card fraud, maybe other example), security/safety video cameras (public, traffic, factories).} \todo[inline, color=green!40]{the goal of these algorithms is to differentiate between normal and anomalous data by finding statistically relevant information which separates the two, since these methods learn how normal data typically is distributed they do not have to have prior knowledge of the types of all anomalies, therefore can potentially detect unseen, unclassified anomalies as well. main challenges when implementing are that its difficult to cleanly separate normal from anormal data} \todo[inline, color=green!40]{typically no or very little labeled data is available and oftentimes the kinds of possible anomalies are unknown and therefore its not possible to label all of them. due to these circumstances anomaly detection methods oftentimes do not rely on labeled data but on the fact that normal circumstances make up the majority of training data (quasi per defintion)} \todo[inline, color=green!40]{figure example shows 2d data but anomaly detection methods work with any kind of dimensionality/shape. shows two clusters of normal data with clear boundaries and outside examples of outliers (anomalous data two single points and one cluster), anomaly detection methods learn to draw these boundaries from the training data given to them which can then be used to judge if unseen data is normal or anormal} \todo[inline, color=green!40]{as discussed in motivation, and same as in reference paper (rain autonomous driving) we model our problem as an anomaly detection problem where we define that good quality sensor data is normal data and degraded sensor data (in our case due to dust/smoke) is defined as an anomaly. this allows us to quantify the degradation of data by using the anomaly detection method to check how likely new data is an anomaly} \iffalse Anomaly detection algorithms are designed to detect or quantify the likelihood of a pattern in data deviating significantly from a well-defined expected norm. Deviations such as these are classified as anomalies or outliers and often signify critical or actionable information. \begin{figure} \begin{center} \includegraphics[width=0.5\textwidth]{figures/anomaly_detection_overview} \end{center} \caption{An example of a 2-dimensional data set with anomalies. Reproduced from~\cite{Chandola2009AnomalyDA}}\label{fig:anomaly_detection_overview} \end{figure} \todo[inline]{Figure example normal data boundaries, single outliers o1, o2, cluster of outliers o3. difficult to define boundaries so that all normal data inside and anomalies outside } \fi \newsection{Semi-Supervised Learning Algorithms}{sec:semi_supervised} \todo[inline]{Quick overview of the DeepSAD metho} \todo[inline, color=green!40]{deep learning based (neural network with hidden layers), neural networks which get trained using backpropagation, to learn to solve a novel task by defining some target} \todo[inline, color=green!40]{data labels decide training setting (supervised, non-supervised, semi-supervised incl explanation), supervised often classification based, but not possible if no labels available, un-supervised has no well-defined target, often used to fined common hidden factors in data (distribution). semi-supervised more like a sub method of unsupervised which additionally uses little (often handlabelled) data to improve method performance} \todo[inline, color=green!40]{include figure unsupervised, semi-supervised, supervised} \todo[inline, color=green!40]{find easy illustrative example with figure of semi-supervised learning and include + explain here} \todo[inline, color=green!40]{our chosen method DeepSAD is a semi-supervised deep learning method whose workings will be discussed in more detail in secion X} \newsection{Autoencoder}{sec:autoencoder} \todo[inline]{autoencoder explanation} \todo[inline, color=green!40]{autoencoders are a neural network architecture archetype (words) whose training target is to reproduce the input data itself - hence the name. the architecture is most commonly a mirrored one consisting of an encoder which transforms input data into a hyperspace represantation in a latent space and a decoder which transforms the latent space into the same data format as the input data (phrasing), this method typically results in the encoder learning to extract the most robust and critical information of the data and the (todo maybe something about the decoder + citation for both). it is used in many domains translations, LLMs, something with images (search example + citations)} \todo[inline, color=green!40]{typical encoder decoder mirrored figure} \todo[inline, color=green!40]{explain figure} \todo[inline, color=green!40]{our chosen method DeepSAD uses an autoencoder to translate input data into a latent space, in which it can more easily differentiate between normal and anomalous data} \newsection{Lidar - Light Detection and Ranging}{sec:lidar_related_work} \todo[inline]{related work in lidar} \todo[inline, color=green!40]{the older more commonly known radar works by sending out an electromagnetic wave in the radiofrequency and detecting the time it takes to return (if it returns at all) signalling a reflective object in the path of the radiowave. lidar works on the same principle but sends out a lightray produced by a laser (citation needed) and measuring the time it takes for the ray to return to the sensor. since the speed of light is constant in air the system can calculate the distance between the sensor and the measured point. modern lidar systems send out multiple, often millions of measurement rays per second which results in a three dimensional pointcloud, constructed from the information in which direction the ray was cast and the distance that was measured} \todo[inline, color=green!40]{lidar is used in most domains reliant on accurate 3d representations of the world like autonomous driving, robot navigation, (+ maybe quickly look up two other domains), its main advantage is high measurement accuracy, precision (use correct term), and high resolution (possible due to single point measurements instead of cones like radar, ToF, Ultrasonic) which enables more detailed mappings of the environment} \todo[inline, color=green!40]{due to point precision, lidar is sensitive to noise/degradation of airborne particles, which may produce early returns, deflections, errrors of light rays, this results in noise in the 3d pointcloud and possibly missing data of the measurement behind the aerosol particle.} \todo[inline, color=green!40]{because of the given advantages of lidar it is most commonly used nowadays on robot platforms for environment mapping and navigiation - so we chose to demonstrate our method based on degraded data collected by a lidar sensor as discussed in more dtail in section (data section)} \newchapter{DeepSAD: Semi-Supervised Anomaly Detection}{chap:deepsad} Deep Semi-Supervised Anomaly Detection~\cite{deepsad} is a deep-learning based anomaly detection method whose performance in regards to sensor degradation quantification we explore in this thesis. It is a semi-supervised method which allows the introduction of manually labeled samples in addition to the unlabeled training data to improve the algorithm's performance over its unsupervised predecessor Deep One-Class Classification~\cite{deepsvdd}.\todo{phrasing} The working principle of the method is to encode the input data onto a latent space and train the network to cluster normal data close together while anomalies get mapped further away in that latent space. %\todo[inline, color=green!40]{DeepSAD is a semi-supervised anomaly detection method proposed in cite, which is based on an unsupervised method (DeepSVDD) and additionally allows for providing some labeled data which is used during the training phase to improve the method's performance} \newsection{Algorithm Description}{sec:algorithm_description} %\todo[inline]{explain deepsad in detail} \todo[inline, color=green!40]{Core idea of the algorithm is to learn a transformation to map input data into a latent space where normal data clusters close together and anomalous data gets mapped further away. to achieve this the methods first includes a pretraining step of an auto-encoder to extract the most relevant information, second it fixes a hypersphere center in the auto-encoders latent space as a target point for normal data and third it traings the network to map normal data closer to that hypersphere center. Fourth The resulting network can map new data into this latent space and interpret its distance from the hypersphere center as an anomaly score which is larger the more anomalous the datapoint is} \todo[inline, color=green!40]{explanation pre-training step: architecture of the autoencoder is dependent on the input data shape, but any data shape is generally permissible. for the autoencoder we do not need any labels since the optimization target is always the input itself. the latent space dimensionality can be chosen based on the input datas complexity (search citations). generally a higher dimensional latent space has more learning capacity but tends to overfit more easily (find cite). the pre-training step is used to find weights for the encoder which genereally extract robust and critical data from the input because TODO read deepsad paper (cite deepsad). as training data typically all data (normal and anomalous) is used during this step.} \todo[inline, color=green!40]{explanation hypersphere center step: an additional positive ramification of the pretraining is that the mean of all pre-training's latent spaces can be used as the hypersphere target around which normal data is supposed to cluster. this is advantageous because it allows the main training to converge faster than choosing a random point in the latent space as hypersphere center. from this point onward the center C is fixed for the main training and inference and does not change anymore.} \todo[inline, color=green!40]{explanation training step: during the main training step the method starts with the pre-trained weights of the encoder but removes the decoder from the architecture since it optimizes the output in the latent space and does not need to reproduce the input data format. it does so by minimizing the geometric distance of each input data's latent space represenation to the previously defined hypersphere center c. Due to normal data being more common in the inputs this results in normal data clustering closely to C and anormal data being pushed away from it. additionally during this step the labeled data is used to more correctly map normal and anormal data} \todo[inline, color=green!40]{explanation inference step: with the trained network we can transform new input data into the latent space and calculate its distance from the hypersphere center which will be smaller the more confident the network is in the data being normal and larger the more likely the data is anomalous. This output score is an analog value dependent on multiple factors like the latent space dimensionality, encoder architecture and ??? and has to be interpreted further to be used (for example thresholding)} \newsection{Algorithm Details and Hyperparameters}{sec:algorithm_details} \todo[inline]{backpropagation optimization formula, hyperaparameters explanation} \todo[inline, color=green!40]{in formula X we see the optimization target of the algorithm. explain in one paragraph the variables in the optimization formula} \todo[inline, color=green!40]{explain the three terms (unlabeled, labeled, regularization)} \begin{equation} \min_{\mathcal{W}} \quad \frac{1}{n+m} \sum_{i=1}^{n}\|\phi(\mathbf{x}_{i};\mathcal{W})-\mathbf{c}\|^{2} +\frac{\eta}{n+m}\sum_{j=1}^{m}\left(\|\phi(\tilde{\mathbf{x}}_{j};\mathcal{W})-\mathbf{c}\|^{2}\right)^{\tilde{y}_{j}} +\frac{\lambda}{2}\sum_{\ell=1}^{L}\|\mathbf{W}^{\ell}\|_{F}^{2}. \end{equation} \newsection{Advantages and Limitations}{sec:advantages_limitations} \todo[inline]{semi supervised, learns normality by amount of data (no labeling/ground truth required), very few labels for better training to specific situation} \newchapter{Data and Preprocessing}{chap:data_preprocessing} %\todo[inline, color=green!40]{good data important for learning based methods and for evaluation. in this chapter we talk about the requirements we have for our data and the difficulties that come with them and will then give some information about the dataset that was used as well as how the data was preprocessed for the experiments (sec 4.2)} %Fortunately situations like earthquakes, structural failures and other circumstances where rescue robots need to be employed are uncommon occurences. When such an operation is conducted, the main focus lies on the fast and safe rescue of any survivors from the hazardous environment, therefore it makes sense that data collection is not a priority. Paired with the rare occurences this leads to a lack of publicly available data of such situations. To improve any method, a large enough, diversified and high quality dataset is always necessary to provide a comprehensive evaluation. Additionally, in this work we evaluate a training based method, which increases the requirements on the data manifold, which makes it all the more complex to find a suitable dataset. In this chapter we will state the requirements we defined for the data, talk about the dataset that was chosen for this task, including some statistics and points of interest, as well as how it was preprocessed for the training and evaluation of the methods. Situations such as earthquakes, structural failures, and other emergencies that require rescue robots are fortunately rare. When these operations do occur, the primary focus is on the rapid and safe rescue of survivors rather than on data collection. Consequently, there is a scarcity of publicly available data from such scenarios. To improve any method, however, a large, diverse, and high-quality dataset is essential for comprehensive evaluation. This challenge is further compounded in our work, as we evaluate a training-based approach that imposes even higher requirements on the data to enable training, making it difficult to find a suitable dataset. In this chapter, we outline the specific requirements we established for the data, describe the dataset selected for this task—including key statistics and notable features—and explain the preprocessing steps applied for training and evaluating the methods. \newsection{Data}{sec:data} %BEGIN missing points As we can see in figure~\ref{fig:data_missing_points}, the artifical smoke introduced as explicit degradation during some experiments results in more missing measurements during scans, which can be explained by measurement rays hitting airborne particles but not being reflected back to the sensor in a way it can measure. \begin{figure} \begin{center} \includegraphics[width=0.9\textwidth]{figures/data_missing_points.png} \end{center} \caption{Density histogram showing the percentage of missing measurements per scan for normal experiments without degradation and anomalous experiments with artifical smoke introduced as degradation.}\label{fig:data_missing_points} \end{figure} %END missing points %BEGIN early returns % In experiments with artifical smoke present, we observe many points in the pointcloud very close to the sensor where there are no solid objects and therefore the points have to be produced by airborne particles from the artifical smoke. The phenomenon can be explained, in that the closer to the sensor an airborne particle is hit, the higher the chance of it reflecting the ray in a way the lidar can measure. In \ref{fig:particles_near_sensor} we see a box diagram depicting how significantly more measurements of the anomaly expirements produce a range smaller than 50 centimeters. Due to the sensor platform's setup and its paths taken during experiments we can conclude that any measurement with a range smaller than 50 centimeters has to be erroneous. While the amount of these returns near the sensor could most likely be used to estimate the sensor data quality while the sensor itself is located inside an environment containing airborne particles, this method would not allow to anticipate sensor data degradation before the sensor itself enters the affected area. Since lidar is used to sense the visible geometry from a distance, it would be desireable to quantify the data degradation of an area before the sensor itself enters it. Due to these reasons we did not use this phenomenon in our work. In experiments with artificial smoke, we observe numerous points in the point cloud very close to the sensor, even though no solid objects exist at that range. These points are therefore generated by airborne particles in the artificial smoke. This phenomenon occurs because the closer an airborne particle is to the sensor, the higher the probability it reflects the laser beam in a measurable way. As shown in Figure~\ref{fig:particles_near_sensor}, a box diagram illustrates that significantly more measurements during these experiments report ranges shorter than 50 centimeters. Given the sensor platform's setup and its experimental trajectory, we conclude that any measurement with a range under 50 centimeters is erroneous. While the density of these near-sensor returns might be used to estimate data quality when the sensor is already in an environment with airborne particles, this method cannot anticipate data degradation before the sensor enters such an area. Since LiDAR is intended to capture visible geometry from a distance, it is preferable to quantify potential degradation of an area in advance. For these reasons, we did not incorporate this phenomenon into our subsequent analysis. \begin{figure} \begin{center} \includegraphics[width=0.9\textwidth]{figures/particles_near_sensor_boxplot_zoomed_500.png} \end{center} \caption{Box diagram depicting the percentage of measurements closer than 50 centimeters to the sensor for normal and anomalous experiments}\label{fig:particles_near_sensor} \end{figure} %END early returns \todo[inline]{describe data sources, limitations} \todo[inline]{screenshots of camera/3d data?} \todo[inline]{difficulties: no ground truth, different lidar sensors/settings, different data shapes, available metadata, ...} \todo[inline, color=green!40]{we require lidar sensor data that was collected in a domain as closely related to our target domain (rescue robots indoors, cave-ins, ) as possible which also includes some kind of appreciable degradation for which we have some kind of labeling possibility. ideally the degradation should be from smoke/dust/aerosol particles. most data should be without degradation (since we require more normal than anormal data to train the method as described in X) but we need enough anormal data so we can confidently evaluate the methods performance} \todo[inline, color=green!40]{labeling is an especially problematic topic since ideally we would want an analog value which corresponds with the amount of smoke present for evaluation. for training we only require the possibility to provide labels in the form of normal or anormal targets (binary classification) and these labels do not have to be present for all data, only for some of the data (since semi-supervised only uses some labeled data as discussed in X)} \todo[inline, color=green!40]{We chose to evaulate the method on the dataset "Multimodal Dataset from Harsh Sub-Terranean Environment with Aerosol Particles for Frontier Exploration"~\cite{alexander_kyuroson_2023_7913307} which is a public dataset collected by X in a sub-terranean environment and includes data from multiple sensors on a moving sensor platform as well as experiments where sensor data is explicitely degraded by aerosol particles produced by a smoke machine.} \todo[inline, color=green!40]{list sensors on the platform} \todo[inline, color=green!40]{talk about how much data is available (maybe a plot about data?), number of experiments with/without degradation, other factors in these experiments which do not concern our use-case of them} \todo[inline, color=green!40]{lidar data of 360° sensor is captured at 10 frames per second. each sensor output consists of pointcloud which resulted from measurement of 32 vertical channels for each of which 2048 measurement points are taken during each measurement equiangular distributed around the whole horizontal 360°, so the sensor measures 32 * 2048 = 65536 measurements 10 times a second for which ideally every one produces a point in the pointcloud consisting of x,y,z coordinates (relative to sensor platform) as well as some other values per measurement (reflectivity, intensity originally measured range value)} \newsection{Preprocessing Steps}{sec:preprocessing} \todo[inline]{describe how 3d lidar data was preprocessed (2d projection), labeling} \todo[inline]{screenshots of 2d projections?} \todo[inline, color=green!40]{while as described in sec X the method DeepSAD is not dependend on any specific type/structure of data it requires to train an auto encoder in the pretraining step. such autoencoders are better understood in the image domain since there are many uses cases for this such as X (TODO citation needed), there are also 3d data auto encoders such as X (todo find example). same as the reference paper (rain cite) we chose to transform the 3d data to 2d by using a spherical spherical projection to map each of the 3d points onto a 2d plane where the range of each measurement can be expressed as the brightness of a single pixel. this leaves us with a 2d image of resolution 32x2048 (channels by horizontal measurements), which is helpful for visualization as well as for choosing a simpler architecture for the autoencoder of deepsad, the data in the rosbag is sparse meaning that measurements of the lidar which did not produce any value (no return ray detected before sensor specific timeout) are simply not present in the lidar scan. meaning we have at most 65xxx measurements per scan but mostly fewer than this, (maybe statistic about this? could aslo be interesting to show smoke experiment stuff)} \todo[inline, color=green!40]{to achieve this transformation we used the helpful measurement index and channel present in each measurement point of the dataset which allowed a perfect reconstruction of the 2d projection without calculating the pixel position in the projection of each measurement via angles which in our experience typically leads to some ambiguity in the projection (multiple measurements mapping to the same pixel due to precision loss/other errors) the measurement index increases even for unavailable measurements (no ray return) so we can simply create the 2d projection by mapping the normalized range (FIXME really normalized) value to the pixel position y = channel, x = measurement index. by initalizing the array to NaN values originally we have a 2d data structure with the range values and NaN on pixel positions where originally no measurement took place (missing measurements in scans due to no ray return)} \todo[inline, color=green!40]{another important preprocessing step is labeling of the lidar frames as normal/anormal. this is one hand used during training (experiments with zero labeled up to most of the data being labeled) and on the other hand is important for evaluation of the method performance. originally we do not have any labels on the data regarding degradation and no analog values from another sensor which measures current smoke particles in the air. our simple approach was to label all frames from experiments which included artifical degradation by fog machine smoke as anomalous and all frames from experiments without artifical degradation as normal.} \todo[inline, color=green!40]{this simple labeling method is quite flawed since we do not label based on the actual degradation of the scan (not by some kind of threshold of analog measurement threshold, statistical info about scan) since (TODO FIXME) this would result in training which only learns this given metric (example missing measurement points) which would make this methodology useless since we could simply use that same measurement as an more simple way to quantify the scan's degradation. } \todo[inline]{TODO maybe evaluate based on different thresholds? missing datapoints, number of detected outliers, number of particles in phantom circle around sensor?} \newchapter{Experimental Setup}{chap:experimental_setup} \newsection{DeepSAD Autoencoder Architecture}{sec:autoencoder_architecture} \newsection{Training/Evaluation Data Distribution}{sec:data_setup} \todo[inline]{which data was used how in training/evaluation} \todo[inline]{explain concept of global/local application for global-/window quantifiction} \newsection{Evaluation Metrics}{sec:evaluation_metrics} \todo[inline]{k-fold evaluation, ROC, generalization (evaluation on other datasets?)} \newsection{Hyperparameters}{sec:hyperparameters} \todo[inline]{vary hyperparameters (no labeled anomalies vs some), specific training on local windows (only z-axis difference?), window size?} \newchapter{Results and Discussion}{chap:results_discussion} \newsection{Results}{sec:results} \todo[inline]{some results, ROC curves, for both global and local} \newsection{Hyperparameter Analysis}{sec:hyperparameter_analysis} \todo[inline]{result for different amounts of labeled data} \newchapter{Conclusion and Future Work}{chap:conclusion_future_work} \newsection{Conclusion}{sec:conclusion} \todo[inline]{summarize what has been achieved} \newsection{Future Work}{sec:future_work} \todo[inline]{confirm results with real smoke data} % end mainmatter % ************************************************************************************************** \appendix \ifthenelse{\equal{\DocumentType}{thesis}} { \setcounter{mypageno}{\value{page}} \frontmatter \pagestyle{plain} \pagenumbering{Roman} \setcounter{page}{\value{mypageno}} }{} \printbibliography \listoffigures \listoftables \printglossary[type=\acronymtype] % ************************************************************************************************** % place all floats and create label on last page \FloatBarrier\label{end-of-document} \end{document}