% ************************************************************************************************** % ** SPSC Report and Thesis Template % ************************************************************************************************** % % ***** Authors ***** % Daniel Arnitz, Paul Meissner, Stefan Petrik, Dietmar Malli, Johanna Rock % Signal Processing and Speech Communication Laboratory (SPSC) % Graz University of Technology (TU Graz), Austria % % ***** Changelog ***** % 0.1 2010-01-25 extracted from report template by Daniel Arnitz (not ready yet) % 0.2 2010-02-08 added thesis titlepage and modified layout (not ready yet) % 0.3 2010-02-18 added TUG logo and statutory declaration % 0.4 2010-02-18 moved the information fields below \input{./base/packages} (encoding...) % 0.5 2010-03-02 added \ShortTitle to fix problems with long thesis titles % added \ThesisType (makes the template suitable for MSc, BSc, PhD, ... Thesis) % 0.6 2010-06-05 added pagestyle and pagenumbering after frontmatter, packages has now type % 0.7 2010-09 \Advisors -> \Assessors, inserted frontmatter for thesis % 0.8 2010-11 added examples % 0.9 2011-04 \Twosided now {true,false}, scrbook for thesis (\front-, \main-, \backmatter) % added \SpecialNote for titlepage (funding, etc.), added type "homework" % 0.10 2011-10-18 fixed two typos in \bibliographystyle{} (bug reported by Michael Tauch) % 0.11 2011-11-09 fixed/modified preamble (bug reported by Michael Tauch) % 0.12 2012-07-20 added ./base/opt_macros to deal with optional macros % 0.13 2012-07-27 added \PaperSize % 0.14 2017-11-03 Fixed thispagestyle issue % Implemented automatic setting of correct page number after switching from % roman numbering back to normal numbering % Implemented \DraftText hack % Moved makeindex from external programm to newer stuff (package...) % Made confidential dependent from \DraftText % Made OptDraftMode and DisplayContentBoxes dependet from \DraftText % Included some syntax formatting definitions % Fixed wrong usage of scrbook class and \emptydoublepage mess... One should % NOT need to adjust/tweak the layout by hand. That's what latex is for... % Replaced bibtex with more modern biblatex (utf8 support in bibliography...) % Added \printbibliography, \listoffigures, \listoftables and % \printglossary[type=\acronymtype] % Renewed and extended Introduction/Usage % 0.15 2018-03-20 Homework and report now compileable again. Fixed a missing if. % 0.16 2018-08-08 fixed/modified title according to official TUG template % 0.17 2018-08-09 updated placeholder commands for new title page % % ***** Todo ***** % % ************************************************************************************************** % basic setup \newcommand{\DocumentType}{thesis} % "thesis" / "report" / "homework" \newcommand{\DocumentLanguage}{en} % "en" / "de" \newcommand{\PaperSize}{a4paper} % "a4paper" / "letterpaper" \newcommand{\Twosided}{true} % "true" / "false" (=Duplex...) \newcommand{\FramedLinks}{false} %"true" / "false" % ************************************************************************************************** % template setup -- do not change these unless you know what you are doing! \input{./base/documentclass_\DocumentType} \input{./base/packages} \input{./base/layout_\DocumentType} \input{./base/macros} % ************************************************************************************************** % uncomment to get watermarks: % \usepackage[first,bottom,light,draft]{draftcopy} % \draftcopyName{ENTWURF}{160} \usepackage{todonotes} % correct bad hyphenation \hyphenation{} % switches \newboolean{OptDraftMode} \newboolean{DisplayContentBoxes} % \setboolean{OptDraftMode}{true} % optional draft mode for pixel graphics (speed up generation; add \OptDraft to options) \ifthenelse{\boolean{OptDraftMode}} { \setboolean{DisplayContentBoxes}{true} } { \setboolean{DisplayContentBoxes}{false} } % ************************************************************************************************** % information fields % general \newcommand{\DocumentTitle}{Lidar Degradation Quantification for Robot Navigation in Hazy Environments} \newcommand{\DocumentSubtitle}{} \newcommand{\ShortTitle}{} % used in headers (keep short!) % for thesis: Firstname Surename, current university degree (e.g. BSc) % for report, homework: Firstname Surename, Mat.Nr. \newcommand{\DocumentAuthor}{Jan Kowalczyk} \newcommand{\DocumentPlace}{Graz} % FOR THESIS ONLY % used for the title page and statutory declaration % one out of: "bachelor's thesis" / "Bachelorarbeit" / % "master's thesis" / "Masterarbeit" / % "diploma thesis" / "Diplomarbeit" / % "doctoral thesis" / "Dissertation" % ATTENTION: use correct language! Otherwise statutory declaration is faulty. \newcommand{\ThesisTitle}{master's thesis} \newcommand{\Institute}{Signal Processing and Speech Communication Laboratory} \newcommand{\OrganizationsAdditional}{in cooperation with \\[0.2cm] \par Virtual Vehicle Research GmbH \\ Graz, Austria \\[2.0cm] \par} \newcommand{\Supervisors}{Univ.-Prof. Dipl.-Ing. Dr.mont Franz Pernkopf} % Supervisor 1 \\ Supervisor 2 ... \newcommand{\SpecialNote}{} % FOR REPORT ONLY %revision numbers \newcommand{\RevPrefix}{alpha~} \newcommand{\RevLarge}{1} \newcommand{\RevSmall}{0} % confidential? (can of course also be used for other messages/notes) \newcommand{\ConfidNote}{\ifthenelse{\boolean{OptDraftMode}}{ \textbf{DRAFT}, \today, ]}{ %\textbf{CONFIDENTIAL} }} \input{./base/opt_macros} % variable for page numbering \newcounter{mypageno} % ************************************************************************************************** \begin{document} % ************************************************************************************************** \input{./base/syntax_formatting} % for thesis: switch to frontmatter (Roman numbering, etc.) \ifthenelse{\equal{\DocumentType}{thesis}} { \frontmatter \pagestyle{plain} \pagenumbering{Roman} }{} %title \input{./base/titlepage_\DocumentType} % for thesis: abstract, kurzfassung, affidavit and statutory declaration \ifthenelse{\equal{\DocumentType}{thesis}} { \emptydoublepage \addcontentsline{toc}{chapter}{Statutory Declaration} \input{./base/declaration_\DocumentLanguage} \emptydoublepage \input{thesis_preamble/acknowledgements} \emptydoublepage \input{thesis_preamble/abstract} \emptydoublepage \input{thesis_preamble/kurzfassung} \emptydoublepage }{} \tableofcontents \ifthenelse{\equal{\DocumentType}{thesis}} { \emptydoublepage \setcounter{mypageno}{\value{page}} \mainmatter \pagestyle{scrheadings} \pagenumbering{arabic} \setcounter{page}{\value{mypageno}} } % ************************************************************************************************** % mainmatter (=content) \newchapter{Introduction}{chap:introduction} \todo[inline, color=green!40]{its a master thesis where we try to know how trustworthy the sensor data for robot navigation is} \newsection{Motivation and Problem Statement}{sec:motivation} \todo[inline]{lidar and its role in robot navigation. discuss sensor degradation and its effects on navigation.} \todo[inline, color=green!40]{autonomous robots have many sensors for understanding the world around them, especially visual sensors (lidar, radar, ToF, ultrasound, optical cameras, infrared cameras), they use that data for navigation mapping, SLAM algorithms, and decision making. these are often deep learning algorithms, oftentimes only trained on good data} \todo[inline, color=green!40]{difficult environments for sensors to produce good data quality (earthquakes, rescue robots), produced data may be unreliable, we don't know how trustworthy that data is (no quantification, confidence), since all navigation and decision making is based on input data, this makes the whole pipeline untrustworthy/problematic} \todo[inline, color=green!40]{contribution/idea of this thesis is to calculate a confidence score which describes how trustworthy input data is. algorithms further down the pipeline (slam, navigation, decision) can use this to make more informed decisions - examples: collect more data by reducing speed, find alternative routes, signal for help, do not attempt navigation, more heavily weight input from other sensors} \newsection{Scope of Research}{chap:scope_research} \todo[inline]{output is score, thresholding (yes/no), maybe confidence in sensor/data? NOT how this score is used in navigation/other decisions further down the line} \todo[inline]{Sensor degradation due to dust/smoke not rain/fog/...} \todo[inline, color=green!40]{we look at domain of rescue robots which save buried people after earthquakes, or in dangerous conditions (after fires, collapsed buildings) which means we are mostly working with indoors or subterranean environments which oftentimes are polluted by smoke and a lot of dust, ideally works for any kind of sensor data degradation but we only explore this domain} \todo[inline, color=green!40]{mostly use lidar (state of the art) since they are very accurate in 3d mapping environments, so we focus on quantifying how trustworthy the lidar data is by itself. we do not look at other sensor data (tof, ultrasound, optical)} \todo[inline, color=green!40]{intended output is confidence score which simply means higher score = worse data quality, lower score = trustworthy data. this score can be interpreted by algorithms in pipeline. we do not look at how this is implemented in the algorithms, no binary classifier but analog value, if this is wished followup algorithm has to decide (example by threshold or other methods)} \newsection{Structure of the Thesis}{sec:thesis_structure} \todo[inline]{brief overview of thesis structure} \todo[inline, color=green!40]{in section x we discuss anomaly detection, semi-supervised learning since such an algorithm was used as the chosen method, we also discuss how lidar works and the data it produces. then in we discuss in detail the chosen method DeepSAD in section X, in section 4 we discuss the traing and evaluation data, in sec 5 we describe our setup for training and evaluation (whole pipeline). results are presented and discussed in section 6. section 7 contains a conclusion and discusses future work} \newchapter{Background and Related Work}{chap:background} \todo[inline, color=green!40]{in this section we will discuss necessary background knowledge for our chosen method and the sensor data we work with. related work exists mostly from autonomous driving which does not include subter data and mostly looks at precipitation as source of degradation, we modeled after one such paper and try to adapt the same method for the domain of rescue robots, this method is a semi-supervised deep learning approach to anomaly detection which we describe in more detail in sections 2.1 and 2.2. in the last subsection 2.3 we discuss lidar sensors and the data they produce} \newsection{Anomaly Detection}{sec:anomaly_detection} \todo[inline, color=green!40]{cite exists since X and has been used to find anomalous data in many domains and works with all kinds of data types/structures (visual, audio, numbers). examples healthcare (computer vision diagnostics, early detection), financial anomalies (credit card fraud, maybe other example), security/safety video cameras (public, traffic, factories).} \todo[inline, color=green!40]{the goal of these algorithms is to differentiate between normal and anomalous data by finding statistically relevant information which separates the two, since these methods learn how normal data typically is distributed they do not have to have prior knowledge of the types of all anomalies, therefore can potentially detect unseen, unclassified anomalies as well. main challenges when implementing are that its difficult to cleanly separate normal from anormal data} \todo[inline, color=green!40]{typically no or very little labeled data is available and oftentimes the kinds of possible anomalies are unknown and therefore its not possible to label all of them. due to these circumstances anomaly detection methods oftentimes do not rely on labeled data but on the fact that normal circumstances make up the majority of training data (quasi per defintion)} \todo[inline, color=green!40]{figure example shows 2d data but anomaly detection methods work with any kind of dimensionality/shape. shows two clusters of normal data with clear boundaries and outside examples of outliers (anomalous data two single points and one cluster), anomaly detection methods learn to draw these boundaries from the training data given to them which can then be used to judge if unseen data is normal or anormal} \todo[inline, color=green!40]{as discussed in motivation, and same as in reference paper (rain autonomous driving) we model our problem as an anomaly detection problem where we define that good quality sensor data is normal data and degraded sensor data (in our case due to dust/smoke) is defined as an anomaly. this allows us to quantify the degradation of data by using the anomaly detection method to check how likely new data is an anomaly} \iffalse Anomaly detection algorithms are designed to detect or quantify the likelihood of a pattern in data deviating significantly from a well-defined expected norm. Deviations such as these are classified as anomalies or outliers and often signify critical or actionable information. \begin{figure} \begin{center} \includegraphics[width=0.5\textwidth]{figures/anomaly_detection_overview} \end{center} \caption{An example of a 2-dimensional data set with anomalies. Reproduced from~\cite{Chandola2009AnomalyDA}}\label{fig:anomaly_detection_overview} \end{figure} \todo[inline]{Figure example normal data boundaries, single outliers o1, o2, cluster of outliers o3. difficult to define boundaries so that all normal data inside and anomalies outside } \fi \newsection{Semi-Supervised Learning Algorithms}{sec:semi_supervised} \todo[inline]{Quick overview of the DeepSAD metho} \todo[inline, color=green!40]{deep learning based (neural network with hidden layers), neural networks which get trained using backpropagation, to learn to solve a novel task by defining some target} \todo[inline, color=green!40]{data labels decide training setting (supervised, non-supervised, semi-supervised incl explanation), supervised often classification based, but not possible if no labels available, un-supervised has no well-defined target, often used to fined common hidden factors in data (distribution). semi-supervised more like a sub method of unsupervised which additionally uses little (often handlabelled) data to improve method performance} \todo[inline, color=green!40]{include figure unsupervised, semi-supervised, supervised} \todo[inline, color=green!40]{find easy illustrative example with figure of semi-supervised learning and include + explain here} \todo[inline, color=green!40]{our chosen method DeepSAD is a semi-supervised deep learning method whose workings will be discussed in more detail in secion X} \newsection{Autoencoder}{sec:autoencoder} \todo[inline]{autoencoder explanation} \todo[inline, color=green!40]{autoencoders are a neural network architecture archetype (words) whose training target is to reproduce the input data itself - hence the name. the architecture is most commonly a mirrored one consisting of an encoder which transforms input data into a hyperspace represantation in a latent space and a decoder which transforms the latent space into the same data format as the input data (phrasing), this method typically results in the encoder learning to extract the most robust and critical information of the data and the (todo maybe something about the decoder + citation for both). it is used in many domains translations, LLMs, something with images (search example + citations)} \todo[inline, color=green!40]{typical encoder decoder mirrored figure} \todo[inline, color=green!40]{explain figure} \todo[inline, color=green!40]{our chosen method DeepSAD uses an autoencoder to translate input data into a latent space, in which it can more easily differentiate between normal and anomalous data} \newsection{Lidar - Light Detection and Ranging}{sec:lidar_related_work} \todo[inline]{related work in lidar} \todo[inline, color=green!40]{the older more commonly known radar works by sending out an electromagnetic wave in the radiofrequency and detecting the time it takes to return (if it returns at all) signalling a reflective object in the path of the radiowave. lidar works on the same principle but sends out a lightray produced by a laser (citation needed) and measuring the time it takes for the ray to return to the sensor. since the speed of light is constant in air the system can calculate the distance between the sensor and the measured point. modern lidar systems send out multiple, often millions of measurement rays per second which results in a three dimensional pointcloud, constructed from the information in which direction the ray was cast and the distance that was measured} \todo[inline, color=green!40]{lidar is used in most domains reliant on accurate 3d representations of the world like autonomous driving, robot navigation, (+ maybe quickly look up two other domains), its main advantage is high measurement accuracy, precision (use correct term), and high resolution (possible due to single point measurements instead of cones like radar, ToF, Ultrasonic) which enables more detailed mappings of the environment} \todo[inline, color=green!40]{due to point precision, lidar is sensitive to noise/degradation of airborne particles, which may produce early returns, deflections, errrors of light rays, this results in noise in the 3d pointcloud and possibly missing data of the measurement behind the aerosol particle.} \todo[inline, color=green!40]{because of the given advantages of lidar it is most commonly used nowadays on robot platforms for environment mapping and navigiation - so we chose to demonstrate our method based on degraded data collected by a lidar sensor as discussed in more dtail in section (data section)} \newchapter{DeepSAD: Semi-Supervised Anomaly Detection}{chap:deepsad} \todo[inline, color=green!40]{DeepSAD is a semi-supervised anomaly detection method proposed in cite, which is based on an unsupervised method (DeepSVDD) and additionally allows for providing some labeled data which is used during the training phase to improve the method's performance} \newsection{Algorithm Description}{sec:algorithm_description} \todo[inline]{explain deepsad in detail} \todo[inline, color=green!40]{Core idea of the algorithm is to learn a transformation to map input data into a latent space where normal data clusters close together and anomalous data gets mapped further away. to achieve this the methods first includes a pretraining step of an auto-encoder to extract the most relevant information, second it fixes a hypersphere center in the auto-encoders latent space as a target point for normal data and third it traings the network to map normal data closer to that hypersphere center. Fourth The resulting network can map new data into this latent space and interpret its distance from the hypersphere center as an anomaly score which is larger the more anomalous the datapoint is} \todo[inline, color=green!40]{explanation pre-training step: architecture of the autoencoder is dependent on the input data shape, but any data shape is generally permissible. for the autoencoder we do not need any labels since the optimization target is always the input itself. the latent space dimensionality can be chosen based on the input datas complexity (search citations). generally a higher dimensional latent space has more learning capacity but tends to overfit more easily (find cite). the pre-training step is used to find weights for the encoder which genereally extract robust and critical data from the input because TODO read deepsad paper (cite deepsad). as training data typically all data (normal and anomalous) is used during this step.} \todo[inline, color=green!40]{explanation hypersphere center step: an additional positive ramification of the pretraining is that the mean of all pre-training's latent spaces can be used as the hypersphere target around which normal data is supposed to cluster. this is advantageous because it allows the main training to converge faster than choosing a random point in the latent space as hypersphere center. from this point onward the center C is fixed for the main training and inference and does not change anymore.} \todo[inline, color=green!40]{explanation training step: during the main training step the method starts with the pre-trained weights of the encoder but removes the decoder from the architecture since it optimizes the output in the latent space and does not need to reproduce the input data format. it does so by minimizing the geometric distance of each input data's latent space represenation to the previously defined hypersphere center c. Due to normal data being more common in the inputs this results in normal data clustering closely to C and anormal data being pushed away from it. additionally during this step the labeled data is used to more correctly map normal and anormal data} \todo[inline, color=green!40]{explanation inference step: with the trained network we can transform new input data into the latent space and calculate its distance from the hypersphere center which will be smaller the more confident the network is in the data being normal and larger the more likely the data is anomalous. This output score is an analog value dependent on multiple factors like the latent space dimensionality, encoder architecture and ??? and has to be interpreted further to be used (for example thresholding)} \newsection{Algorithm Details and Hyperparameters}{sec:algorithm_details} \todo[inline]{backpropagation optimization formula, hyperaparameters explanation} \newsection{Advantages and Limitations}{sec:advantages_limitations} \todo[inline]{semi supervised, learns normality by amount of data (no labeling/ground truth required), very few labels for better training to specific situation} \newchapter{Data and Preprocessing}{chap:data_preprocessing} \newsection{Data Sources}{sec:data_collection} Dataset~\cite{alexander_kyuroson_2023_7913307} \todo[inline]{describe data sources, limitations} \todo[inline]{screenshots of camera/3d data?} \todo[inline]{difficulties: no ground truth, different lidar sensors/settings, different data shapes, available metadata, ...} \newsection{Preprocessing Steps}{sec:preprocessing} \todo[inline]{describe how 3d lidar data was preprocessed (2d projection), labeling} \todo[inline]{screenshots of 2d projections?} \newchapter{Experimental Setup}{chap:experimental_setup} \newsection{Training/Evaluation Data Distribution}{sec:data_setup} \todo[inline]{which data was used how in training/evaluation} \todo[inline]{explain concept of global/local application for global-/window quantifiction} \newsection{Evaluation Metrics}{sec:evaluation_metrics} \todo[inline]{k-fold evaluation, ROC, generalization (evaluation on other datasets?)} \newsection{Hyperparameters}{sec:hyperparameters} \todo[inline]{vary hyperparameters (no labeled anomalies vs some), specific training on local windows (only z-axis difference?), window size?} \newchapter{Results and Discussion}{chap:results_discussion} \newsection{Results}{sec:results} \todo[inline]{some results, ROC curves, for both global and local} \newsection{Hyperparameter Analysis}{sec:hyperparameter_analysis} \todo[inline]{result for different amounts of labeled data} \newchapter{Conclusion and Future Work}{chap:conclusion_future_work} \newsection{Conclusion}{sec:conclusion} \todo[inline]{summarize what has been achieved} \newsection{Future Work}{sec:future_work} \todo[inline]{confirm results with real smoke data} % end mainmatter % ************************************************************************************************** \appendix \ifthenelse{\equal{\DocumentType}{thesis}} { \setcounter{mypageno}{\value{page}} \frontmatter \pagestyle{plain} \pagenumbering{Roman} \setcounter{page}{\value{mypageno}} }{} \printbibliography \listoffigures \listoftables \printglossary[type=\acronymtype] % ************************************************************************************************** % place all floats and create label on last page \FloatBarrier\label{end-of-document} \end{document}