wip
This commit is contained in:
196
thesis/Main.bbl
196
thesis/Main.bbl
@@ -596,6 +596,100 @@
|
|||||||
\verb http://dx.doi.org/10.1016/j.knosys.2021.106878
|
\verb http://dx.doi.org/10.1016/j.knosys.2021.106878
|
||||||
\endverb
|
\endverb
|
||||||
\endentry
|
\endentry
|
||||||
|
\entry{deep_svdd}{inproceedings}{}{}
|
||||||
|
\name{author}{8}{}{%
|
||||||
|
{{hash=002c037bd5c44a3c55a7523254ff0522}{%
|
||||||
|
family={Ruff},
|
||||||
|
familyi={R\bibinitperiod},
|
||||||
|
given={Lukas},
|
||||||
|
giveni={L\bibinitperiod}}}%
|
||||||
|
{{hash=1e584159c8f4d32a1e55772b4b798844}{%
|
||||||
|
family={Vandermeulen},
|
||||||
|
familyi={V\bibinitperiod},
|
||||||
|
given={Robert},
|
||||||
|
giveni={R\bibinitperiod}}}%
|
||||||
|
{{hash=d225927675f1b50ada7afdcd7141a590}{%
|
||||||
|
family={Goernitz},
|
||||||
|
familyi={G\bibinitperiod},
|
||||||
|
given={Nico},
|
||||||
|
giveni={N\bibinitperiod}}}%
|
||||||
|
{{hash=99c57c6d6c2549166bb9eb10bd8357a9}{%
|
||||||
|
family={Deecke},
|
||||||
|
familyi={D\bibinitperiod},
|
||||||
|
given={Lucas},
|
||||||
|
giveni={L\bibinitperiod}}}%
|
||||||
|
{{hash=74815873173614768c79b41ebcd30fb7}{%
|
||||||
|
family={Siddiqui},
|
||||||
|
familyi={S\bibinitperiod},
|
||||||
|
given={Shoaib\bibnamedelima Ahmed},
|
||||||
|
giveni={S\bibinitperiod\bibinitdelim A\bibinitperiod}}}%
|
||||||
|
{{hash=d122a2a87d21da4007f460564975e967}{%
|
||||||
|
family={Binder},
|
||||||
|
familyi={B\bibinitperiod},
|
||||||
|
given={Alexander},
|
||||||
|
giveni={A\bibinitperiod}}}%
|
||||||
|
{{hash=d2949700f8a8fdee1e69d478901c51d7}{%
|
||||||
|
family={Müller},
|
||||||
|
familyi={M\bibinitperiod},
|
||||||
|
given={Emmanuel},
|
||||||
|
giveni={E\bibinitperiod}}}%
|
||||||
|
{{hash=5f7a97296025f5dcf9ed79d67caa64fc}{%
|
||||||
|
family={Kloft},
|
||||||
|
familyi={K\bibinitperiod},
|
||||||
|
given={Marius},
|
||||||
|
giveni={M\bibinitperiod}}}%
|
||||||
|
}
|
||||||
|
\name{editor}{2}{}{%
|
||||||
|
{{hash=dc770b9b0d58d3008bbb3906497d898c}{%
|
||||||
|
family={Dy},
|
||||||
|
familyi={D\bibinitperiod},
|
||||||
|
given={Jennifer},
|
||||||
|
giveni={J\bibinitperiod}}}%
|
||||||
|
{{hash=112eb0b147c4a7f674d015a86e5dea70}{%
|
||||||
|
family={Krause},
|
||||||
|
familyi={K\bibinitperiod},
|
||||||
|
given={Andreas},
|
||||||
|
giveni={A\bibinitperiod}}}%
|
||||||
|
}
|
||||||
|
\list{publisher}{1}{%
|
||||||
|
{PMLR}%
|
||||||
|
}
|
||||||
|
\strng{namehash}{f49556d617d4a5ffe2baa6d71026cde2}
|
||||||
|
\strng{fullhash}{dcbeae0afbfe40f33a90739e660c9b68}
|
||||||
|
\strng{fullhashraw}{dcbeae0afbfe40f33a90739e660c9b68}
|
||||||
|
\strng{bibnamehash}{dcbeae0afbfe40f33a90739e660c9b68}
|
||||||
|
\strng{authorbibnamehash}{dcbeae0afbfe40f33a90739e660c9b68}
|
||||||
|
\strng{authornamehash}{f49556d617d4a5ffe2baa6d71026cde2}
|
||||||
|
\strng{authorfullhash}{dcbeae0afbfe40f33a90739e660c9b68}
|
||||||
|
\strng{authorfullhashraw}{dcbeae0afbfe40f33a90739e660c9b68}
|
||||||
|
\strng{editorbibnamehash}{83be554d58af5be1788b5c3616f0e92a}
|
||||||
|
\strng{editornamehash}{83be554d58af5be1788b5c3616f0e92a}
|
||||||
|
\strng{editorfullhash}{83be554d58af5be1788b5c3616f0e92a}
|
||||||
|
\strng{editorfullhashraw}{83be554d58af5be1788b5c3616f0e92a}
|
||||||
|
\field{extraname}{2}
|
||||||
|
\field{sortinit}{1}
|
||||||
|
\field{sortinithash}{4f6aaa89bab872aa0999fec09ff8e98a}
|
||||||
|
\field{labelnamesource}{author}
|
||||||
|
\field{labeltitlesource}{title}
|
||||||
|
\field{abstract}{Despite the great advances made by deep learning in many machine learning problems, there is a relative dearth of deep learning approaches for anomaly detection. Those approaches which do exist involve networks trained to perform a task other than anomaly detection, namely generative models or compression, which are in turn adapted for use in anomaly detection; they are not trained on an anomaly detection based objective. In this paper we introduce a new anomaly detection method—Deep Support Vector Data Description—, which is trained on an anomaly detection based objective. The adaptation to the deep regime necessitates that our neural network and training procedure satisfy certain properties, which we demonstrate theoretically. We show the effectiveness of our method on MNIST and CIFAR-10 image benchmark datasets as well as on the detection of adversarial examples of GTSRB stop signs.}
|
||||||
|
\field{booktitle}{Proceedings of the 35th International Conference on Machine Learning}
|
||||||
|
\field{month}{10--15 Jul}
|
||||||
|
\field{series}{Proceedings of Machine Learning Research}
|
||||||
|
\field{title}{Deep One-Class Classification}
|
||||||
|
\field{volume}{80}
|
||||||
|
\field{year}{2018}
|
||||||
|
\field{pages}{4393\bibrangedash 4402}
|
||||||
|
\range{pages}{10}
|
||||||
|
\verb{file}
|
||||||
|
\verb http://proceedings.mlr.press/v80/ruff18a/ruff18a.pdf
|
||||||
|
\endverb
|
||||||
|
\verb{urlraw}
|
||||||
|
\verb https://proceedings.mlr.press/v80/ruff18a.html
|
||||||
|
\endverb
|
||||||
|
\verb{url}
|
||||||
|
\verb https://proceedings.mlr.press/v80/ruff18a.html
|
||||||
|
\endverb
|
||||||
|
\endentry
|
||||||
\entry{ml_autoencoder_figure_source}{article}{}{}
|
\entry{ml_autoencoder_figure_source}{article}{}{}
|
||||||
\name{author}{1}{}{%
|
\name{author}{1}{}{%
|
||||||
{{hash=49d03d499031db786a0e61119024cf5a}{%
|
{{hash=49d03d499031db786a0e61119024cf5a}{%
|
||||||
@@ -612,8 +706,8 @@
|
|||||||
\strng{authornamehash}{49d03d499031db786a0e61119024cf5a}
|
\strng{authornamehash}{49d03d499031db786a0e61119024cf5a}
|
||||||
\strng{authorfullhash}{49d03d499031db786a0e61119024cf5a}
|
\strng{authorfullhash}{49d03d499031db786a0e61119024cf5a}
|
||||||
\strng{authorfullhashraw}{49d03d499031db786a0e61119024cf5a}
|
\strng{authorfullhashraw}{49d03d499031db786a0e61119024cf5a}
|
||||||
\field{sortinit}{1}
|
\field{sortinit}{2}
|
||||||
\field{sortinithash}{4f6aaa89bab872aa0999fec09ff8e98a}
|
\field{sortinithash}{8b555b3791beccb63322c22f3320aa9a}
|
||||||
\field{labelnamesource}{author}
|
\field{labelnamesource}{author}
|
||||||
\field{labeltitlesource}{title}
|
\field{labeltitlesource}{title}
|
||||||
\field{journaltitle}{lilianweng.github.io}
|
\field{journaltitle}{lilianweng.github.io}
|
||||||
@@ -899,8 +993,8 @@
|
|||||||
\strng{authornamehash}{04b1af8a561822bd2336140b9c47f5ed}
|
\strng{authornamehash}{04b1af8a561822bd2336140b9c47f5ed}
|
||||||
\strng{authorfullhash}{e7818c2cd27c283d60a3963ba626342b}
|
\strng{authorfullhash}{e7818c2cd27c283d60a3963ba626342b}
|
||||||
\strng{authorfullhashraw}{e7818c2cd27c283d60a3963ba626342b}
|
\strng{authorfullhashraw}{e7818c2cd27c283d60a3963ba626342b}
|
||||||
\field{sortinit}{2}
|
\field{sortinit}{3}
|
||||||
\field{sortinithash}{8b555b3791beccb63322c22f3320aa9a}
|
\field{sortinithash}{ad6fe7482ffbd7b9f99c9e8b5dccd3d7}
|
||||||
\field{labelnamesource}{author}
|
\field{labelnamesource}{author}
|
||||||
\field{labeltitlesource}{title}
|
\field{labeltitlesource}{title}
|
||||||
\field{issn}{2379-9153}
|
\field{issn}{2379-9153}
|
||||||
@@ -1083,100 +1177,6 @@
|
|||||||
\endverb
|
\endverb
|
||||||
\keyw{Degradation;Location awareness;Laser radar;Rain;Codes;System performance;Current measurement}
|
\keyw{Degradation;Location awareness;Laser radar;Rain;Codes;System performance;Current measurement}
|
||||||
\endentry
|
\endentry
|
||||||
\entry{deepsvdd}{inproceedings}{}{}
|
|
||||||
\name{author}{8}{}{%
|
|
||||||
{{hash=002c037bd5c44a3c55a7523254ff0522}{%
|
|
||||||
family={Ruff},
|
|
||||||
familyi={R\bibinitperiod},
|
|
||||||
given={Lukas},
|
|
||||||
giveni={L\bibinitperiod}}}%
|
|
||||||
{{hash=1e584159c8f4d32a1e55772b4b798844}{%
|
|
||||||
family={Vandermeulen},
|
|
||||||
familyi={V\bibinitperiod},
|
|
||||||
given={Robert},
|
|
||||||
giveni={R\bibinitperiod}}}%
|
|
||||||
{{hash=d225927675f1b50ada7afdcd7141a590}{%
|
|
||||||
family={Goernitz},
|
|
||||||
familyi={G\bibinitperiod},
|
|
||||||
given={Nico},
|
|
||||||
giveni={N\bibinitperiod}}}%
|
|
||||||
{{hash=99c57c6d6c2549166bb9eb10bd8357a9}{%
|
|
||||||
family={Deecke},
|
|
||||||
familyi={D\bibinitperiod},
|
|
||||||
given={Lucas},
|
|
||||||
giveni={L\bibinitperiod}}}%
|
|
||||||
{{hash=74815873173614768c79b41ebcd30fb7}{%
|
|
||||||
family={Siddiqui},
|
|
||||||
familyi={S\bibinitperiod},
|
|
||||||
given={Shoaib\bibnamedelima Ahmed},
|
|
||||||
giveni={S\bibinitperiod\bibinitdelim A\bibinitperiod}}}%
|
|
||||||
{{hash=d122a2a87d21da4007f460564975e967}{%
|
|
||||||
family={Binder},
|
|
||||||
familyi={B\bibinitperiod},
|
|
||||||
given={Alexander},
|
|
||||||
giveni={A\bibinitperiod}}}%
|
|
||||||
{{hash=d2949700f8a8fdee1e69d478901c51d7}{%
|
|
||||||
family={Müller},
|
|
||||||
familyi={M\bibinitperiod},
|
|
||||||
given={Emmanuel},
|
|
||||||
giveni={E\bibinitperiod}}}%
|
|
||||||
{{hash=5f7a97296025f5dcf9ed79d67caa64fc}{%
|
|
||||||
family={Kloft},
|
|
||||||
familyi={K\bibinitperiod},
|
|
||||||
given={Marius},
|
|
||||||
giveni={M\bibinitperiod}}}%
|
|
||||||
}
|
|
||||||
\name{editor}{2}{}{%
|
|
||||||
{{hash=dc770b9b0d58d3008bbb3906497d898c}{%
|
|
||||||
family={Dy},
|
|
||||||
familyi={D\bibinitperiod},
|
|
||||||
given={Jennifer},
|
|
||||||
giveni={J\bibinitperiod}}}%
|
|
||||||
{{hash=112eb0b147c4a7f674d015a86e5dea70}{%
|
|
||||||
family={Krause},
|
|
||||||
familyi={K\bibinitperiod},
|
|
||||||
given={Andreas},
|
|
||||||
giveni={A\bibinitperiod}}}%
|
|
||||||
}
|
|
||||||
\list{publisher}{1}{%
|
|
||||||
{PMLR}%
|
|
||||||
}
|
|
||||||
\strng{namehash}{f49556d617d4a5ffe2baa6d71026cde2}
|
|
||||||
\strng{fullhash}{dcbeae0afbfe40f33a90739e660c9b68}
|
|
||||||
\strng{fullhashraw}{dcbeae0afbfe40f33a90739e660c9b68}
|
|
||||||
\strng{bibnamehash}{dcbeae0afbfe40f33a90739e660c9b68}
|
|
||||||
\strng{authorbibnamehash}{dcbeae0afbfe40f33a90739e660c9b68}
|
|
||||||
\strng{authornamehash}{f49556d617d4a5ffe2baa6d71026cde2}
|
|
||||||
\strng{authorfullhash}{dcbeae0afbfe40f33a90739e660c9b68}
|
|
||||||
\strng{authorfullhashraw}{dcbeae0afbfe40f33a90739e660c9b68}
|
|
||||||
\strng{editorbibnamehash}{83be554d58af5be1788b5c3616f0e92a}
|
|
||||||
\strng{editornamehash}{83be554d58af5be1788b5c3616f0e92a}
|
|
||||||
\strng{editorfullhash}{83be554d58af5be1788b5c3616f0e92a}
|
|
||||||
\strng{editorfullhashraw}{83be554d58af5be1788b5c3616f0e92a}
|
|
||||||
\field{extraname}{2}
|
|
||||||
\field{sortinit}{4}
|
|
||||||
\field{sortinithash}{9381316451d1b9788675a07e972a12a7}
|
|
||||||
\field{labelnamesource}{author}
|
|
||||||
\field{labeltitlesource}{title}
|
|
||||||
\field{abstract}{Despite the great advances made by deep learning in many machine learning problems, there is a relative dearth of deep learning approaches for anomaly detection. Those approaches which do exist involve networks trained to perform a task other than anomaly detection, namely generative models or compression, which are in turn adapted for use in anomaly detection; they are not trained on an anomaly detection based objective. In this paper we introduce a new anomaly detection method—Deep Support Vector Data Description—, which is trained on an anomaly detection based objective. The adaptation to the deep regime necessitates that our neural network and training procedure satisfy certain properties, which we demonstrate theoretically. We show the effectiveness of our method on MNIST and CIFAR-10 image benchmark datasets as well as on the detection of adversarial examples of GTSRB stop signs.}
|
|
||||||
\field{booktitle}{Proceedings of the 35th International Conference on Machine Learning}
|
|
||||||
\field{month}{10--15 Jul}
|
|
||||||
\field{series}{Proceedings of Machine Learning Research}
|
|
||||||
\field{title}{Deep One-Class Classification}
|
|
||||||
\field{volume}{80}
|
|
||||||
\field{year}{2018}
|
|
||||||
\field{pages}{4393\bibrangedash 4402}
|
|
||||||
\range{pages}{10}
|
|
||||||
\verb{file}
|
|
||||||
\verb http://proceedings.mlr.press/v80/ruff18a/ruff18a.pdf
|
|
||||||
\endverb
|
|
||||||
\verb{urlraw}
|
|
||||||
\verb https://proceedings.mlr.press/v80/ruff18a.html
|
|
||||||
\endverb
|
|
||||||
\verb{url}
|
|
||||||
\verb https://proceedings.mlr.press/v80/ruff18a.html
|
|
||||||
\endverb
|
|
||||||
\endentry
|
|
||||||
\entry{deep_learning_book}{book}{}{}
|
\entry{deep_learning_book}{book}{}{}
|
||||||
\name{author}{3}{}{%
|
\name{author}{3}{}{%
|
||||||
{{hash=5d2585c11210cf1d4512e6e0a03ec315}{%
|
{{hash=5d2585c11210cf1d4512e6e0a03ec315}{%
|
||||||
|
|||||||
BIN
thesis/Main.pdf
BIN
thesis/Main.pdf
Binary file not shown.
@@ -244,7 +244,7 @@ Lidar sensors function by projecting lasers in multiple directions near-simultan
|
|||||||
\newsection{anomaly_detection}{Anomaly Detection}
|
\newsection{anomaly_detection}{Anomaly Detection}
|
||||||
|
|
||||||
|
|
||||||
Anomaly detection refers to the process of detecting unexpected patterns of data, outliers which deviate significantly from the majority of data which is implicitly defined as normal by its prevalence. In classic statistical analysis these techniques have been studied as early as the 19th century~\cite{anomaly_detection_history}. Since then, a multitude of methods and use-cases for them have been proposed and studied. Examples of applications include healthcare, where computer vision algorithms are used to detect anomalies in medical images for diagnostics and early detection of diseases~\cite{anomaly_detection_medical}, detection of fraud in decentralized financial systems based on block-chain technology~\cite{anomaly_detection_defi} as well as fault detection in industrial machinery using acoustic sound data~\cite{anomaly_detection_manufacturing}.
|
Anomaly detection refers to the process of detecting unexpected patterns of data, outliers which deviate significantly from the majority of data which is implicitly defined as normal by its prevalence. In classic statistical analysis these techniques have been studied as early as the 19th century~\cite{anomaly_detection_history}. Since then, a multitude of methods and use cases for them have been proposed and studied. Examples of applications include healthcare, where computer vision algorithms are used to detect anomalies in medical images for diagnostics and early detection of diseases~\cite{anomaly_detection_medical}, detection of fraud in decentralized financial systems based on block-chain technology~\cite{anomaly_detection_defi} as well as fault detection in industrial machinery using acoustic sound data~\cite{anomaly_detection_manufacturing}.
|
||||||
|
|
||||||
Figure~\ref{fig:anomaly_detection_overview} depicts a simple but illustrative example of data which can be classified as either normal or anomalous and shows the problem anomaly detection methods try to generally solve. A successful anomaly detection method would somehow learn to differentiate normal from anomalous data, for example by learning the boundaries around the available normal data and classifying it as either normal or anomalous based on its location inside or outside of those boundaries. Another possible approach could calculate an analog value which correlates with the likelihood of an sample being anomalous, for example by using the sample's distance from the closest normal data cluster's center.
|
Figure~\ref{fig:anomaly_detection_overview} depicts a simple but illustrative example of data which can be classified as either normal or anomalous and shows the problem anomaly detection methods try to generally solve. A successful anomaly detection method would somehow learn to differentiate normal from anomalous data, for example by learning the boundaries around the available normal data and classifying it as either normal or anomalous based on its location inside or outside of those boundaries. Another possible approach could calculate an analog value which correlates with the likelihood of an sample being anomalous, for example by using the sample's distance from the closest normal data cluster's center.
|
||||||
|
|
||||||
@@ -264,11 +264,11 @@ By their very nature anomalies are rare occurences and oftentimes unpredictable
|
|||||||
|
|
||||||
In this thesis we used an anomaly detection method, namely \citetitle{deepsad}~\cite{deepsad} (DeepSAD) to model our problem -how to quantify the degradation of lidar sensor data- as an anomaly detection problem. We do this by classifying good quality data as normal and degraded data as anomalous and rely on a method which can express each samples likelihood of being anomalous as an analog anomaly score, which enables us to interpret it as the datas degradation quantification value.
|
In this thesis we used an anomaly detection method, namely \citetitle{deepsad}~\cite{deepsad} (DeepSAD) to model our problem -how to quantify the degradation of lidar sensor data- as an anomaly detection problem. We do this by classifying good quality data as normal and degraded data as anomalous and rely on a method which can express each samples likelihood of being anomalous as an analog anomaly score, which enables us to interpret it as the datas degradation quantification value.
|
||||||
|
|
||||||
Chapter~\ref{chp:deepsad} describes DeepSAD in more detail, which shows that it is a clustering based approach with a spectral pre-processing component, in that it uses a neural network to reduce the inputs dimensionality while simultaneously clustering normal data closely around a given centroid. It then produces an anomaly score by calculating the geometric distance between a data sample and the aforementioned cluster centroid, assuming the distance is shorter for normal than for anomalous data. Since our data is high dimensional it makes sense to use a spectral method to reduce the datas dimensionality and an approach which results in an analog value rather than a binary classification is useful for our use-case since we want to quantify not only classify the data degradation.
|
Chapter~\ref{chp:deepsad} describes DeepSAD in more detail, which shows that it is a clustering based approach with a spectral pre-processing component, in that it uses a neural network to reduce the inputs dimensionality while simultaneously clustering normal data closely around a given centroid. It then produces an anomaly score by calculating the geometric distance between a data sample and the aforementioned cluster centroid, assuming the distance is shorter for normal than for anomalous data. Since our data is high dimensional it makes sense to use a spectral method to reduce the datas dimensionality and an approach which results in an analog value rather than a binary classification is useful for our use case since we want to quantify not only classify the data degradation.
|
||||||
|
|
||||||
There is a wide array of problems in domains similar to the one we research in this paper, for which modeling them as anomaly detection problems has been proven successful. The degradation of pointclouds, produced by an industrial 3D sensor, has been modeled as an anomaly detection task in \citetitle{bg_ad_pointclouds_scans}~\cite{bg_ad_pointclouds_scans}. \citeauthor{bg_ad_pointclouds_scans} propose a student-teacher model capable of infering a pointwise anomaly score for degradation in point clouds. The teacher network is trained on an anomaly-free dataset to extract dense features of the point clouds' local geometries, after which an identical student network is trained to emulate the teacher networks' outputs. For degraded pointclouds the regression between the teacher's and student's outputs is calculated and interpreted as the anomaly score, with the rationalization that the student network has not observed features produced by anomalous geometries during training, leaving it incapable of producing a similar output as the teacher for those regions. Another example would be \citetitle{bg_ad_pointclouds_poles}~\cite{bg_ad_pointclouds_poles}, which proposes a method to detect and classify pole-like objects in urban point cloud data, to differentiate between natural and man-made objects such as street signs, for autonomous driving purposes. An anomaly detection method was used to identify the vertical pole-like objects in the point clouds and then the preprocessed objects were grouped by similarity using a clustering algorithm to then classify them as either trees or man-made poles.
|
There is a wide array of problems in domains similar to the one we research in this paper, for which modeling them as anomaly detection problems has been proven successful. The degradation of pointclouds, produced by an industrial 3D sensor, has been modeled as an anomaly detection task in \citetitle{bg_ad_pointclouds_scans}~\cite{bg_ad_pointclouds_scans}. \citeauthor{bg_ad_pointclouds_scans} propose a student-teacher model capable of infering a pointwise anomaly score for degradation in point clouds. The teacher network is trained on an anomaly-free dataset to extract dense features of the point clouds' local geometries, after which an identical student network is trained to emulate the teacher networks' outputs. For degraded pointclouds the regression between the teacher's and student's outputs is calculated and interpreted as the anomaly score, with the rationalization that the student network has not observed features produced by anomalous geometries during training, leaving it incapable of producing a similar output as the teacher for those regions. Another example would be \citetitle{bg_ad_pointclouds_poles}~\cite{bg_ad_pointclouds_poles}, which proposes a method to detect and classify pole-like objects in urban point cloud data, to differentiate between natural and man-made objects such as street signs, for autonomous driving purposes. An anomaly detection method was used to identify the vertical pole-like objects in the point clouds and then the preprocessed objects were grouped by similarity using a clustering algorithm to then classify them as either trees or man-made poles.
|
||||||
|
|
||||||
As already shortly mentioned at the beginning of this section, anomaly detection methods and their usage are oftentimes challenged by the limited availability of anomalous data, owing to the very nature of anomalies which are rare occurences. Oftentimes the intended use-case is to even find unknown anomalies in a given dataset which have not yet been identified. In addition, it can be challenging to classify anomalies correctly for complex data, since the very definition of an anomaly is dependent on many factors such as the type of data, the intended use-case or even how the data evolves over time. For these reasons most types of anomaly detection approaches limit their reliance on anomalous data during training and many of them do not differentiate between normal and anomalous data at all. DeepSAD is a semi-supervised method which is characterized by using a mixture of labeled and unlabeled data.
|
As already shortly mentioned at the beginning of this section, anomaly detection methods and their usage are oftentimes challenged by the limited availability of anomalous data, owing to the very nature of anomalies which are rare occurences. Oftentimes the intended use case is to even find unknown anomalies in a given dataset which have not yet been identified. In addition, it can be challenging to classify anomalies correctly for complex data, since the very definition of an anomaly is dependent on many factors such as the type of data, the intended use case or even how the data evolves over time. For these reasons most types of anomaly detection approaches limit their reliance on anomalous data during training and many of them do not differentiate between normal and anomalous data at all. DeepSAD is a semi-supervised method which is characterized by using a mixture of labeled and unlabeled data.
|
||||||
|
|
||||||
|
|
||||||
\newsection{semi_supervised}{Semi-Supervised Learning Algorithms}
|
\newsection{semi_supervised}{Semi-Supervised Learning Algorithms}
|
||||||
@@ -300,12 +300,12 @@ In reinforcement learning, an agent learns by trial and error while interacting
|
|||||||
|
|
||||||
Semi-Supervised learning algorithms are an inbetween category of supervised and unsupervised algorithms, in that they use a mixture of labeled and unlabeled data. Typically vastly more unlabeled data is used during training of such algorithms than labeled data, due to the effort and expertise required to label large quantities of data correctly. Semi-supervised methods are oftentimes an effort to improve a machine learning algorithm belonging to either the supervised or unsupervised category. Supervised methods such as classification tasks are enhanced by using large amounts of unlabeled data to augment the supervised training without additional need of labeling work. Alternatively, unsupervised methods like clustering algorithms may not only use unlabeled data but improve their performance by considering some hand-labeled data during training.
|
Semi-Supervised learning algorithms are an inbetween category of supervised and unsupervised algorithms, in that they use a mixture of labeled and unlabeled data. Typically vastly more unlabeled data is used during training of such algorithms than labeled data, due to the effort and expertise required to label large quantities of data correctly. Semi-supervised methods are oftentimes an effort to improve a machine learning algorithm belonging to either the supervised or unsupervised category. Supervised methods such as classification tasks are enhanced by using large amounts of unlabeled data to augment the supervised training without additional need of labeling work. Alternatively, unsupervised methods like clustering algorithms may not only use unlabeled data but improve their performance by considering some hand-labeled data during training.
|
||||||
|
|
||||||
Machine learning based anomaly detection methods can utilize techniques from all of the aforementioned categories, although their suitability varies. While supervised anomaly detection methods exist, their usability not only depends on the availability of labeled training data but also on a reasonable proportionality between normal and anomalous data. Both requirements can be challenging due to labeling often being labour intensive and anomalies' intrinsic property to occur rarely when compared to normal data, making capture of enough anomalous behaviour a hard problem. Semi-Supervised anomaly detection methods are of special interest in that they may overcome these difficulties inherently present in many anomaly detection tasks~\cite{semi_ad_survey}. These methods typically have the same goal as unsupervised anomaly detection methods which is to model the normal class behaviour and delimitate it from anomalies, but they can incorporate some hand-labeled examples of normal and/or anomalous behaviour to improve their perfomance over fully unsupervised methods. DeepSAD is a semi-supervised method which extends its unsupervised predecessor Deep SVDD by including some labeled samples during training. Both, DeepSAD and Deep SVDD also utilize an autoencoder in a pre-training step, a machine learning architecture, frequently grouped with unsupervised algorithms, even though that definition can be contested when scrutinizing it in more detail, which we will do next.
|
Machine learning based anomaly detection methods can utilize techniques from all of the aforementioned categories, although their suitability varies. While supervised anomaly detection methods exist, their usability not only depends on the availability of labeled training data but also on a reasonable proportionality between normal and anomalous data. Both requirements can be challenging due to labeling often being labour intensive and anomalies' intrinsic property to occur rarely when compared to normal data, making capture of enough anomalous behaviour a hard problem. Semi-Supervised anomaly detection methods are of special interest in that they may overcome these difficulties inherently present in many anomaly detection tasks~\cite{semi_ad_survey}. These methods typically have the same goal as unsupervised anomaly detection methods which is to model the normal class behaviour and delimitate it from anomalies, but they can incorporate some hand-labeled examples of normal and/or anomalous behaviour to improve their perfomance over fully unsupervised methods. DeepSAD is a semi-supervised method which extends its unsupervised predecessor Deep SVDD~\cite{deep_svdd} by including some labeled samples during training. Both, DeepSAD and Deep SVDD also utilize an autoencoder in a pre-training step, a machine learning architecture, frequently grouped with unsupervised algorithms, even though that definition can be contested when scrutinizing it in more detail, which we will do next.
|
||||||
|
|
||||||
\newsection{autoencoder}{Autoencoder}
|
\newsection{autoencoder}{Autoencoder}
|
||||||
|
|
||||||
|
|
||||||
Autoencoders are a type of neural network architecture, whose main goal is learning to encode input data into a representative state, from which the same input can be reconstructed, hence the name. They typically consist of two functions, an encoder and a decoder with a latent space inbetween them as depicted in the toy example in figure~\ref{fig:autoencoder_general}. The encoder learns to extract the most significant features from the input and to convert them into the input's latent space representation. The reconstruction goal ensures that the most prominent features of the input get retained during the encoding phase, due to the inherent inability to reconstruct the input if too much relevant information is missing. The decoder simultaneously learns to reconstruct the original input from its encoded latent space representation, by minimizing the error between the input sample and the autoencoder's output. This optimization goal creates uncertainty when categorizing autoencoders as an unsupervised method, although literature commonly defines them as such. While they do not require any labeling of the input data, their optimization target can still calculate the error between the output and the optimal target, which is typically not available for unsupervised methods. For this reason, they are sometimes proposed to be a case of self-supervised learning, a type of machine learning where the data itself can be used to generate a supervisory signal without the need for a domain expert to provide one.
|
Autoencoders are a type of neural network architecture, whose main goal is learning to encode input data into a representative state, from which the same input can be reconstructed, hence the name. They typically consist of two functions, an encoder and a decoder with a latent space inbetween them as depicted in the toy example in figure~\ref{fig:autoencoder_general}. The encoder learns to extract the most significant features from the input and to convert them into the input's latent space representation. The reconstruction goal ensures that the most prominent features of the input get retained during the encoding phase, due to the inherent inability to reconstruct the input if too much relevant information is missing. The decoder simultaneously learns to reconstruct the original input from its encoded latent space representation, by minimizing the error between the input sample and the autoencoder's output. This optimization goal complicates the categorization of autoencoders as unsupervised methods. Although they do not require labeled data, they still compute an error against a known target—the input itself. For this reason, some authors describe them as a form of self-supervised learning, where the data provides its own supervisory signal without requiring expert labeling.
|
||||||
|
|
||||||
\fig{autoencoder_general}{figures/autoencoder_principle.png}{Illustration of an autoencoder’s working principle. The encoder $\mathbf{g_\phi}$ compresses the input into a lower-dimensional bottleneck representation $\mathbf{z}$, which is then reconstructed by the decoder $\mathbf{f_\theta}$. During training, the difference between input and output serves as the loss signal to optimize both the encoder’s feature extraction and the decoder’s reconstruction. Reproduced from~\cite{ml_autoencoder_figure_source}.
|
\fig{autoencoder_general}{figures/autoencoder_principle.png}{Illustration of an autoencoder’s working principle. The encoder $\mathbf{g_\phi}$ compresses the input into a lower-dimensional bottleneck representation $\mathbf{z}$, which is then reconstructed by the decoder $\mathbf{f_\theta}$. During training, the difference between input and output serves as the loss signal to optimize both the encoder’s feature extraction and the decoder’s reconstruction. Reproduced from~\cite{ml_autoencoder_figure_source}.
|
||||||
}
|
}
|
||||||
@@ -319,12 +319,12 @@ Autoencoders have been shown to be useful in the anomaly detection domain by ass
|
|||||||
|
|
||||||
Lidar (Light Detection and Ranging) measures distance by emitting short laser pulses and timing how long they take to return, an approach many may be familiar with from the more commonly known radar technology, which uses radio-frequency pulses and measures their return time to gauge an object's range. Unlike radar, however, lidar operates at much shorter wavelengths and can fire millions of pulses per second, achieving millimeter-level precision and dense, high-resolution 3D point clouds. This fine granularity makes lidar ideal for applications such as detailed obstacle mapping, surface reconstruction, and autonomous navigation in complex environments.
|
Lidar (Light Detection and Ranging) measures distance by emitting short laser pulses and timing how long they take to return, an approach many may be familiar with from the more commonly known radar technology, which uses radio-frequency pulses and measures their return time to gauge an object's range. Unlike radar, however, lidar operates at much shorter wavelengths and can fire millions of pulses per second, achieving millimeter-level precision and dense, high-resolution 3D point clouds. This fine granularity makes lidar ideal for applications such as detailed obstacle mapping, surface reconstruction, and autonomous navigation in complex environments.
|
||||||
|
|
||||||
Because the speed of light in air is effectively constant, multiplying half the round‐trip time by that speed gives the distance between the lidar sensor and the reflecting object, as can be seen visualized in figure~\ref{fig:lidar_working_principle}. Modern spinning multi‐beam lidar systems emit millions of these pulses every second. Each pulse is sent at a known combination of horizontal and vertical angles, creating a regular grid of measurements: for example, 32 vertical channels swept through 360° horizontally at a fixed angular spacing. While newer solid-state designs (flash, MEMS, phased-array) are emerging, spinning multi-beam lidar remains the most commonly seen type in autonomous vehicles and robotics because of its proven range, reliability, and mature manufacturing base.
|
Because the speed of light in air is effectively constant, multiplying half the round‐trip time by that speed gives the distance between the lidar sensor and the reflecting object, as can be seen visualized in figure~\ref{fig:lidar_working_principle}. Modern spinning multi‐beam lidar systems emit up to millions of these pulses every second. Each pulse is sent at a known combination of horizontal and vertical angles, creating a regular grid of measurements: for example, 32 vertical channels swept through 360° horizontally at a fixed angular spacing. While newer solid-state designs (flash, MEMS, phased-array) are emerging, spinning multi-beam lidar remains the most commonly seen type in autonomous vehicles and robotics because of its proven range, reliability, and mature manufacturing base.
|
||||||
|
|
||||||
\figc{lidar_working_principle}{figures/bg_lidar_principle.png}{Illustration of the working principle of a lidar sensor. The emitter sends out an optical signal that is reflected by objects in the scene and captured by the receiver. The system controller measures the time delay $\Delta t$ between emission and reception to calculate distance using $d = c \cdot \Delta t / 2$. By repeating this process across many directions—either with multiple emitter/receiver pairs or sequentially in a spinning lidar—the sensor obtains a dense set of distances that, combined with their emission angles, form a 3D point cloud of the environment. Reproduced from~\cite{bg_lidar_figure_source}.
|
\figc{lidar_working_principle}{figures/bg_lidar_principle.png}{Illustration of the working principle of a lidar sensor. The emitter sends out an optical signal that is reflected by objects in the scene and captured by the receiver. The system controller measures the time delay $\Delta t$ between emission and reception to calculate distance using $d = c \cdot \Delta t / 2$. By repeating this process across many directions—either with multiple emitter/receiver pairs or sequentially in a spinning lidar—the sensor obtains a dense set of distances that, combined with their emission angles, form a 3D point cloud of the environment. Reproduced from~\cite{bg_lidar_figure_source}.
|
||||||
}{width=.8\textwidth}
|
}{width=.8\textwidth}
|
||||||
|
|
||||||
Each instance a lidar emits and receives a laser pulse, it can use the ray's direction and the calculated distance to produce a single three-dimensional point. By collecting millions of such points each second, the sensor constructs a “point cloud”—a dense set of 3D coordinates relative to the lidar’s own position. In addition to X, Y, and Z, many lidars also record the intensity or reflectivity of each return, providing extra information about the surface properties of the object hit by the pulse.
|
Each instance a lidar emits and receives a laser pulse, it can use the ray's direction and the calculated distance to produce a single three-dimensional point. By collecting up to millions of such points each second, the sensor constructs a “point cloud”—a dense set of 3D coordinates relative to the lidar’s own position. In addition to X, Y, and Z, many lidars also record the intensity or reflectivity of each return, providing extra information about the surface properties of the object hit by the pulse.
|
||||||
|
|
||||||
Lidar’s high accuracy, long range, and full-circle field of view make it indispensable for tasks like obstacle detection, simultaneous localization and mapping (SLAM), and terrain modeling in autonomous driving and mobile robotics. While complementary sensors—such as time-of-flight cameras, ultrasonic sensors, and RGB cameras—have their strengths at short range or in particular lighting, only lidar delivers the combination of precise 3D measurements over medium to long distances, consistent performance regardless of illumination, and the pointcloud density needed for safe navigation. Lidar systems do exhibit intrinsic noise (e.g., range quantization or occasional multi-return ambiguities), but in most robotic applications these effects are minor compared to environmental degradation.
|
Lidar’s high accuracy, long range, and full-circle field of view make it indispensable for tasks like obstacle detection, simultaneous localization and mapping (SLAM), and terrain modeling in autonomous driving and mobile robotics. While complementary sensors—such as time-of-flight cameras, ultrasonic sensors, and RGB cameras—have their strengths at short range or in particular lighting, only lidar delivers the combination of precise 3D measurements over medium to long distances, consistent performance regardless of illumination, and the pointcloud density needed for safe navigation. Lidar systems do exhibit intrinsic noise (e.g., range quantization or occasional multi-return ambiguities), but in most robotic applications these effects are minor compared to environmental degradation.
|
||||||
|
|
||||||
@@ -334,7 +334,7 @@ All of these may create unwanted noise in the point cloud created by the lidar,
|
|||||||
|
|
||||||
A learning-based method to filter dust-caused degradation from lidar is introduced in \citetitle{lidar_denoising_dust}~\cite{lidar_denoising_dust}. The authors employ a convultional neural network to classify dust particles in lidar point clouds as such, enabling the filtering of those points and compare their methods to more conservative approaches, such as various outlier removal algorithms. Another relevant example would be the filtering method proposed in \citetitle{lidar_subt_dust_removal}~\cite{lidar_subt_dust_removal}, which enables the filtration of pointclouds degraded by smoke or dust in subterranean environments, with a focus on the search and rescue domain. To achieve this, they formulated a filtration framework that relies on dynamic onboard statistical cluster outlier removal, to classify and remove dust particles in point clouds.
|
A learning-based method to filter dust-caused degradation from lidar is introduced in \citetitle{lidar_denoising_dust}~\cite{lidar_denoising_dust}. The authors employ a convultional neural network to classify dust particles in lidar point clouds as such, enabling the filtering of those points and compare their methods to more conservative approaches, such as various outlier removal algorithms. Another relevant example would be the filtering method proposed in \citetitle{lidar_subt_dust_removal}~\cite{lidar_subt_dust_removal}, which enables the filtration of pointclouds degraded by smoke or dust in subterranean environments, with a focus on the search and rescue domain. To achieve this, they formulated a filtration framework that relies on dynamic onboard statistical cluster outlier removal, to classify and remove dust particles in point clouds.
|
||||||
|
|
||||||
Our method does not aim to remove the noise or degraded points in the lidar data, but quantify its degradation to inform other systems of the autonomous robot about the data's quality, enabling more informed decisions. One such approach, though from the autonomous driving and not from the search and rescue domain can be found in \citetitle{degradation_quantification_rain}~\cite{degradation_quantification_rain}. A learning-based method to quantify the lidar's sensor data degradation caused by adverse weather-effects was proposed, implemented by posing the problem as an anomaly detection task and utilizing DeepSAD to learn degraded data to be an anomaly and high quality data to be normal behaviour. DeepSAD's anomaly score was used as the degradation's quantification score. From this example we decided to imitate this method and adapt it for the search and rescue domain, although this proved challenging due to the more limited data availability. Since it was effective for the closely related \citetitle{degradation_quantification_rain}~\cite{degradation_quantification_rain}, we also employed DeepSAD, whose detailed workings we present in the following chapter.
|
Our method does not aim to remove the noise or degraded points in the lidar data, but quantify its degradation to inform other systems of the autonomous robot about the data's quality, enabling more informed decisions. One such approach, though from the autonomous driving and not from the search and rescue domain can be found in \citetitle{degradation_quantification_rain}~\cite{degradation_quantification_rain}. A learning-based method to quantify the lidar sensor data degradation caused by adverse weather-effects was proposed, implemented by posing the problem as an anomaly detection task and utilizing DeepSAD to learn degraded data to be an anomaly and high quality data to be normal behaviour. DeepSAD's anomaly score was used as the degradation's quantification score. From this example we decided to imitate this method and adapt it for the search and rescue domain, although this proved challenging due to the more limited data availability. Since it was effective for this closely related use case, we also employed DeepSAD, whose detailed workings we present in the following chapter.
|
||||||
|
|
||||||
\newchapter{deepsad}{Deep SAD: Semi-Supervised Anomaly Detection}
|
\newchapter{deepsad}{Deep SAD: Semi-Supervised Anomaly Detection}
|
||||||
|
|
||||||
@@ -347,7 +347,7 @@ In this chapter, we explore the method \citetitle{deepsad}~(Deep SAD)~\cite{deep
|
|||||||
|
|
||||||
Deep SAD's overall mechanics are similar to clustering-based anomaly detection methods, which according to \citetitle{anomaly_detection_survey}~\cite{anomaly_detection_survey} typically follow a two-step approach. First, a clustering algorithm groups data points around a centroid; then, the distances of individual data points from this centroid are calculated and used as an anomaly score. In Deep SAD, these concepts are implemented by employing a neural network, which is jointly trained to map input data onto a latent space and to minimize the volume of an data-encompassing hypersphere, whose center is the aforementioned centroid. The data's geometric distance in the latent space to the hypersphere center is used as the anomaly score, where a larger distance between data and centroid corresponds to a higher probability of a sample being anomalous. This is achieved by shrinking the data-encompassing hypersphere during training, proportionally to all training data, of which is required that there is significantly more normal than anomalous data present. The outcome of this approach is that normal data gets clustered more closely around the centroid, while anomalies appear further away from it as can be seen in the toy example depicted in figure~\ref{fig:deep_svdd_transformation}.
|
Deep SAD's overall mechanics are similar to clustering-based anomaly detection methods, which according to \citetitle{anomaly_detection_survey}~\cite{anomaly_detection_survey} typically follow a two-step approach. First, a clustering algorithm groups data points around a centroid; then, the distances of individual data points from this centroid are calculated and used as an anomaly score. In Deep SAD, these concepts are implemented by employing a neural network, which is jointly trained to map input data onto a latent space and to minimize the volume of an data-encompassing hypersphere, whose center is the aforementioned centroid. The data's geometric distance in the latent space to the hypersphere center is used as the anomaly score, where a larger distance between data and centroid corresponds to a higher probability of a sample being anomalous. This is achieved by shrinking the data-encompassing hypersphere during training, proportionally to all training data, of which is required that there is significantly more normal than anomalous data present. The outcome of this approach is that normal data gets clustered more closely around the centroid, while anomalies appear further away from it as can be seen in the toy example depicted in figure~\ref{fig:deep_svdd_transformation}.
|
||||||
|
|
||||||
\fig{deep_svdd_transformation}{figures/deep_svdd_transformation}{DeepSAD teaches a neural network to transform data into a latent space and minimize the volume of an data-encompassing hypersphere centered around a predetermined centroid $\textbf{c}$. \\Reproduced from~\cite{deepsvdd}.}
|
\fig{deep_svdd_transformation}{figures/deep_svdd_transformation}{DeepSAD teaches a neural network to transform data into a latent space and minimize the volume of an data-encompassing hypersphere centered around a predetermined centroid $\textbf{c}$. \\Reproduced from~\cite{deep_svdd}.}
|
||||||
|
|
||||||
|
|
||||||
Before DeepSAD's training can begin, a pre-training step is required, during which an autoencoder is trained on all available input data. One of DeepSAD's goals is to map input data onto a lower dimensional latent space, in which the separation between normal and anomalous data can be achieved. To this end DeepSAD and its predecessor Deep SVDD make use of the autoencoder's reconstruction goal, whose successful training ensures confidence in the encoder architecture's suitability for extracting the input datas' most prominent information to the latent space inbetween the encoder and decoder. DeepSAD goes on to use just the encoder as its main network architecture, discarding the decoder at this step, since reconstruction of the input is unnecessary.
|
Before DeepSAD's training can begin, a pre-training step is required, during which an autoencoder is trained on all available input data. One of DeepSAD's goals is to map input data onto a lower dimensional latent space, in which the separation between normal and anomalous data can be achieved. To this end DeepSAD and its predecessor Deep SVDD make use of the autoencoder's reconstruction goal, whose successful training ensures confidence in the encoder architecture's suitability for extracting the input datas' most prominent information to the latent space inbetween the encoder and decoder. DeepSAD goes on to use just the encoder as its main network architecture, discarding the decoder at this step, since reconstruction of the input is unnecessary.
|
||||||
@@ -366,7 +366,7 @@ DeepSAD's full training and inference procedure is visualized in figure~\ref{fig
|
|||||||
|
|
||||||
\newsection{algorithm_details}{Algorithm Details and Hyperparameters}
|
\newsection{algorithm_details}{Algorithm Details and Hyperparameters}
|
||||||
|
|
||||||
Since Deep SAD is heavily based on its predecessor \citetitle{deepsvdd}~(Deep SVDD)~\cite{deepsvdd} it is helpful to first understand Deep SVDD's optimization objective, so we start with explaining it here. For input space $\mathcal{X} \subseteq \mathbb{R}^D$, output space $\mathcal{Z} \subseteq \mathbb{R}^d$ and a neural network $\phi(\wc; \mathcal{W}) : \mathcal{X} \to \mathcal{Z}$ where $\mathcal{W}$ depicts the neural networks' weights with $L$ layers $\{\mathbf{W}_1, \dots, \mathbf{W}_L\}$, $n$ the number of unlabeled training samples $\{\mathbf{x}_1, \dots, \mathbf{x}_n\}$, $\mathbf{c}$ the center of the hypersphere in the latent space, Deep SVDD teaches the neural network to cluster normal data closely together in the latent space by defining its optimization objective as seen in~\ref{eq:deepsvdd_optimization_objective}.
|
Since Deep SAD is heavily based on its predecessor \citetitle{deep_svdd}~(Deep SVDD)~\cite{deep_svdd} it is helpful to first understand Deep SVDD's optimization objective, so we start with explaining it here. For input space $\mathcal{X} \subseteq \mathbb{R}^D$, output space $\mathcal{Z} \subseteq \mathbb{R}^d$ and a neural network $\phi(\wc; \mathcal{W}) : \mathcal{X} \to \mathcal{Z}$ where $\mathcal{W}$ depicts the neural networks' weights with $L$ layers $\{\mathbf{W}_1, \dots, \mathbf{W}_L\}$, $n$ the number of unlabeled training samples $\{\mathbf{x}_1, \dots, \mathbf{x}_n\}$, $\mathbf{c}$ the center of the hypersphere in the latent space, Deep SVDD teaches the neural network to cluster normal data closely together in the latent space by defining its optimization objective as seen in~\ref{eq:deepsvdd_optimization_objective}.
|
||||||
|
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\label{eq:deepsvdd_optimization_objective}
|
\label{eq:deepsvdd_optimization_objective}
|
||||||
@@ -629,11 +629,11 @@ For inference (i.e.\ model validation on held-out experiments), we provide a sec
|
|||||||
|
|
||||||
\section{Model Configuration \& Evaluation Protocol}
|
\section{Model Configuration \& Evaluation Protocol}
|
||||||
|
|
||||||
Since the neural network architecture trained in the deepsad method is not fixed as described in section~\ref{sec:algorithm_details} but rather chosen based on the input data, we also had to choose an autoencoder architecture befitting our preprocessed lidar data projections. Since \citetitle{degradation_quantification_rain}~\cite{degradation_quantification_rain} reported success in training DeepSAD on similar data we firstly adapted the network architecture utilized by them for our usecase, which is based on the simple and well understood LeNet architecture~\cite{lenet}. Additionally we were interested in evaluating the importance and impact of a well-suited network architecture for DeepSAD's performance and therefore designed a second network architecture henceforth reffered to as "efficient architecture" to incorporate a few modern techniques, befitting our usecase.
|
Since the neural network architecture trained in the deepsad method is not fixed as described in section~\ref{sec:algorithm_details} but rather chosen based on the input data, we also had to choose an autoencoder architecture befitting our preprocessed lidar data projections. Since \citetitle{degradation_quantification_rain}~\cite{degradation_quantification_rain} reported success in training DeepSAD on similar data we firstly adapted the network architecture utilized by them for our use case, which is based on the simple and well understood LeNet architecture~\cite{lenet}. Additionally we were interested in evaluating the importance and impact of a well-suited network architecture for DeepSAD's performance and therefore designed a second network architecture henceforth reffered to as "efficient architecture" to incorporate a few modern techniques, befitting our use case.
|
||||||
|
|
||||||
\newsubsubsectionNoTOC{Network architectures (LeNet variant, custom encoder) and how they suit the point‑cloud input}
|
\newsubsubsectionNoTOC{Network architectures (LeNet variant, custom encoder) and how they suit the point‑cloud input}
|
||||||
|
|
||||||
The LeNet-inspired autoencoder can be split into an encoder network (figure~\ref{fig:setup_arch_lenet_encoder}) and a decoder network (figure~\ref{fig:setup_arch_lenet_decoder}) with a latent space inbetween the two parts. Such an arrangement is typical for autoencoder architectures as we discussed in section~\ref{sec:autoencoder}. The encoder network is simultaneously DeepSAD's main training architecture which is used to infer the degradation quantification in our use-case, once trained.
|
The LeNet-inspired autoencoder can be split into an encoder network (figure~\ref{fig:setup_arch_lenet_encoder}) and a decoder network (figure~\ref{fig:setup_arch_lenet_decoder}) with a latent space inbetween the two parts. Such an arrangement is typical for autoencoder architectures as we discussed in section~\ref{sec:autoencoder}. The encoder network is simultaneously DeepSAD's main training architecture which is used to infer the degradation quantification in our use case, once trained.
|
||||||
|
|
||||||
\figc{setup_arch_lenet_encoder}{diagrams/arch_lenet_encoder}{
|
\figc{setup_arch_lenet_encoder}{diagrams/arch_lenet_encoder}{
|
||||||
Architecture of the LeNet-inspired encoder. The input is a lidar range image of size
|
Architecture of the LeNet-inspired encoder. The input is a lidar range image of size
|
||||||
@@ -1145,6 +1145,7 @@ Finally, inference experiments showed that DeepSAD’s anomaly scores can track
|
|||||||
Several promising avenues remain open for future exploration:
|
Several promising avenues remain open for future exploration:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item \textbf{Temporal modeling:} Instead of treating frames independently, future methods could directly model the difference between temporally consecutive frames to capture dynamic aspects of degradation.
|
\item \textbf{Temporal modeling:} Instead of treating frames independently, future methods could directly model the difference between temporally consecutive frames to capture dynamic aspects of degradation.
|
||||||
|
\item \textbf{Lidar intensity:} Lidar typically save an intensity value per point, indicating the strength of the reflected optical signal, which could be incorporated to improve degradation quantification.
|
||||||
\item \textbf{Sensor fusion:} Combining lidar with complementary sensors (e.g., ultrasonic sensors that penetrate dense clouds) could mitigate blind spots inherent to single-sensor evaluation.
|
\item \textbf{Sensor fusion:} Combining lidar with complementary sensors (e.g., ultrasonic sensors that penetrate dense clouds) could mitigate blind spots inherent to single-sensor evaluation.
|
||||||
\item \textbf{Input segmentation:} The DeepSAD architecture tested here processed full 360° lidar scans. This may obscure localized degradations. Segmenting point clouds into angular sectors and computing anomaly scores per sector could provide more fine-grained quantification. Preliminary tests in this direction were promising, but were not pursued further in this thesis.
|
\item \textbf{Input segmentation:} The DeepSAD architecture tested here processed full 360° lidar scans. This may obscure localized degradations. Segmenting point clouds into angular sectors and computing anomaly scores per sector could provide more fine-grained quantification. Preliminary tests in this direction were promising, but were not pursued further in this thesis.
|
||||||
\item \textbf{Cross-sensor generalization:} Current experiments assume identical sensor resolution. Extending the method to work across different lidar types, including those with varying angular resolutions, remains an open question and would enhance applicability in heterogeneous robotic fleets.
|
\item \textbf{Cross-sensor generalization:} Current experiments assume identical sensor resolution. Extending the method to work across different lidar types, including those with varying angular resolutions, remains an open question and would enhance applicability in heterogeneous robotic fleets.
|
||||||
|
|||||||
@@ -85,37 +85,6 @@
|
|||||||
pages = {716–721},
|
pages = {716–721},
|
||||||
}
|
}
|
||||||
,
|
,
|
||||||
@inproceedings{deepsvdd,
|
|
||||||
title = {Deep One-Class Classification},
|
|
||||||
author = {Ruff, Lukas and Vandermeulen, Robert and Goernitz, Nico and Deecke,
|
|
||||||
Lucas and Siddiqui, Shoaib Ahmed and Binder, Alexander and M{\"u}ller
|
|
||||||
, Emmanuel and Kloft, Marius},
|
|
||||||
booktitle = {Proceedings of the 35th International Conference on Machine
|
|
||||||
Learning},
|
|
||||||
pages = {4393--4402},
|
|
||||||
year = {2018},
|
|
||||||
editor = {Dy, Jennifer and Krause, Andreas},
|
|
||||||
volume = {80},
|
|
||||||
series = {Proceedings of Machine Learning Research},
|
|
||||||
month = {10--15 Jul},
|
|
||||||
publisher = {PMLR},
|
|
||||||
pdf = {http://proceedings.mlr.press/v80/ruff18a/ruff18a.pdf},
|
|
||||||
url = {https://proceedings.mlr.press/v80/ruff18a.html},
|
|
||||||
abstract = {Despite the great advances made by deep learning in many machine
|
|
||||||
learning problems, there is a relative dearth of deep learning
|
|
||||||
approaches for anomaly detection. Those approaches which do exist
|
|
||||||
involve networks trained to perform a task other than anomaly
|
|
||||||
detection, namely generative models or compression, which are in
|
|
||||||
turn adapted for use in anomaly detection; they are not trained on
|
|
||||||
an anomaly detection based objective. In this paper we introduce a
|
|
||||||
new anomaly detection method—Deep Support Vector Data Description—,
|
|
||||||
which is trained on an anomaly detection based objective. The
|
|
||||||
adaptation to the deep regime necessitates that our neural network
|
|
||||||
and training procedure satisfy certain properties, which we
|
|
||||||
demonstrate theoretically. We show the effectiveness of our method
|
|
||||||
on MNIST and CIFAR-10 image benchmark datasets as well as on the
|
|
||||||
detection of adversarial examples of GTSRB stop signs.},
|
|
||||||
},
|
|
||||||
@inproceedings{deep_svdd,
|
@inproceedings{deep_svdd,
|
||||||
title = {Deep One-Class Classification},
|
title = {Deep One-Class Classification},
|
||||||
author = {Ruff, Lukas and Vandermeulen, Robert and Goernitz, Nico and Deecke,
|
author = {Ruff, Lukas and Vandermeulen, Robert and Goernitz, Nico and Deecke,
|
||||||
|
|||||||
Reference in New Issue
Block a user