z-score rework

metrics section draft
raw metrics section
2025-10-18 18:01:41 +02:00 · 2025-10-18 17:23:18 +02:00 · 2025-10-18 17:02:22 +02:00
4 changed files with 350 additions and 10 deletions
--- a/thesis/Main.bbl
+++ b/thesis/Main.bbl
@@ -2134,6 +2134,261 @@
      \verb http://dx.doi.org/10.1109/CVPR.2018.00716
      \endverb
    \endentry
    \entry{roc}{inproceedings}{}{}
      \name{author}{1}{}{%
        {{hash=296b45ce1995399650391e9bc8b09c22}{%
           family={Metz},
           familyi={M\bibinitperiod},
           given={Charles\bibnamedelima E},
           giveni={C\bibinitperiod\bibinitdelim E\bibinitperiod}}}%
      }
      \list{organization}{1}{%
        {Elsevier}%
      }
      \strng{namehash}{296b45ce1995399650391e9bc8b09c22}
      \strng{fullhash}{296b45ce1995399650391e9bc8b09c22}
      \strng{fullhashraw}{296b45ce1995399650391e9bc8b09c22}
      \strng{bibnamehash}{296b45ce1995399650391e9bc8b09c22}
      \strng{authorbibnamehash}{296b45ce1995399650391e9bc8b09c22}
      \strng{authornamehash}{296b45ce1995399650391e9bc8b09c22}
      \strng{authorfullhash}{296b45ce1995399650391e9bc8b09c22}
      \strng{authorfullhashraw}{296b45ce1995399650391e9bc8b09c22}
      \field{sortinit}{6}
      \field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{booktitle}{Seminars in nuclear medicine}
      \field{number}{4}
      \field{title}{Basic principles of ROC analysis}
      \field{volume}{8}
      \field{year}{1978}
      \field{pages}{283\bibrangedash 298}
      \range{pages}{16}
    \endentry
    \entry{roc_vs_prc2}{article}{}{}
      \name{author}{3}{}{%
        {{hash=6c614dc489bb7775dc417b3ac5025f38}{%
           family={Calikus},
           familyi={C\bibinitperiod},
           given={Ece},
           giveni={E\bibinitperiod}}}%
        {{hash=6db975f29e3fa71e616c18ec03af0af4}{%
           family={Nowaczyk},
           familyi={N\bibinitperiod},
           given={Slawomir},
           giveni={S\bibinitperiod}}}%
        {{hash=bd821c16b0a8af3d78e72b89568fb1b2}{%
           family={Dikmen},
           familyi={D\bibinitperiod},
           given={Onur},
           giveni={O\bibinitperiod}}}%
      }
      \list{publisher}{2}{%
        {Springer Science}%
        {Business Media LLC}%
      }
      \strng{namehash}{5d40ff291ddd41f19115ab9b533809c6}
      \strng{fullhash}{54b87a3930e5fe91fd9f2e0d38e53ee7}
      \strng{fullhashraw}{54b87a3930e5fe91fd9f2e0d38e53ee7}
      \strng{bibnamehash}{54b87a3930e5fe91fd9f2e0d38e53ee7}
      \strng{authorbibnamehash}{54b87a3930e5fe91fd9f2e0d38e53ee7}
      \strng{authornamehash}{5d40ff291ddd41f19115ab9b533809c6}
      \strng{authorfullhash}{54b87a3930e5fe91fd9f2e0d38e53ee7}
      \strng{authorfullhashraw}{54b87a3930e5fe91fd9f2e0d38e53ee7}
      \field{sortinit}{6}
      \field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{issn}{2364-4168}
      \field{journaltitle}{International Journal of Data Science and Analytics}
      \field{month}{6}
      \field{number}{1}
      \field{title}{Context discovery for anomaly detection}
      \field{volume}{19}
      \field{year}{2024}
      \field{pages}{99\bibrangedash 113}
      \range{pages}{15}
      \verb{doi}
      \verb 10.1007/s41060-024-00586-x
      \endverb
      \verb{urlraw}
      \verb http://dx.doi.org/10.1007/s41060-024-00586-x
      \endverb
      \verb{url}
      \verb http://dx.doi.org/10.1007/s41060-024-00586-x
      \endverb
    \endentry
    \entry{roc_vs_prc}{article}{}{}
      \name{author}{8}{}{%
        {{hash=6a81232d1cbcdd0f3888fb10d2d20c69}{%
           family={Campos},
           familyi={C\bibinitperiod},
           given={Guilherme\bibnamedelima O.},
           giveni={G\bibinitperiod\bibinitdelim O\bibinitperiod}}}%
        {{hash=cbfafee6627ecbb346007c41a5787a4e}{%
           family={Zimek},
           familyi={Z\bibinitperiod},
           given={Arthur},
           giveni={A\bibinitperiod}}}%
        {{hash=802157026f850823b2027c2100cb359a}{%
           family={Sander},
           familyi={S\bibinitperiod},
           given={Jörg},
           giveni={J\bibinitperiod}}}%
        {{hash=382d8e986b9afdbfedb661be5cf9ce33}{%
           family={Campello},
           familyi={C\bibinitperiod},
           given={Ricardo\bibnamedelimb J.\bibnamedelimi G.\bibnamedelimi B.},
           giveni={R\bibinitperiod\bibinitdelim J\bibinitperiod\bibinitdelim G\bibinitperiod\bibinitdelim B\bibinitperiod}}}%
        {{hash=38f453607e6cbb2c3efe156849a986dd}{%
           family={Micenková},
           familyi={M\bibinitperiod},
           given={Barbora},
           giveni={B\bibinitperiod}}}%
        {{hash=d5aa8a82c7032184011fd502a43e205a}{%
           family={Schubert},
           familyi={S\bibinitperiod},
           given={Erich},
           giveni={E\bibinitperiod}}}%
        {{hash=69b6af16c92b02af90eb0a2864250685}{%
           family={Assent},
           familyi={A\bibinitperiod},
           given={Ira},
           giveni={I\bibinitperiod}}}%
        {{hash=3f5ba8771c6d99e9af9f7716ed7d180b}{%
           family={Houle},
           familyi={H\bibinitperiod},
           given={Michael\bibnamedelima E.},
           giveni={M\bibinitperiod\bibinitdelim E\bibinitperiod}}}%
      }
      \list{publisher}{2}{%
        {Springer Science}%
        {Business Media LLC}%
      }
      \strng{namehash}{2541e132e48ea3d61d11fb7ef5cc2fb4}
      \strng{fullhash}{6b641fe45043d123c859110c492455cd}
      \strng{fullhashraw}{6b641fe45043d123c859110c492455cd}
      \strng{bibnamehash}{6b641fe45043d123c859110c492455cd}
      \strng{authorbibnamehash}{6b641fe45043d123c859110c492455cd}
      \strng{authornamehash}{2541e132e48ea3d61d11fb7ef5cc2fb4}
      \strng{authorfullhash}{6b641fe45043d123c859110c492455cd}
      \strng{authorfullhashraw}{6b641fe45043d123c859110c492455cd}
      \field{sortinit}{6}
      \field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{issn}{1573-756X}
      \field{journaltitle}{Data Mining and Knowledge Discovery}
      \field{month}{1}
      \field{number}{4}
      \field{title}{On the evaluation of unsupervised outlier detection: measures, datasets, and an empirical study}
      \field{volume}{30}
      \field{year}{2016}
      \field{pages}{891\bibrangedash 927}
      \range{pages}{37}
      \verb{doi}
      \verb 10.1007/s10618-015-0444-8
      \endverb
      \verb{urlraw}
      \verb http://dx.doi.org/10.1007/s10618-015-0444-8
      \endverb
      \verb{url}
      \verb http://dx.doi.org/10.1007/s10618-015-0444-8
      \endverb
    \endentry
    \entry{prc}{article}{}{}
      \name{author}{3}{}{%
        {{hash=1f216647f3d14e9e167b5279b02fd2b6}{%
           family={Raghavan},
           familyi={R\bibinitperiod},
           given={Vijay},
           giveni={V\bibinitperiod}}}%
        {{hash=8fc430cb115c6f35cc6c715511c6d017}{%
           family={Bollmann},
           familyi={B\bibinitperiod},
           given={Peter},
           giveni={P\bibinitperiod}}}%
        {{hash=bbea5d20580d37dee6fdc8f2ab689622}{%
           family={Jung},
           familyi={J\bibinitperiod},
           given={Gwang\bibnamedelima S.},
           giveni={G\bibinitperiod\bibinitdelim S\bibinitperiod}}}%
      }
      \list{publisher}{1}{%
        {Association for Computing Machinery (ACM)}%
      }
      \strng{namehash}{8dbc985e2075b3f53854b49b85849232}
      \strng{fullhash}{960dc590833332a78b4cf6bc2d8114f5}
      \strng{fullhashraw}{960dc590833332a78b4cf6bc2d8114f5}
      \strng{bibnamehash}{960dc590833332a78b4cf6bc2d8114f5}
      \strng{authorbibnamehash}{960dc590833332a78b4cf6bc2d8114f5}
      \strng{authornamehash}{8dbc985e2075b3f53854b49b85849232}
      \strng{authorfullhash}{960dc590833332a78b4cf6bc2d8114f5}
      \strng{authorfullhashraw}{960dc590833332a78b4cf6bc2d8114f5}
      \field{sortinit}{6}
      \field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{issn}{1558-2868}
      \field{journaltitle}{ACM Transactions on Information Systems}
      \field{month}{7}
      \field{number}{3}
      \field{title}{A critical investigation of recall and precision as measures of retrieval system performance}
      \field{volume}{7}
      \field{year}{1989}
      \field{pages}{205\bibrangedash 229}
      \range{pages}{25}
      \verb{doi}
      \verb 10.1145/65943.65945
      \endverb
      \verb{urlraw}
      \verb http://dx.doi.org/10.1145/65943.65945
      \endverb
      \verb{url}
      \verb http://dx.doi.org/10.1145/65943.65945
      \endverb
    \endentry
    \entry{zscore}{article}{}{}
      \name{author}{3}{}{%
        {{hash=c4141ad87d07f41e44c31cc3b342bb04}{%
           family={Kreyszig},
           familyi={K\bibinitperiod},
           given={Erwin},
           giveni={E\bibinitperiod}}}%
        {{hash=a9bc276dc5f8f0a388ab7a862ced31db}{%
           family={Stroud},
           familyi={S\bibinitperiod},
           given={K},
           giveni={K\bibinitperiod}}}%
        {{hash=f32f108a562b342127650ab203d3b303}{%
           family={Stephenson},
           familyi={S\bibinitperiod},
           given={G},
           giveni={G\bibinitperiod}}}%
      }
      \list{publisher}{1}{%
        {John Wiley \& Sons, Inc. 9 th edition, 2006 Page 2 of 6 Teaching methods~…}%
      }
      \strng{namehash}{f6ac776fd8cd938f842095a0fcfd6d6e}
      \strng{fullhash}{39b0b87e985991ca0e9951b740c61064}
      \strng{fullhashraw}{39b0b87e985991ca0e9951b740c61064}
      \strng{bibnamehash}{39b0b87e985991ca0e9951b740c61064}
      \strng{authorbibnamehash}{39b0b87e985991ca0e9951b740c61064}
      \strng{authornamehash}{f6ac776fd8cd938f842095a0fcfd6d6e}
      \strng{authorfullhash}{39b0b87e985991ca0e9951b740c61064}
      \strng{authorfullhashraw}{39b0b87e985991ca0e9951b740c61064}
      \field{sortinit}{6}
      \field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{journaltitle}{Integration}
      \field{number}{4}
      \field{title}{Advanced engineering mathematics}
      \field{volume}{9}
      \field{year}{2008}
      \field{pages}{1014}
      \range{pages}{1}
    \endentry
  \enddatalist
 \endrefsection
 \endinput
--- a/thesis/Main.pdf
+++ b/thesis/Main.pdf
--- a/thesis/Main.tex
+++ b/thesis/Main.tex
@@ -761,7 +761,27 @@ In our setting, the raw input dimensionality ($2048 \times 32$ per frame) is too
 We adapted the baseline implementations to our data loader and input format and added support for multiple evaluation targets per frame (two labels per data point), reporting both results per experiment. Both baselines, like DeepSAD, output continuous anomaly scores, which allows us to evaluate them directly without committing to a fixed threshold.
-TODO transition to evaluation metrics, talk about typical ones like F1 scores (single threshold) so we go on to talk about ROC AUC, well known but can suffer from having class imbalance (especially as in our case) maybe calculation and example. say we saw these exact problems in our results so we decided to report mAP which is similar to roc auc but not as sensitive in regards to class imbalance (show with formula why its not) and then go on to explain that its basically the AUC of PRCs, which are more fitting curves for our usecase due to the same stability for class imbalance (like mAP) but for multiple thresholds (unlike F1) and shape can also give more insight than simple mAP alone.
+\paragraph{Evaluation Metrics}
 As discussed in Section~\ref{sec:preprocessing}, evaluating model performance in our setup is challenging due to the absence of analog ground truth. Instead, we rely on binary labels that are additionally noisy and subjective. All models under consideration produce continuous anomaly scores: DeepSAD outputs a positive-valued distance to the center of a hypersphere, Isolation Forest measures deviation from the mean tree depth (which can be negative), and OCSVM returns a signed distance to the decision boundary. Because these scores differ in scale and sign—and due to the lack of a reliable degradation threshold—it is not appropriate to evaluate performance using metrics such as accuracy or F1 score, both of which require classification at a fixed threshold.
 Instead, we adopt threshold-independent evaluation curves that illustrate model behavior across the full range of possible thresholds. The most commonly used of these is the Receiver Operating Characteristic (ROC)~\cite{roc} curve, along with its scalar summary metric, ROC AUC. ROC curves plot the true positive rate (TPR) against the false positive rate (FPR), providing insight into how well a model separates the two classes. However, as noted in~\cite{roc_vs_prc2,roc_vs_prc} and confirmed in our own testing, ROC AUC can be misleading under strong class imbalance—a common condition in anomaly detection.
 To address this, we instead rely on Precision–Recall Curves (PRC)~\cite{prc}, which better capture model behavior on the minority class. PRC plots precision—the fraction of predicted anomalies that are correct—against recall—the fraction of true anomalies that are detected. As the decision threshold is lowered, recall increases but typically at the cost of precision, since more false positives are included. This tradeoff is captured across all thresholds. The metric definitions are as follows:
 \[
 	\text{Precision} = \frac{\text{TP}}{\text{TP} + \text{FP}}, \quad
 	\text{Recall} = \frac{\text{TP}}{\text{TP} + \text{FN}}.
 \]
 In our evaluation, this distinction proved practically significant. As illustrated in Figure~\ref{fig:roc_vs_prc}, ROC AUC values for Isolation Forest and DeepSAD appear similarly strong (0.693 vs. 0.782), suggesting comparable performance. However, the PRC reveals a clear divergence: while DeepSAD maintains high precision across recall levels, Isolation Forest suffers a steep decline in precision as recall increases, due to a high number of false positives. The resulting Average Precision (AP)—the area under the PRC—is much lower for Isolation Forest (0.207 vs. 0.633), offering a more realistic account of its performance under imbalance.
 \figc{roc_vs_prc}{figures/setup_roc_vs_prc.png}{Comparison of ROC and PRC for the same evaluation run. ROC fails to reflect the poor performance of Isolation Forest, which misclassifies many normal samples as anomalous at lower thresholds. The PRC exposes this effect, resulting in a substantially lower AP for Isolation Forest than for DeepSAD.}{width=.9\textwidth}
 In addition to cross-validated performance comparisons, we also apply the trained models to previously unseen, temporally ordered experiments to simulate inference in realistic conditions. Since each method produces scores on a different scale—with different signs and ranges—raw scores are not directly comparable. To enable comparison, we compute a $z$-score~\cite{zscore} per frame, defined as the number of standard deviations a score deviates from the mean of the normal data. To perform the normalization, we compute the mean and standard deviation of anomaly scores on a clean reference experiment. These values are then used to normalize scores from degraded experiments, making it easy to see how much each method's output deviates from its own baseline under degradation. It also facilitates a unified view across methods, even though their outputs are otherwise heterogeneous. In this way, $z$-score normalization supports threshold-free interpretation and enables consistent model comparison during inference.
 In conclusion, the combination of unreliable thresholds and pronounced class imbalance renders traditional threshold-based metrics unsuitable for our setting. PRC and AP provide a more faithful representation of model behavior across thresholds, while $z$-score normalization extends this by enabling direct comparison of inference-time outputs across methods and conditions.
 \newsection{setup_experiments_environment}{Experiment Overview \& Computational Environment}
@@ -1056,22 +1076,20 @@ In summary, three consistent patterns emerge: (i) a very small number of labels
 % --- Section: Autoencoder Pretraining Results ---
 \newsection{results_inference}{Inference on Held-Out Experiments}
-In addition to the evaluation of average precision and precision--recall curves obtained from $k$-fold cross-validation with varying hyperparameters, we also examine the behavior of the fully trained methods when applied to previously unseen, held-out experiments.
+In addition to the evaluation of PRC and AP obtained from $k$-fold cross-validation with varying hyperparameters, we also examine the behavior of the fully trained methods when applied to previously unseen, held-out experiments.
 While the prior analysis provided valuable insights into the classification capabilities of the methods, it was limited by two factors: first, the binary ground-truth labels were of uneven quality due to aforementioned mislabeling of frames, and second, the binary formulation does not reflect our overarching goal of quantifying sensor degradation on a continuous scale.
 To provide a more intuitive understanding of how the methods might perform in real-world applications, we therefore present results from running inference sequentially on entire experiments.
 These frame-by-frame time-axis plots simulate online inference and illustrate how anomaly scores evolve as data is captured, thereby serving as a candidate metric for quantifying the degree of \rev{LiDAR} degradation during operation.
 \fig{results_inference_normal_vs_degraded}{figures/results_inference_normal_vs_degraded.png}{Comparison of anomaly detection methods with statistical indicators across clean (dashed) and degraded (solid) experiments. Each subplot shows one method (DeepSAD--LeNet, DeepSAD--Efficient, OCSVM, Isolation Forest). Red curves denote how strongly the anomaly score deviates from clean-experiment baseline; blue and green curves denote the percentage of missing \rev{LiDAR} points and near-sensor particle hits, respectively. Latent Space Dimensionality was 32 and semi-supervised labeling regime was 0 normal and 0 anomalous samples during training.}
-\fig{results_inference_normal_vs_degraded}{figures/results_inference_normal_vs_degraded.png}{Comparison of anomaly detection methods with statistical indicators across clean (dashed) and degraded (solid) experiments. Each subplot shows one method (DeepSAD--LeNet, DeepSAD--Efficient, OCSVM, Isolation Forest). Red curves denote method anomaly scores normalized to the clean experiment; blue and green curves denote the percentage of missing \rev{LiDAR} points and near-sensor particle hits, respectively. Clear separation between clean and degraded runs is observed for the DeepSAD variants and, to a lesser degree, for OCSVM, while Isolation Forest produces high scores even in the clean experiment. Latent Space Dimensionality was 32 and semi-supervised labeling regime was 0 normal and 0 anomalous samples during training.}
+As discussed in Section~\ref{sec:setup_baselines_evaluation} we apply $z$-score normalization to enable comparison of the different methods during inference.  After normalization, the resulting time series were still highly noisy, which motivated the application of exponential moving average (EMA) smoothing. EMA was chosen because it is causal (does not rely on future data) and thus suitable for real-time inference. Although it introduces a small time delay, this delay is shorter than for other smoothing techniques such as running averages.
-The plots in Figure~\ref{fig:results_inference_normal_vs_degraded} highlight important differences in how well the tested methods distinguish between normal and degraded sensor conditions.
+The plots in Figure~\ref{fig:results_inference_normal_vs_degraded} highlight important differences in how well the tested methods distinguish between normal and degraded sensor conditions. The plots show how strongly the method's scores deviate from their clean-data baseline and include statistical indicators (missing points and near-sensor particle hits) in blue and green.
 Among the four approaches, the strongest separation is achieved by DeepSAD (Efficient), followed by DeepSAD (LeNet), then OCSVM.
 For Isolation Forest, the anomaly scores are already elevated in the clean experiment, which prevents reliable differentiation between normal and degraded runs and makes the method unsuitable in this context.
-Because anomaly scores are on incomparable scales, we apply $z$-score normalization based on the clean experiment. This allows deviations in degraded runs to be measured relative to the clean baseline, enabling direct comparison across methods. To allow comparison between the clean and degraded experiments, the mean and standard deviation were estimated exclusively from the clean experiment and then used to normalize the degraded scores as well. After normalization, the resulting time series were still highly noisy, which motivated the application of exponential moving average (EMA) smoothing. EMA was chosen because it is causal (does not rely on future data) and thus suitable for real-time inference. Although it introduces a small time delay, this delay is shorter than for other smoothing techniques such as running averages.
+Among the four approaches, the strongest separation is achieved by DeepSAD (Efficient), followed by DeepSAD (LeNet), then OCSVM. For Isolation Forest, the anomaly scores are already elevated in the clean experiment, which prevents reliable differentiation between normal and degraded runs and makes the method unsuitable in this context.
-The red method curves can also be compared with the blue and green statistical indicators (missing points and near-sensor particle hits).
+When comparing the methods to the statistical indicators, some similarities in shape may suggest that the methods partly capture these statistics, although such interpretations should be made with caution.
 While some similarities in shape may suggest that the methods partly capture these statistics, such interpretations should be made with caution.
 The anomaly detection models are expected to have learned additional patterns that are not directly observable from simple statistics, and these may also contribute to their ability to separate degraded from clean data.
@@ -1091,7 +1109,7 @@ Our results indicate a qualified “yes.” Using anomaly detection (AD)—in pa
 	\item \textbf{Empirical comparison for \rev{LiDAR} degradation.} A systematic evaluation of DeepSAD against Isolation Forest and OCSVM across latent sizes and labeling regimes, showing that DeepSAD consistently outperforms the baselines under both evaluation schemes (Section~\ref{sec:results_deepsad}).
 	\item \textbf{Latent dimensionality insight.}
-	      Autoencoder pretraining loss decreases with larger latent spaces, but DeepSAD performance shows the opposite trend: compact bottlenecks (32–128) achieve the highest \rev{mean average precision (mAP)}. This contrast demonstrates that pretraining performance does not directly predict DeepSAD performance—latent dimensionality cannot be tuned via autoencoder loss alone, even though it remains useful for comparing architectures.
+	      Autoencoder pretraining loss decreases with larger latent spaces, but DeepSAD performance shows the opposite trend: compact bottlenecks (32–128) achieve the highest \rev{average precision (AP)}. This contrast demonstrates that pretraining performance does not directly predict DeepSAD performance—latent dimensionality cannot be tuned via autoencoder loss alone, even though it remains useful for comparing architectures.
 	\item \textbf{Semi-supervision insight.} In our data, \emph{unsupervised} DeepSAD performed best; \emph{light} labeling (50/10) performed worst; \emph{many} labels (500/100) partially recovered performance but did not surpass \rev{the unsupervised approach}. Evidence from \rev{precision--recall curve (PRC)} shapes and fold variance points to \emph{training-side overfitting to a small labeled set}, an effect that persists even under clean manually-defined evaluation (Table~\ref{tab:results_ap}, Figure~\ref{fig:prc_over_semi}).
--- a/thesis/bib/bibliography.bib
+++ b/thesis/bib/bibliography.bib
@@ -684,6 +684,73 @@ article{ef_concept_source,
 	year = {1986},
 	month = dec,
 	pages = {56–68},
 },
@article{roc_vs_prc2,
 	title = {Context discovery for anomaly detection},
 	volume = {19},
 	ISSN = {2364-4168},
 	url = {http://dx.doi.org/10.1007/s41060-024-00586-x},
 	DOI = {10.1007/s41060-024-00586-x},
 	number = {1},
 	journal = {International Journal of Data Science and Analytics},
 	publisher = {Springer Science and Business Media LLC},
 	author = {Calikus, Ece and Nowaczyk, Slawomir and Dikmen, Onur},
 	year = {2024},
 	month = jun,
 	pages = {99–113},
 },
@article{roc_vs_prc,
 	title = {On the evaluation of unsupervised outlier detection: measures,
 	         datasets, and an empirical study},
 	volume = {30},
 	ISSN = {1573-756X},
 	url = {http://dx.doi.org/10.1007/s10618-015-0444-8},
 	DOI = {10.1007/s10618-015-0444-8},
 	number = {4},
 	journal = {Data Mining and Knowledge Discovery},
 	publisher = {Springer Science and Business Media LLC},
 	author = {Campos, Guilherme O. and Zimek, Arthur and Sander, J\"{o}rg and
 	          Campello, Ricardo J. G. B. and Micenková, Barbora and Schubert, Erich
 	          and Assent, Ira and Houle, Michael E.},
 	year = {2016},
 	month = jan,
 	pages = {891–927},
 },
@inproceedings{roc,
 	title = {Basic principles of ROC analysis},
 	author = {Metz, Charles E},
 	booktitle = {Seminars in nuclear medicine},
 	volume = {8},
 	number = {4},
 	pages = {283--298},
 	year = {1978},
 	organization = {Elsevier},
 },
@article{prc,
 	title = {A critical investigation of recall and precision as measures of
 	         retrieval system performance},
 	volume = {7},
 	ISSN = {1558-2868},
 	url = {http://dx.doi.org/10.1145/65943.65945},
 	DOI = {10.1145/65943.65945},
 	number = {3},
 	journal = {ACM Transactions on Information Systems},
 	publisher = {Association for Computing Machinery (ACM)},
 	author = {Raghavan, Vijay and Bollmann, Peter and Jung, Gwang S.},
 	year = {1989},
 	month = jul,
 	pages = {205–229},
 },
@article{zscore,
 	title = {Advanced engineering mathematics},
 	author = {Kreyszig, Erwin and Stroud, K and Stephenson, G},
 	journal = {Integration},
 	volume = {9},
 	number = {4},
 	pages = {1014},
 	year = {2008},
 	publisher = {John Wiley \& Sons, Inc. 9 th edition, 2006 Page 2 of 6 Teaching
 	             methods~…},
 }
Author	SHA1	Message	Date
Jan Kowalczyk	ece887860b	z-score rework	2025-10-18 18:01:41 +02:00
Jan Kowalczyk	c3830db913	metrics section draft	2025-10-18 17:23:18 +02:00
Jan Kowalczyk	3d21171a40	raw metrics section	2025-10-18 17:02:22 +02:00