new complete auc table

This commit is contained in:
Jan Kowalczyk
2025-09-17 11:43:38 +02:00
parent 936d2ecb6e
commit 8f36bd2e07
2 changed files with 280 additions and 11 deletions

Binary file not shown.

View File

@@ -77,6 +77,16 @@
\newcolumntype{Y}{>{\centering\arraybackslash}X}
% Define a slanted column type
\newcolumntype{R}[1]{>{\raggedleft\arraybackslash}p{#1}}
\newcommand{\rotheader}[1]{\rotatebox{90}{\parbox{2cm}{\centering #1}}}
% Full-width labeling row for regime blocks
\newcommand{\regimerow}[1]{%
\addlinespace[2pt]%
\multicolumn{9}{l}{\textbf{#1}}\\
\addlinespace[2pt]%
}
\DeclareRobustCommand{\threadtodo}[4]{%
\todo[inline,
% \todo[disable,
@@ -1425,19 +1435,278 @@ Inference latency per sample is presented in Table~\ref{tab:inference_latency_co
Together, these results provide a comprehensive overview of the computational requirements of our experimental setup. They show that while our deep semi-supervised approach is significantly more demanding during training than classical baselines, it remains highly efficient at inference, which is the decisive factor for deployment in time-critical domains such as rescue robotics.
\newchapter{results_discussion}{Results and Discussion}
\newsection{results}{Results}
% \threadtodo
% {give overview about hardware setup and how long things take to train}
% {we know what we trained but not how long that takes}
% {table of hardware and of how long different trainings took}
% {experiment setup understood $\rightarrow$ what were the experiments' results}
\todo{}
\newsection{hyperparameter_analysis}{Hyperparameter Analysis}
\todo[inline]{result for different amounts of labeled data}
\threadtodo
{Introduce the structure and scope of the results chapter}
{The reader knows the experiments from the previous chapter, but not the outcomes}
{State that we will first analyze autoencoder results, then anomaly detection performance, and finally inference experiments}
{Clear roadmap $\rightarrow$ prepares reader for detailed sections}
% --- Section: Autoencoder Pretraining Results ---
\section{Autoencoder Pretraining Results}
\threadtodo
{Present autoencoder reconstruction performance across architectures and latent sizes}
{Important because latent size and architecture determine representation quality, which may affect DeepSAD later}
{Show reconstruction losses over latent dimensions, compare Efficient vs LeNet}
{Understanding representation capacity $\rightarrow$ motivates analyzing if AE results transfer to DeepSAD}
%\fig{ae_loss_overall}{figures/ae_loss_overall.png}{Reconstruction loss across latent dimensions for LeNet-inspired and Efficient architectures.}
\threadtodo
{Analyze anomaly reconstruction performance specifically}
{Critical because degraded inputs may reconstruct differently, showing whether networks capture degradation structure}
{Show reconstruction losses on anomalous-only data subset}
{This analysis $\rightarrow$ motivates testing whether better AE reconstructions imply better anomaly detection}
%\fig{ae_loss_degraded}{figures/ae_loss_degraded.png}{Reconstruction loss on degraded-only subsets.}
% --- Section: DeepSAD Training Results ---
\section{DeepSAD Detection Performance}
\begin{table}[t]
\centering
\caption{ROC AUC (mean \textpm std) across 5 folds for \texttt{experiment-based evaluation}, semi-labeling regime: 0 normal samples 0 anomalous samples.}
\label{tab:auc_exp_based_semi_0_0}
\begin{tabularx}{\textwidth}{cYYYY}
\toprule
\textbf{Latent Dim.} & \textbf{DeepSAD (LeNet)} & \textbf{DeepSAD (Efficient)} & \textbf{IsolationForest} & \textbf{OC\text{-}SVM} \\
\midrule
32 & \textbf{0.801 \textpm 0.019} & 0.791 \textpm 0.011 & 0.717 \textpm 0.006 & 0.752 \textpm 0.045 \\
64 & 0.776 \textpm 0.009 & \textbf{0.786 \textpm 0.012} & 0.718 \textpm 0.010 & 0.742 \textpm 0.018 \\
128 & \textbf{0.784 \textpm 0.024} & 0.784 \textpm 0.017 & 0.719 \textpm 0.017 & 0.775 \textpm 0.009 \\
256 & 0.762 \textpm 0.028 & 0.772 \textpm 0.016 & 0.712 \textpm 0.006 & \textbf{0.793 \textpm 0.022} \\
512 & 0.759 \textpm 0.020 & 0.784 \textpm 0.021 & 0.712 \textpm 0.007 & \textbf{0.804 \textpm 0.027} \\
768 & 0.749 \textpm 0.041 & 0.754 \textpm 0.024 & 0.713 \textpm 0.011 & \textbf{0.812 \textpm 0.023} \\
1024 & 0.757 \textpm 0.020 & 0.750 \textpm 0.017 & 0.716 \textpm 0.012 & \textbf{0.821 \textpm 0.019} \\
\bottomrule
\end{tabularx}
\end{table}
\begin{table}[t]
\centering
\caption{ROC AUC (mean \textpm std) across 5 folds for \texttt{experiment-based evaluation}, semi-labeling regime: 50 normal samples 10 anomalous samples.}
\label{tab:auc_exp_based_semi_50_10}
\begin{tabularx}{\textwidth}{cYYYY}
\toprule
\textbf{Latent Dim.} & \textbf{DeepSAD (LeNet)} & \textbf{DeepSAD (Efficient)} & \textbf{IsolationForest} & \textbf{OC\text{-}SVM} \\
\midrule
32 & 0.741 \textpm 0.013 & 0.747 \textpm 0.015 & 0.717 \textpm 0.006 & \textbf{0.752 \textpm 0.045} \\
64 & \textbf{0.757 \textpm 0.011} & 0.750 \textpm 0.017 & 0.718 \textpm 0.010 & 0.742 \textpm 0.018 \\
128 & 0.746 \textpm 0.019 & 0.751 \textpm 0.016 & 0.719 \textpm 0.017 & \textbf{0.775 \textpm 0.009} \\
256 & 0.746 \textpm 0.015 & 0.750 \textpm 0.015 & 0.712 \textpm 0.006 & \textbf{0.793 \textpm 0.022} \\
512 & 0.760 \textpm 0.057 & 0.763 \textpm 0.027 & 0.712 \textpm 0.007 & \textbf{0.804 \textpm 0.027} \\
768 & 0.749 \textpm 0.016 & 0.747 \textpm 0.036 & 0.713 \textpm 0.011 & \textbf{0.812 \textpm 0.023} \\
1024 & 0.748 \textpm 0.021 & 0.732 \textpm 0.015 & 0.716 \textpm 0.012 & \textbf{0.821 \textpm 0.019} \\
\bottomrule
\end{tabularx}
\end{table}
\begin{table}[t]
\centering
\caption{ROC AUC (mean \textpm std) across 5 folds for \texttt{experiment-based evaluation}, semi-labeling regime: 500 normal samples 100 anomalous samples.}
\label{tab:auc_exp_based_semi_500_100}
\begin{tabularx}{\textwidth}{cYYYY}
\toprule
\textbf{Latent Dim.} & \textbf{DeepSAD (LeNet)} & \textbf{DeepSAD (Efficient)} & \textbf{IsolationForest} & \textbf{OC\text{-}SVM} \\
\midrule
32 & 0.765 \textpm 0.005 & \textbf{0.775 \textpm 0.010} & 0.717 \textpm 0.006 & 0.752 \textpm 0.045 \\
64 & 0.754 \textpm 0.013 & \textbf{0.773 \textpm 0.020} & 0.718 \textpm 0.010 & 0.742 \textpm 0.018 \\
128 & 0.758 \textpm 0.009 & 0.769 \textpm 0.014 & 0.719 \textpm 0.017 & \textbf{0.775 \textpm 0.009} \\
256 & 0.749 \textpm 0.016 & 0.768 \textpm 0.021 & 0.712 \textpm 0.006 & \textbf{0.793 \textpm 0.022} \\
512 & 0.766 \textpm 0.043 & 0.770 \textpm 0.026 & 0.712 \textpm 0.007 & \textbf{0.804 \textpm 0.027} \\
768 & 0.746 \textpm 0.016 & 0.750 \textpm 0.027 & 0.713 \textpm 0.011 & \textbf{0.812 \textpm 0.023} \\
1024 & 0.743 \textpm 0.023 & 0.739 \textpm 0.016 & 0.716 \textpm 0.012 & \textbf{0.821 \textpm 0.019} \\
\bottomrule
\end{tabularx}
\end{table}
\begin{table}[t]
\centering
\caption{ROC AUC (mean \textpm std) across 5 folds for \texttt{handlabeling-based evaluation}, semi-labeling regime: 0 normal samples 0 anomalous samples.}
\label{tab:auc_manual_based_semi_0_0}
\begin{tabularx}{\textwidth}{cYYYY}
\toprule
\textbf{Latent Dim.} & \textbf{DeepSAD (LeNet)} & \textbf{DeepSAD (Efficient)} & \textbf{IsolationForest} & \textbf{OC\text{-}SVM} \\
\midrule
32 & \textbf{1.000 \textpm 0.000} & \textbf{1.000 \textpm 0.000} & 0.921 \textpm 0.010 & 0.917 \textpm 0.014 \\
64 & 1.000 \textpm 0.000 & \textbf{1.000 \textpm 0.000} & 0.917 \textpm 0.007 & 0.931 \textpm 0.023 \\
128 & \textbf{1.000 \textpm 0.000} & \textbf{1.000 \textpm 0.000} & 0.921 \textpm 0.008 & 0.967 \textpm 0.029 \\
256 & 1.000 \textpm 0.000 & \textbf{1.000 \textpm 0.000} & 0.918 \textpm 0.009 & 0.966 \textpm 0.016 \\
512 & 1.000 \textpm 0.000 & \textbf{1.000 \textpm 0.000} & 0.920 \textpm 0.010 & 0.949 \textpm 0.021 \\
768 & 1.000 \textpm 0.000 & \textbf{1.000 \textpm 0.000} & 0.923 \textpm 0.007 & 0.960 \textpm 0.024 \\
1024 & 1.000 \textpm 0.000 & \textbf{1.000 \textpm 0.000} & 0.919 \textpm 0.005 & 0.956 \textpm 0.011 \\
\bottomrule
\end{tabularx}
\end{table}
\begin{table}[t]
\centering
\caption{ROC AUC (mean \textpm std) across 5 folds for \texttt{handlabeling-based evaluation}, semi-labeling regime: 50 normal samples 10 anomalous samples.}
\label{tab:auc_manual_based_semi_50_10}
\begin{tabularx}{\textwidth}{cYYYY}
\toprule
\textbf{Latent Dim.} & \textbf{DeepSAD (LeNet)} & \textbf{DeepSAD (Efficient)} & \textbf{IsolationForest} & \textbf{OC\text{-}SVM} \\
\midrule
32 & 0.990 \textpm 0.019 & \textbf{0.998 \textpm 0.001} & 0.921 \textpm 0.010 & 0.917 \textpm 0.014 \\
64 & 0.998 \textpm 0.003 & \textbf{0.999 \textpm 0.000} & 0.917 \textpm 0.007 & 0.931 \textpm 0.023 \\
128 & 0.991 \textpm 0.018 & \textbf{0.999 \textpm 0.000} & 0.921 \textpm 0.008 & 0.967 \textpm 0.029 \\
256 & 0.999 \textpm 0.002 & \textbf{0.999 \textpm 0.001} & 0.918 \textpm 0.009 & 0.966 \textpm 0.016 \\
512 & 0.972 \textpm 0.060 & \textbf{0.999 \textpm 0.001} & 0.920 \textpm 0.010 & 0.949 \textpm 0.021 \\
768 & \textbf{1.000 \textpm 0.000} & 0.998 \textpm 0.001 & 0.923 \textpm 0.007 & 0.960 \textpm 0.024 \\
1024 & \textbf{0.999 \textpm 0.001} & 0.998 \textpm 0.001 & 0.919 \textpm 0.005 & 0.956 \textpm 0.011 \\
\bottomrule
\end{tabularx}
\end{table}
\begin{table}[t]
\centering
\caption{ROC AUC (mean \textpm std) across 5 folds for \texttt{handlabeling-based evaluation}, semi-labeling regime: 500 normal samples 100 anomalous samples.}
\label{tab:auc_manual_based_semi_500_100}
\begin{tabularx}{\textwidth}{cYYYY}
\toprule
\textbf{Latent Dim.} & \textbf{DeepSAD (LeNet)} & \textbf{DeepSAD (Efficient)} & \textbf{IsolationForest} & \textbf{OC\text{-}SVM} \\
\midrule
32 & \textbf{1.000 \textpm 0.000} & 1.000 \textpm 0.000 & 0.921 \textpm 0.010 & 0.917 \textpm 0.014 \\
64 & 1.000 \textpm 0.000 & \textbf{1.000 \textpm 0.000} & 0.917 \textpm 0.007 & 0.931 \textpm 0.023 \\
128 & 1.000 \textpm 0.000 & \textbf{1.000 \textpm 0.000} & 0.921 \textpm 0.008 & 0.967 \textpm 0.029 \\
256 & 0.999 \textpm 0.001 & \textbf{1.000 \textpm 0.000} & 0.918 \textpm 0.009 & 0.966 \textpm 0.016 \\
512 & 0.989 \textpm 0.025 & \textbf{1.000 \textpm 0.000} & 0.920 \textpm 0.010 & 0.949 \textpm 0.021 \\
768 & 1.000 \textpm 0.000 & \textbf{1.000 \textpm 0.000} & 0.923 \textpm 0.007 & 0.960 \textpm 0.024 \\
1024 & 1.000 \textpm 0.000 & \textbf{1.000 \textpm 0.000} & 0.919 \textpm 0.005 & 0.956 \textpm 0.011 \\
\bottomrule
\end{tabularx}
\end{table}
\threadtodo
{Introduce DeepSAD anomaly detection results compared to baselines}
{Core part of evaluation: shows if DeepSAD provides benefit beyond standard methods}
{Explain ROC/PRC as evaluation metrics, show curves for all latent sizes, unsupervised case}
{Results here $\rightarrow$ baseline comparison and semi-supervised effects}
\begin{table}[t]
\centering
\setlength{\tabcolsep}{4pt}
\renewcommand{\arraystretch}{1.2}
\begin{tabularx}{\textwidth}{c*{4}{Y}|*{4}{Y}}
\toprule
& \multicolumn{4}{c}{Experiment-based eval.} & \multicolumn{4}{c}{Handlabeled eval.} \\
\cmidrule(lr){2-5} \cmidrule(lr){6-9}
Latent Dim. & \rotheader{DeepSAD \\(LeNet)} & \rotheader{DeepSAD\\(Efficient)} & \rotheader{IsoForest} & \rotheader{OC-SVM} & \rotheader{DeepSAD\\(LeNet)} & \rotheader{DeepSAD\\(Efficient)} & \rotheader{IsoForest} & \rotheader{OC-SVM} \\
\midrule
\multicolumn{9}{l}{\textbf{Labeling regime: }\(\mathbf{0/0}\) \textit{(normal/anomalous samples labeled)}} \\
\addlinespace[2pt]
32 & \textbf{0.801} & 0.791 & 0.717 & 0.752 & \textbf{1.000} & \textbf{1.000} & 0.921 & 0.917 \\
64 & 0.776 & \textbf{0.786} & 0.718 & 0.742 & \textbf{1.000} & \textbf{1.000} & 0.917 & 0.931 \\
128 & \textbf{0.784} & \textbf{0.784} & 0.719 & 0.775 & \textbf{1.000} & \textbf{1.000} & 0.921 & 0.967 \\
256 & 0.762 & 0.772 & 0.712 & \textbf{0.793} & \textbf{1.000} & \textbf{1.000} & 0.918 & 0.966 \\
512 & 0.759 & 0.784 & 0.712 & \textbf{0.804} & \textbf{1.000} & \textbf{1.000} & 0.920 & 0.949 \\
768 & 0.749 & 0.754 & 0.713 & \textbf{0.812} & \textbf{1.000} & \textbf{1.000} & 0.923 & 0.960 \\
1024 & 0.757 & 0.750 & 0.716 & \textbf{0.821} & \textbf{1.000} & \textbf{1.000} & 0.919 & 0.956 \\
\midrule
\multicolumn{9}{l}{\textbf{Labeling regime: }\(\mathbf{50/10}\) \textit{(normal/anomalous samples labeled)}} \\
\addlinespace[2pt]
32 & 0.741 & 0.747 & 0.717 & \textbf{0.752} & 0.990 & \textbf{0.998} & 0.921 & 0.917 \\
64 & \textbf{0.757} & 0.750 & 0.718 & 0.742 & 0.998 & \textbf{0.999} & 0.917 & 0.931 \\
128 & 0.746 & 0.751 & 0.719 & \textbf{0.775} & 0.991 & \textbf{0.999} & 0.921 & 0.967 \\
256 & 0.746 & 0.750 & 0.712 & \textbf{0.793} & \textbf{0.999} & \textbf{0.999} & 0.918 & 0.966 \\
512 & 0.760 & 0.763 & 0.712 & \textbf{0.804} & 0.972 & \textbf{0.999} & 0.920 & 0.949 \\
768 & 0.749 & 0.747 & 0.713 & \textbf{0.812} & \textbf{1.000} & 0.998 & 0.923 & 0.960 \\
1024 & 0.748 & 0.732 & 0.716 & \textbf{0.821} & \textbf{0.999} & 0.998 & 0.919 & 0.956 \\
\midrule
\multicolumn{9}{l}{\textbf{Labeling regime: }\(\mathbf{500/100}\) \textit{(normal/anomalous samples labeled)}} \\
\addlinespace[2pt]
32 & 0.765 & \textbf{0.775} & 0.717 & 0.752 & \textbf{1.000} & \textbf{1.000} & 0.921 & 0.917 \\
64 & 0.754 & \textbf{0.773} & 0.718 & 0.742 & \textbf{1.000} & \textbf{1.000} & 0.917 & 0.931 \\
128 & 0.758 & 0.769 & 0.719 & \textbf{0.775} & \textbf{1.000} & \textbf{1.000} & 0.921 & 0.967 \\
256 & 0.749 & 0.768 & 0.712 & \textbf{0.793} & 0.999 & \textbf{1.000} & 0.918 & 0.966 \\
512 & 0.766 & 0.770 & 0.712 & \textbf{0.804} & 0.989 & \textbf{1.000} & 0.920 & 0.949 \\
768 & 0.746 & 0.750 & 0.713 & \textbf{0.812} & \textbf{1.000} & \textbf{1.000} & 0.923 & 0.960 \\
1024 & 0.743 & 0.739 & 0.716 & \textbf{0.821} & \textbf{1.000} & \textbf{1.000} & 0.919 & 0.956 \\
\bottomrule
\end{tabularx}
\caption{AUC means across 5 folds for both evaluations, grouped by labeling regime. Maximum observed standard deviation across all cells (not shown in table): 0.060.}
\end{table}
%\fig{roc_prc_unsup}{figures/roc_prc_unsup.png}{ROC and PRC curves for DeepSAD, Isolation Forest, and OCSVM (unsupervised, all latent dimensions).}
\threadtodo
{Interpret unsupervised results across architectures and baselines}
{Important to establish the baseline performance levels}
{Compare AUCs: Isolation Forest weakest, OCSVM moderate (uses encoder), DeepSAD best}
{Sets expectation for whether supervision improves or harms performance}
\threadtodo
{Present semi-supervised regimes and their effects}
{Semi-supervision is central to DeepSAD; must show how labels change outcomes}
{Show ROC/PRC plots for selected latent sizes under different labeling regimes}
{This leads $\rightarrow$ analysis of why few labels harmed but many labels improved}
%\fig{roc_prc_semi}{figures/roc_prc_semi.png}{ROC and PRC curves for selected latent sizes under different semi-supervised regimes.}
\threadtodo
{Discuss surprising supervision dynamics}
{Reader expects supervision to always help; but results show nuance}
{Interpret why few labels overfit, many labels help, unsupervised sometimes best}
{This discussion $\rightarrow$ motivates looking at model behavior over time via inference}
% --- Section: Inference Experiments ---
\section{Inference on Held-Out Experiments}
\threadtodo
{Introduce inference evaluation on unseen experiments}
{This tests real-world usefulness: continuous scan-level degradation quantification}
{Explain setup: EMA-smoothed z-scores compared against heuristic degradation indicators}
{From static metrics $\rightarrow$ to temporal behavior analysis}
%\fig{inference_indicators}{figures/inference_indicators.png}{Example inference traces: EMA-smoothed anomaly scores compared to missing-point percentage and near-sensor returns.}
\threadtodo
{Analyze correlation of anomaly scores with degradation indicators}
{Important because it shows methods behave as intended even without perfect ground truth}
{Discuss qualitative similarity, emphasize scores as degradation proxies}
{Sets stage $\rightarrow$ for clean vs degraded comparison}
\threadtodo
{Compare anomaly score dynamics between clean and degraded experiments}
{Tests whether scores separate normal vs degraded traversals reliably}
{Show normalized z-score plots using clean-experiment parameters}
{Final confirmation $\rightarrow$ methods are meaningful for degradation quantification}
%\fig{inference_clean_vs_smoke}{figures/inference_clean_vs_smoke.png}{Normalized anomaly scores for a clean vs degraded experiment. Clear amplitude separation is visible.}
% --- Section: Results Summary ---
\section{Summary of Results}
\threadtodo
{Summarize main findings across all results}
{Reader should leave with a compact understanding of what was learned}
{State that Efficient autoencoder reconstructs better, DeepSAD beats baselines, semi-supervision shows tradeoffs, inference confirms degradation quantification works}
{Clear closure $\rightarrow$ prepares transition to discussion, limitations, and future work}
% \todo[inline]{introductory paragraph results}
% \todo[inline]{autoencoder results, compare lenet to efficient, shows that efficient is better and especially at lower latent dims, interesting to see in future exps if autencoder results appear to transfer to deepsad training results, therefore not a single latent dim in later exps, but rather all so it can be compared. also interesting to see if efficient better than lenet since reconstruction loss is better for efficient}
%
% \todo[inline]{we already have results graphs loss over latent dims with both lenet and effficient arch in plot, we also have overall plot as well as one for evaluation only with degraded data (anomalies) to see how good the networks are in reconstructing anomalies, not only normal data, plots enough or table with results necessary?}
%
% \todo[inline]{transition to main training results, should we show ROC/PRC comparisons of methods first or should we first show inference as score over time for one (during training left out) experiment?}
%
% \todo[inline]{main training compare roc/prc of 7 latent dimensionalities with 0 normal 0 anomalous semi regime, plot with 7 subplots, both deepsad better than baselines in both labeling regimes (experiment based and subjective hand-labeled evaluations). as expected isoforest worst since its simplest, ocsvm better since it profits from pre-trained encoder which should be good at dim reduction while maximizing retained information, efficient and lenet have similar results, although efficient has less variance between folds which could either mean its more effective at finding patterns (due to maybe more channels, better receptive field, etc) or it could mean it overfits more readily to data? not sure tbh and I don't think we can interpret from these limited evaluations, but better evaluation not possible without good ground truth}
%
% \todo[inline]{main training compare roc/prc of semi-regimes from 2 or 3 latent dimensionalities, show that unsupervised was best, then heavily semi-supervised then a few labeled samples in last position, why was this? maybe the few labeled examples create overfit already and lot of them improve overfit but are worse than generalized unsupervised version?}
%
% \todo[inline]{inference results showing the general workings of the methods on two experiments (one degraded, one normal - so no smoke) which were left out during training of these methods. inference plots of which 2 kinds exist: one that compares the smoothed z-score of the methods (to reduce noise in plots with EMA, which is not dependent on future data, so could be used in realtime and reacts way faster than moving averages and z-score is used since the analog output values from the different methods have different signs and magnitudes) with two statistical values we discussed in data section, namely missing percentage of points per lidar scan and erroneous near-sensor returns which have to be early returns per scan. these show that all methods have comparative qualities to these statistics, although the should not be taken as a ground truth, just as an indicator showing that generally the intended use case appears to be fulfilled by all methods (which was to interpret the anomaly score as a degradtaion quantification of each individual scan)}
% \todo[inline]{the second kind of inference plots shows the difference between scores produced on normal (non-degraded) experiment data vs scores produced on anomalous (degraded) data by normalizing the timeline of two experiments of which one contains no smoke and one has been degraded with artificial smoke. this has been achieved by using the z-score parameters of the clean data scores on both the clean experiment scores and the degraded experiment scores to show that there is a large difference between the amplitudes of these methods' scores for the two types of experiments}
%
% \todo[inline]{anything else for results or simply transition to conclusion and future work?}
%
%
% \newsection{hyperparameter_analysis}{Hyperparameter Analysis}
% \todo[inline]{result for different amounts of labeled data}
\newchapter{conclusion_future_work}{Conclusion and Future Work}
\newsection{conclusion}{Conclusion}