diff --git a/thesis/Main.tex b/thesis/Main.tex index 2de7e20..0aa5a6a 100755 --- a/thesis/Main.tex +++ b/thesis/Main.tex @@ -53,9 +53,9 @@ % ************************************************************************************************** % template setup -- do not change these unless you know what you are doing! -\input{./base/documentclass_\DocumentType} +\input{./base/documentclass_thesis} \input{./base/packages} -\input{./base/layout_\DocumentType} +\input{./base/layout_thesis} \input{./base/macros} % ************************************************************************************************** @@ -156,26 +156,27 @@ % variable for page numbering \newcounter{mypageno} -% ************************************************************************************************** -\begin{document} -% ************************************************************************************************** + \input{./base/syntax_formatting} % for thesis: switch to frontmatter (Roman numbering, etc.) -\ifthenelse{\equal{\DocumentType}{thesis}} +\ifthenelse{\equal{thesis}{thesis}} { \frontmatter \pagestyle{plain} \pagenumbering{Roman} }{} +% ************************************************************************************************** +\begin{document} +% ************************************************************************************************** %title -\input{./base/titlepage_\DocumentType} +\input{./base/titlepage_thesis} % for thesis: abstract, kurzfassung, affidavit and statutory declaration -\ifthenelse{\equal{\DocumentType}{thesis}} +\ifthenelse{\equal{thesis}{thesis}} { \emptydoublepage \addcontentsline{toc}{chapter}{Statutory Declaration} - \input{./base/declaration_\DocumentLanguage} + \input{./base/declaration_en} \emptydoublepage \input{thesis_preamble/acknowledgements} \emptydoublepage @@ -187,7 +188,7 @@ \tableofcontents -\ifthenelse{\equal{\DocumentType}{thesis}} +\ifthenelse{\equal{thesis}{thesis}} { \emptydoublepage \setcounter{mypageno}{\value{page}} @@ -1148,7 +1149,7 @@ In summary, while this thesis demonstrates the feasibility of using anomaly dete % ************************************************************************************************** \appendix -\ifthenelse{\equal{\DocumentType}{thesis}} +\ifthenelse{\equal{thesis}{thesis}} { \setcounter{mypageno}{\value{page}} \frontmatter \pagestyle{plain} \pagenumbering{Roman} diff --git a/thesis/base/declaration_en.tex b/thesis/base/declaration_en.tex index e18c306..1d2815d 100644 --- a/thesis/base/declaration_en.tex +++ b/thesis/base/declaration_en.tex @@ -24,15 +24,12 @@ not used other than the declared sources/resources, and that I have explicitly indicated all material which has been quoted either literally or by content from the sources used. -\ifthenelse{\equal{\ThesisTitle}{master's thesis} \or - \equal{\ThesisTitle}{diploma thesis} \or - \equal{\ThesisTitle}{doctoral thesis}} -{The text document uploaded to TUGRAZonline is identical to the present \ThesisTitle.}{\reminder{TODO: fix \textbackslash ThesisTitle}} +The text document uploaded to TUGRAZonline is identical to the present \ThesisTitle. \par\vspace*{4cm} \centerline{ -\begin{tabular}{m{1.5cm}cm{1.5cm}m{3cm}m{1.5cm}cm{1.5cm}} -\cline{1-3} \cline{5-7} - & date & & & & (signature) &\\ -\end{tabular}} + \begin{tabular}{m{1.5cm}cm{1.5cm}m{3cm}m{1.5cm}cm{1.5cm}} + \cline{1-3} \cline{5-7} + & date & & & & (signature) & \\ + \end{tabular}} diff --git a/thesis/drop-images.lua b/thesis/drop-images.lua new file mode 100644 index 0000000..7834c32 --- /dev/null +++ b/thesis/drop-images.lua @@ -0,0 +1,11 @@ +-- drop-images.lua +-- Replaces all images (figures, graphics) with a short placeholder. +function Image(el) return pandoc.Str("[image omitted]") end + +-- For LaTeX figures that are still raw +function RawBlock(el) + if el.format == "tex" and el.text:match("\\begin%s*{%s*figure%s*}") then + return pandoc.Plain({pandoc.Str("[figure omitted]")}) + end +end + diff --git a/thesis/drop-tables.lua b/thesis/drop-tables.lua new file mode 100644 index 0000000..5da9b7e --- /dev/null +++ b/thesis/drop-tables.lua @@ -0,0 +1,11 @@ +-- drop-tables.lua +-- Removes LaTeX tabular and tabularx environments (and their contents). +function RawBlock(el) + if el.format == "tex" then + -- Check for tabular or tabularx environment + if el.text:match("\\begin%s*{%s*tabularx?%s*}") then + return pandoc.Plain({pandoc.Str("[table omitted]")}) + end + end +end + diff --git a/thesis/flake.nix b/thesis/flake.nix index 4de24a1..825d85f 100644 --- a/thesis/flake.nix +++ b/thesis/flake.nix @@ -28,7 +28,10 @@ zathura wmctrl python312 + pandoc + pandoc-lua-filters ]; + filtersPath = "${pkgs.pandoc-lua-filters}/share/pandoc/filters"; in { devShell = pkgs.mkShell { @@ -39,6 +42,28 @@ ]; }; + shellHook = '' + set -eu + # local folder in your repo to reference in commands + link_target="pandoc-filters" + # refresh symlink each time you enter the shell + ln -sfn ${filtersPath} "$link_target" + echo "Linked $link_target -> ${filtersPath}" + + # (optional) write a defaults file that uses the relative symlink + if [ ! -f pandoc.defaults.yaml ]; then + cat > pandoc.defaults.yaml <<'YAML' + from: latex + to: plain + wrap: none + lua-filter: + - pandoc-filters/latex-hyphen.lua + - pandoc-filters/pandoc-quotes.lua + YAML + echo "Wrote pandoc.defaults.yaml" + fi + ''; + } ); } diff --git a/thesis/keep-citations.lua b/thesis/keep-citations.lua new file mode 100644 index 0000000..3144947 --- /dev/null +++ b/thesis/keep-citations.lua @@ -0,0 +1,43 @@ +-- keep-citations.lua +-- Replace citations with a placeholder and eat any preceding space. +local PH = "[citation]" + +-- Pandoc-native citations (if the reader produced Cite nodes) +function Cite(el) return pandoc.Str(PH) end + +-- Raw LaTeX \cite-like macros (when not parsed as Cite) +function RawInline(el) + if el.format and el.format:match("tex") and el.text:match("\\%a-*cite%*?") then + return pandoc.Str(PH) + end +end + +-- Remove a single leading Space before our placeholder +local function squash_spaces(inlines) + local out = {} + local i = 1 + while i <= #inlines do + local cur = inlines[i] + local nxt = inlines[i + 1] + if cur and cur.t == "Space" and nxt and nxt.t == "Str" and nxt.text == + PH then + table.insert(out, nxt) + i = i + 2 + else + table.insert(out, cur) + i = i + 1 + end + end + return out +end + +function Para(el) + el.content = squash_spaces(el.content) + return el +end + +function Plain(el) + el.content = squash_spaces(el.content) + return el +end + diff --git a/thesis/tex2plaintext.sh b/thesis/tex2plaintext.sh new file mode 100755 index 0000000..b009770 --- /dev/null +++ b/thesis/tex2plaintext.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Usage: +# ./tex2plaintext.sh [INPUT_TEX] [OUT_BASENAME] +# +# Defaults: +# INPUT_TEX = Main.txt (your original file name) +# OUT_BASENAME = thesis (produces thesis.txt, thesis_part1.txt, thesis_part2.txt) + +INPUT_TEX="${1:-Main.tex}" +OUT_BASE="${2:-thesis}" + +FLAT_TEX="flat.tex" +NO_TABLES_TEX="flat_notables.tex" +PLAIN_TXT="${OUT_BASE}.txt" +PART1_TXT="${OUT_BASE}_part1.txt" +PART2_TXT="${OUT_BASE}_part2.txt" +MARKER="Data and Preprocessing" + +echo "[1/4] Flattening with latexpand -> ${FLAT_TEX}" +latexpand "${INPUT_TEX}" > "${FLAT_TEX}" + +echo "[2/4] Removing tabular/tabularx environments -> ${NO_TABLES_TEX}" +# Replace entire tabular / tabularx environments with a placeholder +perl -0777 -pe 's/\\begin\{(tabularx?)\}.*?\\end\{\1\}/[table omitted]/gs' \ + "${FLAT_TEX}" > "${NO_TABLES_TEX}" + +echo "[3/4] Converting to plain text with pandoc -> ${PLAIN_TXT}" +pandoc -f latex -t plain --wrap=none "${NO_TABLES_TEX}" -o "${PLAIN_TXT}" + +echo "[4/4] Splitting ${PLAIN_TXT} before the marker line: \"${MARKER}\"" + +# Ensure the marker exists exactly on its own line +if ! grep -xq "${MARKER}" "${PLAIN_TXT}"; then + echo "ERROR: Marker line not found exactly as \"${MARKER}\" in ${PLAIN_TXT}." + echo " (It must be the only content on that line.)" + exit 1 +fi + +# Clean previous outputs if present +rm -f -- "${PART1_TXT}" "${PART2_TXT}" + +# Split so the marker line becomes the FIRST line of part 2 +awk -v marker="${MARKER}" -v out1="${PART1_TXT}" -v out2="${PART2_TXT}" ' +BEGIN { current = out1 } +$0 == marker { current = out2; print $0 > current; next } +{ print $0 > current } +' "${PLAIN_TXT}" + +echo "Done." +echo " - ${PLAIN_TXT}" +echo " - ${PART1_TXT}" +echo " - ${PART2_TXT}" +