better grammarly prep
This commit is contained in:
48
thesis/filters/math-omit.lua
Normal file
48
thesis/filters/math-omit.lua
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
-- math-omit.lua
|
||||||
|
-- Replace any math with a placeholder and ensure a space before it when appropriate.
|
||||||
|
local PH = "[math omitted]"
|
||||||
|
|
||||||
|
function Math(el)
|
||||||
|
-- Emit the placeholder as a Str; spacing is fixed in Para/Plain below.
|
||||||
|
return pandoc.Str(PH)
|
||||||
|
end
|
||||||
|
|
||||||
|
local function ensure_space_before_ph(inlines)
|
||||||
|
local out = {}
|
||||||
|
for i = 1, #inlines do
|
||||||
|
local cur = inlines[i]
|
||||||
|
if cur.t == "Str" and cur.text == PH then
|
||||||
|
local prev = out[#out]
|
||||||
|
local need_space = true
|
||||||
|
|
||||||
|
-- No space if it's the first token in the block
|
||||||
|
if not prev then
|
||||||
|
need_space = false
|
||||||
|
elseif prev.t == "Space" then
|
||||||
|
need_space = false
|
||||||
|
elseif prev.t == "Str" then
|
||||||
|
-- If previous char is an opening bracket/paren/slash/hyphen or whitespace, skip
|
||||||
|
local last = prev.text:sub(-1)
|
||||||
|
if last:match("[%(%[%{%/%-]") or last:match("%s") then
|
||||||
|
need_space = false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
if need_space then table.insert(out, pandoc.Space()) end
|
||||||
|
table.insert(out, cur)
|
||||||
|
else
|
||||||
|
table.insert(out, cur)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return out
|
||||||
|
end
|
||||||
|
|
||||||
|
function Para(el)
|
||||||
|
el.content = ensure_space_before_ph(el.content)
|
||||||
|
return el
|
||||||
|
end
|
||||||
|
|
||||||
|
function Plain(el)
|
||||||
|
el.content = ensure_space_before_ph(el.content)
|
||||||
|
return el
|
||||||
|
end
|
||||||
@@ -18,18 +18,24 @@ PART1_TXT="${OUT_BASE}_part1.txt"
|
|||||||
PART2_TXT="${OUT_BASE}_part2.txt"
|
PART2_TXT="${OUT_BASE}_part2.txt"
|
||||||
MARKER="Data and Preprocessing"
|
MARKER="Data and Preprocessing"
|
||||||
|
|
||||||
echo "[1/4] Flattening with latexpand -> ${FLAT_TEX}"
|
echo "[1/5] Flattening with latexpand -> ${FLAT_TEX}"
|
||||||
latexpand "${INPUT_TEX}" > "${FLAT_TEX}"
|
latexpand "${INPUT_TEX}" > "${FLAT_TEX}"
|
||||||
|
|
||||||
echo "[2/4] Removing tabular/tabularx environments -> ${NO_TABLES_TEX}"
|
echo "[2/5] Removing tabular/tabularx environments -> ${NO_TABLES_TEX}"
|
||||||
# Replace entire tabular / tabularx environments with a placeholder
|
# Replace entire tabular / tabularx environments with a placeholder
|
||||||
perl -0777 -pe 's/\\begin\{(tabularx?)\}.*?\\end\{\1\}/[table omitted]/gs' \
|
perl -0777 -pe 's/\\begin\{(tabularx?)\}.*?\\end\{\1\}/[table omitted]/gs' \
|
||||||
"${FLAT_TEX}" > "${NO_TABLES_TEX}"
|
"${FLAT_TEX}" > "${NO_TABLES_TEX}"
|
||||||
|
|
||||||
echo "[3/4] Converting to plain text with pandoc -> ${PLAIN_TXT}"
|
echo "[3/5] Converting to plain text with pandoc -> ${PLAIN_TXT}"
|
||||||
pandoc -f latex -t plain --wrap=none "${NO_TABLES_TEX}" -o "${PLAIN_TXT}"
|
pandoc -f latex -t plain --wrap=none \
|
||||||
|
--lua-filter=filters/keep-citations.lua \
|
||||||
|
--lua-filter=filters/math-omit.lua \
|
||||||
|
"${NO_TABLES_TEX}" -o "${PLAIN_TXT}"
|
||||||
|
|
||||||
echo "[4/4] Splitting ${PLAIN_TXT} before the marker line: \"${MARKER}\""
|
echo "[4/5] Replacing [] placeholders with [figure]"
|
||||||
|
sed -i 's/\[\]/[figure]/g' "${PLAIN_TXT}"
|
||||||
|
|
||||||
|
echo "[5/5] Splitting ${PLAIN_TXT} before the marker line: \"${MARKER}\""
|
||||||
|
|
||||||
# Ensure the marker exists exactly on its own line
|
# Ensure the marker exists exactly on its own line
|
||||||
if ! grep -xq "${MARKER}" "${PLAIN_TXT}"; then
|
if ! grep -xq "${MARKER}" "${PLAIN_TXT}"; then
|
||||||
|
|||||||
Reference in New Issue
Block a user