Use pdftotext to convert library PDF files into plaintext. https://dokk.org/manpages/debian/12/poppler-utils/pdftotext.1.en mkdir nodes txt for file in *.pdf; do pdftotext -layout "$file" - > txt/"$(basename "$file").txt"; done for file in txt/*; do ./txt2rdf.py "$(basename "$file")"; done