home » zplus/dokk.git
ID: b7a0a57a55a39f1f8a4522b346f5d5d4506a25e2
6 lines — 304B — View raw


1
2
3
4
5
6
Use pdftotext to convert library PDF files into plaintext.
https://dokk.org/manpages/debian/12/poppler-utils/pdftotext.1.en

    mkdir nodes txt
    for file in *.pdf; do pdftotext -layout "$file" - > txt/"$(basename "$file").txt"; done
    for file in txt/*; do ./txt2rdf.py "$(basename "$file")"; done