home » zplus/dokk.git
ID: ee19bd7bd93278f7c8d053768008a4ad0b5bac51
6 lines — 318B — View raw


1
2
3
4
5
6
Use pdftotext to convert library PDF files into plaintext.
https://dokk.org/manpages/debian/12/poppler-utils/pdftotext.1.en

    mkdir nodes
    export PDF_FOLDER=
    i=0; for file in $(find $PDF_FOLDER -type f -name *.pdf); do i=$(( i + 1 )); pdftotext -layout "$file" - | ./pdf2rdf.py $file > nodes/$i.jsonld; done