ID: ee19bd7bd93278f7c8d053768008a4ad0b5bac51
6 lines
—
318B —
View raw
| Use pdftotext to convert library PDF files into plaintext.
https://dokk.org/manpages/debian/12/poppler-utils/pdftotext.1.en
mkdir nodes
export PDF_FOLDER=
i=0; for file in $(find $PDF_FOLDER -type f -name *.pdf); do i=$(( i + 1 )); pdftotext -layout "$file" - | ./pdf2rdf.py $file > nodes/$i.jsonld; done
|