From cf9a33a135b4247ff721102e5299ba2040872594 Mon Sep 17 00:00:00 2001 From: zPlus Date: Sun, 19 Nov 2023 10:17:40 +0100 Subject: [PATCH] Add basic configuration for Fuseki. --- README | 29 +++++++++++++++++++--- fuseki.service | 39 ++++++++++++++++++++++++++++++ fuseki_base/configuration/dokk.ttl | 39 ++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 fuseki.service create mode 100644 fuseki_base/configuration/dokk.ttl diff --git a/README b/README index cb3accc..0f4b030 100644 --- a/README +++ b/README @@ -1,10 +1,33 @@ -Run website: +# Extract plaintext from library PDF files. These texts are used in the library pages. + + cp -R pdf pdf_to_text + cd pdf_to_text + for file in *.pdf; do pdftotext -layout "$file"; done + rm -f *.pdf + +# Database + +The tools used here are available for download at +. Just download +"apache-jena-.tar.gz" and "apache-jena-fuseki-.tar.gz" + +Databases can be created with: + + tdb2.tdbloader --loc=database_name *.ttl + +Place all the databases into the folder "fuseki_base/databases/". +Now to start the server, just run the systemd service file "fuseki.service" available +in this repository. Don't forget to edit both "fuseki.service" and "fuseki_base/configuration/dokk.ttl" +for configuring paths or other custom settings. + +# Run website: python3 -m venv venv source venv/bin/activate - gunicorn --reload --worker-connections=4 --threads=4 --bind localhost:8080 app:application + pip install -r requirements.txt + gunicorn --reload --worker-connections=4 --threads=4 --bind 0.0.0.0:8080 app:application -Crawl website to static HTML pages: +# Crawl website for static HTML pages: wget2 --mirror --max-threads=16 --page-requisites --adjust-extension --execute robots=off localhost:8080 diff --git a/fuseki.service b/fuseki.service new file mode 100644 index 0000000..75019e0 --- /dev/null +++ b/fuseki.service @@ -0,0 +1,39 @@ +# This is an example service file for systemd. If using the Fuseki server, this +# service can be configured to automatically start the server. +# +# How to use this file: +# - copy this file to /etc/systemd/system +# - edit the variables below accordingly +# - systemctl enable fuseki.service +# - systemctl start fuseki.service + +[Unit] +Description=Fuseki server for DOKK +After=network.target + +[Service] +# Edit the line below to adjust the amount of memory allocated to Fuseki +Environment=JVM_ARGS=-Xmx2G + +# Folder of the Fuseki installation files +Environment=FUSEKI_HOME=/home/fuseki/fuseki_server + +# Fuseki runtime folder (will contain configuration files and other runtime files). +Environment=FUSEKI_BASE=/home/fuseki/fuseki_base + +# Command for launching the Fuseki server +# Arguments must be passed via command line because the server is started before +# the configuration file is read. +ExecStart=/home/fuseki/fuseki_server/fuseki-server --port=7000 --localhost + +User=fuseki +Group=fuseki +WorkingDirectory=/home/fuseki/fuseki_server +Restart=always + +# Java processes exit with status 143 when terminated by SIGTERM, this +# should be considered a successful shutdown +SuccessExitStatus=143 + +[Install] +WantedBy=multi-user.target diff --git a/fuseki_base/configuration/dokk.ttl b/fuseki_base/configuration/dokk.ttl new file mode 100644 index 0000000..e4e476f --- /dev/null +++ b/fuseki_base/configuration/dokk.ttl @@ -0,0 +1,39 @@ +PREFIX : +PREFIX fuseki: +PREFIX ja: +PREFIX rdf: +PREFIX rdfs: +PREFIX tdb1: +PREFIX tdb2: + +:g1 + ja:graphName :graph:graph ; + ja:graph [ + a tdb2:GraphTDB2 ; + tdb2:location "/home/fuseki/fuseki_base/databases/graph" + # tdb2:graphName ... None, use default unnamed + ] . + +:g2 + ja:graphName :graph:manpages ; + ja:graph [ + a tdb2:GraphTDB2 ; + tdb2:location "/home/fuseki/fuseki_base/databases/manpages" + ] . + +:dataset + a ja:RDFDataset ; + # ja:defaultGraph ... + ja:namedGraph :g1 , :g2 . + +:endpoint + # Without a name: /endpoint?query= + # With a name: /endpoint/name/?query= + # fuseki:name "query" ; + fuseki:operation fuseki:query . + +:service + a fuseki:Service ; + fuseki:name "dokk" ; + fuseki:dataset :dataset ; + fuseki:endpoint :endpoint .