home » zplus/dokk.org.git
Author zPlus <zplus@peers.community> 2025-01-07 12:58:51
Committer zPlus <zplus@peers.community> 2025-01-07 12:58:51
Commit ba6958d (patch)
Tree b6ba162
Parent(s)

Add journals and read plaintext from database. - Add journals to the library - Retrieve PDFs plaintext from the database instead of blob.dokk.org


commits diff: e89559e..ba6958d
4 files changed, 81 insertions, 62 deletionsdownload


Diffstat
-rw-r--r-- README 7
-rwxr-xr-x app.py 104
-rw-r--r-- templates/library/index.html 19
-rw-r--r-- templates/library/item.html 13

Diff options
View
Side
Whitespace
Context lines
Inter-hunk lines
+0/-7 M   README
index b940d60..723adb2
old size: 1K - new size: 1K
@@ -1,10 +1,3 @@
1 - # Extract plaintext from library PDF files. These texts are used in the library pages.
2 -
3 - cp -R pdf pdf_to_text
4 - cd pdf_to_text
5 - for file in *.pdf; do pdftotext -layout "$file"; done
6 - rm -f *.pdf
7 -
8 1 # Database
9 2
10 3 The tools used here are available for download at

+53/-51 M   app.py
index 9ad73bb..231f070
old size: 16K - new size: 16K
@@ -108,7 +108,7 @@ def library():
108 108 Library index
109 109 """
110 110
111 - # Get a list of authors for searching
111 + # Get a list of authors for search filters
112 112 authors = query("""
113 113 PREFIX library: <dokk:vocab:library:>
114 114
@@ -119,7 +119,7 @@ def library():
119 119 ORDER BY ?name
120 120 """)['results']['bindings']
121 121
122 - # Get a list of licenses for searching
122 + # Get a list of licenses for search filters
123 123 licenses = query("""
124 124 PREFIX library: <dokk:vocab:library:>
125 125 PREFIX license: <dokk:vocab:license:>
@@ -134,18 +134,36 @@ def library():
134 134 ORDER BY ?id
135 135 """)['results']['bindings']
136 136
137 + # Get a list of journals for search filters
138 + journals = query("""
139 + PREFIX library: <dokk:vocab:library:>
140 +
141 + SELECT DISTINCT ?id ?title
142 + WHERE {
143 + ?id a library:Journal;
144 + library:title ?title
145 + }
146 + ORDER BY ?title
147 + """)['results']['bindings']
148 +
137 149 # Retrieve filters selected by the user
138 150 filters_author = []
151 + filters_journal = []
139 152 filters_license = []
140 153 query_filters = ''
141 154 if request.method == 'POST':
142 155 filters_author = request.forms.getall('author')
156 + filters_journal = request.forms.getall('journal')
143 157 filters_license = request.forms.getall('license')
144 158
145 159 if len(filters_author) > 0:
146 160 query_filters_author = ','.join([ '"'+i.replace('"', '\\"')+'"' for i in filters_author ])
147 161 query_filters += f'FILTER(?author IN ({query_filters_author}))'
148 162
163 + if len(filters_journal) > 0:
164 + query_filters_journal = ','.join([ '<'+i+'>' for i in filters_journal ])
165 + query_filters += f'FILTER(?journal IN ({query_filters_journal}))'
166 +
149 167 if len(filters_license) > 0:
150 168 query_filters_license = ','.join([ '"'+i.replace('"', '\\"')+'"' for i in filters_license ])
151 169 query_filters += f'FILTER(?license_id IN ({query_filters_license}))'
@@ -154,42 +172,43 @@ def library():
154 172 PREFIX library: <dokk:vocab:library:>
155 173 PREFIX license: <dokk:vocab:license:>
156 174
157 - CONSTRUCT {{
158 - ?item library:title ?title;
159 - library:author ?author ;
160 - license:licensed_under ?license .
161 - ?license license:id ?license_id ;
162 - license:name ?license_name .
163 - }}
164 - WHERE {{
165 - ?item library:title ?title ;
166 - library:author ?author ;
167 - license:licensed_under ?license .
175 + DESCRIBE *
176 + WHERE
177 + {{
178 + ?item
179 + a library:Item;
180 + library:title ?title ;
181 + library:author ?author .
182 +
183 + OPTIONAL {{ ?item license:licensed_under ?license }}
168 184
169 185 OPTIONAL {{
170 - ?license license:id ?license_id_optional ;
171 - license:name ?license_name_optional .
186 + ?item library:journal ?journal .
187 + ?journal
188 + a library:Journal ;
189 + library:title ?journal_title .
172 190 }}
173 191
174 - BIND(COALESCE(?license_id_optional, SUBSTR(STR(?license), 14)) AS ?license_id)
175 - BIND(COALESCE(?license_name_optional, SUBSTR(STR(?license), 14)) AS ?license_name)
176 -
177 192 {query_filters}
178 193 }}
179 - ORDER BY UCASE(?title)
194 + ORDER BY ?title
180 195 """,
181 196 {
182 197 '@context': {
198 + 'blob': 'dokk:vocab:blob:',
183 199 'library': 'dokk:vocab:library:',
184 200 'license': 'dokk:vocab:license:',
185 201 'library:author': { '@container': '@set' },
186 202 'license:licensed_under': { '@container': '@set' }
187 203 },
188 - 'library:title': {}
204 + '@type': 'library:Item'
189 205 })
190 206
191 - return template('library/index.html', authors=authors, licenses=licenses, items=items,
192 - filters_author=filters_author, filters_license=filters_license)
207 + return template('library/index.html', authors=authors, licenses=licenses,
208 + journals=journals, items=items,
209 + filters_author=filters_author,
210 + filters_journal=filters_journal,
211 + filters_license=filters_license)
193 212
194 213 @bottle.get('/library/<item_id>', name='library_item')
195 214 def library_item(item_id):
@@ -197,53 +216,36 @@ def library_item(item_id):
197 216 Show a single item in the library.
198 217 """
199 218
200 - try:
201 - with open(f'../library_txt/{item_id}.txt', 'r') as file:
202 - item_plaintext = file.read()
203 - except:
204 - item_plaintext = ''
205 -
206 219 data = query(f"""
220 + PREFIX blob: <dokk:vocab:blob:>
207 221 PREFIX library: <dokk:vocab:library:>
208 222 PREFIX license: <dokk:vocab:license:>
209 223
210 - CONSTRUCT {{
211 - ?item library:title ?title ;
212 - library:author ?author ;
213 - license:licensed_under ?license .
214 -
215 - ?license license:id ?license_id ;
216 - license:name ?license_name .
217 - }}
218 - WHERE {{
219 - ?item
220 - library:title ?title ;
221 - library:author ?author ;
222 - license:licensed_under ?license .
223 -
224 - FILTER (?item = <dokk:{item_id}>)
224 + DESCRIBE ?item ?license ?blob ?journal
225 + WHERE
226 + {{
227 + ?item a library:Item ;
228 + blob:at ?blob .
225 229
226 - OPTIONAL {{
227 - ?license license:id ?license_id_optional ;
228 - license:name ?license_name_optional .
229 - }}
230 + FILTER(?item = <dokk:{item_id}>)
230 231
231 - BIND(COALESCE(?license_id_optional, SUBSTR(STR(?license), 14)) AS ?license_id)
232 - BIND(COALESCE(?license_name_optional, SUBSTR(STR(?license), 14)) AS ?license_name)
232 + OPTIONAL {{ ?item license:licensed_under ?license }}
233 + OPTIONAL {{ ?item library:journal ?journal }}
233 234 }}
234 235 """,
235 236 {
236 237 "@context": {
238 + "blob": "dokk:vocab:blob:",
237 239 "library": "dokk:vocab:library:",
238 240 "license": "dokk:vocab:license:",
239 241 "library:author": { "@container": "@set" },
240 242 "license:licensed_under": { "@container": "@set" }
241 243 },
242 - "library:title": {}
244 + "@type": "library:Item",
243 245 }
244 246 )["@graph"][0]
245 247
246 - return template('library/item.html', item_id=item_id, plaintext=item_plaintext, data=data)
248 + return template('library/item.html', data=data)
247 249
248 250 @bottle.get('/license', name='license_list')
249 251 def license_list():

+18/-1 M   templates/library/index.html
index 9f02c5f..e58ce9b
old size: 2K - new size: 2K
@@ -22,6 +22,16 @@
22 22
23 23 <br /><br />
24 24
25 + Journal:
26 + <br />
27 + <select name="journal" multiple size=5>
28 + {% for journal in journals %}
29 + <option value="{{ journal['id']['value'] }}" {{ 'selected' if journal['id']['value'] in filters_journal }}>{{ journal['title']['value'] }}</option>
30 + {% endfor %}
31 + </select>
32 +
33 + <br /><br />
34 +
25 35 License:
26 36 <br />
27 37 <select name="license" multiple size=10>
@@ -48,9 +58,16 @@
48 58 {% endfor%}
49 59 </div>
50 60
61 + {% if 'library:journal' in item %}
62 + <div>
63 + Journal:
64 + {{ item['library:journal']['library:title'] }}
65 + </div>
66 + {% endif %}
67 +
51 68 <div>
52 69 License:
53 - {% for license in item["license:licensed_under"]|sort(attribute="license:id") %}
70 + {% for license in item["license:licensed_under"]|sort(attribute="license\:id") %}
54 71 {{ license['license:id'] }}
55 72 {% endfor%}
56 73 </div>

+10/-3 M   templates/library/item.html
index f7299d9..97f37bd
old size: 962B - new size: 1K
@@ -9,7 +9,7 @@
9 9 <a href="/library">DOKK Library</a>
10 10 </div>
11 11
12 - <object type="application/pdf" data="https://blob.dokk.org/pdf/{{ item_id }}.pdf"></object>
12 + <object type="application/pdf" data="https://blob.dokk.org/{{ data['blob:at']['@id'][8:] }}"></object>
13 13
14 14 <div class="info">
15 15 <h1>{{ data["library:title"] }}</h1>
@@ -17,10 +17,17 @@
17 17 <p>
18 18 <b>Authors</b>
19 19 {% for author in data["library:author"]|sort() %}
20 - {{ author }}
20 + {{ author }},
21 21 {% endfor%}
22 22 </p>
23 23
24 + {% if 'library:journal' in data %}
25 + <p>
26 + <b>Journal</b>
27 + {{ data['library:journal']['library:title'] }}
28 + </p>
29 + {% endif %}
30 +
24 31 <p>
25 32 <b>License</b>
26 33 {% for license in data["license:licensed_under"]|sort(attribute="license:id") %}
@@ -31,7 +38,7 @@
31 38
32 39 <details open>
33 40 <summary>Plaintext</summary>
34 - <pre>{{ plaintext }}</pre>
41 + <pre>{{ data["blob:at"]["blob:pdftotext"] }}</pre>
35 42 </details>
36 43 </div>
37 44