home » dokk/manpages.git
Author zPlus <zplus@peers.community> 2023-11-23 07:33:28
Committer zPlus <zplus@peers.community> 2023-11-23 07:33:28
Commit 7485d93 (patch)
Tree 8db2619
Parent(s)

Replace urllib.parse.quote() with percent_encode().


commits diff: 68485af..7485d93
1 file changed, 15 insertions, 4 deletionsdownload


Diffstat
-rwxr-xr-x scripts/rdf.py 19

Diff options
View
Side
Whitespace
Context lines
Inter-hunk lines
+15/-4 M   scripts/rdf.py
index 9048145..8c774e2
old size: 4K - new size: 4K
@@ -4,7 +4,6 @@ import os
4 4 import pathlib
5 5 import rdflib
6 6 import sys
7 - import urllib.parse
8 7
9 8 from rdflib import BNode, Graph, Literal, Namespace, URIRef
10 9 from rdflib.namespace import RDF
@@ -20,6 +19,18 @@ MANPAGE = Namespace('dokk:manpages:')
20 19 # A graph to store all the triples
21 20 g = Graph()
22 21
22 + def percent_encode(string):
23 + """
24 + A few manpages contain special characters that are not valid symbols in a URL's
25 + path. Since we use node URIs like <dokk:manpages:distro/package/name>, we need
26 + to percent-encode these otherwise the URLs are invalid.
27 + The reason for using a custom method instead of urllib.parse.quote() is that quote()
28 + will percent-encode *any* non ASCII character such as non-latin characters.
29 + """
30 +
31 + return string.replace(' ', '_') \
32 + .replace('#', '%23')
33 +
23 34 for absolute_file_path in pathlib.Path(DEBIMAN_SERVING_DIR).glob('**/*.roff'):
24 35 if not absolute_file_path.is_file():
25 36 exit('Not a file: {}'.format(absolute_file_path))
@@ -64,7 +75,7 @@ for absolute_file_path in pathlib.Path(DEBIMAN_SERVING_DIR).glob('**/*.roff'):
64 75 # because I haven't got enough RAM for storing thousands of pages.
65 76 g_page = Graph()
66 77
67 - page_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}/{distro_package}/{filename}'))
78 + page_ref = URIRef('dokk:manpages:debian/' + percent_encode(f'{distro_number}/{distro_package}/{filename}'))
68 79 g_page.add((page_ref, RDF.type, URIRef(MANPAGE.Page)))
69 80 g_page.add((page_ref, URIRef(MANPAGE.filename), Literal(filename)))
70 81 g_page.add((page_ref, URIRef(MANPAGE.name), Literal(name)))
@@ -82,14 +93,14 @@ for absolute_file_path in pathlib.Path(DEBIMAN_SERVING_DIR).glob('**/*.roff'):
82 93
83 94 # Create a graph node for this package
84 95 # Link to the page node
85 - package_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}/{distro_package}'))
96 + package_ref = URIRef('dokk:manpages:debian/' + percent_encode(f'{distro_number}/{distro_package}'))
86 97 g.add((package_ref, RDF.type, URIRef(MANPAGE.Package)))
87 98 g.add((package_ref, MANPAGE.name, Literal(distro_package)))
88 99 g.add((package_ref, MANPAGE.page, page_ref))
89 100
90 101 # Create a graph node for this distro
91 102 # Link to the package node
92 - distro_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}'))
103 + distro_ref = URIRef('dokk:manpages:debian/' + percent_encode(f'{distro_number}'))
93 104 g.add((distro_ref, RDF.type, URIRef(MANPAGE.Distribution)))
94 105 g.add((distro_ref, MANPAGE.name, Literal('debian')))
95 106 g.add((distro_ref, MANPAGE.codename, Literal(distro_codename)))