4
|
4
|
|
import pathlib
|
5
|
5
|
|
import rdflib
|
6
|
6
|
|
import sys
|
7
|
|
- |
import urllib.parse
|
8
|
7
|
|
|
9
|
8
|
|
from rdflib import BNode, Graph, Literal, Namespace, URIRef
|
10
|
9
|
|
from rdflib.namespace import RDF
|
20
|
19
|
|
# A graph to store all the triples
|
21
|
20
|
|
g = Graph()
|
22
|
21
|
|
|
|
22
|
+ |
def percent_encode(string):
|
|
23
|
+ |
"""
|
|
24
|
+ |
A few manpages contain special characters that are not valid symbols in a URL's
|
|
25
|
+ |
path. Since we use node URIs like <dokk:manpages:distro/package/name>, we need
|
|
26
|
+ |
to percent-encode these otherwise the URLs are invalid.
|
|
27
|
+ |
The reason for using a custom method instead of urllib.parse.quote() is that quote()
|
|
28
|
+ |
will percent-encode *any* non ASCII character such as non-latin characters.
|
|
29
|
+ |
"""
|
|
30
|
+ |
|
|
31
|
+ |
return string.replace(' ', '_') \
|
|
32
|
+ |
.replace('#', '%23')
|
|
33
|
+ |
|
23
|
34
|
|
for absolute_file_path in pathlib.Path(DEBIMAN_SERVING_DIR).glob('**/*.roff'):
|
24
|
35
|
|
if not absolute_file_path.is_file():
|
25
|
36
|
|
exit('Not a file: {}'.format(absolute_file_path))
|
64
|
75
|
|
# because I haven't got enough RAM for storing thousands of pages.
|
65
|
76
|
|
g_page = Graph()
|
66
|
77
|
|
|
67
|
|
- |
page_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}/{distro_package}/{filename}'))
|
|
78
|
+ |
page_ref = URIRef('dokk:manpages:debian/' + percent_encode(f'{distro_number}/{distro_package}/{filename}'))
|
68
|
79
|
|
g_page.add((page_ref, RDF.type, URIRef(MANPAGE.Page)))
|
69
|
80
|
|
g_page.add((page_ref, URIRef(MANPAGE.filename), Literal(filename)))
|
70
|
81
|
|
g_page.add((page_ref, URIRef(MANPAGE.name), Literal(name)))
|
82
|
93
|
|
|
83
|
94
|
|
# Create a graph node for this package
|
84
|
95
|
|
# Link to the page node
|
85
|
|
- |
package_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}/{distro_package}'))
|
|
96
|
+ |
package_ref = URIRef('dokk:manpages:debian/' + percent_encode(f'{distro_number}/{distro_package}'))
|
86
|
97
|
|
g.add((package_ref, RDF.type, URIRef(MANPAGE.Package)))
|
87
|
98
|
|
g.add((package_ref, MANPAGE.name, Literal(distro_package)))
|
88
|
99
|
|
g.add((package_ref, MANPAGE.page, page_ref))
|
89
|
100
|
|
|
90
|
101
|
|
# Create a graph node for this distro
|
91
|
102
|
|
# Link to the package node
|
92
|
|
- |
distro_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}'))
|
|
103
|
+ |
distro_ref = URIRef('dokk:manpages:debian/' + percent_encode(f'{distro_number}'))
|
93
|
104
|
|
g.add((distro_ref, RDF.type, URIRef(MANPAGE.Distribution)))
|
94
|
105
|
|
g.add((distro_ref, MANPAGE.name, Literal('debian')))
|
95
|
106
|
|
g.add((distro_ref, MANPAGE.codename, Literal(distro_codename)))
|