4
|
4
|
|
import pathlib
|
5
|
5
|
|
import rdflib
|
6
|
6
|
|
import sys
|
|
7
|
+ |
import urllib.parse
|
7
|
8
|
|
|
8
|
9
|
|
from rdflib import BNode, Graph, Literal, Namespace, URIRef
|
9
|
10
|
|
from rdflib.namespace import RDF
|
14
|
15
|
|
print('envvar DEBIMAN_SERVING_DIR is not defined')
|
15
|
16
|
|
exit()
|
16
|
17
|
|
|
17
|
|
- |
for absolute_file_path in sorted(pathlib.Path(DEBIMAN_SERVING_DIR).glob('**/*.roff')):
|
|
18
|
+ |
MANPAGE = Namespace('dokk:manpages:')
|
|
19
|
+ |
|
|
20
|
+ |
g = Graph()
|
|
21
|
+ |
|
|
22
|
+ |
for absolute_file_path in pathlib.Path(DEBIMAN_SERVING_DIR).glob('**/*.roff'):
|
18
|
23
|
|
if not absolute_file_path.is_file():
|
19
|
|
- |
print('Not a file: {}'.format(absolute_file_path))
|
20
|
|
- |
continue
|
|
24
|
+ |
exit('Not a file: {}'.format(absolute_file_path))
|
21
|
25
|
|
|
22
|
26
|
|
# Remove the DEBIMAN_SERVING_DIR prefix from the path
|
23
|
27
|
|
# Also remove .roff suffix from the filename
|
24
|
|
- |
file = absolute_file_path.relative_to(DEBIMAN_SERVING_DIR).with_suffix('')
|
25
|
|
- |
file_parts = list(file.parts)
|
|
28
|
+ |
file_path = absolute_file_path.relative_to(DEBIMAN_SERVING_DIR).with_suffix('')
|
|
29
|
+ |
file_parts = list(file_path.parts)
|
|
30
|
+ |
|
|
31
|
+ |
distro_codename, distro_package, filename = file_parts[0], file_parts[1], file_parts[2]
|
26
|
32
|
|
|
27
|
33
|
|
# Replace debian codenames with version numbers
|
28
|
|
- |
if file_parts[0] == 'debian':
|
29
|
|
- |
file_parts[1] = file_parts[1].replace('bookworm', '12') \
|
30
|
|
- |
.replace('bullseye', '11')
|
31
|
|
- |
.replace('buster', '10')
|
32
|
|
- |
file = pathlib.Path(*file_parts)
|
|
34
|
+ |
if distro_codename == 'buster': distro_number = 10
|
|
35
|
+ |
elif distro_codename == 'bullseye': distro_number = 11
|
|
36
|
+ |
elif distro_codename == 'bookworm': distro_number = 12
|
|
37
|
+ |
else: exit('Distro codename not recognized.')
|
33
|
38
|
|
|
34
|
|
- |
name, section, language = file.name.rsplit('.', 2)
|
|
39
|
+ |
name, section, language = filename.rsplit('.', 2)
|
35
|
40
|
|
section_number, subsection = int(section[:1]), section[1:]
|
36
|
41
|
|
|
37
|
|
- |
# Fix characters that cannot be used as valid URIs
|
38
|
|
- |
id = str(file).replace(' ', '_') \
|
39
|
|
- |
.replace('[', '%5B') \
|
40
|
|
- |
.replace(']', '%5D') \
|
41
|
|
- |
.replace('#', '%23')
|
42
|
|
- |
|
43
|
42
|
|
# Read files
|
44
|
43
|
|
try:
|
45
|
44
|
|
with open(absolute_file_path, 'r') as f:
|
59
|
58
|
|
except:
|
60
|
59
|
|
html = ''
|
61
|
60
|
|
|
62
|
|
- |
MANPAGE = Namespace('dokk:manpages:')
|
63
|
|
- |
|
64
|
|
- |
turtle = f"""
|
65
|
|
- |
{URIRef(MANPAGE[id]).n3()}
|
66
|
|
- |
{URIRef(MANPAGE.name).n3()} {Literal(name).n3()} ;
|
67
|
|
- |
{URIRef(MANPAGE.name_lowercase).n3()} {Literal(name.lower()).n3()} ;
|
68
|
|
- |
{URIRef(MANPAGE.section).n3()} {Literal(section).n3()} ;
|
69
|
|
- |
{URIRef(MANPAGE.section_lowercase).n3()} {Literal(section.lower()).n3()} ;
|
70
|
|
- |
{URIRef(MANPAGE.section_number).n3()} {Literal(section_number).n3()} ;
|
71
|
|
- |
{URIRef(MANPAGE.subsection).n3()} {Literal(subsection).n3()} ;
|
72
|
|
- |
{URIRef(MANPAGE.language).n3()} {Literal(language).n3()} ;
|
73
|
|
- |
{URIRef(MANPAGE.roff).n3()} {Literal(roff).n3()} ;
|
74
|
|
- |
{URIRef(MANPAGE.plaintext).n3()} {Literal(plaintext).n3()} ;
|
75
|
|
- |
{URIRef(MANPAGE.html).n3()} {Literal(html).n3()} ;
|
76
|
|
- |
"""
|
77
|
|
- |
|
78
|
|
- |
if file_parts[0] == 'debian':
|
79
|
|
- |
turtle += f"""
|
80
|
|
- |
{URIRef(MANPAGE.source).n3()} [
|
81
|
|
- |
{URIRef(MANPAGE.distribution_name).n3()} {Literal(file_parts[0]).n3()} ;
|
82
|
|
- |
{URIRef(MANPAGE.distribution_version).n3()} {Literal(file_parts[1]).n3()} ;
|
83
|
|
- |
{URIRef(MANPAGE.package).n3()} {Literal(file_parts[2]).n3()} ;
|
84
|
|
- |
{URIRef(MANPAGE.filename).n3()} {Literal(file.name).n3()} ;
|
85
|
|
- |
] .
|
86
|
|
- |
"""
|
87
|
|
- |
|
88
|
|
- |
mangraph = Graph().parse(publicID='', format='turtle', data=turtle)
|
89
|
|
- |
|
90
|
|
- |
triples = mangraph.serialize(destination=None, format='nt')
|
91
|
|
- |
sys.stdout.write(triples)
|
|
61
|
+ |
# Create a graph node for this manpage
|
|
62
|
+ |
page_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}/{distro_package}/{filename}'))
|
|
63
|
+ |
g.add((page_ref, RDF.type, URIRef(MANPAGE.Page)))
|
|
64
|
+ |
g.add((page_ref, URIRef(MANPAGE.filename), Literal(filename)))
|
|
65
|
+ |
g.add((page_ref, URIRef(MANPAGE.name), Literal(name)))
|
|
66
|
+ |
g.add((page_ref, URIRef(MANPAGE.name_lowercase), Literal(name.lower())))
|
|
67
|
+ |
g.add((page_ref, URIRef(MANPAGE.section), Literal(section)))
|
|
68
|
+ |
g.add((page_ref, URIRef(MANPAGE.section_lowercase), Literal(section.lower())))
|
|
69
|
+ |
g.add((page_ref, URIRef(MANPAGE.section_number), Literal(section_number)))
|
|
70
|
+ |
g.add((page_ref, URIRef(MANPAGE.subsection), Literal(subsection)))
|
|
71
|
+ |
g.add((page_ref, URIRef(MANPAGE.language), Literal(language)))
|
|
72
|
+ |
g.add((page_ref, URIRef(MANPAGE.roff), Literal(roff)))
|
|
73
|
+ |
g.add((page_ref, URIRef(MANPAGE.plaintext), Literal(plaintext)))
|
|
74
|
+ |
g.add((page_ref, URIRef(MANPAGE.html), Literal(html)))
|
|
75
|
+ |
|
|
76
|
+ |
# Create a graph node for this package
|
|
77
|
+ |
# Link to the page node
|
|
78
|
+ |
package_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}/{distro_package}'))
|
|
79
|
+ |
g.add((package_ref, RDF.type, URIRef(MANPAGE.Package)))
|
|
80
|
+ |
g.add((package_ref, MANPAGE.name, Literal(distro_package)))
|
|
81
|
+ |
g.add((package_ref, MANPAGE.page, page_ref))
|
|
82
|
+ |
|
|
83
|
+ |
# Create a graph node for this distro
|
|
84
|
+ |
# Link to the package node
|
|
85
|
+ |
distro_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}'))
|
|
86
|
+ |
g.add((distro_ref, RDF.type, URIRef(MANPAGE.Distribution)))
|
|
87
|
+ |
g.add((distro_ref, MANPAGE.name, Literal('debian')))
|
|
88
|
+ |
g.add((distro_ref, MANPAGE.codename, Literal(distro_codename)))
|
|
89
|
+ |
g.add((distro_ref, MANPAGE.number, Literal(distro_number)))
|
|
90
|
+ |
g.add((distro_ref, MANPAGE.package, package_ref))
|
|
91
|
+ |
|
|
92
|
+ |
print(g.serialize(format='nt'))
|