home » dokk/manpages.git
Author zPlus <zplus@peers.community> 2023-11-22 16:51:19
Committer zPlus <zplus@peers.community> 2023-11-22 16:51:19
Commit d38b375 (patch)
Tree 821994b
Parent(s)

Rewrite "source" property with separate nodes.


commits diff: 4d43de4..d38b375
1 file changed, 48 insertions, 47 deletionsdownload


Diffstat
-rwxr-xr-x scripts/rdf.py 95

Diff options
View
Side
Whitespace
Context lines
Inter-hunk lines
+48/-47 M   scripts/rdf.py
index 18e2514..3308cc3
old size: 3K - new size: 4K
@@ -4,6 +4,7 @@ import os
4 4 import pathlib
5 5 import rdflib
6 6 import sys
7 + import urllib.parse
7 8
8 9 from rdflib import BNode, Graph, Literal, Namespace, URIRef
9 10 from rdflib.namespace import RDF
@@ -14,32 +15,30 @@ if not DEBIMAN_SERVING_DIR:
14 15 print('envvar DEBIMAN_SERVING_DIR is not defined')
15 16 exit()
16 17
17 - for absolute_file_path in sorted(pathlib.Path(DEBIMAN_SERVING_DIR).glob('**/*.roff')):
18 + MANPAGE = Namespace('dokk:manpages:')
19 +
20 + g = Graph()
21 +
22 + for absolute_file_path in pathlib.Path(DEBIMAN_SERVING_DIR).glob('**/*.roff'):
18 23 if not absolute_file_path.is_file():
19 - print('Not a file: {}'.format(absolute_file_path))
20 - continue
24 + exit('Not a file: {}'.format(absolute_file_path))
21 25
22 26 # Remove the DEBIMAN_SERVING_DIR prefix from the path
23 27 # Also remove .roff suffix from the filename
24 - file = absolute_file_path.relative_to(DEBIMAN_SERVING_DIR).with_suffix('')
25 - file_parts = list(file.parts)
28 + file_path = absolute_file_path.relative_to(DEBIMAN_SERVING_DIR).with_suffix('')
29 + file_parts = list(file_path.parts)
30 +
31 + distro_codename, distro_package, filename = file_parts[0], file_parts[1], file_parts[2]
26 32
27 33 # Replace debian codenames with version numbers
28 - if file_parts[0] == 'debian':
29 - file_parts[1] = file_parts[1].replace('bookworm', '12') \
30 - .replace('bullseye', '11')
31 - .replace('buster', '10')
32 - file = pathlib.Path(*file_parts)
34 + if distro_codename == 'buster': distro_number = 10
35 + elif distro_codename == 'bullseye': distro_number = 11
36 + elif distro_codename == 'bookworm': distro_number = 12
37 + else: exit('Distro codename not recognized.')
33 38
34 - name, section, language = file.name.rsplit('.', 2)
39 + name, section, language = filename.rsplit('.', 2)
35 40 section_number, subsection = int(section[:1]), section[1:]
36 41
37 - # Fix characters that cannot be used as valid URIs
38 - id = str(file).replace(' ', '_') \
39 - .replace('[', '%5B') \
40 - .replace(']', '%5D') \
41 - .replace('#', '%23')
42 -
43 42 # Read files
44 43 try:
45 44 with open(absolute_file_path, 'r') as f:
@@ -59,33 +58,35 @@ for absolute_file_path in sorted(pathlib.Path(DEBIMAN_SERVING_DIR).glob('**/*.ro
59 58 except:
60 59 html = ''
61 60
62 - MANPAGE = Namespace('dokk:manpages:')
63 -
64 - turtle = f"""
65 - {URIRef(MANPAGE[id]).n3()}
66 - {URIRef(MANPAGE.name).n3()} {Literal(name).n3()} ;
67 - {URIRef(MANPAGE.name_lowercase).n3()} {Literal(name.lower()).n3()} ;
68 - {URIRef(MANPAGE.section).n3()} {Literal(section).n3()} ;
69 - {URIRef(MANPAGE.section_lowercase).n3()} {Literal(section.lower()).n3()} ;
70 - {URIRef(MANPAGE.section_number).n3()} {Literal(section_number).n3()} ;
71 - {URIRef(MANPAGE.subsection).n3()} {Literal(subsection).n3()} ;
72 - {URIRef(MANPAGE.language).n3()} {Literal(language).n3()} ;
73 - {URIRef(MANPAGE.roff).n3()} {Literal(roff).n3()} ;
74 - {URIRef(MANPAGE.plaintext).n3()} {Literal(plaintext).n3()} ;
75 - {URIRef(MANPAGE.html).n3()} {Literal(html).n3()} ;
76 - """
77 -
78 - if file_parts[0] == 'debian':
79 - turtle += f"""
80 - {URIRef(MANPAGE.source).n3()} [
81 - {URIRef(MANPAGE.distribution_name).n3()} {Literal(file_parts[0]).n3()} ;
82 - {URIRef(MANPAGE.distribution_version).n3()} {Literal(file_parts[1]).n3()} ;
83 - {URIRef(MANPAGE.package).n3()} {Literal(file_parts[2]).n3()} ;
84 - {URIRef(MANPAGE.filename).n3()} {Literal(file.name).n3()} ;
85 - ] .
86 - """
87 -
88 - mangraph = Graph().parse(publicID='', format='turtle', data=turtle)
89 -
90 - triples = mangraph.serialize(destination=None, format='nt')
91 - sys.stdout.write(triples)
61 + # Create a graph node for this manpage
62 + page_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}/{distro_package}/{filename}'))
63 + g.add((page_ref, RDF.type, URIRef(MANPAGE.Page)))
64 + g.add((page_ref, URIRef(MANPAGE.filename), Literal(filename)))
65 + g.add((page_ref, URIRef(MANPAGE.name), Literal(name)))
66 + g.add((page_ref, URIRef(MANPAGE.name_lowercase), Literal(name.lower())))
67 + g.add((page_ref, URIRef(MANPAGE.section), Literal(section)))
68 + g.add((page_ref, URIRef(MANPAGE.section_lowercase), Literal(section.lower())))
69 + g.add((page_ref, URIRef(MANPAGE.section_number), Literal(section_number)))
70 + g.add((page_ref, URIRef(MANPAGE.subsection), Literal(subsection)))
71 + g.add((page_ref, URIRef(MANPAGE.language), Literal(language)))
72 + g.add((page_ref, URIRef(MANPAGE.roff), Literal(roff)))
73 + g.add((page_ref, URIRef(MANPAGE.plaintext), Literal(plaintext)))
74 + g.add((page_ref, URIRef(MANPAGE.html), Literal(html)))
75 +
76 + # Create a graph node for this package
77 + # Link to the page node
78 + package_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}/{distro_package}'))
79 + g.add((package_ref, RDF.type, URIRef(MANPAGE.Package)))
80 + g.add((package_ref, MANPAGE.name, Literal(distro_package)))
81 + g.add((package_ref, MANPAGE.page, page_ref))
82 +
83 + # Create a graph node for this distro
84 + # Link to the package node
85 + distro_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}'))
86 + g.add((distro_ref, RDF.type, URIRef(MANPAGE.Distribution)))
87 + g.add((distro_ref, MANPAGE.name, Literal('debian')))
88 + g.add((distro_ref, MANPAGE.codename, Literal(distro_codename)))
89 + g.add((distro_ref, MANPAGE.number, Literal(distro_number)))
90 + g.add((distro_ref, MANPAGE.package, package_ref))
91 +
92 + print(g.serialize(format='nt'))