diff --git a/scripts/rdf.py b/scripts/rdf.py index 18e2514..3308cc3 100755 --- a/scripts/rdf.py +++ b/scripts/rdf.py @@ -4,6 +4,7 @@ import os import pathlib import rdflib import sys +import urllib.parse from rdflib import BNode, Graph, Literal, Namespace, URIRef from rdflib.namespace import RDF @@ -14,32 +15,30 @@ if not DEBIMAN_SERVING_DIR: print('envvar DEBIMAN_SERVING_DIR is not defined') exit() -for absolute_file_path in sorted(pathlib.Path(DEBIMAN_SERVING_DIR).glob('**/*.roff')): +MANPAGE = Namespace('dokk:manpages:') + +g = Graph() + +for absolute_file_path in pathlib.Path(DEBIMAN_SERVING_DIR).glob('**/*.roff'): if not absolute_file_path.is_file(): - print('Not a file: {}'.format(absolute_file_path)) - continue + exit('Not a file: {}'.format(absolute_file_path)) # Remove the DEBIMAN_SERVING_DIR prefix from the path # Also remove .roff suffix from the filename - file = absolute_file_path.relative_to(DEBIMAN_SERVING_DIR).with_suffix('') - file_parts = list(file.parts) + file_path = absolute_file_path.relative_to(DEBIMAN_SERVING_DIR).with_suffix('') + file_parts = list(file_path.parts) + + distro_codename, distro_package, filename = file_parts[0], file_parts[1], file_parts[2] # Replace debian codenames with version numbers - if file_parts[0] == 'debian': - file_parts[1] = file_parts[1].replace('bookworm', '12') \ - .replace('bullseye', '11') - .replace('buster', '10') - file = pathlib.Path(*file_parts) + if distro_codename == 'buster': distro_number = 10 + elif distro_codename == 'bullseye': distro_number = 11 + elif distro_codename == 'bookworm': distro_number = 12 + else: exit('Distro codename not recognized.') - name, section, language = file.name.rsplit('.', 2) + name, section, language = filename.rsplit('.', 2) section_number, subsection = int(section[:1]), section[1:] - # Fix characters that cannot be used as valid URIs - id = str(file).replace(' ', '_') \ - .replace('[', '%5B') \ - .replace(']', '%5D') \ - .replace('#', '%23') - # Read files try: with open(absolute_file_path, 'r') as f: @@ -59,33 +58,35 @@ for absolute_file_path in sorted(pathlib.Path(DEBIMAN_SERVING_DIR).glob('**/*.ro except: html = '' - MANPAGE = Namespace('dokk:manpages:') - - turtle = f""" - {URIRef(MANPAGE[id]).n3()} - {URIRef(MANPAGE.name).n3()} {Literal(name).n3()} ; - {URIRef(MANPAGE.name_lowercase).n3()} {Literal(name.lower()).n3()} ; - {URIRef(MANPAGE.section).n3()} {Literal(section).n3()} ; - {URIRef(MANPAGE.section_lowercase).n3()} {Literal(section.lower()).n3()} ; - {URIRef(MANPAGE.section_number).n3()} {Literal(section_number).n3()} ; - {URIRef(MANPAGE.subsection).n3()} {Literal(subsection).n3()} ; - {URIRef(MANPAGE.language).n3()} {Literal(language).n3()} ; - {URIRef(MANPAGE.roff).n3()} {Literal(roff).n3()} ; - {URIRef(MANPAGE.plaintext).n3()} {Literal(plaintext).n3()} ; - {URIRef(MANPAGE.html).n3()} {Literal(html).n3()} ; - """ - - if file_parts[0] == 'debian': - turtle += f""" - {URIRef(MANPAGE.source).n3()} [ - {URIRef(MANPAGE.distribution_name).n3()} {Literal(file_parts[0]).n3()} ; - {URIRef(MANPAGE.distribution_version).n3()} {Literal(file_parts[1]).n3()} ; - {URIRef(MANPAGE.package).n3()} {Literal(file_parts[2]).n3()} ; - {URIRef(MANPAGE.filename).n3()} {Literal(file.name).n3()} ; - ] . - """ - - mangraph = Graph().parse(publicID='', format='turtle', data=turtle) - - triples = mangraph.serialize(destination=None, format='nt') - sys.stdout.write(triples) + # Create a graph node for this manpage + page_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}/{distro_package}/{filename}')) + g.add((page_ref, RDF.type, URIRef(MANPAGE.Page))) + g.add((page_ref, URIRef(MANPAGE.filename), Literal(filename))) + g.add((page_ref, URIRef(MANPAGE.name), Literal(name))) + g.add((page_ref, URIRef(MANPAGE.name_lowercase), Literal(name.lower()))) + g.add((page_ref, URIRef(MANPAGE.section), Literal(section))) + g.add((page_ref, URIRef(MANPAGE.section_lowercase), Literal(section.lower()))) + g.add((page_ref, URIRef(MANPAGE.section_number), Literal(section_number))) + g.add((page_ref, URIRef(MANPAGE.subsection), Literal(subsection))) + g.add((page_ref, URIRef(MANPAGE.language), Literal(language))) + g.add((page_ref, URIRef(MANPAGE.roff), Literal(roff))) + g.add((page_ref, URIRef(MANPAGE.plaintext), Literal(plaintext))) + g.add((page_ref, URIRef(MANPAGE.html), Literal(html))) + + # Create a graph node for this package + # Link to the page node + package_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}/{distro_package}')) + g.add((package_ref, RDF.type, URIRef(MANPAGE.Package))) + g.add((package_ref, MANPAGE.name, Literal(distro_package))) + g.add((package_ref, MANPAGE.page, page_ref)) + + # Create a graph node for this distro + # Link to the package node + distro_ref = URIRef('dokk:manpages:debian/' + urllib.parse.quote(f'{distro_number}')) + g.add((distro_ref, RDF.type, URIRef(MANPAGE.Distribution))) + g.add((distro_ref, MANPAGE.name, Literal('debian'))) + g.add((distro_ref, MANPAGE.codename, Literal(distro_codename))) + g.add((distro_ref, MANPAGE.number, Literal(distro_number))) + g.add((distro_ref, MANPAGE.package, package_ref)) + +print(g.serialize(format='nt'))