#!/usr/bin/env python3
import os
import pathlib
import rdflib
import sys
from rdflib import BNode, Graph, Literal, Namespace, URIRef
from rdflib.namespace import RDF
DEBIMAN_SERVING_DIR = os.getenv('DEBIMAN_SERVING_DIR')
if not DEBIMAN_SERVING_DIR:
print('envvar DEBIMAN_SERVING_DIR is not defined')
exit()
for absolute_file_path in sorted(pathlib.Path(DEBIMAN_SERVING_DIR).glob('**/*.roff')):
if not absolute_file_path.is_file():
print('Not a file: {}'.format(absolute_file_path))
continue
# Remove the DEBIMAN_SERVING_DIR prefix from the path
# Also remove .roff suffix from the filename
file = absolute_file_path.relative_to(DEBIMAN_SERVING_DIR).with_suffix('')
file_parts = list(file.parts)
# Replace debian codenames with version numbers
if file_parts[0] == 'debian':
file_parts[1] = file_parts[1].replace('bookworm', '12') \
.replace('bullseye', '11')
.replace('buster', '10')
file = pathlib.Path(*file_parts)
name, section, language = file.name.rsplit('.', 2)
section_number, subsection = int(section[:1]), section[1:]
# Fix characters that cannot be used as valid URIs
id = str(file).replace(' ', '_') \
.replace('[', '%5B') \
.replace(']', '%5D') \
.replace('#', '%23')
# Read files
try:
with open(absolute_file_path, 'r') as f:
roff = f.read()
except:
roff = ''
try:
with open(f'{absolute_file_path}.txt', 'r') as f:
plaintext = f.read()
except:
plaintext = ''
try:
with open(f'{absolute_file_path}.html', 'r') as f:
html = f.read()
except:
html = ''
MANPAGE = Namespace('dokk:manpages:')
turtle = f"""
{URIRef(MANPAGE[id]).n3()}
{URIRef(MANPAGE.name).n3()} {Literal(name).n3()} ;
{URIRef(MANPAGE.name_lowercase).n3()} {Literal(name.lower()).n3()} ;
{URIRef(MANPAGE.section).n3()} {Literal(section).n3()} ;
{URIRef(MANPAGE.section_lowercase).n3()} {Literal(section.lower()).n3()} ;
{URIRef(MANPAGE.section_number).n3()} {Literal(section_number).n3()} ;
{URIRef(MANPAGE.subsection).n3()} {Literal(subsection).n3()} ;
{URIRef(MANPAGE.language).n3()} {Literal(language).n3()} ;
{URIRef(MANPAGE.roff).n3()} {Literal(roff).n3()} ;
{URIRef(MANPAGE.plaintext).n3()} {Literal(plaintext).n3()} ;
{URIRef(MANPAGE.html).n3()} {Literal(html).n3()} ;
"""
if file_parts[0] == 'debian':
turtle += f"""
{URIRef(MANPAGE.source).n3()} [
{URIRef(MANPAGE.distribution_name).n3()} {Literal(file_parts[0]).n3()} ;
{URIRef(MANPAGE.distribution_version).n3()} {Literal(file_parts[1]).n3()} ;
{URIRef(MANPAGE.package).n3()} {Literal(file_parts[2]).n3()} ;
{URIRef(MANPAGE.filename).n3()} {Literal(file.name).n3()} ;
] .
"""
mangraph = Graph().parse(publicID='', format='turtle', data=turtle)
triples = mangraph.serialize(destination=None, format='nt')
sys.stdout.write(triples)