home » dokk/manpages.git
ID: 18e251498fcd1496133a1a0233b015dbb89a843d
91 lines — 3K — View raw


#!/usr/bin/env python3

import os
import pathlib
import rdflib
import sys

from rdflib import BNode, Graph, Literal, Namespace, URIRef
from rdflib.namespace import RDF

DEBIMAN_SERVING_DIR = os.getenv('DEBIMAN_SERVING_DIR')

if not DEBIMAN_SERVING_DIR:
    print('envvar DEBIMAN_SERVING_DIR is not defined')
    exit()

for absolute_file_path in sorted(pathlib.Path(DEBIMAN_SERVING_DIR).glob('**/*.roff')):
    if not absolute_file_path.is_file():
        print('Not a file: {}'.format(absolute_file_path))
        continue

    # Remove the DEBIMAN_SERVING_DIR prefix from the path
    # Also remove .roff suffix from the filename
    file = absolute_file_path.relative_to(DEBIMAN_SERVING_DIR).with_suffix('')
    file_parts = list(file.parts)

    # Replace debian codenames with version numbers
    if file_parts[0] == 'debian':
        file_parts[1] = file_parts[1].replace('bookworm', '12') \
                                     .replace('bullseye', '11')
                                     .replace('buster',   '10')
        file = pathlib.Path(*file_parts)

    name, section, language = file.name.rsplit('.', 2)
    section_number, subsection = int(section[:1]), section[1:]

    # Fix characters that cannot be used as valid URIs
    id = str(file).replace(' ', '_') \
                  .replace('[', '%5B') \
                  .replace(']', '%5D') \
                  .replace('#', '%23')

    # Read files
    try:
        with open(absolute_file_path, 'r') as f:
            roff = f.read()
    except:
        roff = ''

    try:
        with open(f'{absolute_file_path}.txt', 'r') as f:
            plaintext = f.read()
    except:
        plaintext = ''

    try:
        with open(f'{absolute_file_path}.html', 'r') as f:
            html = f.read()
    except:
        html = ''

    MANPAGE = Namespace('dokk:manpages:')

    turtle = f"""
    {URIRef(MANPAGE[id]).n3()}
        {URIRef(MANPAGE.name).n3()} {Literal(name).n3()} ;
        {URIRef(MANPAGE.name_lowercase).n3()} {Literal(name.lower()).n3()} ;
        {URIRef(MANPAGE.section).n3()} {Literal(section).n3()} ;
        {URIRef(MANPAGE.section_lowercase).n3()} {Literal(section.lower()).n3()} ;
        {URIRef(MANPAGE.section_number).n3()} {Literal(section_number).n3()} ;
        {URIRef(MANPAGE.subsection).n3()} {Literal(subsection).n3()} ;
        {URIRef(MANPAGE.language).n3()} {Literal(language).n3()} ;
        {URIRef(MANPAGE.roff).n3()} {Literal(roff).n3()} ;
        {URIRef(MANPAGE.plaintext).n3()} {Literal(plaintext).n3()} ;
        {URIRef(MANPAGE.html).n3()} {Literal(html).n3()} ;
    """

    if file_parts[0] == 'debian':
        turtle += f"""
        {URIRef(MANPAGE.source).n3()} [
            {URIRef(MANPAGE.distribution_name).n3()} {Literal(file_parts[0]).n3()} ;
            {URIRef(MANPAGE.distribution_version).n3()} {Literal(file_parts[1]).n3()} ;
            {URIRef(MANPAGE.package).n3()} {Literal(file_parts[2]).n3()} ;
            {URIRef(MANPAGE.filename).n3()} {Literal(file.name).n3()} ;
        ] .
        """

    mangraph = Graph().parse(publicID='', format='turtle', data=turtle)

    triples = mangraph.serialize(destination=None, format='nt')
    sys.stdout.write(triples)