lezzo.org/gamewiki/parser.py

import glob
import re
from typing import TypeVar, Iterator

T = TypeVar("T")


class WikiArticle:
    '''Abstract class that defines an article in the wiki'''
    def __init__(self, filename):
        def tags(content):
            for line in content:
                if line.startswith('#+FILETAGS:'):
                    tags = line.replace(' ', '').strip().split(":")[1:]
                    tags = list(filter(identity, tags))
                    return tags
            raise Exception(f'no tags in {filename}')

        def title(content):
            for line in content:
                if line.startswith('#+TITLE:'):
                    res = line.strip().replace(' ', '', 1).split(':')
                    if len(res) > 2: raise Exception(f'Invalid title in {filename}')
                    return res[-1]
            raise Exception(f'No title in {filename}')

        with open(filename, 'r') as f:
            content = f.readlines()
        self.content = [c for c in content]
        self.title = title(content)
        self.tags = tags(content)
        self.filename = filename

    def enrich(self, linked_articles: Iterator):
        '''Generator that returns content that will end up in the document before html conversion'''
        content = map(identity, self.content)
        def rep():
            for line in content:
                if line.strip() == self.header: break
                else: yield line
            yield self.header + '\n'
            for w in linked_articles:
                yield f'- [[../{w.filename}][{w.title}]]\n'
        return rep()

    def __repr__(self):
        return f'{type(self).__name__}({self.title}, {self.filename}, {self.tags})'

class ContentArticle(WikiArticle):
    '''The articles we wrote'''
    def __init__(self, filename):
        self.header = '** Related articles'
        super().__init__(filename)


class MetaPage(WikiArticle):
    '''Wiki article containing code that must be generated by this program'''
    def __init__(self, filename):
        self.header = '** Pages in this category'
        super().__init__(filename)
        if len(self.tags) > 1:
            raise Exception(f'Multiple tags in metapage: {self.filename}')
        self.meta = self.tags[0]

        correct_section = list(filter(lambda l: l.strip() == self.header, self.content))
        if len(correct_section) != 1:
            raise Exception(f'Invalid meta section in {filename}')

identity = lambda x: x
def merge(d1, d2):
    d1.update(d2)
    return d1

def files():
    return filter(lambda f: f not in {'todo.org'}, glob.glob('*.org'))

def metafiles():
    return glob.glob('meta/*.org')

def invert_map(map_: list[ContentArticle]):
    '''from {a: [1, 2], b: [2, 3]} to {1: [a], 2: [a, b], 3: [b]}'''
    keys = set(i for e in map_ for i in e.tags)
    res: dict[str, list[WikiArticle]] = dict()
    for k in keys:
        res[k] = res.get(k, []) + [w for w in map_ if k in w.tags]
    return res

def links(article: ContentArticle):
    def haslink(line):
        return re.findall(r'\[\[(.+?)\]\]', line)

    def yield_links(line):
        i = 0
        while i < len(line)-1:
            sub = line[i:]
            if sub.startswith('[[./') and ']]' in sub and '.org' in sub: # org link to one org file
                ridx = sub.index('.org')
                yield sub[4:ridx+4] # skip initial [[
                i += sub.index(']]')
            i += 1

    for line in article.content:
        if haslink(line):
            yield from yield_links(line)


def writetodisk(m: WikiArticle, content: list[str]):
    with open(m.filename, 'w') as f:
        f.writelines(content)

if __name__ == '__main__':
    files_ = [ContentArticle(f) for f in files()]
    byfilename = {c.filename: c for c in files_}
    meta_ = [MetaPage(f) for f in metafiles()]
    meta = {m.meta: m for m in meta_}

    tags = invert_map(files_)
    for t, articles in tags.items():
        if t in meta:
            newcontent = meta[t].enrich(articles)
            writetodisk(meta[t], newcontent)

    for f in files_:
        related = set(links(f))
        if related:
            newcontent = f.enrich(map(lambda f: byfilename[f], related))
            writetodisk(f, newcontent)