Sigma

From:	Georgios Atheridis
Date:	2023-04-01
There's not much going on here, I am just trying out this feature to see how well it works. I would also like to make sure everything works correctly, like the new lines and all.
Hopefully it's good!
#!/bin/python3
# BSD 2-Clause License
#
# Copyright (c) 2023, Georgios Atheridis <georgios@atheridis.org>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os
import re
import tomllib
import time
import html

import markdown


regex_extend = re.compile(r"^{%\s?extend (\"|')(.+)\1\s?%}$")
regex_variable = re.compile(r"{{\s?(\S+)\s?}}")
regex_variable_no_interpret = re.compile(r"None")
regex_execute = re.compile(r"{%\s?file (\"|')(.+)\1\s?%}")


def md_to_html(md: str) -> str:
    return markdown.markdown(md, extensions=["extra", "sane_lists", "meta"])

def md_meta(md: str) -> dict:
    md_meta = markdown.Markdown(extensions=["meta"])
    md_meta.convert(md)
    return md_meta.Meta


def get_max_mtime(dir: str) -> float:
    mtime = 0
    for f in os.walk(dir):
        mtime = max(
            map(
                os.path.getmtime,
                map(lambda file: os.path.join(f[0], file), f[2]),
            )
        )
    return mtime


def create_page_index(data: dict):
    pages: list[tuple] = []
    for root, _, files in os.walk(data["_PAGE_ROOT"]):
        root = os.path.relpath(root, data["_PAGE_ROOT"])
        if root.startswith(("_", ".")) and root != ".":
            continue
        if root == ".":
            root = ""
        root_name, root_ext = os.path.splitext(root)
        if root_ext:
            pages.append(split_path(root_name))
            continue
        for file in files:
            if file.startswith(("_", ".")):
                continue
            file_name, _ = os.path.splitext(os.path.join(root, file))
            pages.append(split_path(file_name))
    data["_PAGES"] = pages


def initialize_values(data: dict):
    for dir in os.walk(data["_PAGE_ROOT"]):
        _, ext = os.path.splitext(dir[0])
        if ext:
            rel_path = os.path.relpath(dir[0], data["_PAGE_ROOT"])
            mtime = get_max_mtime(dir[0])
            rel_path_no_type, _ = os.path.splitext(rel_path)
            namespace = split_path(rel_path_no_type)
            update_value(data, namespace, "_name", namespace[-1], False)
            update_value(
                data,
                namespace,
                "_date",
                time.strftime(data["_DATE_FORMAT"], time.localtime(mtime)),
                False,
            )
            update_value(data, namespace, "_EXT", ext)
            continue
        for file in dir[2]:
            path = os.path.join(dir[0], file)
            rel_path = os.path.relpath(path, data["_PAGE_ROOT"])
            mtime = os.path.getmtime(path)
            rel_path_no_type, rel_path_ext = os.path.splitext(rel_path)
            namespace = split_path(rel_path_no_type)
            update_value(data, namespace, "_name", namespace[-1], False)
            update_value(
                data,
                namespace,
                "_date",
                time.strftime(data["_DATE_FORMAT"], time.localtime(mtime)),
                False,
            )
            if rel_path_ext == ".md":
                update_value(data, namespace, "_EXT", ".html")
            else:
                update_value(data, namespace, "_EXT", rel_path_ext)


def get_value_from_file(
    data: dict, namespace: tuple, key: str, interpret_ok: bool
) -> str | None:
    path = os.path.join(data["_PAGE_ROOT"], *namespace[:-1])
    for dir in os.listdir(path):
        dir_name, dir_ext = os.path.splitext(dir)
        if namespace[-1] == dir_name:
            break
    else:
        return

    if not os.path.isdir(os.path.join(path, dir_name + dir_ext)):
        if dir_ext != ".md":
            return
        with open(os.path.join(path, dir_name + dir_ext), "r") as file_in:
            m = md_meta(file_in.read()).get(key[1:])
            if not m:
                return
            if len(m) == 1:
                return m[0]
            return m

    try:
        with open(os.path.join(path, dir_name + dir_ext, "_value.md"), "r") as file_in:
            m = md_meta(file_in.read()).get(key[1:])
            if m:
                if len(m) == 1:
                    return m[0]
                return m
    except FileNotFoundError:
        pass

    for file in os.listdir(os.path.join(path, dir_name + dir_ext)):
        file_name, file_ext = os.path.splitext(file)
        if key == file_name:
            with open(os.path.join(path, dir_name + dir_ext, file), "r") as file_in:
                if interpret_ok:
                    _value = interpret(file_in.read(), data, namespace)
                else:
                    _value = file_in.read()
                if file_ext == ".md":
                    _value = md_to_html(_value)
            return _value


def get_value(data: dict, namespace: tuple, key: str, interpret_ok=True):
    try:
        if key != "_value" and (
            value := get_value_from_file(data, namespace, key, interpret_ok)
        ):
            return value
    except FileNotFoundError:
        pass
    except IndexError:
        pass
    value = data.get(key)
    for namespace_item in namespace:
        data = data.get(namespace_item, data)
        value = data.get(key, value)
    return value


def update_value(data: dict, namespace: tuple, key: str, value, replace=True):
    if not replace and get_value(data, namespace, key):
        return
    for namespace_item in namespace:
        data = data.setdefault(namespace_item, {})
    data[key] = value


def split_path(path: str) -> tuple:
    rest, tail = os.path.split(path)
    if rest in ("", os.path.sep):
        return (tail,)
    return split_path(rest) + (tail,)


def interpret_no_recursion(file_value: str, data: dict, namespace: tuple) -> str:
    start_pos = 0
    while variable := regex_variable_no_interpret.search(file_value, start_pos):
        varspace = variable.group(1).split(".")
        try:
            varspace.remove("self")
        except ValueError:
            varspace = tuple(varspace)
        else:
            varspace = namespace + tuple(varspace)
        repl_value = str(get_value(data, varspace[:-1], varspace[-1], False))
        repl_value = html.escape(repl_value)
        start_pos = variable.start() + len(repl_value)
        regex_variable_no_interpret.search(file_value, start_pos)
        file_value = file_value.replace(variable.group(0), repl_value, 1)
    return file_value


def interpret(file_value: str, data: dict, namespace: tuple) -> str:
    while regex_execute.search(file_value) or regex_variable.search(file_value):
        while file_to_run := regex_execute.search(file_value):
            file_to_run = file_to_run.group(2)
            _, ext = os.path.splitext(file_to_run)
            _value = ""
            with open(os.path.join(data["_TEMPLATES"], file_to_run), "r") as f:
                if ext == ".py":
                    d = {"data": data, "namespace": namespace, "get_value": get_value}
                    exec(f.read(), d)
                    _value = d["_value"]
                elif ext == ".md":
                    _value = md_to_html(f.read())
                else:
                    _value = f.read()
            file_value = regex_execute.sub(_value.replace("\\", "\\\\"), file_value, 1)
        while variable := regex_variable.search(file_value):
            varspace = variable.group(1).split(".")
            try:
                varspace.remove("self")
            except ValueError:
                varspace = tuple(varspace)
            else:
                varspace = namespace + tuple(varspace)
            repl_value = get_value(data, varspace[:-1], varspace[-1])
            if isinstance(repl_value, list):
                repl_value = ", ".join(repl_value)
            repl_value = str(repl_value).replace("\\", "\\\\")
            file_value = regex_variable.sub(
                repl_value,
                file_value,
                1,
            )

    return file_value


def dir_to_file(file: str) -> dict[str, str]:
    if not os.path.isdir(file):
        return {"_value": file}
    files = {}
    for f in os.listdir(file):
        files[os.path.splitext(f)[0]] = os.path.join(file, f)
    return files


def generate_output(file: str, data: dict, namespace: tuple) -> str:
    files = dir_to_file(file)
    _, _value_file_type = os.path.splitext(files["_value"])
    with open(files["_value"], "r") as in_file:
        _value = in_file.read()
    if result := regex_extend.search(_value.splitlines()[0]):
        update_value(data, namespace, "_extend", result.group(2))
        _value = _value.removeprefix(result.group(0) + "\n")
    _value = interpret(_value, data, namespace)
    if _value_file_type == ".md":
        _value = md_to_html(_value)
    update_value(data, namespace, "_value", _value)

    if _extend := get_value(data, namespace, "_extend"):
        update_value(data, namespace, "_extend", "")
        generate_output(os.path.join(data["_TEMPLATES"], _extend), data, namespace)

    update_value(
        data,
        namespace,
        "_value",
        interpret_no_recursion(
            str(get_value(data, namespace, "_value")), data, namespace
        ),
    )

    return str(get_value(data, namespace, "_value"))


def generate_builds(build_file: str, data: dict) -> list[tuple[tuple, str]]:
    rel_path = os.path.relpath(build_file, data["_PAGE_ROOT"])
    rel_path_no_type, _ = os.path.splitext(rel_path)
    namespace = split_path(rel_path_no_type)[:-1]
    d = {"data": data, "namespace": namespace, "get_value": get_value}
    with open(build_file, "r") as f:
        exec(f.read(), d)
    for path, _ in d["_value"]:
        rel_path = os.path.relpath(os.path.split(build_file)[0], data["_PAGE_ROOT"])
        if rel_path != ".":
            # path = split_path(rel_path) + path
            path.insert(0, *split_path(rel_path))
        update_value(data, path, "_EXT", ".html", False)
        update_value(data, path, "_name", path[-1], False)
        update_value(
            data,
            path,
            "_date",
            time.strftime(
                data["_DATE_FORMAT"], time.localtime(os.path.getmtime(build_file))
            ),
            False,
        )

        data["_PAGES"].append(tuple(path))
    for i in range(len(d["_value"])):
        d["_value"][i] = (tuple(d["_value"][i][0]), d["_value"][i][1])
    return d["_value"]


def interpret_builds(build_data: list[tuple[tuple, str]], data: dict):
    for path, _value in build_data:
        path_with_ext = path[:-1] + (path[-1] + ".html",)
        _value = interpret(_value, data, path)
        update_value(data, path, "_value", _value)

        if _extend := get_value(data, path, "_extend"):
            update_value(data, path, "_extend", "")
            generate_output(os.path.join(data["_TEMPLATES"], _extend), data, path)
        update_value(
            data,
            path,
            "_value",
            interpret_no_recursion(str(get_value(data, path, "_value")), data, path),
        )

        os.makedirs(os.path.join(data["_OUT"], *path_with_ext[:-1]), exist_ok=True)
        with open(os.path.join(data["_OUT"], *path_with_ext), "w") as out_file:
            out_file.write(get_value(data, path, "_value"))


def main(args):
    # Load toml file
    data = tomllib.load(args.data)
    args.data.close()

    # Assign default values if not set
    if args.templates:
        data["_TEMPLATES"] = args.templates
    elif not data.get("_TEMPLATES"):
        data["_TEMPLATES"] = "templates"

    if args.page_root:
        data["_PAGE_ROOT"] = args.page_root
    elif not data.get("_PAGE_ROOT"):
        data["_PAGE_ROOT"] = "pages"

    if args.out:
        data["_OUT"] = args.out
    elif not data.get("_OUT"):
        data["_OUT"] = "out"

    if args.date_format:
        data["_DATE_FORMAT"] = args.date_format
    elif not data.get("_DATE_FORMAT"):
        data["_DATE_FORMAT"] = "%Y-%m-%d"

    initialize_values(data)

    create_page_index(data)


    builds = []
    for root, _, files in os.walk(data["_PAGE_ROOT"]):
        if root.startswith(("_", ".")):
            continue
        if "_build.py" not in files:
            continue
        builds.extend(generate_builds(os.path.join(root, "_build.py"), data))

    interpret_builds(builds, data)

    if not args.pages:
        pages = []
        for dir in os.walk(data["_PAGE_ROOT"]):
            _, ext = os.path.splitext(dir[0])
            if dir[0].startswith(("_", ".")):
                continue
            if ext:
                pages.append(dir[0])
                continue
            for file in dir[2]:
                if file.startswith(("_", ".")):
                    continue
                pages.append(os.path.join(dir[0], file))
    else:
        pages = args.pages

    for page in pages:
        rel_path = os.path.relpath(page, data["_PAGE_ROOT"])
        rel_path_no_type, file_type = os.path.splitext(rel_path)
        namespace = split_path(rel_path_no_type)
        os.makedirs(
            os.path.join(data["_OUT"], os.path.split(rel_path)[0]), exist_ok=True
        )
        if file_type == ".md" and not os.path.isdir(page):
            rel_path = rel_path_no_type + ".html"
        with open(os.path.join(data["_OUT"], rel_path), "w") as out_file:
            out_file.write(generate_output(page, data, namespace))


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--pages", type=str, nargs="*")
    parser.add_argument("--page-root", type=str)
    parser.add_argument("--templates", type=str)
    parser.add_argument("--data", default="data.toml", type=argparse.FileType("rb"))
    parser.add_argument("--out", type=str)
    parser.add_argument("--date-format", type=str)
    args = parser.parse_args()
    main(args)