There's not much going on here, I am just trying out this feature to see how well it works. I would also like to make sure everything works correctly, like the new lines and all.
Hopefully it's good!
#!/bin/python3
# BSD 2-Clause License
#
# Copyright (c) 2023, Georgios Atheridis <georgios@atheridis.org>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import argparse
import os
import re
import tomllib
import time
import html
import markdown
regex_extend = re.compile(r"^{%\s?extend (\"|')(.+)\1\s?%}$")
regex_variable = re.compile(r"{{\s?(\S+)\s?}}")
regex_variable_no_interpret = re.compile(r"None")
regex_execute = re.compile(r"{%\s?file (\"|')(.+)\1\s?%}")
def md_to_html(md: str) -> str:
return markdown.markdown(md, extensions=["extra", "sane_lists", "meta"])
def md_meta(md: str) -> dict:
md_meta = markdown.Markdown(extensions=["meta"])
md_meta.convert(md)
return md_meta.Meta
def get_max_mtime(dir: str) -> float:
mtime = 0
for f in os.walk(dir):
mtime = max(
map(
os.path.getmtime,
map(lambda file: os.path.join(f[0], file), f[2]),
)
)
return mtime
def create_page_index(data: dict):
pages: list[tuple] = []
for root, _, files in os.walk(data["_PAGE_ROOT"]):
root = os.path.relpath(root, data["_PAGE_ROOT"])
if root.startswith(("_", ".")) and root != ".":
continue
if root == ".":
root = ""
root_name, root_ext = os.path.splitext(root)
if root_ext:
pages.append(split_path(root_name))
continue
for file in files:
if file.startswith(("_", ".")):
continue
file_name, _ = os.path.splitext(os.path.join(root, file))
pages.append(split_path(file_name))
data["_PAGES"] = pages
def initialize_values(data: dict):
for dir in os.walk(data["_PAGE_ROOT"]):
_, ext = os.path.splitext(dir[0])
if ext:
rel_path = os.path.relpath(dir[0], data["_PAGE_ROOT"])
mtime = get_max_mtime(dir[0])
rel_path_no_type, _ = os.path.splitext(rel_path)
namespace = split_path(rel_path_no_type)
update_value(data, namespace, "_name", namespace[-1], False)
update_value(
data,
namespace,
"_date",
time.strftime(data["_DATE_FORMAT"], time.localtime(mtime)),
False,
)
update_value(data, namespace, "_EXT", ext)
continue
for file in dir[2]:
path = os.path.join(dir[0], file)
rel_path = os.path.relpath(path, data["_PAGE_ROOT"])
mtime = os.path.getmtime(path)
rel_path_no_type, rel_path_ext = os.path.splitext(rel_path)
namespace = split_path(rel_path_no_type)
update_value(data, namespace, "_name", namespace[-1], False)
update_value(
data,
namespace,
"_date",
time.strftime(data["_DATE_FORMAT"], time.localtime(mtime)),
False,
)
if rel_path_ext == ".md":
update_value(data, namespace, "_EXT", ".html")
else:
update_value(data, namespace, "_EXT", rel_path_ext)
def get_value_from_file(
data: dict, namespace: tuple, key: str, interpret_ok: bool
) -> str | None:
path = os.path.join(data["_PAGE_ROOT"], *namespace[:-1])
for dir in os.listdir(path):
dir_name, dir_ext = os.path.splitext(dir)
if namespace[-1] == dir_name:
break
else:
return
if not os.path.isdir(os.path.join(path, dir_name + dir_ext)):
if dir_ext != ".md":
return
with open(os.path.join(path, dir_name + dir_ext), "r") as file_in:
m = md_meta(file_in.read()).get(key[1:])
if not m:
return
if len(m) == 1:
return m[0]
return m
try:
with open(os.path.join(path, dir_name + dir_ext, "_value.md"), "r") as file_in:
m = md_meta(file_in.read()).get(key[1:])
if m:
if len(m) == 1:
return m[0]
return m
except FileNotFoundError:
pass
for file in os.listdir(os.path.join(path, dir_name + dir_ext)):
file_name, file_ext = os.path.splitext(file)
if key == file_name:
with open(os.path.join(path, dir_name + dir_ext, file), "r") as file_in:
if interpret_ok:
_value = interpret(file_in.read(), data, namespace)
else:
_value = file_in.read()
if file_ext == ".md":
_value = md_to_html(_value)
return _value
def get_value(data: dict, namespace: tuple, key: str, interpret_ok=True):
try:
if key != "_value" and (
value := get_value_from_file(data, namespace, key, interpret_ok)
):
return value
except FileNotFoundError:
pass
except IndexError:
pass
value = data.get(key)
for namespace_item in namespace:
data = data.get(namespace_item, data)
value = data.get(key, value)
return value
def update_value(data: dict, namespace: tuple, key: str, value, replace=True):
if not replace and get_value(data, namespace, key):
return
for namespace_item in namespace:
data = data.setdefault(namespace_item, {})
data[key] = value
def split_path(path: str) -> tuple:
rest, tail = os.path.split(path)
if rest in ("", os.path.sep):
return (tail,)
return split_path(rest) + (tail,)
def interpret_no_recursion(file_value: str, data: dict, namespace: tuple) -> str:
start_pos = 0
while variable := regex_variable_no_interpret.search(file_value, start_pos):
varspace = variable.group(1).split(".")
try:
varspace.remove("self")
except ValueError:
varspace = tuple(varspace)
else:
varspace = namespace + tuple(varspace)
repl_value = str(get_value(data, varspace[:-1], varspace[-1], False))
repl_value = html.escape(repl_value)
start_pos = variable.start() + len(repl_value)
regex_variable_no_interpret.search(file_value, start_pos)
file_value = file_value.replace(variable.group(0), repl_value, 1)
return file_value
def interpret(file_value: str, data: dict, namespace: tuple) -> str:
while regex_execute.search(file_value) or regex_variable.search(file_value):
while file_to_run := regex_execute.search(file_value):
file_to_run = file_to_run.group(2)
_, ext = os.path.splitext(file_to_run)
_value = ""
with open(os.path.join(data["_TEMPLATES"], file_to_run), "r") as f:
if ext == ".py":
d = {"data": data, "namespace": namespace, "get_value": get_value}
exec(f.read(), d)
_value = d["_value"]
elif ext == ".md":
_value = md_to_html(f.read())
else:
_value = f.read()
file_value = regex_execute.sub(_value.replace("\\", "\\\\"), file_value, 1)
while variable := regex_variable.search(file_value):
varspace = variable.group(1).split(".")
try:
varspace.remove("self")
except ValueError:
varspace = tuple(varspace)
else:
varspace = namespace + tuple(varspace)
repl_value = get_value(data, varspace[:-1], varspace[-1])
if isinstance(repl_value, list):
repl_value = ", ".join(repl_value)
repl_value = str(repl_value).replace("\\", "\\\\")
file_value = regex_variable.sub(
repl_value,
file_value,
1,
)
return file_value
def dir_to_file(file: str) -> dict[str, str]:
if not os.path.isdir(file):
return {"_value": file}
files = {}
for f in os.listdir(file):
files[os.path.splitext(f)[0]] = os.path.join(file, f)
return files
def generate_output(file: str, data: dict, namespace: tuple) -> str:
files = dir_to_file(file)
_, _value_file_type = os.path.splitext(files["_value"])
with open(files["_value"], "r") as in_file:
_value = in_file.read()
if result := regex_extend.search(_value.splitlines()[0]):
update_value(data, namespace, "_extend", result.group(2))
_value = _value.removeprefix(result.group(0) + "\n")
_value = interpret(_value, data, namespace)
if _value_file_type == ".md":
_value = md_to_html(_value)
update_value(data, namespace, "_value", _value)
if _extend := get_value(data, namespace, "_extend"):
update_value(data, namespace, "_extend", "")
generate_output(os.path.join(data["_TEMPLATES"], _extend), data, namespace)
update_value(
data,
namespace,
"_value",
interpret_no_recursion(
str(get_value(data, namespace, "_value")), data, namespace
),
)
return str(get_value(data, namespace, "_value"))
def generate_builds(build_file: str, data: dict) -> list[tuple[tuple, str]]:
rel_path = os.path.relpath(build_file, data["_PAGE_ROOT"])
rel_path_no_type, _ = os.path.splitext(rel_path)
namespace = split_path(rel_path_no_type)[:-1]
d = {"data": data, "namespace": namespace, "get_value": get_value}
with open(build_file, "r") as f:
exec(f.read(), d)
for path, _ in d["_value"]:
rel_path = os.path.relpath(os.path.split(build_file)[0], data["_PAGE_ROOT"])
if rel_path != ".":
# path = split_path(rel_path) + path
path.insert(0, *split_path(rel_path))
update_value(data, path, "_EXT", ".html", False)
update_value(data, path, "_name", path[-1], False)
update_value(
data,
path,
"_date",
time.strftime(
data["_DATE_FORMAT"], time.localtime(os.path.getmtime(build_file))
),
False,
)
data["_PAGES"].append(tuple(path))
for i in range(len(d["_value"])):
d["_value"][i] = (tuple(d["_value"][i][0]), d["_value"][i][1])
return d["_value"]
def interpret_builds(build_data: list[tuple[tuple, str]], data: dict):
for path, _value in build_data:
path_with_ext = path[:-1] + (path[-1] + ".html",)
_value = interpret(_value, data, path)
update_value(data, path, "_value", _value)
if _extend := get_value(data, path, "_extend"):
update_value(data, path, "_extend", "")
generate_output(os.path.join(data["_TEMPLATES"], _extend), data, path)
update_value(
data,
path,
"_value",
interpret_no_recursion(str(get_value(data, path, "_value")), data, path),
)
os.makedirs(os.path.join(data["_OUT"], *path_with_ext[:-1]), exist_ok=True)
with open(os.path.join(data["_OUT"], *path_with_ext), "w") as out_file:
out_file.write(get_value(data, path, "_value"))
def main(args):
# Load toml file
data = tomllib.load(args.data)
args.data.close()
# Assign default values if not set
if args.templates:
data["_TEMPLATES"] = args.templates
elif not data.get("_TEMPLATES"):
data["_TEMPLATES"] = "templates"
if args.page_root:
data["_PAGE_ROOT"] = args.page_root
elif not data.get("_PAGE_ROOT"):
data["_PAGE_ROOT"] = "pages"
if args.out:
data["_OUT"] = args.out
elif not data.get("_OUT"):
data["_OUT"] = "out"
if args.date_format:
data["_DATE_FORMAT"] = args.date_format
elif not data.get("_DATE_FORMAT"):
data["_DATE_FORMAT"] = "%Y-%m-%d"
initialize_values(data)
create_page_index(data)
builds = []
for root, _, files in os.walk(data["_PAGE_ROOT"]):
if root.startswith(("_", ".")):
continue
if "_build.py" not in files:
continue
builds.extend(generate_builds(os.path.join(root, "_build.py"), data))
interpret_builds(builds, data)
if not args.pages:
pages = []
for dir in os.walk(data["_PAGE_ROOT"]):
_, ext = os.path.splitext(dir[0])
if dir[0].startswith(("_", ".")):
continue
if ext:
pages.append(dir[0])
continue
for file in dir[2]:
if file.startswith(("_", ".")):
continue
pages.append(os.path.join(dir[0], file))
else:
pages = args.pages
for page in pages:
rel_path = os.path.relpath(page, data["_PAGE_ROOT"])
rel_path_no_type, file_type = os.path.splitext(rel_path)
namespace = split_path(rel_path_no_type)
os.makedirs(
os.path.join(data["_OUT"], os.path.split(rel_path)[0]), exist_ok=True
)
if file_type == ".md" and not os.path.isdir(page):
rel_path = rel_path_no_type + ".html"
with open(os.path.join(data["_OUT"], rel_path), "w") as out_file:
out_file.write(generate_output(page, data, namespace))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--pages", type=str, nargs="*")
parser.add_argument("--page-root", type=str)
parser.add_argument("--templates", type=str)
parser.add_argument("--data", default="data.toml", type=argparse.FileType("rb"))
parser.add_argument("--out", type=str)
parser.add_argument("--date-format", type=str)
args = parser.parse_args()
main(args)