From ea974b0aa571d976764ecd593298392f8a41e35d Mon Sep 17 00:00:00 2001 From: Jordan Robinson Date: Mon, 15 Sep 2025 22:28:09 +0100 Subject: [PATCH] change markdown library to markdown-it-python and add Graphviz support --- main/templates/main/landing.html | 38 +++++------ main/util.py | 113 ++++++++++++++++++++++++------- pyproject.toml | 3 + uv.lock | 48 +++++++++++++ 4 files changed, 159 insertions(+), 43 deletions(-) diff --git a/main/templates/main/landing.html b/main/templates/main/landing.html index f6f6af16eefc1e6a20142b501d3b67ebe5f822c0..bdce77cf5b36bfb06f06d50ce7c9e334505a1b94 100644 --- a/main/templates/main/landing.html +++ b/main/templates/main/landing.html @@ -170,17 +170,16 @@
- Why doesn't strikethrough markdown notation work? + Why doesn't strikethrough markdown notation work? (update: now available!)

- The markdown library we use - (Python-Markdown) - supports - - John Gruber’s markdown spec, - which does not define strikethrough text. Discussions have been had on migrating to another - markdown library at some point in the future, though, this is not - yet planned. + We now use the markdown-it-py library + for parsing markdown. This parser supports a rich set of features beyond the original + John Gruber specification, including tables, footnotes, fenced code blocks, and more. + Strikethrough text is fully supported using the ~~text~~ syntax. + If something isn’t rendering as expected, please ensure your markdown follows the + + markdown-it-py syntax extensions.

@@ -197,7 +196,7 @@
- Would you add support for LaTeX-style math expressions? + Would you add support for LaTeX-style math expressions? (update: now available!)

This was recently introduced in V1.3.1. Please see the pull request for more information or give it a try yourself. @@ -206,17 +205,18 @@

- What about supporting Mermaid flowcharts? + What about supporting Mermaid flowcharts? (update: Graphviz now available!)

- We would not add - mermaid-js - as it is - - 2.7MB minified - - JavaScript which is far too heavy for BōcPress' mission. However, we would be interested - in implementing something similar if it was rendered on the backend. + We do not include mermaid-js, + as it requires a hefty 2.7MB of JavaScript, + which would weigh down BōcPress’ mission of speed and simplicity. +

+

+ In its place, we offer support for Graphviz, + allowing you to render diagrams elegantly and safely, entirely server-side, without + adding any extra JavaScript. You may read more about this approach in our + blog post.

diff --git a/main/util.py b/main/util.py index 365b4b24ea26361621b78e7d3909945eb3c090e1..815e3b1926adaf44887d4a90c0544e33304fc570 100644 --- a/main/util.py +++ b/main/util.py @@ -18,6 +18,39 @@ from l2m4m import LaTeX2MathMLExtension from main import denylist, models +from markdown_it import MarkdownIt +from mdit_py_plugins.footnote import footnote_plugin +from mdit_py_plugins.tasklists import tasklists_plugin +from graphviz import Source + +md = ( + MarkdownIt("commonmark", {"html": True}) + .enable("strikethrough") + .enable("table") + .use(footnote_plugin) + .use(tasklists_plugin) +) + +# Define allowed CSS properties and SVG attributes +ALLOWED_CSS_PROPERTIES = frozenset([ + "azimuth", "background-color", "border-bottom-color", "border-collapse", + "border-color", "border-left-color", "border-right-color", "border-top-color", + "clear", "color", "cursor", "direction", "display", "elevation", "float", + "font", "font-family", "font-size", "font-style", "font-variant", "font-weight", + "height", "letter-spacing", "line-height", "overflow", "pause", "pause-after", + "pause-before", "pitch", "pitch-range", "richness", "speak", "speak-header", + "speak-numeral", "speak-punctuation", "speech-rate", "stress", "text-align", + "text-decoration", "text-indent", "text-transform", "visibility", "white-space", + "widows", "width", "word-spacing", "z-index" +]) +SVG_TAGS = ["svg","g","path","line","polygon","polyline","circle","ellipse","rect","text","defs","title","desc"] +SVG_ATTRS = ["width","height","viewBox","fill","stroke","stroke-width","d","x","y","cx","cy","r","points","transform","style","id","class"] + +# Allow MathML tags +MATHML_TAGS = [ + "math","mrow","mi","mo","mn","msup","msub","mfrac","msqrt","mstyle", + "mtable","mtr","mtd","mfenced","ms","mspace","menclose","mover","munder" +] def is_disallowed(username): """Return true if username is not allowed to be registered.""" @@ -131,37 +164,69 @@ def syntax_highlight(text): def clean_html(dirty_html, strip_tags=False): - """Clean potentially evil HTML. - - - strip_tags: true will strip everything, false will escape. - """ + allowed_tags = list(bleach.sanitizer.ALLOWED_TAGS) + denylist.ALLOWED_HTML_ELEMENTS + SVG_TAGS + MATHML_TAGS + denylist_attrs_dict = {"*": denylist.ALLOWED_HTML_ATTRS} + allowed_attrs = {**bleach.sanitizer.ALLOWED_ATTRIBUTES, **{tag: SVG_ATTRS for tag in SVG_TAGS}, **denylist_attrs_dict } + if strip_tags: return bleach.clean(dirty_html, strip=True) - css_sanitizer = CSSSanitizer(allowed_css_properties=denylist.ALLOWED_CSS_STYLES) - return bleach.clean( - dirty_html, - tags=denylist.ALLOWED_HTML_ELEMENTS, - attributes=denylist.ALLOWED_HTML_ATTRS, - css_sanitizer=css_sanitizer, - ) + css_sanitizer = CSSSanitizer(allowed_css_properties=ALLOWED_CSS_PROPERTIES) + return bleach.clean(dirty_html, tags=allowed_tags, attributes=allowed_attrs, css_sanitizer=css_sanitizer) + +default_fence = md.renderer.rules.get("fence") + + +def fence_override(tokens, idx, options, env): + token = tokens[idx] + code = token.content + lang = token.info.strip() + + if lang == "dot": + try: + svg_bytes = Source(code, format="svg").pipe() + svg_str = svg_bytes.decode() + # Remove XML declaration + svg_str = svg_str.replace('', '').strip() + return svg_str + except Exception: + return f"
{code}
" + + # fallback to default renderer + if default_fence: + return default_fence(tokens, idx, options, env) + return f"
{code}
" +md.renderer.rules["fence"] = fence_override -def md_to_html(markdown_string, strip_tags=False): - """Return HTML formatted string, given a markdown one.""" + +def replace_latex_with_mathml(md_text: str) -> str: + # Replace all inline ($...$) and display ($$...$$) LaTeX with MathML + def repl(match): + latex = match.group(0) + # Use l2m4m to convert LaTeX to MathML + mathml_html = markdown.markdown(latex, extensions=[LaTeX2MathMLExtension()]) + return mathml_html + + # Display math first + md_text = re.sub(r"\$\$.*?\$\$", repl, md_text, flags=re.S) + # Inline math + md_text = re.sub(r"\$.*?\$", repl, md_text, flags=re.S) + return md_text + + +def md_to_html(markdown_string: str, strip_tags=False) -> str: if not markdown_string: return "" - dirty_html = markdown.markdown( - syntax_highlight(markdown_string), - extensions=[ - "markdown.extensions.fenced_code", - "markdown.extensions.tables", - "markdown.extensions.footnotes", - "markdown.extensions.toc", - LaTeX2MathMLExtension(), - ], - ) - return clean_html(dirty_html, strip_tags) + + # Convert LaTeX to MathML first + mathml_html = replace_latex_with_mathml(markdown_string) + + # Pass through Markdown-It for strikethrough, tables, footnotes, and Graphviz diagrams + intermediate_html = md.render(mathml_html) + + # Clean the HTML but preserve SVG and MathML + return clean_html(intermediate_html, strip_tags=strip_tags) def remove_control_chars(text): diff --git a/pyproject.toml b/pyproject.toml index 2664974ea423baa8ec066837f6b8ee2a92d4ae54..a8908789dbf452dcd6fc6046ea7e7c7e41e1fa5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,9 +7,12 @@ requires-python = ">=3.13" dependencies = [ "bleach[css]>=6.2.0", "django>=5.2.5", + "graphviz>=0.21", "gunicorn>=23.0.0", "l2m4m>=1.0.4", "markdown>=3.8.2", + "markdown-it-py>=4.0.0", + "mdit-py-plugins>=0.5.0", "psycopg[binary]>=3.2.9", "pygments>=2.19.2", "python-dotenv>=1.1.1", diff --git a/uv.lock b/uv.lock index 741a4a0b199f54f29196a81342c061109986e8a4..98e1c39fbe36798c5b7abadfbe44717cfc372382 100644 --- a/uv.lock +++ b/uv.lock @@ -239,6 +239,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9d/6e/98a1d23648e0085bb5825326af17612ecd8fc76be0ce96ea4dc35e17b926/django-5.2.5-py3-none-any.whl", hash = "sha256:2b2ada0ee8a5ff743a40e2b9820d1f8e24c11bac9ae6469cd548f0057ea6ddcd", size = 8302999, upload-time = "2025-08-06T08:26:23.562Z" }, ] +[[package]] +name = "graphviz" +version = "0.21" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/b3/3ac91e9be6b761a4b30d66ff165e54439dcd48b83f4e20d644867215f6ca/graphviz-0.21.tar.gz", hash = "sha256:20743e7183be82aaaa8ad6c93f8893c923bd6658a04c32ee115edb3c8a835f78", size = 200434, upload-time = "2025-06-15T09:35:05.824Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/4c/e0ce1ef95d4000ebc1c11801f9b944fa5910ecc15b5e351865763d8657f8/graphviz-0.21-py3-none-any.whl", hash = "sha256:54f33de9f4f911d7e84e4191749cac8cc5653f815b06738c54db9a15ab8b1e42", size = 47300, upload-time = "2025-06-15T09:35:04.433Z" }, +] + [[package]] name = "gunicorn" version = "23.0.0" @@ -303,6 +312,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/96/2b/34cc11786bc00d0f04d0f5fdc3a2b1ae0b6239eef72d3d345805f9ad92a1/markdown-3.8.2-py3-none-any.whl", hash = "sha256:5c83764dbd4e00bdd94d85a19b8d55ccca20fe35b2e678a1422b380324dd5f24", size = 106827, upload-time = "2025-06-19T17:12:42.994Z" }, ] +[[package]] +name = "markdown-it-py" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, +] + [[package]] name = "markupsafe" version = "3.0.2" @@ -338,9 +359,12 @@ source = { virtual = "." } dependencies = [ { name = "bleach", extra = ["css"] }, { name = "django" }, + { name = "graphviz" }, { name = "gunicorn" }, { name = "l2m4m" }, { name = "markdown" }, + { name = "markdown-it-py" }, + { name = "mdit-py-plugins" }, { name = "psycopg", extra = ["binary"] }, { name = "pygments" }, { name = "python-dotenv" }, @@ -359,9 +383,12 @@ dev = [ requires-dist = [ { name = "bleach", extras = ["css"], specifier = ">=6.2.0" }, { name = "django", specifier = ">=5.2.5" }, + { name = "graphviz", specifier = ">=0.21" }, { name = "gunicorn", specifier = ">=23.0.0" }, { name = "l2m4m", specifier = ">=1.0.4" }, { name = "markdown", specifier = ">=3.8.2" }, + { name = "markdown-it-py", specifier = ">=4.0.0" }, + { name = "mdit-py-plugins", specifier = ">=0.5.0" }, { name = "psycopg", extras = ["binary"], specifier = ">=3.2.9" }, { name = "pygments", specifier = ">=2.19.2" }, { name = "python-dotenv", specifier = ">=1.1.1" }, @@ -376,6 +403,27 @@ dev = [ { name = "ruff", specifier = ">=0.12.8" }, ] +[[package]] +name = "mdit-py-plugins" +version = "0.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b2/fd/a756d36c0bfba5f6e39a1cdbdbfdd448dc02692467d83816dff4592a1ebc/mdit_py_plugins-0.5.0.tar.gz", hash = "sha256:f4918cb50119f50446560513a8e311d574ff6aaed72606ddae6d35716fe809c6", size = 44655, upload-time = "2025-08-11T07:25:49.083Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/86/dd6e5db36df29e76c7a7699123569a4a18c1623ce68d826ed96c62643cae/mdit_py_plugins-0.5.0-py3-none-any.whl", hash = "sha256:07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f", size = 57205, upload-time = "2025-08-11T07:25:47.597Z" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + [[package]] name = "packaging" version = "25.0"