~linuxgoose/bocpress

ea974b0aa571d976764ecd593298392f8a41e35d — Jordan Robinson 4 months ago 559db65
change markdown library to markdown-it-python and add Graphviz support
4 files changed, 159 insertions(+), 43 deletions(-)

M main/templates/main/landing.html
M main/util.py
M pyproject.toml
M uv.lock
M main/templates/main/landing.html => main/templates/main/landing.html +19 -19
@@ 170,17 170,16 @@
    </details>

    <details>
        <summary>Why doesn't strikethrough markdown notation work?</summary>
        <summary>Why doesn't strikethrough markdown notation work? (<b>update:</b> now available!)</summary>
        <div>
            <p>
                The markdown library we use
                (<a href="https://github.com/Python-Markdown/markdown">Python-Markdown</a>)
                supports
                <a href="http://daringfireball.net/projects/markdown/syntax">
                    John Gruber’s markdown spec</a>,
                which does not define strikethrough text. Discussions have been had on migrating to another
                markdown library at some point in the future, though, this is not
                yet planned.
                We now use the <a href="https://markdown-it-py.readthedocs.io/">markdown-it-py</a> library
                for parsing markdown. This parser supports a rich set of features beyond the original
                John Gruber specification, including tables, footnotes, fenced code blocks, and more.
                Strikethrough text is fully supported using the <code>~~text~~</code> syntax.
                If something isn’t rendering as expected, please ensure your markdown follows the
                <a href="https://markdown-it-py.readthedocs.io/en/latest/README.html#syntax-extensions">
                markdown-it-py syntax extensions</a>.
            </p>
        </div>
    </details>


@@ 197,7 196,7 @@
    </details>

    <details>
        <summary>Would you add support for LaTeX-style math expressions?</summary>
        <summary>Would you add support for LaTeX-style math expressions? (<b>update:</b> now available!)</summary>
        <div>
            <p>
                This was recently introduced in V1.3.1. Please see the <a href="https://github.com/linuxgoose/bocpress/pull/20">pull request</a> for more information or give it a try yourself.


@@ 206,17 205,18 @@
    </details>

    <details>
        <summary>What about supporting Mermaid flowcharts?</summary>
        <summary>What about supporting Mermaid flowcharts? (<b>update:</b> Graphviz now available!)</summary>
        <div>
            <p>
                We would not add
                <a href="https://github.com/mermaid-js/mermaid">mermaid-js</a>
                as it is
                <a href="https://github.com/mermaid-js/mermaid/blob/develop/dist/mermaid.min.js">
                    2.7MB minified
                </a>
                JavaScript which is far too heavy for BōcPress' mission. However, we would be interested
                in implementing something similar if it was rendered on the backend.
                We do not include <a href="https://github.com/mermaid-js/mermaid">mermaid-js</a>,
                as it requires a hefty <a href="https://github.com/mermaid-js/mermaid/blob/develop/dist/mermaid.min.js">2.7MB of JavaScript</a>,
                which would weigh down BōcPress’ mission of speed and simplicity.
            </p>
            <p>
                In its place, we offer support for <a href="https://graphviz.org/">Graphviz</a>,
                allowing you to render diagrams elegantly and safely, entirely server-side, without
                adding any extra JavaScript. You may read more about this approach in our
                <a href="https://jjrobinson.bocpress.co.uk/blog/diagrams-markdown-and-the-quiet-joy-of-graphviz-now-available-on-bocpress/">blog post</a>.
            </p>
        </div>
    </details>

M main/util.py => main/util.py +89 -24
@@ 18,6 18,39 @@ from l2m4m import LaTeX2MathMLExtension

from main import denylist, models

from markdown_it import MarkdownIt
from mdit_py_plugins.footnote import footnote_plugin
from mdit_py_plugins.tasklists import tasklists_plugin
from graphviz import Source

md = (
    MarkdownIt("commonmark", {"html": True})
    .enable("strikethrough")
    .enable("table")
    .use(footnote_plugin)
    .use(tasklists_plugin)
)

# Define allowed CSS properties and SVG attributes
ALLOWED_CSS_PROPERTIES = frozenset([
    "azimuth", "background-color", "border-bottom-color", "border-collapse",
    "border-color", "border-left-color", "border-right-color", "border-top-color",
    "clear", "color", "cursor", "direction", "display", "elevation", "float",
    "font", "font-family", "font-size", "font-style", "font-variant", "font-weight",
    "height", "letter-spacing", "line-height", "overflow", "pause", "pause-after",
    "pause-before", "pitch", "pitch-range", "richness", "speak", "speak-header",
    "speak-numeral", "speak-punctuation", "speech-rate", "stress", "text-align",
    "text-decoration", "text-indent", "text-transform", "visibility", "white-space",
    "widows", "width", "word-spacing", "z-index"
])
SVG_TAGS = ["svg","g","path","line","polygon","polyline","circle","ellipse","rect","text","defs","title","desc"]
SVG_ATTRS = ["width","height","viewBox","fill","stroke","stroke-width","d","x","y","cx","cy","r","points","transform","style","id","class"]

# Allow MathML tags
MATHML_TAGS = [
    "math","mrow","mi","mo","mn","msup","msub","mfrac","msqrt","mstyle",
    "mtable","mtr","mtd","mfenced","ms","mspace","menclose","mover","munder"
]

def is_disallowed(username):
    """Return true if username is not allowed to be registered."""


@@ 131,37 164,69 @@ def syntax_highlight(text):


def clean_html(dirty_html, strip_tags=False):
    """Clean potentially evil HTML.

    - strip_tags: true will strip everything, false will escape.
    """
    allowed_tags = list(bleach.sanitizer.ALLOWED_TAGS) + denylist.ALLOWED_HTML_ELEMENTS + SVG_TAGS + MATHML_TAGS
    denylist_attrs_dict = {"*": denylist.ALLOWED_HTML_ATTRS}
    allowed_attrs = {**bleach.sanitizer.ALLOWED_ATTRIBUTES, **{tag: SVG_ATTRS for tag in SVG_TAGS}, **denylist_attrs_dict }
    
    if strip_tags:
        return bleach.clean(dirty_html, strip=True)

    css_sanitizer = CSSSanitizer(allowed_css_properties=denylist.ALLOWED_CSS_STYLES)
    return bleach.clean(
        dirty_html,
        tags=denylist.ALLOWED_HTML_ELEMENTS,
        attributes=denylist.ALLOWED_HTML_ATTRS,
        css_sanitizer=css_sanitizer,
    )
    css_sanitizer = CSSSanitizer(allowed_css_properties=ALLOWED_CSS_PROPERTIES)
    return bleach.clean(dirty_html, tags=allowed_tags, attributes=allowed_attrs, css_sanitizer=css_sanitizer)

default_fence = md.renderer.rules.get("fence")


def fence_override(tokens, idx, options, env):
    token = tokens[idx]
    code = token.content
    lang = token.info.strip()

    if lang == "dot":
        try:
            svg_bytes = Source(code, format="svg").pipe()
            svg_str = svg_bytes.decode()
            # Remove XML declaration
            svg_str = svg_str.replace('<?xml version="1.0" encoding="UTF-8"?>', '').strip()
            return svg_str
        except Exception:
            return f"<pre>{code}</pre>"

    # fallback to default renderer
    if default_fence:
        return default_fence(tokens, idx, options, env)
    return f"<pre><code>{code}</code></pre>"

md.renderer.rules["fence"] = fence_override

def md_to_html(markdown_string, strip_tags=False):
    """Return HTML formatted string, given a markdown one."""

def replace_latex_with_mathml(md_text: str) -> str:
    # Replace all inline ($...$) and display ($$...$$) LaTeX with MathML
    def repl(match):
        latex = match.group(0)
        # Use l2m4m to convert LaTeX to MathML
        mathml_html = markdown.markdown(latex, extensions=[LaTeX2MathMLExtension()])
        return mathml_html

    # Display math first
    md_text = re.sub(r"\$\$.*?\$\$", repl, md_text, flags=re.S)
    # Inline math
    md_text = re.sub(r"\$.*?\$", repl, md_text, flags=re.S)
    return md_text


def md_to_html(markdown_string: str, strip_tags=False) -> str:
    if not markdown_string:
        return ""
    dirty_html = markdown.markdown(
        syntax_highlight(markdown_string),
        extensions=[
            "markdown.extensions.fenced_code",
            "markdown.extensions.tables",
            "markdown.extensions.footnotes",
            "markdown.extensions.toc",
            LaTeX2MathMLExtension(),
        ],
    )
    return clean_html(dirty_html, strip_tags)
    
    # Convert LaTeX to MathML first
    mathml_html = replace_latex_with_mathml(markdown_string)
    
    # Pass through Markdown-It for strikethrough, tables, footnotes, and Graphviz diagrams
    intermediate_html = md.render(mathml_html)
    
    # Clean the HTML but preserve SVG and MathML
    return clean_html(intermediate_html, strip_tags=strip_tags)


def remove_control_chars(text):

M pyproject.toml => pyproject.toml +3 -0
@@ 7,9 7,12 @@ requires-python = ">=3.13"
dependencies = [
    "bleach[css]>=6.2.0",
    "django>=5.2.5",
    "graphviz>=0.21",
    "gunicorn>=23.0.0",
    "l2m4m>=1.0.4",
    "markdown>=3.8.2",
    "markdown-it-py>=4.0.0",
    "mdit-py-plugins>=0.5.0",
    "psycopg[binary]>=3.2.9",
    "pygments>=2.19.2",
    "python-dotenv>=1.1.1",

M uv.lock => uv.lock +48 -0
@@ 240,6 240,15 @@ wheels = [
]

[[package]]
name = "graphviz"
version = "0.21"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f8/b3/3ac91e9be6b761a4b30d66ff165e54439dcd48b83f4e20d644867215f6ca/graphviz-0.21.tar.gz", hash = "sha256:20743e7183be82aaaa8ad6c93f8893c923bd6658a04c32ee115edb3c8a835f78", size = 200434, upload-time = "2025-06-15T09:35:05.824Z" }
wheels = [
    { url = "https://files.pythonhosted.org/packages/91/4c/e0ce1ef95d4000ebc1c11801f9b944fa5910ecc15b5e351865763d8657f8/graphviz-0.21-py3-none-any.whl", hash = "sha256:54f33de9f4f911d7e84e4191749cac8cc5653f815b06738c54db9a15ab8b1e42", size = 47300, upload-time = "2025-06-15T09:35:04.433Z" },
]

[[package]]
name = "gunicorn"
version = "23.0.0"
source = { registry = "https://pypi.org/simple" }


@@ 304,6 313,18 @@ wheels = [
]

[[package]]
name = "markdown-it-py"
version = "4.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
    { name = "mdurl" },
]
sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
wheels = [
    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
]

[[package]]
name = "markupsafe"
version = "3.0.2"
source = { registry = "https://pypi.org/simple" }


@@ 338,9 359,12 @@ source = { virtual = "." }
dependencies = [
    { name = "bleach", extra = ["css"] },
    { name = "django" },
    { name = "graphviz" },
    { name = "gunicorn" },
    { name = "l2m4m" },
    { name = "markdown" },
    { name = "markdown-it-py" },
    { name = "mdit-py-plugins" },
    { name = "psycopg", extra = ["binary"] },
    { name = "pygments" },
    { name = "python-dotenv" },


@@ 359,9 383,12 @@ dev = [
requires-dist = [
    { name = "bleach", extras = ["css"], specifier = ">=6.2.0" },
    { name = "django", specifier = ">=5.2.5" },
    { name = "graphviz", specifier = ">=0.21" },
    { name = "gunicorn", specifier = ">=23.0.0" },
    { name = "l2m4m", specifier = ">=1.0.4" },
    { name = "markdown", specifier = ">=3.8.2" },
    { name = "markdown-it-py", specifier = ">=4.0.0" },
    { name = "mdit-py-plugins", specifier = ">=0.5.0" },
    { name = "psycopg", extras = ["binary"], specifier = ">=3.2.9" },
    { name = "pygments", specifier = ">=2.19.2" },
    { name = "python-dotenv", specifier = ">=1.1.1" },


@@ 377,6 404,27 @@ dev = [
]

[[package]]
name = "mdit-py-plugins"
version = "0.5.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
    { name = "markdown-it-py" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b2/fd/a756d36c0bfba5f6e39a1cdbdbfdd448dc02692467d83816dff4592a1ebc/mdit_py_plugins-0.5.0.tar.gz", hash = "sha256:f4918cb50119f50446560513a8e311d574ff6aaed72606ddae6d35716fe809c6", size = 44655, upload-time = "2025-08-11T07:25:49.083Z" }
wheels = [
    { url = "https://files.pythonhosted.org/packages/fb/86/dd6e5db36df29e76c7a7699123569a4a18c1623ce68d826ed96c62643cae/mdit_py_plugins-0.5.0-py3-none-any.whl", hash = "sha256:07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f", size = 57205, upload-time = "2025-08-11T07:25:47.597Z" },
]

[[package]]
name = "mdurl"
version = "0.1.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
wheels = [
    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
]

[[package]]
name = "packaging"
version = "25.0"
source = { registry = "https://pypi.org/simple" }