Coverage for markdown_middleware / middleware.py: 100%
54 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 08:40 -0500
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 08:40 -0500
1import hashlib
2import uuid
4from django.conf import settings
5from django.core.cache import cache
6from html_to_markdown import convert
8CACHE_KEY_PREFIX = "markdown_middleware"
11def invalidate_cache(path):
12 """Delete all cached markdown responses for the given path, across all query strings."""
13 digest = hashlib.sha256(path.encode()).hexdigest()
14 cache.delete(f"{CACHE_KEY_PREFIX}:path:{digest}")
17class MarkdownMiddleware:
18 def __init__(self, get_response):
19 self.get_response = get_response
21 def get_path_cache_key(self, request):
22 """Return the cache key under which this path's token is stored."""
23 digest = hashlib.sha256(request.path.encode()).hexdigest()
24 return f"{CACHE_KEY_PREFIX}:path:{digest}"
26 def get_full_cache_key(self, request):
27 """Return the response cache key for this request.
29 Ensures a path token exists, creating one if necessary. The key combines
30 a per-path token with a hash of the query string, so that invalidating
31 the path token makes all query string variants unreachable.
32 """
33 path_key = self.get_path_cache_key(request)
34 path_token = cache.get(path_key)
35 if path_token is None:
36 cache_timeout = getattr(settings, "MARKDOWN_MIDDLEWARE_CACHE_TIMEOUT", None)
37 path_token = uuid.uuid4().hex
38 cache.set(path_key, path_token, cache_timeout)
40 qs_digest = hashlib.sha256(request.META.get("QUERY_STRING", "").encode()).hexdigest()
42 return f"{CACHE_KEY_PREFIX}:{path_token}:{qs_digest}"
44 def __call__(self, request):
45 accept = request.META.get("HTTP_ACCEPT", "")
46 if "text/markdown" not in accept:
47 return self.get_response(request)
49 anonymous_only = getattr(settings, "MARKDOWN_MIDDLEWARE_ANONYMOUS_ONLY", True)
50 if anonymous_only and getattr(
51 getattr(request, "user", None), "is_authenticated", False
52 ):
53 return self.get_response(request)
55 cache_timeout = getattr(settings, "MARKDOWN_MIDDLEWARE_CACHE_TIMEOUT", None)
56 cache_key = self.get_full_cache_key(request) if cache_timeout is not None else None
58 if cache_key is not None:
59 cached = cache.get(cache_key)
60 if cached is not None:
61 return cached
63 response = self.get_response(request)
65 if response.status_code != 200:
66 return response
68 content_type = response.get("Content-Type", "")
69 if "text/html" not in content_type:
70 return response
72 html = response.content.decode(response.charset or "utf-8")
73 result = convert(html)
74 markdown = result.content
76 # To calculate the number of tokens needed for Markdown,
77 # you can estimate that one token generally corresponds to
78 # about 4 characters of text.
79 token_count = len(markdown) // 4
81 response.content = markdown.encode("utf-8")
82 response["Content-Type"] = "text/markdown; charset=utf-8"
83 response["Content-Length"] = len(response.content)
84 response["X-Markdown-Tokens"] = str(token_count)
86 if cache_key is not None:
87 cache.set(cache_key, response, cache_timeout)
89 return response