Coverage for markdown_middleware / middleware.py: 100%

54 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-05 08:40 -0500

1import hashlib 

2import uuid 

3 

4from django.conf import settings 

5from django.core.cache import cache 

6from html_to_markdown import convert 

7 

8CACHE_KEY_PREFIX = "markdown_middleware" 

9 

10 

11def invalidate_cache(path): 

12 """Delete all cached markdown responses for the given path, across all query strings.""" 

13 digest = hashlib.sha256(path.encode()).hexdigest() 

14 cache.delete(f"{CACHE_KEY_PREFIX}:path:{digest}") 

15 

16 

17class MarkdownMiddleware: 

18 def __init__(self, get_response): 

19 self.get_response = get_response 

20 

21 def get_path_cache_key(self, request): 

22 """Return the cache key under which this path's token is stored.""" 

23 digest = hashlib.sha256(request.path.encode()).hexdigest() 

24 return f"{CACHE_KEY_PREFIX}:path:{digest}" 

25 

26 def get_full_cache_key(self, request): 

27 """Return the response cache key for this request. 

28 

29 Ensures a path token exists, creating one if necessary. The key combines 

30 a per-path token with a hash of the query string, so that invalidating 

31 the path token makes all query string variants unreachable. 

32 """ 

33 path_key = self.get_path_cache_key(request) 

34 path_token = cache.get(path_key) 

35 if path_token is None: 

36 cache_timeout = getattr(settings, "MARKDOWN_MIDDLEWARE_CACHE_TIMEOUT", None) 

37 path_token = uuid.uuid4().hex 

38 cache.set(path_key, path_token, cache_timeout) 

39 

40 qs_digest = hashlib.sha256(request.META.get("QUERY_STRING", "").encode()).hexdigest() 

41 

42 return f"{CACHE_KEY_PREFIX}:{path_token}:{qs_digest}" 

43 

44 def __call__(self, request): 

45 accept = request.META.get("HTTP_ACCEPT", "") 

46 if "text/markdown" not in accept: 

47 return self.get_response(request) 

48 

49 anonymous_only = getattr(settings, "MARKDOWN_MIDDLEWARE_ANONYMOUS_ONLY", True) 

50 if anonymous_only and getattr( 

51 getattr(request, "user", None), "is_authenticated", False 

52 ): 

53 return self.get_response(request) 

54 

55 cache_timeout = getattr(settings, "MARKDOWN_MIDDLEWARE_CACHE_TIMEOUT", None) 

56 cache_key = self.get_full_cache_key(request) if cache_timeout is not None else None 

57 

58 if cache_key is not None: 

59 cached = cache.get(cache_key) 

60 if cached is not None: 

61 return cached 

62 

63 response = self.get_response(request) 

64 

65 if response.status_code != 200: 

66 return response 

67 

68 content_type = response.get("Content-Type", "") 

69 if "text/html" not in content_type: 

70 return response 

71 

72 html = response.content.decode(response.charset or "utf-8") 

73 result = convert(html) 

74 markdown = result.content 

75 

76 # To calculate the number of tokens needed for Markdown, 

77 # you can estimate that one token generally corresponds to 

78 # about 4 characters of text. 

79 token_count = len(markdown) // 4 

80 

81 response.content = markdown.encode("utf-8") 

82 response["Content-Type"] = "text/markdown; charset=utf-8" 

83 response["Content-Length"] = len(response.content) 

84 response["X-Markdown-Tokens"] = str(token_count) 

85 

86 if cache_key is not None: 

87 cache.set(cache_key, response, cache_timeout) 

88 

89 return response