Source code for coaster.gfm
# -*- coding: utf-8 -*-
"""
GitHub flavoured markdown: because normal markdown has some vicious
gotchas.
Further reading on the gotchas:
http://blog.stackoverflow.com/2009/10/markdown-one-year-later/
This is a Python port of GitHub code, taken from
https://gist.github.com/901706
To run the tests, install nose ($ easy_install nose) then:
$ nosetests coaster/gfm.py
This version from https://gist.github.com/Wilfred/901706
"""
from markdown import Markdown
import re
__all__ = ['gfm', 'markdown']
markdown_convert = Markdown(safe_mode='escape', output_format='html5',
extensions=['codehilite'],
extension_configs={'codehilite': {'css_class': 'syntax'}}
).convert
def remove_pre_blocks(markdown_source):
# replace <pre> blocks with placeholders, so we don't accidentally
# muck up stuff inside the block with our other transformations
original_blocks = []
pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
while re.search(pattern, markdown_source):
# save the original block
original_block = re.search(pattern, markdown_source).group(0)
original_blocks.append(original_block)
# put in a placeholder
markdown_source = re.sub(pattern, '{placeholder}', markdown_source,
count=1)
return (markdown_source, original_blocks)
def remove_inline_code_blocks(markdown_source):
original_blocks = []
pattern = re.compile(r'`.*?`', re.DOTALL)
while re.search(pattern, markdown_source):
# save the original block
original_block = re.search(pattern, markdown_source).group(0)
original_blocks.append(original_block)
# put in a placeholder
markdown_source = re.sub(pattern, '{placeholder}', markdown_source,
count=1)
return (markdown_source, original_blocks)
[docs]def gfm(text):
text, code_blocks = remove_pre_blocks(text)
text, inline_blocks = remove_inline_code_blocks(text)
# Prevent foo_bar_baz from ending up with an italic word in the middle.
def italic_callback(matchobj):
s = matchobj.group(0)
# don't mess with URLs:
if 'http:' in s or 'https:' in s:
return s
return s.replace('_', '\_')
# fix italics for code blocks
pattern = re.compile(r'^(?! {4}|\t).*\w+(?<!_)_\w+_\w[\w_]*', re.MULTILINE | re.UNICODE)
text = re.sub(pattern, italic_callback, text)
# linkify naked URLs
regex_string = """
(^|\s) # start of string or has whitespace before it
(https?://[:/.?=&;a-zA-Z0-9_-]+) # the URL itself, http or https only
(\s|$) # trailing whitespace or end of string
"""
pattern = re.compile(regex_string, re.VERBOSE | re.MULTILINE | re.UNICODE)
# wrap the URL in brackets: http://foo -> [http://foo](http://foo)
text = re.sub(pattern, r'\1[\2](\2)\3', text)
# In very clear cases, let newlines become <br /> tags.
def newline_callback(matchobj):
if len(matchobj.group(1)) == 1:
return matchobj.group(0).rstrip() + ' \n'
else:
return matchobj.group(0)
pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE | re.UNICODE)
text = re.sub(pattern, newline_callback, text)
# now restore removed code blocks
removed_blocks = code_blocks + inline_blocks
for removed_block in removed_blocks:
text = text.replace('{placeholder}', removed_block, 1)
return text
[docs]def markdown(text):
"""
Return Markdown rendered text using GitHub-flavoured Markdown.
"""
return markdown_convert(gfm(text))