Coverage for crateweb/research/html_functions.py: 30%

142 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2026-02-05 06:46 -0600

1""" 

2crate_anon/crateweb/research/html_functions.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26**Helper functions for low-level HTML, used in the "research" section of the 

27CRATE web site.** 

28 

29""" 

30 

31import logging 

32import re 

33import textwrap 

34from typing import Any, Dict, Iterable, List, Optional, Pattern 

35 

36from cardinal_pythonlib.django.function_cache import django_cache_function 

37from django.templatetags.static import static 

38from django.utils.html import escape 

39from django.template.defaultfilters import linebreaksbr 

40from pygments import highlight 

41from pygments.lexers.sql import SqlLexer 

42from pygments.formatters.html import HtmlFormatter 

43import sqlparse 

44 

45log = logging.getLogger(__name__) 

46 

47 

48N_CSS_HIGHLIGHT_CLASSES = 3 # named highlight0, highlight1, ... highlight<n-1> 

49REGEX_METACHARS = ["\\", "^", "$", ".", "|", "?", "*", "+", "(", ")", "[", "{"] 

50# http://www.regular-expressions.info/characters.html 

51# Start with \, for replacement. 

52 

53 

54# ============================================================================= 

55# Collapsible div, etc. 

56# ============================================================================= 

57 

58 

59def visibility_button( 

60 tag: str, 

61 small: bool = True, 

62 title_html: str = "", 

63 as_span: bool = False, 

64 as_visibility: bool = True, 

65) -> str: 

66 """ 

67 Returns HTML for a "(+)/(-)" button. Used for: 

68 

69 - visibility (show/hide): to show/hide things 

70 - collapse (expand/collapse): to collapse large cells in query results. 

71 

72 Args: 

73 tag: tag used for this set of elements; used as part of the parameters 

74 to Javascript ``toggleVisible`` or ``toggleCollapsed`` functions; 

75 see ``crate_anon/crateweb/static/collapse.js`` 

76 small: start small (or invisible) rather than big (or visible)? 

77 title_html: HTML to put inside the element 

78 as_span: return a ``<span>`` element rather than a ``<div>`` element? 

79 as_visibility: "visibility" style, rather than "collapse" style? 

80 

81 Returns: 

82 str: HTML 

83 

84 See :func:`visibility_contentdiv` for the associated content. 

85 

86 """ 

87 eltype = "span" if as_span else "div" 

88 togglefunc = "toggleVisible" if as_visibility else "toggleCollapsed" 

89 tag = str(tag) 

90 img = static("plus.gif") if small else static("minus.gif") 

91 return f""" 

92<{eltype} class="expandcollapse" onclick="{togglefunc}('collapsible_{tag}', 'collapse_img_{tag}');"> 

93 <img class="plusminus_image" id="collapse_img_{tag}" alt="" src="{img}"> 

94 {title_html} 

95</{eltype}> 

96 """ # noqa: E501 

97 

98 

99def visibility_contentdiv( 

100 tag: str, 

101 contents: str, 

102 extra_div_classes: Iterable[str] = None, 

103 small: bool = True, 

104 as_visibility: bool = True, 

105) -> str: 

106 """ 

107 Returns HTML for a content ``<div>`` that can be collapsed by a button 

108 (for which, see :func:`visibility_button`). 

109 

110 Args: 

111 tag: tag used for this set of elements; used as part of the parameters 

112 to Javascript ``toggleVisible`` or ``toggleCollapsed`` functions; 

113 see ``crate_anon/crateweb/static/collapse.js`` 

114 contents: HTML contents of the ``div`` 

115 extra_div_classes: extra CSS classes to add to the ``div`` 

116 small: start small (or invisible) rather than big (or visible)? 

117 as_visibility: "visibility" style, rather than "collapse" style? 

118 

119 Returns: 

120 str: HTML 

121 

122 """ 

123 extra_div_classes = extra_div_classes or [] 

124 div_classes = ["collapsible"] + extra_div_classes 

125 if as_visibility: 

126 if small: 

127 div_classes.append("collapse_invisible") 

128 else: 

129 div_classes.append("collapse_visible") 

130 else: 

131 if small: 

132 div_classes.append("collapse_small") 

133 else: 

134 div_classes.append("collapse_big") 

135 tag = str(tag) 

136 return f""" 

137<div class="{" ".join(div_classes)}" id="collapsible_{str(tag)}"> 

138 {contents} 

139</div> 

140 """ 

141 

142 

143def visibility_div_with_divbutton( 

144 tag: str, 

145 contents: str, 

146 title_html: str = "", 

147 extra_div_classes: Iterable[str] = None, 

148 small: bool = True, 

149) -> str: 

150 """ 

151 Returns an HTML ``<div>`` with a show/hide button and contents. 

152 

153 Args: 

154 tag: tag used for this set of elements; used as part of the parameters 

155 to Javascript ``toggleVisible`` or ``toggleCollapsed`` functions; 

156 see ``crate_anon/crateweb/static/collapse.js`` 

157 contents: HTML contents of the content ``div`` 

158 title_html: HTML to put inside the button element 

159 extra_div_classes: extra CSS classes to add to the content ``div`` 

160 small: start invisible rather than visible? 

161 

162 Returns: 

163 str: HTML 

164 

165 - The HTML pre-hides, rather than using an onload method. 

166 

167 """ 

168 button = visibility_button( 

169 tag=tag, small=small, title_html=title_html, as_visibility=True 

170 ) 

171 contents = visibility_contentdiv( 

172 tag=tag, 

173 contents=contents, 

174 extra_div_classes=extra_div_classes, 

175 small=small, 

176 as_visibility=True, 

177 ) 

178 return "<div>" + button + contents + "</div>" 

179 

180 

181def overflow_div( 

182 tag: str, 

183 contents: str, 

184 extra_div_classes: Iterable[str] = None, 

185 small: bool = True, 

186) -> str: 

187 """ 

188 Returns an HTML ``<div>`` with an expand/collapse button and contents. 

189 

190 Args: 

191 tag: tag used for this set of elements; used as part of the parameters 

192 to Javascript ``toggleVisible`` or ``toggleCollapsed`` functions; 

193 see ``crate_anon/crateweb/static/collapse.js`` 

194 contents: HTML contents of the content ``div`` 

195 extra_div_classes: extra CSS classes to add to the content ``div`` 

196 small: start collapsed rather than expanded? 

197 

198 Returns: 

199 str: HTML 

200 """ 

201 button = visibility_button(tag=tag, small=small, as_visibility=False) 

202 contentdiv = visibility_contentdiv( 

203 tag=tag, 

204 contents=contents, 

205 extra_div_classes=extra_div_classes, 

206 small=small, 

207 as_visibility=False, 

208 ) 

209 return f""" 

210<div class="expandcollapsewrapper"> 

211 {button} 

212 {contentdiv} 

213</div> 

214 """ 

215 

216 

217# ============================================================================= 

218# HtmlElementCounter 

219# ============================================================================= 

220 

221 

222class HtmlElementCounter: 

223 """ 

224 Class to maintain element counters, for use with pages having lots of 

225 collapsible divs (or other HTML elements requiring individual numbering). 

226 """ 

227 

228 def __init__(self, prefix: str = "") -> None: 

229 """ 

230 Args: 

231 prefix: text to be prefixed to the tag used for HTML elements 

232 """ 

233 self.elementnum = 0 

234 self.prefix = prefix 

235 

236 def next(self) -> None: 

237 """ 

238 Increments the ``elementnum`` counter. 

239 """ 

240 self.elementnum += 1 

241 

242 def tag(self) -> str: 

243 """ 

244 Returns a tag based on the prefix and current element number. 

245 """ 

246 return self.prefix + str(self.elementnum) 

247 

248 def visibility_div_with_divbutton( 

249 self, 

250 contents: str, 

251 title_html: str = "", 

252 extra_div_classes: Iterable[str] = None, 

253 small: bool = True, 

254 ) -> str: 

255 """ 

256 Returns a "visibility" ``<div>`` with a show/hide button. 

257 

258 Args: 

259 contents: HTML contents of the content ``div`` 

260 title_html: HTML to put inside the button element 

261 extra_div_classes: extra CSS classes to add to the content ``div`` 

262 small: start invisible, rather than visible? 

263 

264 Returns: 

265 str: HTML 

266 """ 

267 result = visibility_div_with_divbutton( 

268 tag=self.tag(), 

269 contents=contents, 

270 title_html=title_html, 

271 extra_div_classes=extra_div_classes, 

272 small=small, 

273 ) 

274 self.next() 

275 return result 

276 

277 def visibility_div_spanbutton(self, small: bool = True) -> str: 

278 """ 

279 Returns a visibility button in an HTML ``<span>``. 

280 

281 Args: 

282 small: start in "hidden" rather than "visible" mode? 

283 

284 Returns: 

285 str: HTML 

286 

287 """ 

288 return visibility_button( 

289 tag=self.tag(), as_visibility=True, small=small, as_span=True 

290 ) 

291 

292 def visibility_div_contentdiv( 

293 self, 

294 contents: str, 

295 extra_div_classes: Iterable[str] = None, 

296 small: bool = True, 

297 ) -> str: 

298 """ 

299 Returns a "visibility" content ``<div>``. 

300 

301 Args: 

302 contents: HTML contents of the content ``div`` 

303 extra_div_classes: extra CSS classes to add to the ``div`` 

304 small: start invisible, rather than visible? 

305 

306 Returns: 

307 str: HTML 

308 

309 """ 

310 result = visibility_contentdiv( 

311 tag=self.tag(), 

312 contents=contents, 

313 extra_div_classes=extra_div_classes, 

314 small=small, 

315 as_visibility=True, 

316 ) 

317 self.next() 

318 return result 

319 

320 def collapsible_div_contentdiv( 

321 self, 

322 contents: str, 

323 extra_div_classes: Iterable[str] = None, 

324 small: bool = True, 

325 ) -> str: 

326 """ 

327 Returns a "collapsible" content ``<div>`` 

328 

329 Args: 

330 contents: HTML contents of the ``div`` 

331 extra_div_classes: extra CSS classes to add to the content ``div`` 

332 small: start collapsed, rather than expanded? 

333 

334 Returns: 

335 str: HTML 

336 """ 

337 result = visibility_contentdiv( 

338 tag=self.tag(), 

339 contents=contents, 

340 extra_div_classes=extra_div_classes, 

341 small=small, 

342 as_visibility=False, 

343 ) 

344 self.next() 

345 return result 

346 

347 def overflow_div( 

348 self, 

349 contents: str, 

350 extra_div_classes: Iterable[str] = None, 

351 small: bool = True, 

352 ) -> str: 

353 """ 

354 Returns a "overflow" ``<div>`` with content and an expand/collapse 

355 button. 

356 

357 Args: 

358 contents: HTML contents of the ``div`` 

359 extra_div_classes: extra CSS classes to add to the content ``div`` 

360 small: start collapsed, rather than expanded? 

361 

362 Returns: 

363 str: HTML 

364 """ 

365 result = overflow_div( 

366 tag=self.tag(), 

367 contents=contents, 

368 extra_div_classes=extra_div_classes, 

369 small=small, 

370 ) 

371 self.next() 

372 return result 

373 

374 

375# ============================================================================= 

376# Highlighting of query results 

377# ============================================================================= 

378 

379HIGHLIGHT_FWD_REF = "Highlight" 

380 

381 

382def escape_literal_string_for_regex(s: str) -> str: 

383 r""" 

384 Escape any regex characters. 

385 

386 - Start with `\` -> ``\\``. 

387 This should be the first replacement in REGEX_METACHARS. 

388 """ 

389 for c in REGEX_METACHARS: 

390 s.replace(c, "\\" + c) 

391 return s 

392 

393 

394def get_regex_from_highlights( 

395 highlight_list: Iterable[HIGHLIGHT_FWD_REF], 

396 at_word_boundaries_only: bool = False, 

397) -> Pattern: 

398 """ 

399 Takes a list of the user's chosen highlights to apply to results, and 

400 builds a compiled regular expression for (any of) them. 

401 

402 Args: 

403 highlight_list: list of 

404 :class:`crate_anon.crateweb.research.models.Highlight` objects, 

405 which represent text to find and a colour to highlight it with 

406 at_word_boundaries_only: match at word boundaries only? 

407 

408 Returns: 

409 a compiled regular expression (case-insensitive) 

410 

411 """ 

412 elements = [] # type: List[str] 

413 wb = r"\b" # word boundary; escape the slash if not using a raw string 

414 for hl in highlight_list: 

415 h = escape_literal_string_for_regex(hl.text) 

416 if at_word_boundaries_only: 

417 elements.append(wb + h + wb) 

418 else: 

419 elements.append(h) 

420 regexstring = "(" + "|".join(elements) + ")" # group required, to replace 

421 return re.compile(regexstring, re.IGNORECASE | re.UNICODE) 

422 

423 

424def highlight_text(x: str, n: int = 0) -> str: 

425 """ 

426 Transforms text (from a query result) into HTML that highlights it. 

427 

428 Args: 

429 x: original text 

430 n: highlight colour number to use (as per our ``static/base.css``) 

431 

432 Returns: 

433 

434 """ 

435 n %= N_CSS_HIGHLIGHT_CLASSES 

436 return rf'<span class="highlight{n}">{x}</span>' 

437 

438 

439def make_highlight_replacement_regex(n: int = 0) -> str: 

440 r""" 

441 Makes a regex replacement string that highlights the first "found" group 

442 with a specific highlight colour. 

443 

444 Args: 

445 n: highlight colour number to use (as per our ``static/base.css``) 

446 

447 Returns: 

448 str: regex text like ``<span class="highlight1">\1</span>`` 

449 

450 """ 

451 return highlight_text(r"\1", n=n) 

452 

453 

454def make_result_element( 

455 x: Any, 

456 element_counter: HtmlElementCounter, 

457 highlight_dict: Dict[int, List[HIGHLIGHT_FWD_REF]] = None, 

458 collapse_at_len: int = None, 

459 collapse_at_n_lines: int = None, 

460 line_length: int = None, 

461 keep_existing_newlines: bool = True, 

462 collapsed: bool = True, 

463 null: str = "<i>NULL</i>", 

464) -> str: 

465 """ 

466 Returns a collapsible HTML ``<div>`` for a result cell, with optional 

467 highlighting of results. 

468 

469 Args: 

470 x: the value 

471 element_counter: a :class:``HtmlElementCounter``, used for 

472 distinguishing multiple elements; it will be modified 

473 highlight_dict: an optional dictionary mapping highlight colour to all 

474 the :class:`crate_anon.crateweb.research.models.Highlight` objects 

475 that use it (e.g.: ``2`` maps to highlight objects for all the 

476 separate pieces of text to be highlighted in colour 2) 

477 collapse_at_len: if specified, the string length beyond which the cell 

478 will be collapsed 

479 collapse_at_n_lines: if specified, the number of lines beyond which the 

480 cell will be collapsed 

481 line_length: if specified, the line length to word-wrap at 

482 keep_existing_newlines: retain existing newlines from the source? 

483 collapsed: start cells collapsed rather than expanded? 

484 null: HTML string to use for database NULL values 

485 

486 Returns: 

487 str: HTML 

488 

489 """ 

490 # return escape(repr(x)) 

491 if x is None: 

492 return null 

493 highlight_dict = highlight_dict or {} 

494 x = str(x) 

495 xlen = len(x) # before we mess around with it 

496 # textwrap.wrap will absorb existing newlines 

497 if keep_existing_newlines: 

498 input_lines = x.split("\n") 

499 else: 

500 input_lines = [x] 

501 if line_length: 

502 output_lines = [] # type: List[str] 

503 for line in input_lines: 

504 if line: 

505 output_lines.extend(textwrap.wrap(line, width=line_length)) 

506 else: # blank line; textwrap.wrap will swallow it 

507 output_lines.append("") 

508 else: 

509 output_lines = input_lines 

510 n_lines = len(output_lines) 

511 # return escape(repr(output_lines)) 

512 output = linebreaksbr(escape("\n".join(output_lines))) 

513 # return escape(repr(output)) 

514 for n, highlight_list in highlight_dict.items(): 

515 find = get_regex_from_highlights(highlight_list) 

516 replace = make_highlight_replacement_regex(n) 

517 output = find.sub(replace, output) 

518 if (collapse_at_len and xlen >= collapse_at_len) or ( 

519 collapse_at_n_lines and n_lines >= collapse_at_n_lines 

520 ): 

521 result = element_counter.overflow_div(contents=output, small=collapsed) 

522 element_counter.next() 

523 else: 

524 result = output 

525 return result 

526 

527 

528def pre(x: str = "") -> str: 

529 """ 

530 Applies an HTML ``<pre>...</pre>`` tag. 

531 

532 Args: 

533 x: input 

534 

535 Returns: 

536 the input within a ``pre`` tag 

537 

538 """ 

539 return f"<pre>{x}</pre>" 

540 

541 

542# ============================================================================= 

543# SQL formatting 

544# ============================================================================= 

545 

546SQL_BASE_CSS_CLASS = "sq" # brief is good 

547SQL_FORMATTER = HtmlFormatter(cssclass=SQL_BASE_CSS_CLASS) 

548SQL_LEXER = SqlLexer() 

549 

550 

551def prettify_sql_html( 

552 sql: str, reformat: bool = False, indent_width: int = 4 

553) -> str: 

554 """ 

555 Formats SQL (optionally), and highlights it with Pygments. 

556 

557 Args: 

558 sql: raw SQL text 

559 reformat: reformat the layout? 

560 indent_width: if reformatting, what indent should we use? 

561 

562 Returns: 

563 str: HTML 

564 

565 """ 

566 if reformat: 

567 sql = sqlparse.format(sql, reindent=True, indent_width=indent_width) 

568 return highlight(sql, SQL_LEXER, SQL_FORMATTER) 

569 

570 

571@django_cache_function(timeout=None) 

572def prettify_sql_css() -> str: 

573 """ 

574 Returns the CSS used by the Pygments SQL formatter. 

575 """ 

576 return SQL_FORMATTER.get_style_defs() 

577 

578 

579def prettify_sql_and_args( 

580 sql: str, 

581 args: List[Any] = None, 

582 sql_not_formatted: bool = True, 

583 reformat: bool = False, 

584 indent_width: int = 4, 

585) -> str: 

586 """ 

587 Returns HTML for both some SQL and its arguments. 

588 

589 Args: 

590 sql: SQL text 

591 args: optional list of arguments 

592 sql_not_formatted: is the sql already highlighted and formatted? 

593 reformat: reformat the layout? 

594 indent_width: if reformatting, what indent should we use? 

595 

596 Returns: 

597 str: HTML 

598 

599 """ 

600 if sql_not_formatted: 

601 sql = prettify_sql_html( 

602 sql, reformat=reformat, indent_width=indent_width 

603 ) 

604 if args: 

605 formatted_args = "\n".join(textwrap.wrap(repr(args))) 

606 return sql + f"<div>Args:</div><pre>{formatted_args}</pre>" 

607 else: 

608 return sql 

609 

610 

611def make_collapsible_sql_query( 

612 sql: Optional[str], 

613 element_counter: HtmlElementCounter, 

614 sql_not_formatted: bool = False, 

615 args: List[Any] = None, 

616 collapse_at_len: int = 400, 

617 collapse_at_n_lines: int = 5, 

618) -> str: 

619 """ 

620 Formats an SQL query (and its arguments, if any) in a collapsible HTML 

621 ``<div>``. 

622 

623 Args: 

624 sql: SQL text 

625 element_counter: 

626 args: optional list of arguments 

627 sql_not_formatted: is the sql already highlighted and formatted? 

628 collapse_at_len: if specified, the string length beyond which the cell 

629 will be collapsed 

630 collapse_at_n_lines: if specified, the number of lines beyond which the 

631 cell will be collapsed 

632 

633 Returns: 

634 str: HTML 

635 

636 """ 

637 sql = sql or "" 

638 sql = str(sql) 

639 xlen = len(sql) 

640 n_lines = len(sql.split("\n")) 

641 formatted = prettify_sql_and_args( 

642 sql, args, sql_not_formatted=sql_not_formatted, reformat=False 

643 ) 

644 # x = linebreaksbr(escape(x)) 

645 if (collapse_at_len and xlen >= collapse_at_len) or ( 

646 collapse_at_n_lines and n_lines >= collapse_at_n_lines 

647 ): 

648 return element_counter.overflow_div(contents=formatted) 

649 return formatted