Coverage for cc_modules/cc_spreadsheet.py: 29%
266 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-15 14:23 +0100
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-15 14:23 +0100
1"""
2camcops_server/cc_modules/cc_spreadsheet.py
4===============================================================================
6 Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CamCOPS.
11 CamCOPS is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CamCOPS is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26**Helper functions/classes for spreadsheet-style tab-separated value (TSV)
27(and related) exports.**
29"""
31from collections import OrderedDict
32import csv
33import io
34import logging
35import os
36import random
37import re
38from typing import (
39 Any,
40 BinaryIO,
41 Callable,
42 Container,
43 Dict,
44 Iterable,
45 List,
46 Optional,
47 Sequence,
48 Union,
49)
50import zipfile
52from cardinal_pythonlib.datetimefunc import (
53 format_datetime,
54 get_now_localtz_pendulum,
55)
56from cardinal_pythonlib.excel import (
57 convert_for_openpyxl,
58 convert_for_pyexcel_ods3,
59)
60from cardinal_pythonlib.logs import BraceStyleAdapter
61from sqlalchemy.engine import Result
63from camcops_server.cc_modules.cc_constants import DateFormat
65ODS_VIA_PYEXCEL = True # significantly faster
66XLSX_VIA_PYEXCEL = True
68if ODS_VIA_PYEXCEL:
69 import pyexcel_ods3 # e.g. pip install pyexcel-ods3==0.5.3
71 ODSWriter = ODSSheet = None
72else:
73 from odswriter import ODSWriter, Sheet as ODSSheet # type: ignore[no-redef] # noqa: E501
75 pyexcel_ods3 = None
77if XLSX_VIA_PYEXCEL:
78 import pyexcel_xlsx # e.g. pip install pyexcel-xlsx==0.5.7
80 openpyxl = XLWorkbook = XLWorksheet = None
81else:
82 from openpyxl.workbook.workbook import Workbook as XLWorkbook # type: ignore[no-redef] # noqa: E501
83 from openpyxl.worksheet.worksheet import Worksheet as XLWorksheet # type: ignore[no-redef] # noqa: E501
85 pyexcel_xlsx = None
87log = BraceStyleAdapter(logging.getLogger(__name__))
90# =============================================================================
91# Spreadsheet output holding structures
92# =============================================================================
95class SpreadsheetPage(object):
96 """
97 Represents a single "spreadsheet" page, e.g. for TSV/Excel/ODS output.
98 """
100 def __init__(
101 self, name: str, rows: List[Union[Dict[str, Any], OrderedDict]]
102 ) -> None:
103 """
104 Args:
105 name: name for the whole sheet
106 rows: list of rows, where each row is a dictionary mapping
107 column name to value
108 """
109 assert name, "Missing name"
110 self.name = name
111 self.rows = rows
112 self.headings = [] # type: List[str]
113 for row in rows:
114 self._add_headings_if_absent(row.keys())
116 def __str__(self) -> str:
117 return f"SpreadsheetPage: name={self.name}\n{self.get_tsv()}"
119 @classmethod
120 def from_headings_rows(
121 cls, name: str, headings: List[str], rows: List[Sequence[Any]]
122 ) -> "SpreadsheetPage":
123 """
124 Creates a SpreadsheetPage object using a list of headings and the row
125 data as a list of lists.
126 """
127 page = cls(name=name, rows=[])
128 n_cols = len(headings)
129 page.headings = headings
130 for row in rows:
131 assert len(row) == n_cols
132 page.rows.append(dict(zip(headings, row)))
133 return page
135 @classmethod
136 def from_result(cls, name: str, rp: Result) -> "SpreadsheetPage":
137 """
138 Creates a SpreadsheetPage object from an SQLAlchemy Result.
140 Args:
141 rp:
142 A :class:` sqlalchemy.engine.Result`.
143 name:
144 Name for this sheet.
145 """
146 column_names = rp.keys()
147 rows = rp.fetchall()
148 return cls.from_headings_rows(
149 name=name, headings=column_names, rows=rows # type: ignore[arg-type] # noqa: E501
150 )
152 @property
153 def empty(self) -> bool:
154 """
155 Do we have zero rows?
156 """
157 return len(self.rows) == 0
159 def _add_headings_if_absent(self, headings: Iterable[str]) -> None:
160 """
161 Add any headings we've not yet seen to our list of headings.
162 """
163 for h in headings:
164 if h not in self.headings:
165 self.headings.append(h)
167 def add_or_set_value(self, heading: str, value: Any) -> None:
168 """
169 If we contain only a single row, this function will set the value
170 for a given column (``heading``) to ``value``.
172 Raises:
173 :exc:`AssertionError` if we don't have exactly 1 row
174 """
175 assert len(self.rows) == 1, "add_value can only be used if #rows == 1"
176 self._add_headings_if_absent([heading])
177 self.rows[0][heading] = value
179 def add_or_set_column(self, heading: str, values: List[Any]) -> None:
180 """
181 Set the column labelled ``heading`` so it contains the values specified
182 in ``values``. The length of ``values`` must equal the number of rows
183 that we already contain.
185 Raises:
186 :exc:`AssertionError` if the number of values doesn't match
187 the number of existing rows
188 """
189 assert len(values) == len(self.rows), "#values != #existing rows"
190 self._add_headings_if_absent([heading])
191 for i, row in enumerate(self.rows):
192 row[heading] = values[i]
194 def add_or_set_columns_from_page(self, other: "SpreadsheetPage") -> None:
195 """
196 This function presupposes that ``self`` and ``other`` are two pages
197 ("spreadsheets") with *matching* rows.
199 It updates values or creates columns in ``self`` such that the values
200 from all columns in ``other`` are written to the corresponding rows of
201 ``self``.
203 Raises:
204 :exc:`AssertionError` if the two pages (sheets) don't have
205 the same number of rows.
206 """
207 assert len(self.rows) == len(other.rows), "Mismatched #rows"
208 self._add_headings_if_absent(other.headings)
209 for i, row in enumerate(self.rows):
210 for k, v in other.rows[i].items():
211 row[k] = v
213 def add_rows_from_page(self, other: "SpreadsheetPage") -> None:
214 """
215 Add all rows from ``other`` to ``self``.
216 """
217 self._add_headings_if_absent(other.headings)
218 self.rows.extend(other.rows)
220 def sort_headings(self) -> None:
221 """
222 Sort our headings internally.
223 """
224 self.headings.sort()
226 def delete_columns(self, headings: Container[str]) -> None:
227 """
228 Removes columns with the specified heading names.
229 Used to simplify spreadsheets.
231 Since our rows are a dictionary, and our export functions are based on
232 the headings, all we have to do is to delete the unwanted headings.
233 """
234 self.headings = [h for h in self.headings if h not in headings]
236 @property
237 def plainrows(self) -> List[List[Any]]:
238 """
239 Returns a list of rows, where each row is a list of values.
240 Does not include a "header" row.
242 Compare :attr:`rows`, which is a list of dictionaries.
243 """
244 rows = []
245 for row in self.rows:
246 rows.append([row.get(h) for h in self.headings])
247 return rows
249 def spreadsheetrows(
250 self, converter: Callable[[Any], Any]
251 ) -> List[List[Any]]:
252 """
253 Like :meth:`plainrows`, but (a) ensures every cell is converted to a
254 value that can be sent to a spreadsheet converted (e.g. ODS, XLSX), and
255 (b) includes a header row.
256 """
257 rows = [self.headings.copy()]
258 for row in self.rows:
259 rows.append([converter(row.get(h)) for h in self.headings])
260 return rows
262 def get_tsv(self, dialect: str = "excel-tab") -> str:
263 r"""
264 Returns the entire page (sheet) as TSV: one header row and then
265 lots of data rows.
267 For the dialect, see
268 https://docs.python.org/3/library/csv.html#csv.excel_tab.
270 For CSV files, see RGC 4180: https://tools.ietf.org/html/rfc4180.
272 For TSV files, see
273 https://www.iana.org/assignments/media-types/text/tab-separated-values.
275 Test code:
277 .. code-block:: python
279 import io
280 import csv
281 from typing import List
283 def test(row: List[str], dialect: str = "excel-tab") -> str:
284 f = io.StringIO()
285 writer = csv.writer(f, dialect=dialect)
286 writer.writerow(row)
287 return f.getvalue()
289 test(["hello", "world"])
290 test(["hello\ttab", "world"]) # actual tab within double quotes
291 test(["hello\nnewline", "world"]) # actual newline within double quotes
292 test(['hello"doublequote', "world"]) # doubled double quote within double quotes
294 """ # noqa
295 f = io.StringIO()
296 writer = csv.writer(f, dialect=dialect)
297 writer.writerow(self.headings)
298 for row in self.rows:
299 writer.writerow([row.get(h) for h in self.headings])
300 return f.getvalue()
302 def write_to_openpyxl_xlsx_worksheet(self, ws: "XLWorksheet") -> None: # type: ignore[valid-type] # noqa: E501
303 """
304 Writes data from this page to an existing ``openpyxl`` XLSX worksheet.
305 """
306 ws.append(self.headings) # type: ignore[attr-defined]
307 for row in self.rows:
308 ws.append( # type: ignore[attr-defined]
309 [convert_for_openpyxl(row.get(h)) for h in self.headings]
310 )
312 def write_to_odswriter_ods_worksheet(self, ws: "ODSSheet") -> None: # type: ignore[valid-type] # noqa: E501
313 """
314 Writes data from this page to an existing ``odswriter`` ODS sheet.
315 """
316 # noinspection PyUnresolvedReferences
317 ws.writerow(self.headings) # type: ignore[attr-defined]
318 for row in self.rows:
319 # noinspection PyUnresolvedReferences
320 ws.writerow([row.get(h) for h in self.headings]) # type: ignore[attr-defined] # noqa: E501
322 def r_object_name(self) -> str:
323 """
324 Name of the object when imported into R.
325 The main thing: no leading underscores.
326 """
327 n = self.name
328 n = n[1:] if n.startswith("_") else n
329 return f"camcops_{n}" # less chance of conflict within R
331 def r_data_table_definition(self) -> str:
332 """
333 Returns a string to define this object as a ``data.table`` in R.
335 See also:
337 - https://stackoverflow.com/questions/32103639/read-csv-file-in-r-with-double-quotes
338 """ # noqa
339 object_name = self.r_object_name()
340 csv_text = self.get_tsv(dialect="excel")
341 csv_text = csv_text.replace('"', r"\"")
342 definition = (
343 f'data.table::fread(sep=",", header=TRUE, text="{csv_text}"\n)'
344 )
345 return f"{object_name} <- {definition}"
348class SpreadsheetCollection(object):
349 """
350 A collection of
351 :class:`camcops_server.cc_modules.cc_spreadsheet.SpreadsheetPage` pages
352 (spreadsheets), like an Excel workbook.
353 """
355 def __init__(self) -> None:
356 self.pages = [] # type: List[SpreadsheetPage]
358 def __str__(self) -> str:
359 return "SpreadsheetCollection:\n" + "\n\n".join(
360 page.get_tsv() for page in self.pages
361 )
363 # -------------------------------------------------------------------------
364 # Pages
365 # -------------------------------------------------------------------------
367 def page_with_name(self, page_name: str) -> Optional[SpreadsheetPage]:
368 """
369 Returns the page with the specific name, or ``None`` if no such
370 page exists.
371 """
372 return next(
373 (page for page in self.pages if page.name == page_name), None
374 )
376 def add_page(self, page: SpreadsheetPage) -> None:
377 """
378 Adds a new page to our collection. If the new page has the same name
379 as an existing page, rows from the new page are added to the existing
380 page. Does nothing if the new page is empty.
381 """
382 if page.empty:
383 return
384 existing_page = self.page_with_name(page.name)
385 if existing_page:
386 # Blend with existing page
387 existing_page.add_rows_from_page(page)
388 else:
389 # New page
390 self.pages.append(page)
392 def add_pages(self, pages: List[SpreadsheetPage]) -> None:
393 """
394 Adds all ``pages`` to our collection, via :func:`add_page`.
395 """
396 for page in pages:
397 self.add_page(page)
399 def sort_headings_within_all_pages(self) -> None:
400 """
401 Sort headings within each of our pages.
402 """
403 for page in self.pages:
404 page.sort_headings()
406 def sort_pages(self) -> None:
407 """
408 Sort our pages by their page name.
409 """
410 self.pages.sort(key=lambda p: p.name)
412 def get_page_names(self) -> List[str]:
413 """
414 Return a list of the names of all our pages.
415 """
416 return [p.name for p in self.pages]
418 def delete_page(self, page_name: str) -> None:
419 """
420 Delete any page with the name specified.
421 """
422 self.pages = [p for p in self.pages if p.name != page_name]
424 def delete_pages(self, page_names: Container[str]) -> None:
425 """
426 Delete pages with the names specified.
427 """
428 self.pages = [p for p in self.pages if p.name not in page_names]
430 def delete_columns(self, headings: Container[str]) -> None:
431 """
432 Across all pages, removes columns with the specified heading names.
433 Used to simplify spreadsheets.
434 """
435 for p in self.pages:
436 p.delete_columns(headings)
438 # -------------------------------------------------------------------------
439 # TSV
440 # -------------------------------------------------------------------------
442 def get_tsv_file(self, page_name: str) -> str:
443 """
444 Returns a TSV file for a named page.
446 Raises:
447 :exc:`AssertionError` if the named page does not exist
449 """
450 page = self.page_with_name(page_name)
451 assert page is not None, f"No such page with name {page_name}"
452 return page.get_tsv()
454 # -------------------------------------------------------------------------
455 # ZIP of TSVs
456 # -------------------------------------------------------------------------
458 def write_zip(
459 self,
460 file: Union[str, BinaryIO],
461 encoding: str = "utf-8",
462 compression: int = zipfile.ZIP_DEFLATED,
463 ) -> None:
464 """
465 Writes data to a file, as a ZIP file of TSV files.
467 Args:
468 file: filename or file-like object
469 encoding: encoding to use when writing the TSV files
470 compression: compression method to use
472 Choice of compression method: see
474 - https://docs.python.org/3/library/zipfile.html
475 - https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
476 - https://en.wikipedia.org/wiki/Zip_(file_format)#Compression_methods
478 Note also that ``openpyxl`` uses ``ZIP_DEFLATED``, which seems to be
479 the most portable if not the best compression.
480 """
481 if isinstance(file, str): # it's a filename
482 with open(file, "wb") as binaryfile:
483 return self.write_zip(binaryfile, encoding) # recurse once
484 with zipfile.ZipFile(file, mode="w", compression=compression) as z:
485 # Write to ZIP.
486 # If there are no valid task instances, there'll be no TSV;
487 # that's OK.
488 for filename_stem in self.get_page_names():
489 tsv_filename = filename_stem + ".tsv"
490 tsv_contents = self.get_tsv_file(page_name=filename_stem)
491 z.writestr(tsv_filename, tsv_contents.encode(encoding))
493 def as_zip(self, encoding: str = "utf-8") -> bytes:
494 """
495 Returns the TSV collection as a ZIP file containing TSV files.
497 Args:
498 encoding: encoding to use when writing the TSV files
499 """
500 with io.BytesIO() as memfile:
501 self.write_zip(memfile, encoding)
502 zip_contents = memfile.getvalue()
503 return zip_contents
505 # -------------------------------------------------------------------------
506 # XLSX, ODS
507 # -------------------------------------------------------------------------
509 def write_xlsx(self, file: Union[str, BinaryIO]) -> None:
510 """
511 Write the contents in XLSX (Excel) format to a file.
513 Args:
514 file: filename or file-like object
515 """
516 if XLSX_VIA_PYEXCEL: # use pyexcel_xlsx
517 data = self._get_pyexcel_data(convert_for_openpyxl)
518 pyexcel_xlsx.save_data(file, data)
519 else: # use openpyxl
520 # Marginal performance gain with write_only. Does not automatically
521 # add a blank sheet
522 wb = XLWorkbook(write_only=True) # type: ignore[misc]
523 valid_name_dict = self.get_pages_with_valid_sheet_names()
524 for page, title in valid_name_dict.items():
525 ws = wb.create_sheet(title=title)
526 page.write_to_openpyxl_xlsx_worksheet(ws)
527 wb.save(file)
529 def as_xlsx(self) -> bytes:
530 """
531 Returns the TSV collection as an XLSX (Excel) file.
532 """
533 with io.BytesIO() as memfile:
534 self.write_xlsx(memfile)
535 contents = memfile.getvalue()
536 return contents
538 @staticmethod
539 def get_sheet_title(page: SpreadsheetPage) -> str:
540 r"""
541 Returns a worksheet name for a :class:`SpreadsheetPage`.
543 See ``openpyxl/workbook/child.py``.
545 - Excel prohibits ``\``, ``*``, ``?``, ``:``, ``/``, ``[``, ``]``
546 - LibreOffice also prohibits ``'`` as first or last character but let's
547 just replace that globally.
548 """
549 title = re.sub(r"[\\*?:/\[\]']", "_", page.name)
551 if len(title) > 31:
552 title = f"{title[:28]}..."
554 return title
556 def _get_pyexcel_data(
557 self, converter: Callable[[Any], Any]
558 ) -> Dict[str, List[List[Any]]]:
559 """
560 Returns data in the format expected by ``pyexcel``, which is an ordered
561 dictionary mapping sheet names to a list of rows, where each row is a
562 list of cell values.
563 """
564 data = OrderedDict()
565 for page in self.pages:
566 data[self.get_sheet_title(page)] = page.spreadsheetrows(converter)
567 return data
569 def write_ods(self, file: Union[str, BinaryIO]) -> None:
570 """
571 Writes an ODS (OpenOffice spreadsheet document) to a file.
573 Args:
574 file: filename or file-like object
575 """
576 if ODS_VIA_PYEXCEL: # use pyexcel_ods3
577 data = self._get_pyexcel_data(convert_for_pyexcel_ods3)
578 pyexcel_ods3.save_data(file, data)
579 else: # use odswriter
580 if isinstance(file, str): # it's a filename
581 with open(file, "wb") as binaryfile:
582 return self.write_ods(binaryfile) # recurse once
583 # noinspection PyCallingNonCallable
584 with ODSWriter(file) as odsfile: # type: ignore[misc]
585 valid_name_dict = self.get_pages_with_valid_sheet_names()
586 for page, title in valid_name_dict.items():
587 sheet = odsfile.new_sheet(name=title)
588 page.write_to_odswriter_ods_worksheet(sheet)
590 def as_ods(self) -> bytes:
591 """
592 Returns the TSV collection as an ODS (OpenOffice spreadsheet document)
593 file.
594 """
595 with io.BytesIO() as memfile:
596 self.write_ods(memfile)
597 contents = memfile.getvalue()
598 return contents
600 def get_pages_with_valid_sheet_names(self) -> Dict[SpreadsheetPage, str]:
601 """
602 Returns an ordered mapping from :class:`SpreadsheetPage` objects to
603 their sheet names.
604 """
605 name_dict = OrderedDict()
607 for page in self.pages:
608 name_dict[page] = self.get_sheet_title(page)
610 self.make_sheet_names_unique(name_dict)
612 return name_dict
614 @staticmethod
615 def make_sheet_names_unique(name_dict: Dict[SpreadsheetPage, str]) -> None:
616 """
617 Modifies (in place) a mapping from :class:`SpreadsheetPage` to
618 worksheet names, such that all page names are unique.
620 - See also :func:`avoid_duplicate_name` in
621 ``openpxl/workbook/child.py``
622 - We keep the 31 character restriction
623 """
624 unique_names = [] # type: List[str]
626 for page, name in name_dict.items():
627 attempt = 0
629 while name.lower() in unique_names:
630 attempt += 1
632 if attempt > 1000:
633 # algorithm failure, better to let Excel deal with the
634 # consequences than get stuck in a loop
635 log.debug(
636 f"Failed to generate a unique sheet name from {name}"
637 )
638 break
640 match = re.search(r"\d+$", name)
641 count = 0
642 if match is not None:
643 count = int(match.group())
645 new_suffix = str(count + 1)
646 name = name[: -len(new_suffix)] + new_suffix
647 name_dict[page] = name
648 unique_names.append(name.lower())
650 # -------------------------------------------------------------------------
651 # R
652 # -------------------------------------------------------------------------
654 def as_r(self) -> str:
655 """
656 Returns data as an R script.
658 This could be more sophisticated, e.g. creating factors with
659 appropriate levels (etc.).
660 """
661 now = format_datetime(
662 get_now_localtz_pendulum(),
663 DateFormat.ISO8601_HUMANIZED_TO_SECONDS_TZ,
664 )
665 table_definition_str = "\n\n".join(
666 page.r_data_table_definition() for page in self.pages
667 )
668 script = f"""#!/usr/bin/env Rscript
670# R script generated by CamCOPS at {now}
672# =============================================================================
673# Libraries
674# =============================================================================
676library(data.table)
678# =============================================================================
679# Data
680# =============================================================================
682{table_definition_str}
684"""
685 return script
687 def write_r(self, filename: str, encoding: str = "utf-8") -> None:
688 """
689 Write the contents in R format to a file.
691 Args:
692 filename: filename or file-like object
693 encoding: encoding to use
694 """
695 with open(filename, "wt", encoding=encoding) as f:
696 f.write(self.as_r())
699def _make_benchmarking_collection(
700 nsheets: int = 100,
701 nrows: int = 200,
702 ncols: int = 30,
703 mindata: int = 0,
704 maxdata: int = 1000000,
705) -> SpreadsheetCollection:
706 log.info(
707 f"Creating SpreadsheetCollection with nsheets={nsheets}, "
708 f"nrows={nrows}, ncols={ncols}..."
709 )
710 coll = SpreadsheetCollection()
711 for sheetnum in range(1, nsheets + 1):
712 rows = [
713 {
714 f"c{colnum}": str(random.randint(mindata, maxdata))
715 for colnum in range(1, ncols + 1)
716 }
717 for _ in range(1, nrows + 1)
718 ]
719 page = SpreadsheetPage(name=f"sheet{sheetnum}", rows=rows)
720 coll.add_page(page)
721 log.info("... done.")
722 return coll
725def file_size(filename: str) -> int:
726 """
727 Returns a file's size in bytes.
728 """
729 return os.stat(filename).st_size
732def benchmark_save(
733 xlsx_filename: str = "test.xlsx",
734 ods_filename: str = "test.ods",
735 tsv_zip_filename: str = "test.zip",
736 r_filename: str = "test.R",
737) -> None:
738 """
739 Use with:
741 .. code-block:: python
743 from cardinal_pythonlib.logs import main_only_quicksetup_rootlogger
744 from camcops_server.cc_modules.cc_spreadsheet import benchmark_save
745 main_only_quicksetup_rootlogger()
746 benchmark_save()
748 Args:
749 xlsx_filename: XLSX file to create
750 ods_filename: ODS file to create
751 tsv_zip_filename: TSV ZIP file to create
752 r_filename: R script to create
754 Problem in Nov 2019 is that ODS is extremely slow. Rough timings:
756 - TSV ZIP: about 4.1 Mb, about 0.2 s. Good.
757 - XLSX (via openpyxl): about 4.6 Mb, 16 seconds.
758 - XLSX (via pyexcel_xlsx): about 4.6 Mb, 16 seconds.
759 - ODS (via odswriter): about 53 Mb, 56 seconds.
760 - ODS (via pyexcel_ods3): about 2.8 Mb, 29 seconds.
761 """
762 coll = _make_benchmarking_collection()
764 log.info("Writing TSV ZIP...")
765 coll.write_zip(tsv_zip_filename)
766 log.info(f"... done. File size {file_size(tsv_zip_filename)}")
768 log.info("Writing XLSX...")
769 coll.write_xlsx(xlsx_filename)
770 log.info(f"... done. File size {file_size(xlsx_filename)}")
772 log.info("Writing ODS...")
773 coll.write_ods(ods_filename)
774 log.info(f"... done. File size {file_size(ods_filename)}")
776 log.info("Writing R...")
777 coll.write_r(r_filename)
778 log.info(f"... done. File size {file_size(r_filename)}")