# SPDX-License-Identifier: GPL-3.0-or-later
#
# Copyright (C) 2025 The Project Authors
# See pyproject.toml for authors/maintainers.
# See LICENSE for license details.
"""
{Short module description (1-3 sentences)}
todo docstring
"""
# IMPORTS
# ***********************************************************************
# import modules from other libs
# Native imports
# =======================================================================
import glob, re
from pathlib import Path
from datetime import datetime
# ... {develop}
# External imports
# =======================================================================
import pandas as pd
# ... {develop}
# Project-level imports
# =======================================================================
from losalamos.root import MbaE, Collection
from losalamos.paths import FOLDER_TEMPLATES_NOTES
# ... {develop}
# CONSTANTS
# ***********************************************************************
# define constants in uppercase
# CONSTANTS -- Project-level
# =======================================================================
# ... {develop}
# list of fields that needs harmonization
HARMONIZE_TEXT_FIELDS = [
"name",
"title",
"subtitle",
"subject",
"abstract",
"contract",
"client",
"project",
"comment",
"caption",
"alttext",
"source",
"document",
"file",
"file_draft",
"publisher",
]
HARMONIZE_DATE_FIELDS = [
"timestamp",
"date",
"datetime",
"date_start",
"date_end",
]
# Subsubsection example
# -----------------------------------------------------------------------
# CONSTANTS -- Module-level
# =======================================================================
# ... {develop}
# FUNCTIONS
# ***********************************************************************
# FUNCTIONS -- Project-level
# =======================================================================
# ... {develop}
# FUNCTIONS -- Module-level
# =======================================================================
# ... {develop}
# CLASSES
# ***********************************************************************
# CLASSES -- Project-level
# =======================================================================
# NOTES
# -----------------------------------------------------------------------
[docs]
class Note(MbaE):
# todo [docstring]
def __init__(self, name="MyNote", alias="Nt1"):
# set attributes
self.file_note = None
self.metadata = None
self.data = None
super().__init__(name=name, alias=alias)
# ... continues in downstream objects ... #
def _set_fields(self):
super()._set_fields()
# Attribute fields
self.field_name_note = "note_name"
self.field_alias_note = "note_alias"
self.field_file_note = "note_file"
# Metadata fields
# ... continues in downstream objects ... #
[docs]
def load_data(self):
self.data = Note.parse_note(self.file_note)
[docs]
def load(self):
self.load_metadata()
self.load_data()
[docs]
def save(self):
self.to_file(file_path=self.file_note)
[docs]
def export(self, folder, filename, export_metadata=False):
"""
Export to a markdown file with optional metadata handling.
:param folder: The directory path where the file will be saved.
:type folder: str
:param filename: The name of the file without the extension.
:type filename: str
:param export_metadata: Toggle to trigger the parent class metadata export. Default value = ``False``
:type export_metadata: bool
:return: Path to the exported file
:rtype: Path
.. note::
This method constructs a file path using ``pathlib.Path`` and appends the
``.md`` extension. It calls the internal ``to_file`` method with ``cleanup``
enabled to finalize the document. If ``export_metadata`` is set to ``True``,
it invokes the ``export`` method of the superclass before proceeding.
"""
if export_metadata:
super().export(folder=folder, filename=filename)
fpath = Path(folder) / f"{filename}.md"
self.to_file(fpath, cleanup=True)
return fpath.absolute()
[docs]
def to_file(self, file_path, cleanup=True):
"""
Write the note object's metadata and data content to a specified file.
:param file_path: The destination path where the note will be written.
:type file_path: str
:param cleanup: Toggle to remove excessive blank lines after writing. Default value = ``True``
:type cleanup: bool
:return: None
:rtype: NoneType
.. note::
This method aggregates metadata and data by calling ``metadata_to_list`` and
``data_to_list``. It sanitizes the output by replacing ``None`` string
occurrences with empty strings and ensures each entry ends with a newline
character. If ``cleanup`` is enabled, it post-processes the file using
``remove_excessive_blank_lines`` to maintain consistent formatting.
"""
# metadata
# ------------------------------------
ls_metadata = Note.metadata_to_list(self.metadata)
for i in range(len(ls_metadata)):
# clear "None" values
ls_metadata[i] = ls_metadata[i].replace("None", "")
# data
# ------------------------------------
ls_data = Note.data_to_list(self.data)
# append to metadata list
for l in ls_data:
ls_metadata.append(l[:])
ls_all = [line + "\n" for line in ls_metadata]
with open(file_path, "w", encoding="utf-8") as file:
file.writelines(ls_all)
# clean up excessive lines
if cleanup:
Note.remove_excessive_blank_lines(file_path)
return None
[docs]
@staticmethod
def harmonize_entry_text(entry):
if entry is not None:
new_entry_bulk = entry[:]
new_entry_bulk = new_entry_bulk.replace("'", "`")
new_entry_bulk = new_entry_bulk.replace('"', "``")
new_entry = '"' + new_entry_bulk + '"'
s_start = new_entry[:2].replace("`", "")
s_end = new_entry[-2:].replace("`", "")
s_bulk = new_entry[2:-2]
return s_start + s_bulk + s_end
else:
return None
[docs]
@staticmethod
def harmonize_entry_date(entry, key="date"):
if entry is None:
return None
if "datetime" in key or "timestamp" in key:
format_mask = "%Y-%m-%d %H:%M:%S"
else:
format_mask = "%Y-%m-%d"
dt = pd.to_datetime(entry, errors="coerce")
if pd.isna(dt):
return None
return dt.strftime(format_mask)
[docs]
@staticmethod
def harmonize_entry_date_old(entry, key="date"):
if entry is not None:
format_mask = "%Y-%m-%d"
if "datetime" in key:
format_mask = "%Y-%m-%d %H:%M:%S"
if "timestamp" in key:
format_mask = "%Y-%m-%d %H:%M:%S"
# Convert string back into a Python object
dt_object = pd.to_datetime([entry])
# return string
return str(dt_object.strftime(format_mask).values[0])
else:
return None
[docs]
@staticmethod
def remove_excessive_blank_lines(file_path):
"""
Remove consecutive blank lines from a file to ensure only single blank lines remain.
:param file_path: The path to the target text file to be processed.
:type file_path: str
:return: None
:rtype: NoneType
.. note::
This method performs an in-place modification of the file. It iterates through
the content and suppresses any sequence of empty lines that exceeds a
single occurrence, effectively "squeezing" the vertical whitespace.
"""
with open(file_path, "r", encoding="utf-8") as file:
lines = file.readlines()
cleaned_lines = []
previous_line_blank = False
for line in lines:
if line.strip() == "":
if not previous_line_blank:
cleaned_lines.append(line)
previous_line_blank = True
else:
cleaned_lines.append(line)
previous_line_blank = False
with open(file_path, "w", encoding="utf-8") as file:
file.writelines(cleaned_lines)
[docs]
@staticmethod
def parse_yaml(yaml_content):
"""
Parses YAML content into a dictionary.
:param yaml_content: str, YAML content as string
:return: dict, parsed YAML content
"""
metadata = {}
lines = yaml_content.split("\n")
current_key = None
current_list = None
for line in lines:
if line.strip() == "":
continue
if ":" in line:
key, value = line.split(":", 1)
key = key.strip()
value = value.strip()
if value == "": # start of a list
current_key = key
current_list = []
metadata[current_key] = current_list
else:
if key == "tags":
metadata[key] = [
v.strip() for v in value.split("-") if v.strip()
]
else:
metadata[key] = value
elif current_list is not None and line.strip().startswith("-"):
current_list.append(line.strip()[1:].strip())
# fix empty lists
for e in metadata:
if len(metadata[e]) == 0:
metadata[e] = None
# fix text fields
for e in metadata:
if metadata[e]:
size = len(metadata[e]) - 1
if metadata[e][0] == '"' and metadata[e][size] == '"':
# slice it
metadata[e] = metadata[e][1:size]
return metadata
[docs]
@staticmethod
def data_to_list(data_dict):
"""
Flatten a dictionary of lists into a single list separated by blank lines and delimiters.
:param data_dict: A dictionary where each key maps to a list of strings to be aggregated.
:type data_dict: dict
:return: A concatenated list of all values with added structural spacing and separators.
:rtype: list
.. note::
This function iterates through the top-level keys of ``data_dict`` and appends
the contents of each list to a master list. After each group of data, it
inserts an empty string, a ``---`` separator, and another empty string to
visually distinguish different levels or sections.
"""
ls_out = []
for level in data_dict:
ls_out = ls_out + data_dict[level][:]
if level != "Tail":
ls_out.append("")
ls_out.append("---")
return ls_out
[docs]
@staticmethod
def parse_note(file_path):
"""
Extract and categorize note content into head, body, and tail sections based on separators.
:param file_path: The path to the note file to be parsed.
:type file_path: str
:return: A dictionary containing the cleaned lines for ``Head``, ``Body``, and ``Tail``.
:rtype: dict
.. note::
The function first identifies and skips an initial YAML frontmatter block if it
starts with ``---``. It then uses the ``---`` string as a delimiter to
segment the remaining text. If multiple separators exist, the first and last
act as boundaries for the ``Body``, while everything before the first is
``Head`` and everything after the last is ``Tail``. All extracted lines
undergo a ``strip()`` operation to remove leading/trailing whitespace.
"""
with open(file_path, "r", encoding="utf-8") as file:
lines = file.readlines()
# Skip YAML header if present
if lines[0].strip() == "---":
yaml_end_index = lines.index("---\n", 1) + 1
lines = lines[yaml_end_index:]
# Find all separator positions (lines with "---")
separator_indices = [i for i, line in enumerate(lines) if line.strip() == "---"]
# Default values for Head, Body, and Tail
head, body, tail = [], [], []
if len(separator_indices) == 0:
# No separators, the whole content is the Body
body = lines
elif len(separator_indices) == 1:
# One separator: Head is before, Body is between, Tail is after
head = lines[: separator_indices[0]]
body = lines[separator_indices[0] + 1 :]
elif len(separator_indices) == 2:
# Two separators: Head, Body, and Tail
head = lines[: separator_indices[0]]
body = lines[separator_indices[0] + 1 : separator_indices[1]]
tail = lines[separator_indices[1] + 1 :]
else:
# More than two separators: Head is before the first, Body is between the first and last, Tail is after the last
head = lines[: separator_indices[0]]
body = lines[separator_indices[0] + 1 : separator_indices[-1]]
tail = lines[separator_indices[-1] + 1 :]
# Clean up any extra newlines from the content
head = [line.strip() for line in head]
body = [line.strip() for line in body]
tail = [line.strip() for line in tail]
return {"Head": head, "Body": body, "Tail": tail}
[docs]
@staticmethod
def list_by_pattern(md_dict, patt_type="tag"):
"""
Retrieve a list of patterns from the note dictionary.
:param md_dict: Dictionary containing note sections.
:type md_dict: dict
:param patt_type: Type of pattern to search for, either "tag" or "related". Defaults to "tag".
:type patt_type: str
:return: List of found patterns or None if no patterns are found.
:rtype: list or None
"""
if patt_type == "tag":
pattern = re.compile(r"#\w+")
elif patt_type == "related":
pattern = re.compile(r"\[\[.*?\]\]")
else:
pattern = re.compile(r"#\w+")
patts = []
# run over all sections
for s in md_dict:
content = md_dict[s]["Content"]
for line in content:
patts.extend(pattern.findall(line))
if len(patts) == 0:
patts = None
return patts
[docs]
class NoteBasic(Note):
TEMPLATE_FILE = FOLDER_TEMPLATES_NOTES / "_basic.md"
THUMBNAIL_SIZE = None
def __init__(self, name="MyNote", alias="Nt1"):
super().__init__(name=name, alias=alias)
self.file_note_template = self.get_template_file()
self.metadata_standard = self.load_metadata_standard()
self.data_standard = self.load_data_standard()
[docs]
@classmethod
def get_template_file(cls):
"""
Retrieves the filesystem path of the template file associated with the class.
:return: The path to the template file.
:rtype: :class:`pathlib.Path`
"""
return Path(cls.TEMPLATE_FILE)
[docs]
def load_new(self, file_note):
"""
Initializes a new note instance using a template and assigns it a new file path.
:param file_note: The destination path where the new note will be saved.
:type file_note: str or :class:`pathlib.Path`
"""
self.file_note = self.file_note_template
self.load()
self.file_note = Path(file_note)
self.update()
self.update_timestamp()
[docs]
def load_data_standard(self):
"""
Parses and returns the standard data content from the template file.
:return: A dictionary containing the parsed data from the template.
:rtype: dict
"""
dc = Note.parse_note(file_path=self.file_note_template)
return dc
[docs]
def reset_data(self):
"""
Resets all data segments including ``Head``, ``Body`` and ``Tail`` to their standard values.
.. danger::
This action will erase all current data in the instance.
"""
self.reset_data_head()
self.reset_data_body()
self.reset_data_tail()
[docs]
def reset_data_segment(self, segment):
"""
Resets a specific data segment to its standard default state.
:param segment: The name of the data segment to reset (e.g., ``Head``, ``Body``, or ``Tail``).
:type segment: str
.. danger::
This action will erase the current data for the specified segment.
"""
dc = self.load_data_standard()
self.data[segment] = dc[segment][:]
self.update()
[docs]
def reset_data_head(self):
"""
Resets the ``Head`` data segment to its standard default state.
.. danger::
This action will erase the current data segment.
"""
self.reset_data_segment("Head")
[docs]
def reset_data_body(self):
"""
Resets the ``Body`` data segment to its standard default state.
.. danger::
This action will erase the current data segment.
"""
self.reset_data_segment("Body")
[docs]
def reset_data_tail(self):
"""
Resets the ``Tail`` data segment to its standard default state.
.. danger::
This action will erase the current data segment.
"""
self.reset_data_segment("Tail")
[docs]
def update(self):
"""
Triggers a sequence of internal updates to synchronize the note's name, abstract, and thumbnail.
.. note::
This method acts as a base update sequence; additional update logic may be implemented in downstream classes.
"""
self.update_name()
self.update_abstract()
self.update_thumbnail()
# -- continues in downstream classes
[docs]
def update_name(self):
"""
Updates the note metadata and the first line of the Head segment with the current file stem.
.. warning::
This method assumes the first item in the ``Head`` data segment is the title and will overwrite it.
"""
current_name = Path(self.file_note).stem
self.metadata["name"] = current_name
# always the first item
self.data["Head"][0] = f"# {current_name}"
[docs]
def update_abstract(self):
"""
Synchronizes the abstract from metadata into the Head data segment block.
.. note::
The method searches for the ``[!Abstract]`` identifier within the ``Head`` segment and replaces the subsequent line with the formatted abstract string.
"""
if self.metadata["abstract"] is not None:
current_abstract = self.metadata["abstract"][1:-1]
n = 0
for line in self.data["Head"]:
n = n + 1
if "[!Abstract]" in line:
break
if n > 0:
self.data["Head"][n] = f"> {current_abstract}\n"
[docs]
def update_thumbnail(self):
"""
Updates the thumbnail image link in the Head data segment based on the current file name and predefined size.
.. note::
The method searches for an existing Wikilink image pattern (``![[``) within the ``Head`` segment to perform the replacement.
"""
size = self.THUMBNAIL_SIZE
name = self.file_note.stem
if size is not None:
n = 0
for line in self.data["Head"]:
if "![[" in line[:3]:
break
n = n + 1
if n > 0:
self.data["Head"][n] = f"![[{name}.jpeg|{size}]]"
[docs]
def update_timestamp(self):
"""
Records the current local date and time into the note metadata.
.. note::
The timestamp is formatted as a string following the ``%Y-%m-%d %H:%M:%S`` pattern.
"""
from datetime import datetime
now = datetime.now()
self.metadata["timestamp"] = now.strftime("%Y-%m-%d %H:%M:%S")
[docs]
class NoteProject(NoteBasic):
TEMPLATE_FILE = FOLDER_TEMPLATES_NOTES / "_project.md"
THUMBNAIL_SIZE = None
[docs]
class NoteJournal(NoteBasic):
TEMPLATE_FILE = FOLDER_TEMPLATES_NOTES / "_journal.md"
THUMBNAIL_SIZE = 300
[docs]
def update(self):
super().update()
self.update_title()
[docs]
def update_title(self):
name = self.metadata["name"]
self.metadata["title"] = f'"{name}"'
# COLLECTIONS
# -----------------------------------------------------------------------
[docs]
class NoteCollection(Collection):
# todo docstring
BASE_OBJECT = Note
def __init__(self, name="MyNoteColl", alias="NtCol0"):
super().__init__(base_object=self.BASE_OBJECT, name=name, alias=alias)
[docs]
def load_list(self, files):
"""
Iterates through a list of file paths to initialize, load, and append objects to the collection.
:param files: A list of file paths to be processed.
:type files: list
:return: None
:rtype: None
"""
for f in files:
p = Path(f)
name = p.stem
n = self.baseobject(name=name, alias=name)
n.file_note = p
n.load()
self.append(n)
return None
[docs]
def load_folder(self, folder):
"""
Identifies all Markdown files within a specific directory and adds them to the collection.
:param folder: The directory path to scan for ``.md`` files.
:type folder: str
:return: None
:rtype: None
"""
ls = glob.glob(str(Path(folder) / "*.md"))
self.load_list(files=ls)
return None
[docs]
def load_pattern(self, pattern):
"""
Uses a glob pattern to locate files and load them into the collection.
:param pattern: The search pattern (e.g., ``path/to/*/*.md``) used to match files.
:type pattern: str
:return: None
:rtype: None
"""
ls = glob.glob(pattern)
self.load_list(files=ls)
return None
[docs]
class NoteCollBasic(NoteCollection):
BASE_OBJECT = NoteBasic
[docs]
class NoteCollProject(NoteCollection):
BASE_OBJECT = NoteProject
[docs]
class NoteCollJournal(NoteCollection):
BASE_OBJECT = NoteJournal
# ... {develop}
# CLASSES -- Module-level
# =======================================================================
# ... {develop}
# SCRIPT
# ***********************************************************************
# standalone behaviour as a script
if __name__ == "__main__":
# Script section
# ===================================================================
print("Hello world!")
# ... {develop}
# Script subsection
# -------------------------------------------------------------------
# ... {develop}