Module gamslib.objectdir

Module for object directory management and validation in GAMS library.

Functions

def find_object_folders(root_folder: pathlib._local.Path) ‑> Generator[pathlib._local.Path, None, None]
Expand source code
def find_object_folders(root_folder: Path) -> Generator[Path, None, None]:
    """
    Find all object folders in the root folder or below.

    Args:
        root_folder (Path): Root directory to search for object folders.

    Yields:
        Path: Path to each object folder containing a DC.xml file.

    Notes:
        - Skips folders that do not contain a DC.xml file and logs a warning.
    """
    # Path.walk() only was introduced in Python 3.12, so we use os.walk() here
    for root, _, _ in os.walk(root_folder):
        path = Path(root)
        if is_object_folder(path):
            yield path
        else:
            logger.debug(
                "Skipping folder %s as it does not contain a DC.xml file.", root
            )

Find all object folders in the root folder or below.

Args

root_folder : Path
Root directory to search for object folders.

Yields

Path
Path to each object folder containing a DC.xml file.

Notes

  • Skips folders that do not contain a DC.xml file and logs a warning.
def is_object_folder(folder_path: pathlib._local.Path) ‑> bool
Expand source code
def is_object_folder(folder_path: Path) -> bool:
    """
    Check if the given folder is an object folder.

    An object folder is defined as a folder that contains a DC.xml file.

    Args:
        folder_path (Path): Path to the folder to check.

    Returns:
        bool: True if the folder is an object folder, False otherwise.
    """
    return (folder_path / "DC.xml").is_file()

Check if the given folder is an object folder.

An object folder is defined as a folder that contains a DC.xml file.

Args

folder_path : Path
Path to the folder to check.

Returns

bool
True if the folder is an object folder, False otherwise.
def validate_csv_files(object_path: pathlib._local.Path) ‑> None
Expand source code
def validate_csv_files(object_path: Path) -> None:
    """
    Validate the CSV files in the object directory.

    Args:
        object_path (Path): Path to the object directory.

    Raises:
        ObjectDirectoryValidationError: If any CSV file is invalid.
    """
    # use the ObjectCSVFile class to validate contents of the object.csv file
    csv_mgr = _create_csvmgr_with_error_handling(object_path)
    try:
        csv_mgr.validate()
        # check if recid matches directory name
        if csv_mgr.object_id != csv_mgr.get_object().recid:
            raise ObjectDirectoryValidationError(
                f"Object directory '{object_path.name}': Directory name '{csv_mgr.object_id}' "
                f"does not match recid '{csv_mgr.get_object().recid}' in object.csv."
            )
        # check if all datastream files exist
        for dsdata in csv_mgr.get_datastreamdata():
            ds_file_path = object_path / dsdata.dsid
            if not ds_file_path.is_file():
                raise ObjectDirectoryValidationError(
                    f"Object directory '{object_path.name}': Datastream file "
                    f"'{dsdata.dspath.split('/')[-1]}' "
                    f"referenced in datastreams.csv does not exist."
                )
    except ValueError as e:
        raise ObjectDirectoryValidationError(
            f"Object directory '{object_path.name}': {e}"
        ) from e

Validate the CSV files in the object directory.

Args

object_path : Path
Path to the object directory.

Raises

ObjectDirectoryValidationError
If any CSV file is invalid.
def validate_dc_file(object_path: pathlib._local.Path) ‑> None
Expand source code
def validate_dc_file(object_path: Path) -> None:
    """
    Validate the DC.xml file in the object directory.

    Args:
        object_path (Path): Path to the object directory.

    Raises:
        ObjectDirectoryValidationError: If the DC.xml file is invalid.
    """
    dc_file = object_path / "DC.xml"
    try:
        dc = DublinCore(dc_file)
        dc.validate()
        identifiers = dc.get_element_all_langs("identifier")
        if object_path.name.replace("%3A", ":") not in identifiers:
            raise ObjectDirectoryValidationError(
                f"Object directory '{object_path.name}': DC.xml identifier value does not match "
                f"the object directory name."
            )
    except ValueError as e:
        raise ObjectDirectoryValidationError(
            f"Object directory '{object_path.name}': DC.xml file is invalid: {e}"
        ) from e

Validate the DC.xml file in the object directory.

Args

object_path : Path
Path to the object directory.

Raises

ObjectDirectoryValidationError
If the DC.xml file is invalid.
def validate_directory_structure(object_path: pathlib._local.Path) ‑> None
Expand source code
def validate_directory_structure(object_path: Path) -> None:
    """
    Validate the structure of the object directory.

    Structure is valid if it contains at least these files:
      * DC.xml
      * object.csv
      * datastreams.csv
      * all files referenced in datastreams.csv


    Args:
        object_path (Path): Path to the object directory.

    Raises:
        ObjectDirectoryValidationError: If the directory structure is invalid.
    """
    if not object_path.is_dir():
        raise ObjectDirectoryValidationError(
            f"Object directory '{object_path.name}' does not exist or is not a directory."
        )

    if not (object_path / "DC.xml").exists():
        raise ObjectDirectoryValidationError(
            f"Object directory '{object_path.name}' does not contain a DC.xml file."
        )

    # Check the object.csv file
    if not (object_path / "object.csv").is_file():
        raise ObjectDirectoryValidationError(
            f"Object directory '{object_path.name}' does not contain an object.csv file."
        )
    if not (object_path / "datastreams.csv").is_file():
        raise ObjectDirectoryValidationError(
            f"Object directory '{object_path.name}' does not contain a datastreams.csv file."
        )

Validate the structure of the object directory.

Structure is valid if it contains at least these files: * DC.xml * object.csv * datastreams.csv * all files referenced in datastreams.csv

Args

object_path : Path
Path to the object directory.

Raises

ObjectDirectoryValidationError
If the directory structure is invalid.
def validate_main_resource_id(object_dir: pathlib._local.Path)
Expand source code
def validate_main_resource_id(object_dir: Path):
    """Validate if the main resource file has the same ID as the object directory

    Raise a ObjectDirectoryValidationError if the main resource is a TEI or LIDO file and
    the ID in this file does not have the same ID as the object directory.

    In all other cases, this function does not raise an error.

    Args:
        object_dir (Path): Path to the object directory.
    Raises:
        ObjectDirectoryValidationError: If the main resource file has the same ID as
        the object directory
    """
    csv_mgr = ObjectCSVManager(object_dir)
    main_resource = csv_mgr.get_mainresource()
    if main_resource is not None:
        object_id = None
        main_resource_path = object_dir / Path(main_resource.dspath).name
        main_format = formatdetect.detect_format(main_resource_path)
        if main_format.subtype == formatinfo.SubType.TEI:
            object_id = _extract_id_from_tei(main_resource_path)
        elif main_format.subtype == formatinfo.SubType.LIDO:
            object_id = _extract_id_from_lido(main_resource_path)
        dir_id = object_dir.name.replace("%3A", ":")
        if object_id is not None and dir_id != object_id:
            raise ValueError(
                f"Object directory name '{object_dir.name}' does not match "
                f"the object ID '{object_id}' extracted from the main resource "
                f"file '{main_resource_path.name}'."
            )

Validate if the main resource file has the same ID as the object directory

Raise a ObjectDirectoryValidationError if the main resource is a TEI or LIDO file and the ID in this file does not have the same ID as the object directory.

In all other cases, this function does not raise an error.

Args

object_dir : Path
Path to the object directory.

Raises

ObjectDirectoryValidationError
If the main resource file has the same ID as

the object directory

def validate_object_dir(object_path: pathlib._local.Path) ‑> None
Expand source code
def validate_object_dir(object_path: Path) -> None:
    """
    Check if everything needed is present in the object directory.

    Args:
        object_path (Path): Path to the object directory.

    Raises:
        ObjectDirectoryValidationError: If the directory or required files are missing,
            or if object.csv is invalid.
    """
    validate_directory_structure(object_path)
    validate_dc_file(object_path)
    validate_csv_files(object_path)
    validate_main_resource_id(object_path)

Check if everything needed is present in the object directory.

Args

object_path : Path
Path to the object directory.

Raises

ObjectDirectoryValidationError
If the directory or required files are missing, or if object.csv is invalid.

Classes

class ObjectDirectoryValidationError (*args, **kwargs)
Expand source code
class ObjectDirectoryValidationError(Exception):
    """Exception raised when an object directory is invalid."""

Exception raised when an object directory is invalid.

Ancestors

  • builtins.Exception
  • builtins.BaseException