docs for zanj v0.6.0
View Source on GitHub

zanj.externals

for storing/retrieving an item externally in a ZANJ archive


 1"""for storing/retrieving an item externally in a ZANJ archive"""
 2
 3from __future__ import annotations
 4
 5import json
 6from typing import IO, Any, Callable, Literal, NamedTuple, get_args
 7
 8import numpy as np
 9from muutils.json_serialize.json_serialize import ObjectPath
10
11from zanj.consts import JSONitem
12
13# this is to make type checking work -- it will later be overridden
14_ZANJ_pre = Any
15
16ZANJ_MAIN: str = "__zanj__.json"
17ZANJ_META: str = "__zanj_meta__.json"
18
19ExternalItemType = Literal["jsonl", "npy"]
20
21ExternalItemType_vals = get_args(ExternalItemType)
22
23ExternalItem = NamedTuple(
24    "ExternalItem",
25    [
26        ("item_type", ExternalItemType),
27        ("data", Any),
28        ("path", ObjectPath),
29    ],
30)
31
32
33def load_jsonl(zanj: "LoadedZANJ", fp: IO[bytes]) -> list[JSONitem]:  # type: ignore[name-defined] # noqa: F821
34    return [json.loads(line) for line in fp]
35
36
37def load_npy(zanj: "LoadedZANJ", fp: IO[bytes]) -> np.ndarray:  # type: ignore[name-defined] # noqa: F821
38    return np.load(fp)
39
40
41EXTERNAL_LOAD_FUNCS: dict[ExternalItemType, Callable[[_ZANJ_pre, IO[bytes]], Any]] = {
42    "jsonl": load_jsonl,
43    "npy": load_npy,
44}
45
46
47def GET_EXTERNAL_LOAD_FUNC(item_type: str) -> Callable[[_ZANJ_pre, IO[bytes]], Any]:
48    if item_type not in EXTERNAL_LOAD_FUNCS:
49        raise ValueError(
50            f"unknown external item type: {item_type}, needs to be one of {EXTERNAL_LOAD_FUNCS.keys()}"
51        )
52    # safe to ignore since we just checked
53    return EXTERNAL_LOAD_FUNCS[item_type]  # type: ignore[index]

ZANJ_MAIN: str = '__zanj__.json'
ZANJ_META: str = '__zanj_meta__.json'
ExternalItemType = typing.Literal['jsonl', 'npy']
ExternalItemType_vals = ('jsonl', 'npy')
class ExternalItem(typing.NamedTuple):

ExternalItem(item_type, data, path)

ExternalItem( item_type: Literal['jsonl', 'npy'], data: Any, path: tuple[typing.Union[str, int], ...])

Create new instance of ExternalItem(item_type, data, path)

item_type: Literal['jsonl', 'npy']

Alias for field number 0

data: Any

Alias for field number 1

path: tuple[typing.Union[str, int], ...]

Alias for field number 2

Inherited Members
builtins.tuple
index
count
def load_jsonl( zanj: "'LoadedZANJ'", fp: IO[bytes]) -> list[typing.Union[bool, int, float, str, NoneType, typing.Sequence[typing.Union[bool, int, float, str, NoneType, typing.Sequence[ForwardRef('JSONitem')], typing.Dict[str, ForwardRef('JSONitem')]]], typing.Dict[str, typing.Union[bool, int, float, str, NoneType, typing.Sequence[ForwardRef('JSONitem')], typing.Dict[str, ForwardRef('JSONitem')]]]]]:
34def load_jsonl(zanj: "LoadedZANJ", fp: IO[bytes]) -> list[JSONitem]:  # type: ignore[name-defined] # noqa: F821
35    return [json.loads(line) for line in fp]
def load_npy(zanj: "'LoadedZANJ'", fp: IO[bytes]) -> numpy.ndarray:
38def load_npy(zanj: "LoadedZANJ", fp: IO[bytes]) -> np.ndarray:  # type: ignore[name-defined] # noqa: F821
39    return np.load(fp)
EXTERNAL_LOAD_FUNCS: dict[typing.Literal['jsonl', 'npy'], typing.Callable[[zanj.ZANJ, typing.IO[bytes]], typing.Any]] = {'jsonl': <function load_jsonl>, 'npy': <function load_npy>}
def GET_EXTERNAL_LOAD_FUNC(item_type: str) -> Callable[[zanj.ZANJ, IO[bytes]], Any]:
48def GET_EXTERNAL_LOAD_FUNC(item_type: str) -> Callable[[_ZANJ_pre, IO[bytes]], Any]:
49    if item_type not in EXTERNAL_LOAD_FUNCS:
50        raise ValueError(
51            f"unknown external item type: {item_type}, needs to be one of {EXTERNAL_LOAD_FUNCS.keys()}"
52        )
53    # safe to ignore since we just checked
54    return EXTERNAL_LOAD_FUNCS[item_type]  # type: ignore[index]