Coverage for tests / unit / no_torch / test_zanj_basic.py: 100%
73 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-21 22:18 -0700
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-21 22:18 -0700
1from __future__ import annotations
3import json
4import typing
5from pathlib import Path
7import numpy as np
8import pandas as pd # type: ignore
10from zanj import ZANJ
12np.random.seed(0)
15TEST_DATA_PATH: Path = Path("tests/junk_data")
18def array_meta(x: typing.Any) -> dict:
19 if isinstance(x, np.ndarray):
20 return dict(
21 shape=list(x.shape),
22 dtype=str(x.dtype),
23 contents=str(x),
24 )
25 else:
26 return dict(
27 type=type(x).__name__,
28 contents=str(x),
29 )
32def test_numpy():
33 data = dict(
34 name="testing zanj",
35 some_array=np.random.rand(128, 128),
36 some_other_array=np.random.rand(16, 64),
37 small_array=np.random.rand(4, 4),
38 )
39 fname: Path = TEST_DATA_PATH / "test_numpy.zanj"
40 z: ZANJ = ZANJ()
41 z.save(data, fname)
42 recovered_data = z.read(fname)
44 print(f"{list(data.keys()) = }")
45 print(f"{list(recovered_data.keys()) = }")
46 original_vals: dict = {k: array_meta(v) for k, v in data.items()}
47 print(json.dumps(original_vals, indent=2))
48 recovered_vals: dict = {k: array_meta(v) for k, v in recovered_data.items()}
49 print(json.dumps(recovered_vals, indent=2))
51 assert sorted(list(data.keys())) == sorted(list(recovered_data.keys()))
52 # assert all([type(data[k]) == type(recovered_data[k]) for k in data.keys()])
54 assert all(
55 [
56 data["name"] == recovered_data["name"],
57 np.allclose(data["some_array"], recovered_data["some_array"]),
58 np.allclose(data["some_other_array"], recovered_data["some_other_array"]),
59 np.allclose(data["small_array"], recovered_data["small_array"]),
60 ]
61 ), f"assert failed:\n{data = }\n{recovered_data = }"
64def test_jsonl():
65 data = dict(
66 name="testing zanj jsonl",
67 iris_data=pd.read_csv("tests/input_data/iris.csv"),
68 brain_data=pd.read_csv("tests/input_data/brain_networks.csv"),
69 some_array=np.random.rand(128, 128),
70 )
71 fname: Path = TEST_DATA_PATH / "test_jsonl.zanj"
72 z: ZANJ = ZANJ()
73 z.save(data, fname)
74 recovered_data = z.read(fname)
76 assert sorted(list(data.keys())) == sorted(list(recovered_data.keys()))
77 # assert all([type(data[k]) == type(recovered_data[k]) for k in data.keys()])
79 assert all(
80 [
81 data["name"] == recovered_data["name"],
82 np.allclose(data["some_array"], recovered_data["some_array"]),
83 data["iris_data"].equals(recovered_data["iris_data"]),
84 data["brain_data"].equals(recovered_data["brain_data"]),
85 ]
86 )
89def test_polars_dataframe():
90 import polars as pl
92 # basic dataframe with various types
93 data = dict(
94 name="testing zanj polars",
95 df=pl.DataFrame(
96 {
97 "a": [1, 2, 3],
98 "b": ["x", "y", "z"],
99 "c": [1.1, 2.2, 3.3],
100 }
101 ),
102 some_array=np.random.rand(128, 128),
103 )
104 fname: Path = TEST_DATA_PATH / "test_polars.zanj"
105 z: ZANJ = ZANJ()
106 z.save(data, fname)
107 recovered_data = z.read(fname)
109 assert sorted(list(data.keys())) == sorted(list(recovered_data.keys()))
111 assert all(
112 [
113 data["name"] == recovered_data["name"],
114 np.allclose(data["some_array"], recovered_data["some_array"]),
115 data["df"].equals(recovered_data["df"]),
116 ]
117 )
120def test_polars_dataframe_empty():
121 """Test empty polars DataFrame serialization"""
122 import polars as pl
124 data = dict(
125 name="testing empty polars df",
126 empty_df=pl.DataFrame({"a": [], "b": [], "c": []}),
127 )
128 fname: Path = TEST_DATA_PATH / "test_polars_empty.zanj"
129 z: ZANJ = ZANJ()
130 z.save(data, fname)
131 recovered_data = z.read(fname)
133 assert data["name"] == recovered_data["name"]
134 assert recovered_data["empty_df"].shape == (0, 3)
135 assert recovered_data["empty_df"].columns == ["a", "b", "c"]
138def test_polars_dataframe_large():
139 """Test larger polars DataFrame to ensure external storage works"""
140 import polars as pl
142 # create a larger dataframe
143 n_rows = 1000
144 data = dict(
145 name="testing large polars df",
146 large_df=pl.DataFrame(
147 {
148 "int_col": list(range(n_rows)),
149 "float_col": [float(i) * 0.1 for i in range(n_rows)],
150 "str_col": [f"row_{i}" for i in range(n_rows)],
151 "bool_col": [i % 2 == 0 for i in range(n_rows)],
152 }
153 ),
154 )
155 fname: Path = TEST_DATA_PATH / "test_polars_large.zanj"
156 z: ZANJ = ZANJ()
157 z.save(data, fname)
158 recovered_data = z.read(fname)
160 assert data["name"] == recovered_data["name"]
161 assert data["large_df"].equals(recovered_data["large_df"])
164def test_polars_with_nulls():
165 """Test polars DataFrame with null values"""
166 import polars as pl
168 data = dict(
169 name="testing polars with nulls",
170 df_with_nulls=pl.DataFrame(
171 {
172 "a": [1, None, 3],
173 "b": ["x", "y", None],
174 "c": [1.1, None, 3.3],
175 }
176 ),
177 )
178 fname: Path = TEST_DATA_PATH / "test_polars_nulls.zanj"
179 z: ZANJ = ZANJ()
180 z.save(data, fname)
181 recovered_data = z.read(fname)
183 assert data["name"] == recovered_data["name"]
184 assert data["df_with_nulls"].equals(recovered_data["df_with_nulls"])