Coverage for C:\Python311\Lib\site-packages\persist_cache\serialization.py: 100%
75 statements
« prev ^ index » next coverage.py v7.3.2, created at 2024-05-06 20:49 +1000
« prev ^ index » next coverage.py v7.3.2, created at 2024-05-06 20:49 +1000
1import pickle
2from typing import Any, Union
4import msgspec
6Msgpackables = Union[str, int, list, dict, bool, float, None]
7"""Types that are directly msgpackable."""
9# Initialise msgpack encoders and decoders once to speed up subsequent serialization and deserialization.
10msgpack_encoder = msgspec.msgpack.Encoder()
11msgpack_decoder = msgspec.msgpack.Decoder(type=Msgpackables)
13SIGNATURE_SEPARATOR = '\x1c\x1e'
14"""A separator used to distinguish between a signature indicating how data has been serialized and the data itself."""
16TUPLE_SIGNATURE = f'🌷{SIGNATURE_SEPARATOR}'
17"""A signature indicating that serialized data is a tuple."""
19SET_SIGNATURE = f'🧺{SIGNATURE_SEPARATOR}'
20"""A signature indicating that serialized data is a set."""
22FROZENSET_SIGNATURE = f'🧊🧺{SIGNATURE_SEPARATOR}'
23"""A signature indicating that serialized data is a frozenset."""
25LISTED_SIGNATURES = {TUPLE_SIGNATURE, SET_SIGNATURE, FROZENSET_SIGNATURE}
26"""The signatures of data types that are serialized as lists."""
28PICKLE_SIGNATURE = f'🥒{SIGNATURE_SEPARATOR}'
29"""A signature indicating that serialized data is a pickled object."""
31BYTES_SIGNATURE = f'🔟{SIGNATURE_SEPARATOR}'
32"""A signature indicating that serialized data is a bytes object."""
34BYTEARRAY_SIGNATURE = f'🔢{SIGNATURE_SEPARATOR}'
35"""A signature indicating that serialized data is a bytearray."""
37# Preserve the lengths of the signatures to avoid having to constantly recompute them.
38PICKLE_SIGNATURE_LEN = len(PICKLE_SIGNATURE)
39BYTES_SIGNATURE_LEN = len(BYTES_SIGNATURE)
40BYTEARRAY_SIGNATURE_LEN = len(BYTEARRAY_SIGNATURE)
42STR_SIGNATURES = (PICKLE_SIGNATURE, BYTES_SIGNATURE, BYTEARRAY_SIGNATURE)
43"""The signatures of data types that are serialized as strings."""
45ABSOLUTELY_DIRECTLY_MSGPACKABLE_TYPES = (bool, float, type(None),)
46"""Types that are absolutely directly msgpackable."""
48def directly_msgpackable(data: Any) -> bool:
49 """Determine whether the provided data is directly msgpackable."""
51 # Data will be directly msgpackable if:
52 # - It is a string and it does not start with any of the signatures of data types that are serialized as strings;
53 # - It is an integer and is between -2**63 and 2**64-1, inclusive;
54 # - It is a list and all of its elements are directly msgpackable and it is either empty or its first element is not a signature of a data type that is serialized as a list;
55 # - It is a dictionary and all of its keys and values are directly msgpackable; or
56 # - It is of a type specified by `ABSOLUTELY_DIRECTLY_MSGPACKABLE_TYPES`.
57 if (isinstance(data, str) and not any(data.startswith(str_signature) for str_signature in STR_SIGNATURES)) \
58 or isinstance(data, int) and -2**63 <= data <= 2**64-1 \
59 or (isinstance(data, list) and all(directly_msgpackable(d) for d in data) and (len(data) == 0 or not isinstance(data[0], str) or data[0] not in LISTED_SIGNATURES)) \
60 or (isinstance(data, dict) and all(directly_msgpackable(k) and directly_msgpackable(v) for k, v in data.items())) \
61 or isinstance(data, ABSOLUTELY_DIRECTLY_MSGPACKABLE_TYPES):
62 return True
64 return False
66def make_directly_msgpackable(data: Any) -> Msgpackables:
67 """Make the given data capable of being directly serialized to msgpack."""
69 # If the data is directly msgpackable, return it as is.
70 if directly_msgpackable(data):
71 return data
73 # If the data is a tuple, make all of its elements directly msgpackable and return it as a list with a signature indicating that it is a tuple.
74 elif isinstance(data, tuple):
75 return [TUPLE_SIGNATURE, *[make_directly_msgpackable(d) for d in data]]
77 # If the data is a set, make all of its elements directly msgpackable and return it as a list with a signature indicating that it is a set.
78 elif isinstance(data, set):
79 return [SET_SIGNATURE, *[make_directly_msgpackable(d) for d in data]]
81 # If the data is a bytes object, return it as a string with a signature indicating that it is a bytes object.
82 elif isinstance(data, bytes):
83 return f'{BYTES_SIGNATURE}{data.decode("latin1")}'
85 # If the data is a frozenset, make all of its elements directly msgpackable and return it as a list with a signature indicating that it is a frozenset.
86 elif isinstance(data, frozenset):
87 return [FROZENSET_SIGNATURE, *[make_directly_msgpackable(d) for d in data]]
89 # If the data is a bytearray, return it as a string with a signature indicating that it is a bytearray.
90 elif isinstance(data, bytearray):
91 return f'{BYTEARRAY_SIGNATURE}{data.decode("latin1")}'
93 # If the data is incapable of other being forced into a directly msgpackable form, pickle it and return it as a string with a signature indicating that it is a pickled object.
94 return f'{PICKLE_SIGNATURE}{pickle.dumps(data).decode("latin1")}'
96def serialize(data: Any) -> str:
97 """Serialize the provided data as msgpack."""
99 # Force the data into a directly msgpackable form.
100 data = make_directly_msgpackable(data)
102 # Encode the data as msgpack.
103 data = msgpack_encoder.encode(data)
105 return data
107def make_pythonic(data: Msgpackables) -> Any:
108 """Transform the provided msgpackable data back into Python objects."""
110 # If the data is a string, check if it has a signature indicating that it is a pickled object, bytes object or bytearray and then decode it to the corresponding object, otherwise return it as is.
111 if isinstance(data, str):
112 if data.startswith(PICKLE_SIGNATURE):
113 return pickle.loads(data[PICKLE_SIGNATURE_LEN:].encode("latin1"))
115 elif data.startswith(BYTES_SIGNATURE):
116 return data[BYTES_SIGNATURE_LEN:].encode("latin1")
118 elif data.startswith(BYTEARRAY_SIGNATURE):
119 return bytearray(data[BYTEARRAY_SIGNATURE_LEN:].encode("latin1"))
121 return data
123 # If the data is a list, check if it has a signature indicating that it is a tuple, set or frozenset and then decode it to the corresponding object, otherwise return it as is.
124 elif isinstance(data, list):
125 if len(data) != 0:
126 if data[0] == TUPLE_SIGNATURE:
127 return tuple(make_pythonic(d) for d in data[1:])
129 elif data[0] == SET_SIGNATURE:
130 return set(make_pythonic(d) for d in data[1:])
132 elif data[0] == FROZENSET_SIGNATURE:
133 return frozenset(make_pythonic(d) for d in data[1:])
135 return data
137 # If the data is neither a string nor a list, return it as is.
138 return data
140def deserialize(data: str) -> Any:
141 """Deserialize the provided msgpack-encoded data."""
143 # Decode the data.
144 data = msgpack_decoder.decode(data)
146 # Transform the data back into Python objects.
147 data = make_pythonic(data)
149 return data