Coverage for C:\Python311\Lib\site-packages\persist_cache\serialization.py: 100%

75 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2024-05-06 20:49 +1000

1import pickle 

2from typing import Any, Union 

3 

4import msgspec 

5 

6Msgpackables = Union[str, int, list, dict, bool, float, None] 

7"""Types that are directly msgpackable.""" 

8 

9# Initialise msgpack encoders and decoders once to speed up subsequent serialization and deserialization. 

10msgpack_encoder = msgspec.msgpack.Encoder() 

11msgpack_decoder = msgspec.msgpack.Decoder(type=Msgpackables) 

12 

13SIGNATURE_SEPARATOR = '\x1c\x1e' 

14"""A separator used to distinguish between a signature indicating how data has been serialized and the data itself.""" 

15 

16TUPLE_SIGNATURE = f'🌷{SIGNATURE_SEPARATOR}' 

17"""A signature indicating that serialized data is a tuple.""" 

18 

19SET_SIGNATURE = f'🧺{SIGNATURE_SEPARATOR}' 

20"""A signature indicating that serialized data is a set.""" 

21 

22FROZENSET_SIGNATURE = f'🧊🧺{SIGNATURE_SEPARATOR}' 

23"""A signature indicating that serialized data is a frozenset.""" 

24 

25LISTED_SIGNATURES = {TUPLE_SIGNATURE, SET_SIGNATURE, FROZENSET_SIGNATURE} 

26"""The signatures of data types that are serialized as lists.""" 

27 

28PICKLE_SIGNATURE = f'🥒{SIGNATURE_SEPARATOR}' 

29"""A signature indicating that serialized data is a pickled object.""" 

30 

31BYTES_SIGNATURE = f'🔟{SIGNATURE_SEPARATOR}' 

32"""A signature indicating that serialized data is a bytes object.""" 

33 

34BYTEARRAY_SIGNATURE = f'🔢{SIGNATURE_SEPARATOR}' 

35"""A signature indicating that serialized data is a bytearray.""" 

36 

37# Preserve the lengths of the signatures to avoid having to constantly recompute them. 

38PICKLE_SIGNATURE_LEN = len(PICKLE_SIGNATURE) 

39BYTES_SIGNATURE_LEN = len(BYTES_SIGNATURE) 

40BYTEARRAY_SIGNATURE_LEN = len(BYTEARRAY_SIGNATURE) 

41 

42STR_SIGNATURES = (PICKLE_SIGNATURE, BYTES_SIGNATURE, BYTEARRAY_SIGNATURE) 

43"""The signatures of data types that are serialized as strings.""" 

44 

45ABSOLUTELY_DIRECTLY_MSGPACKABLE_TYPES = (bool, float, type(None),) 

46"""Types that are absolutely directly msgpackable.""" 

47 

48def directly_msgpackable(data: Any) -> bool: 

49 """Determine whether the provided data is directly msgpackable.""" 

50 

51 # Data will be directly msgpackable if: 

52 # - It is a string and it does not start with any of the signatures of data types that are serialized as strings; 

53 # - It is an integer and is between -2**63 and 2**64-1, inclusive; 

54 # - It is a list and all of its elements are directly msgpackable and it is either empty or its first element is not a signature of a data type that is serialized as a list; 

55 # - It is a dictionary and all of its keys and values are directly msgpackable; or 

56 # - It is of a type specified by `ABSOLUTELY_DIRECTLY_MSGPACKABLE_TYPES`. 

57 if (isinstance(data, str) and not any(data.startswith(str_signature) for str_signature in STR_SIGNATURES)) \ 

58 or isinstance(data, int) and -2**63 <= data <= 2**64-1 \ 

59 or (isinstance(data, list) and all(directly_msgpackable(d) for d in data) and (len(data) == 0 or not isinstance(data[0], str) or data[0] not in LISTED_SIGNATURES)) \ 

60 or (isinstance(data, dict) and all(directly_msgpackable(k) and directly_msgpackable(v) for k, v in data.items())) \ 

61 or isinstance(data, ABSOLUTELY_DIRECTLY_MSGPACKABLE_TYPES): 

62 return True 

63 

64 return False 

65 

66def make_directly_msgpackable(data: Any) -> Msgpackables: 

67 """Make the given data capable of being directly serialized to msgpack.""" 

68 

69 # If the data is directly msgpackable, return it as is. 

70 if directly_msgpackable(data): 

71 return data 

72 

73 # If the data is a tuple, make all of its elements directly msgpackable and return it as a list with a signature indicating that it is a tuple. 

74 elif isinstance(data, tuple): 

75 return [TUPLE_SIGNATURE, *[make_directly_msgpackable(d) for d in data]] 

76 

77 # If the data is a set, make all of its elements directly msgpackable and return it as a list with a signature indicating that it is a set. 

78 elif isinstance(data, set): 

79 return [SET_SIGNATURE, *[make_directly_msgpackable(d) for d in data]] 

80 

81 # If the data is a bytes object, return it as a string with a signature indicating that it is a bytes object. 

82 elif isinstance(data, bytes): 

83 return f'{BYTES_SIGNATURE}{data.decode("latin1")}' 

84 

85 # If the data is a frozenset, make all of its elements directly msgpackable and return it as a list with a signature indicating that it is a frozenset. 

86 elif isinstance(data, frozenset): 

87 return [FROZENSET_SIGNATURE, *[make_directly_msgpackable(d) for d in data]] 

88 

89 # If the data is a bytearray, return it as a string with a signature indicating that it is a bytearray. 

90 elif isinstance(data, bytearray): 

91 return f'{BYTEARRAY_SIGNATURE}{data.decode("latin1")}' 

92 

93 # If the data is incapable of other being forced into a directly msgpackable form, pickle it and return it as a string with a signature indicating that it is a pickled object. 

94 return f'{PICKLE_SIGNATURE}{pickle.dumps(data).decode("latin1")}' 

95 

96def serialize(data: Any) -> str: 

97 """Serialize the provided data as msgpack.""" 

98 

99 # Force the data into a directly msgpackable form. 

100 data = make_directly_msgpackable(data) 

101 

102 # Encode the data as msgpack. 

103 data = msgpack_encoder.encode(data) 

104 

105 return data 

106 

107def make_pythonic(data: Msgpackables) -> Any: 

108 """Transform the provided msgpackable data back into Python objects.""" 

109 

110 # If the data is a string, check if it has a signature indicating that it is a pickled object, bytes object or bytearray and then decode it to the corresponding object, otherwise return it as is. 

111 if isinstance(data, str): 

112 if data.startswith(PICKLE_SIGNATURE): 

113 return pickle.loads(data[PICKLE_SIGNATURE_LEN:].encode("latin1")) 

114 

115 elif data.startswith(BYTES_SIGNATURE): 

116 return data[BYTES_SIGNATURE_LEN:].encode("latin1") 

117 

118 elif data.startswith(BYTEARRAY_SIGNATURE): 

119 return bytearray(data[BYTEARRAY_SIGNATURE_LEN:].encode("latin1")) 

120 

121 return data 

122 

123 # If the data is a list, check if it has a signature indicating that it is a tuple, set or frozenset and then decode it to the corresponding object, otherwise return it as is. 

124 elif isinstance(data, list): 

125 if len(data) != 0: 

126 if data[0] == TUPLE_SIGNATURE: 

127 return tuple(make_pythonic(d) for d in data[1:]) 

128 

129 elif data[0] == SET_SIGNATURE: 

130 return set(make_pythonic(d) for d in data[1:]) 

131 

132 elif data[0] == FROZENSET_SIGNATURE: 

133 return frozenset(make_pythonic(d) for d in data[1:]) 

134 

135 return data 

136 

137 # If the data is neither a string nor a list, return it as is. 

138 return data 

139 

140def deserialize(data: str) -> Any: 

141 """Deserialize the provided msgpack-encoded data.""" 

142 

143 # Decode the data. 

144 data = msgpack_decoder.decode(data) 

145 

146 # Transform the data back into Python objects. 

147 data = make_pythonic(data) 

148 

149 return data