Source code for lodstorage.jsonable

"""This module has a class JSONAble for serialization of tables/list of dicts to and from JSON encoding

Created on 2020-09-03

@author: wf
"""
import datetime
import json
import re
import sys

from lodstorage.lod import LOD


[docs] class JSONAbleSettings: """ settings for JSONAble - put in a separate class so they would not be serialized """ indent = 4 """ regular expression to be used for conversion from singleQuote to doubleQuote see https://stackoverflow.com/a/50257217/1497139 """ singleQuoteRegex = re.compile("(?<!\\\\)'")
[docs] class JSONAble(object): """ mixin to allow classes to be JSON serializable see - https://stackoverflow.com/questions/3768895/how-to-make-a-class-json-serializable """ def __init__(self): """ Constructor """
[docs] @classmethod def getPluralname(cls): return "%ss" % cls.__name__
[docs] @staticmethod def singleQuoteToDoubleQuote(singleQuoted, useRegex=False): """ convert a single quoted string to a double quoted one Args: singleQuoted (str): a single quoted string e.g. .. highlight:: json {'cities': [{'name': "Upper Hell's Gate"}]} useRegex (boolean): True if a regular expression shall be used for matching Returns: string: the double quoted version of the string Note: see - https://stackoverflow.com/questions/55600788/python-replace-single-quotes-with-double-quotes-but-leave-ones-within-double-q """ if useRegex: doubleQuoted = JSONAble.singleQuoteToDoubleQuoteUsingRegex(singleQuoted) else: doubleQuoted = JSONAble.singleQuoteToDoubleQuoteUsingBracketLoop( singleQuoted ) return doubleQuoted
[docs] @staticmethod def singleQuoteToDoubleQuoteUsingRegex(singleQuoted): """ convert a single quoted string to a double quoted one using a regular expression Args: singleQuoted(string): a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]} useRegex(boolean): True if a regular expression shall be used for matching Returns: string: the double quoted version of the string e.g. Note: see https://stackoverflow.com/a/50257217/1497139 """ doubleQuoted = JSONAbleSettings.singleQuoteRegex.sub('"', singleQuoted) return doubleQuoted
[docs] @staticmethod def singleQuoteToDoubleQuoteUsingBracketLoop(singleQuoted): """ convert a single quoted string to a double quoted one using a regular expression Args: singleQuoted(string): a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]} useRegex(boolean): True if a regular expression shall be used for matching Returns: string: the double quoted version of the string e.g. Note: see https://stackoverflow.com/a/63862387/1497139 """ cList = list(singleQuoted) inDouble = False inSingle = False for i, c in enumerate(cList): # print ("%d:%s %r %r" %(i,c,inSingle,inDouble)) if c == "'": if not inDouble: inSingle = not inSingle cList[i] = '"' elif c == '"': inDouble = not inDouble inSingle = False doubleQuoted = "".join(cList) return doubleQuoted
[docs] def getJsonTypeSamples(self): """ does my class provide a "getSamples" method? """ if hasattr(self, "__class__"): cls = self.__class__ if isinstance(self, JSONAbleList) and not hasattr(cls, "getSamples"): cls = self.clazz return JSONAble.getJsonTypeSamplesForClass(cls) return None
[docs] @staticmethod def getJsonTypeSamplesForClass(cls): """ return the type samples for the given class Return: list: a list of dict that specify the types by example """ if hasattr(cls, "getSamples"): getSamples = getattr(cls, "getSamples") if callable(getSamples): return getSamples() return None
[docs] @staticmethod def readJsonFromFile(jsonFilePath): """ read json string from the given jsonFilePath Args: jsonFilePath(string): the path of the file where to read the result from Returns: the JSON string read from the file """ with open(jsonFilePath, "r") as jsonFile: jsonStr = jsonFile.read() return jsonStr
[docs] @staticmethod def storeJsonToFile(jsonStr, jsonFilePath): """ store the given json string to the given jsonFilePath Args: jsonStr(string): the string to store jsonFilePath(string): the path of the file where to store the result """ with open(jsonFilePath, "w") as jsonFile: jsonFile.write(jsonStr)
[docs] def checkExtension(self, jsonFile: str, extension: str = ".json") -> str: """ make sure the jsonFile has the given extension e.g. ".json" Args: jsonFile(str): the jsonFile name - potentially without ".json" suffix Returns: str: the jsonFile name with ".json" as an extension guaranteed """ if not jsonFile.endswith(extension): jsonFile = f"{jsonFile}{extension}" return jsonFile
[docs] def storeToJsonFile( self, jsonFile: str, extension: str = ".json", limitToSampleFields: bool = False ): """ store me to the given jsonFile Args: jsonFile(str): the JSON file name (optionally without extension) exension(str): the extension to use if not part of the jsonFile name limitToSampleFields(bool): If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False. """ jsonFile = self.checkExtension(jsonFile, extension) JSONAble.storeJsonToFile(self.toJSON(limitToSampleFields), jsonFile)
[docs] def restoreFromJsonFile(self, jsonFile: str): """ restore me from the given jsonFile Args: jsonFile(string): the jsonFile to restore me from """ jsonFile = self.checkExtension(jsonFile) jsonStr = JSONAble.readJsonFromFile(jsonFile) self.fromJson(jsonStr)
[docs] def fromJson(self, jsonStr): """ initialize me from the given JSON string Args: jsonStr(str): the JSON string """ jsonMap = json.loads(jsonStr) self.fromDict(jsonMap)
[docs] def fromDict(self, data: dict): """ initialize me from the given data Args: data(dict): the dictionary to initialize me from """ # https://stackoverflow.com/questions/38987/how-do-i-merge-two-dictionaries-in-a-single-expression-in-python-taking-union-o for key in data.keys(): value = data[key] setattr(self, key, value)
[docs] def toJsonAbleValue(self, v): """ return the JSON able value of the given value v Args: v(object): the value to convert """ # objects have __dict__ hash tables which can be JSON-converted if hasattr(v, "__dict__"): return v.__dict__ elif isinstance(v, datetime.datetime): return v.isoformat() elif isinstance(v, datetime.date): return v.isoformat() else: return ""
[docs] def toJSON(self, limitToSampleFields: bool = False): """ Args: limitToSampleFields(bool): If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False. Returns: a recursive JSON dump of the dicts of my objects """ data = {} if limitToSampleFields: samples = self.getJsonTypeSamples() sampleFields = LOD.getFields(samples) if isinstance(self, JSONAbleList): limitedRecords = [] for record in self.__dict__[self.listName]: limitedRecord = {} for key, value in record.__dict__.items(): if key in sampleFields: limitedRecord[key] = value limitedRecords.append(limitedRecord) data[self.listName] = limitedRecords else: for key, value in self.__dict__.items(): if key in sampleFields: data[key] = value else: data = self jsonStr = json.dumps( data, default=lambda v: self.toJsonAbleValue(v), sort_keys=True, indent=JSONAbleSettings.indent, ) return jsonStr
[docs] def getJSONValue(self, v): """ get the value of the given v as JSON Args: v(object): the value to get Returns: the the value making sure objects are return as dicts """ if hasattr(v, "asJSON"): return v.asJSON(asString=False) elif type(v) is dict: return self.reprDict(v) elif type(v) is list: vlist = [] for vitem in v: vlist.append(self.getJSONValue(vitem)) return vlist elif isinstance(v, datetime.datetime): return v.isoformat() elif isinstance(v, datetime.date): return v.isoformat() elif isinstance(v, bool): # convert True,False to -> true,false return str(v).lower() else: return v
[docs] def reprDict(self, srcDict): """ get the given srcDict as new dict with fields being converted with getJSONValue Args: scrcDict(dict): the source dictionary Returns dict: the converted dictionary """ d = dict() for a, v in srcDict.items(): d[a] = self.getJSONValue(v) return d
[docs] def asJSON(self, asString=True, data=None): """ recursively return my dict elements Args: asString(boolean): if True return my result as a string """ if data is None: data = self.__dict__ jsonDict = self.reprDict(data) if asString: jsonStr = str(jsonDict) jsonStr = JSONAble.singleQuoteToDoubleQuote(jsonStr) return jsonStr return jsonDict
[docs] class JSONAbleList(JSONAble): """ Container class """ def __init__( self, listName: str = None, clazz=None, tableName: str = None, initList: bool = True, handleInvalidListTypes=False, filterInvalidListTypes=False, ): """ Constructor Args: listName(str): the name of the list attribute to be used for storing the List clazz(class): a class to be used for Object relational mapping (if any) tableName(str): the name of the "table" to be used initList(bool): True if the list should be initialized handleInvalidListTypes(bool): True if invalidListTypes should be converted or filtered filterInvalidListTypes(bool): True if invalidListTypes should be deleted """ self.clazz = clazz self.handleInvalidListTypes = handleInvalidListTypes self.filterInvalidListTypes = filterInvalidListTypes if listName is None: if self.clazz is not None: listName = self.clazz.getPluralname() else: listName = self.__class__.name.lower() self.listName = listName if tableName is None: self.tableName = listName else: self.tableName = tableName if initList: self.__dict__[self.listName] = []
[docs] def getList(self): """ get my list """ return self.__dict__[self.listName]
[docs] def setListFromLoD(self, lod: list) -> list: """ set my list from the given list of dicts Args: lod(list) a raw record list of dicts Returns: list: a list of dicts if no clazz is set otherwise a list of objects """ # non OO mode if self.clazz is None: result = lod self.__dict__[self.listName] = result else: # ORM mode # TODO - handle errors self.fromLoD(lod, append=False) return self.getList()
[docs] def getLoDfromJson(self, jsonStr: str, types=None, listName: str = None): """ get a list of Dicts form the given JSON String Args: jsonStr(str): the JSON string fixType(Types): the types to be fixed Returns: list: a list of dicts """ # read a data structe from the given JSON string lodOrDict = json.loads(jsonStr) # it should be a list only of dict with my list if not isinstance(lodOrDict, dict) and listName is not None: lod = lodOrDict else: if self.listName in lodOrDict: # get the relevant list of dicts lod = lodOrDict[self.listName] else: msg = f"invalid JSON for getLoD from Json\nexpecting a list of dicts or a dict with '{self.listName}' as list\nfound a dict with keys: {lodOrDict.keys()} instead" raise Exception(msg) if types is not None: types.fixTypes(lod, self.listName) return lod
[docs] def fromLoD(self, lod, append: bool = True, debug: bool = False): """ load my entityList from the given list of dicts Args: lod(list): the list of dicts to load append(bool): if True append to my existing entries Return: list: a list of errors (if any) """ errors = [] entityList = self.getList() if not append: del entityList[:] if self.handleInvalidListTypes: LOD.handleListTypes(lod=lod, doFilter=self.filterInvalidListTypes) for record in lod: # call the constructor to get a new instance try: entity = self.clazz() entity.fromDict(record) entityList.append(entity) except Exception as ex: error = {self.listName: record, "error": ex} errors.append(error) if debug: print(error) return errors
[docs] def getLookup(self, attrName: str, withDuplicates: bool = False): """ create a lookup dictionary by the given attribute name Args: attrName(str): the attribute to lookup withDuplicates(bool): whether to retain single values or lists Return: a dictionary for lookup or a tuple dictionary,list of duplicates depending on withDuplicates """ return LOD.getLookup(self.getList(), attrName, withDuplicates)
[docs] def getJsonData(self): """ get my Jsondata """ jsonData = {self.listName: self.__dict__[self.listName]} return jsonData
[docs] def toJsonAbleValue(self, v): """ make sure we don't store our meta information clazz, tableName and listName but just the list we are holding """ if v == self: return self.getJsonData() else: return super().toJsonAbleValue(v)
[docs] def fromJson(self, jsonStr, types=None): """ initialize me from the given JSON string Args: jsonStr(str): the JSON string fixType(Types): the types to be fixed """ lod = self.getLoDfromJson(jsonStr, types, listName=self.listName) self.setListFromLoD(lod)
[docs] def asJSON(self, asString=True): jsonData = self.getJsonData() return super().asJSON(asString, data=jsonData)
[docs] def restoreFromJsonFile(self, jsonFile: str) -> list: """ read my list of dicts and restore it """ lod = self.readLodFromJsonFile(jsonFile) return self.setListFromLoD(lod)
[docs] def restoreFromJsonStr(self, jsonStr: str) -> list: """ restore me from the given jsonStr Args: jsonStr(str): the json string to restore me from """ lod = self.readLodFromJsonStr(jsonStr) return self.setListFromLoD(lod)
[docs] def readLodFromJsonFile(self, jsonFile: str, extension: str = ".json"): """ read the list of dicts from the given jsonFile Args: jsonFile(string): the jsonFile to read from Returns: list: a list of dicts """ jsonFile = self.checkExtension(jsonFile, extension) jsonStr = JSONAble.readJsonFromFile(jsonFile) lod = self.readLodFromJsonStr(jsonStr) return lod
[docs] def readLodFromJsonStr(self, jsonStr) -> list: """ restore me from the given jsonStr Args: storeFilePrefix(string): the prefix for the JSON file name """ if self.clazz is None: typeSamples = self.getJsonTypeSamples() else: typeSamples = self.clazz.getSamples() if typeSamples is None: types = None else: types = Types( self.listName, warnOnUnsupportedTypes=not self.handleInvalidListTypes ) types.getTypes(self.listName, typeSamples, len(typeSamples)) lod = self.getLoDfromJson(jsonStr, types, listName=self.listName) return lod
[docs] class Types(JSONAble): """ Types holds entity meta Info :ivar name(string): entity name = table name """ typeName2Type = { "bool": bool, "date": datetime.date, "datetime": datetime.datetime, "float": float, "int": int, "str": str, } def __init__(self, name: str, warnOnUnsupportedTypes=True, debug=False): """ Constructor Args: name(str): the name of the type map warnOnUnsupportedTypes(bool): if TRUE warn if an item value has an unsupported type debug(bool): if True - debugging information should be shown """ self.name = name self.warnOnUnsupportedTypes = warnOnUnsupportedTypes self.debug = debug self.typeMap = {}
[docs] @staticmethod def forTable( instance, listName: str, warnOnUnsupportedTypes: bool = True, debug=False ): """ get the types for the list of Dicts (table) in the given instance with the given listName Args: instance(object): the instance to inspect listName(string): the list of dicts to inspect warnOnUnsupportedTypes(bool): if TRUE warn if an item value has an unsupported type debug(bool): True if debuggin information should be shown Returns: Types: a types object """ clazz = type(instance) types = Types( clazz.__name__, warnOnUnsupportedTypes=warnOnUnsupportedTypes, debug=debug ) types.getTypes(listName, instance.__dict__[listName]) return types
[docs] def addType(self, listName, field, valueType): """ add the python type for the given field to the typeMap Args: listName(string): the name of the list of the field field(string): the name of the field valueType(type): the python type of the field """ if listName not in self.typeMap: self.typeMap[listName] = {} typeMap = self.typeMap[listName] if not field in typeMap: typeMap[field] = valueType
[docs] def getTypes(self, listName: str, sampleRecords: list, limit: int = 10): """ determine the types for the given sample records Args: listName(str): the name of the list sampleRecords(list): a list of items limit(int): the maximum number of items to check """ for sampleRecord in sampleRecords[:limit]: items = sampleRecord.items() self.getTypesForItems(listName, items, warnOnNone=len(sampleRecords) == 1)
[docs] def getTypesForItems(self, listName: str, items: list, warnOnNone: bool = False): """ get the types for the given items side effect is setting my types Args: listName(str): the name of the list items(list): a list of items warnOnNone(bool): if TRUE warn if an item value is None """ for key, value in items: valueType = None if value is None: if warnOnNone and self.debug: print( f"Warning sampleRecord field {key} is None - using string as type" ) valueType = str else: valueType = type(value) if valueType == str: pass elif valueType == int: pass elif valueType == float: pass elif valueType == bool: pass elif valueType == datetime.date: pass elif valueType == datetime.datetime: pass else: if valueType is not None: msg = f"warning: unsupported type {str(valueType)} for field {key}" if self.debug and self.warnOnUnsupportedTypes: print(msg) if valueType is not None: self.addType(listName, key, valueType.__name__)
[docs] def fixTypes(self, lod: list, listName: str): """ fix the types in the given data structure Args: lod(list): a list of dicts listName(str): the types to lookup by list name """ for listName in self.typeMap: self.fixListOfDicts(self.typeMap[listName], lod)
[docs] def getType(self, typeName): """ get the type for the given type name """ if typeName in Types.typeName2Type: return Types.typeName2Type[typeName] else: if self.debug: print("Warning unsupported type %s" % typeName) return None
[docs] def fixListOfDicts(self, typeMap, listOfDicts): """ fix the type in the given list of Dicts """ for record in listOfDicts: for keyValue in record.items(): key, value = keyValue if value is None: record[key] = None elif key in typeMap: valueType = self.getType(typeMap[key]) if valueType == bool: if type(value) == str: b = value in ["True", "TRUE", "true"] else: b = value record[key] = b elif valueType == datetime.date: dt = datetime.datetime.strptime(value, "%Y-%m-%d") record[key] = dt.date() elif valueType == datetime.datetime: # see https://stackoverflow.com/questions/127803/how-do-i-parse-an-iso-8601-formatted-date if isinstance(value, str): if sys.version_info >= (3, 7): dtime = datetime.datetime.fromisoformat(value) else: dtime = datetime.datetime.strptime( value, "%Y-%m-%dT%H:%M:%S.%f" ) else: # TODO: error handling dtime = None record[key] = dtime