schema_sentinel.metadata_manager.model
1from __future__ import annotations 2 3import logging as log 4import os 5from abc import abstractmethod 6 7import pandas as pd 8from sqlalchemy.ext.declarative import declarative_base 9 10ATTRIBUTES_TO_EXCLUDE = [ 11 "database_id", 12 "table_id", 13 "environment", 14 "version", 15 "created", 16 "last_altered", 17 "schema_id", 18 "id", 19 "column_id", 20 "last_suspended", 21 "table_constraint_id", 22 "column_constraint_id", 23 "referential_constraint_id", 24 "view_id", 25 "pipe_id", 26 "task_id", 27 "stream_id", 28 "function_id", 29 "procedure_id", 30 "last_ddl", 31 "stale_after", 32 "bytes", 33 "row_count", 34] 35PROJECT_NAME = "schema-sentinel" 36TEMP_DIR = os.getenv("TEMP") if os.name == "nt" else "/tmp" 37LOG_FILE = os.path.join(TEMP_DIR, "schema-sentinel.log") 38LOG_LEVEL = os.getenv("LOG_LEVEL") if os.getenv("LOG_LEVEL") is not None else "INFO" 39 40PROJECT_HOME = os.path.dirname(os.path.join(os.path.abspath("./"), PROJECT_NAME)) 41RESOURCES_PATH = os.path.join(PROJECT_HOME, "resources") 42META_DB_PATH = os.path.join(RESOURCES_PATH, "meta-db") 43 44log.basicConfig( 45 level=LOG_LEVEL, 46 format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s", 47 handlers=[log.FileHandler(LOG_FILE), log.StreamHandler()], 48) 49 50 51def compare_obj(left, right) -> {}: 52 comparison = {"left": left.__class__.__name__, "right": right.__class__.__name__, "differences": {}} 53 for attribute, left_val in left.__dict__.items(): 54 if attribute in ATTRIBUTES_TO_EXCLUDE or attribute.startswith("_") or attribute.endswith("_id"): 55 continue 56 57 try: 58 right_val = getattr(right, attribute) 59 except Exception: 60 right_val = None 61 62 if left_val != right_val: 63 if attribute.endswith("_definition"): 64 left_val = left_val[0:100] if len(left_val) >= 100 else left_val 65 right_val = right_val[0:100] if len(right_val) >= 100 else right_val 66 else: 67 left_val_no_env = drop_environment(str(left_val).upper()) 68 right_val_no_env = drop_environment(str(right_val).upper()) 69 if left_val_no_env == right_val_no_env: 70 continue 71 log.debug(f"Values are not equal: left {left_val}, right {right_val}") 72 comparison["differences"][attribute] = [left_val, right_val] 73 74 return comparison 75 76 77def drop_environment(s: str) -> str: 78 if s is None: 79 return "" 80 81 for x in ["US_NONPROD_", "US_NON_PROD_", "US_DEV_", "US_CERT_", "US_PROD_"]: 82 if x in s: 83 return s.replace(x, "") 84 85 86Base = declarative_base() 87 88 89class CommonBase(Base): 90 __abstract__ = True 91 92 @abstractmethod 93 def __get_id__(self): 94 pass 95 96 def __get_header__(self) -> list: 97 header = [] 98 for attribute, _value in self.__dict__.items(): 99 if attribute.startswith("_"): 100 continue 101 header.append(attribute) 102 return header 103 104 def __get_values__(self) -> list: 105 row: list = [] 106 for attribute, value in self.__dict__.items(): 107 if attribute.startswith("_"): 108 continue 109 row.append(value) 110 return row 111 112 def __get_df__(self, columns=None) -> pd.DataFrame: 113 if columns is None: 114 columns = ["Attribute", "Value"] 115 data = [] 116 117 for attribute, value in self.__dict__.items(): 118 if attribute.startswith("_"): 119 continue 120 data.append([attribute, value]) 121 122 df = pd.DataFrame(data=data, columns=columns) 123 return df 124 125 def __get_row__(self) -> pd.DataFrame: 126 return pd.DataFrame(data=[self.__get_values__()], columns=self.__get_header__()) 127 128 def __side_by_side__(self, other): 129 if self.__class__.__name__ != other.__class__.__name__: 130 raise Exception(f"{other.__class__.__name__} is not {self.__class__.__name__}") 131 left = self.__get_df__(["Attribute", "Left"]) 132 right = other.__get_df__(["Attribute", "Right"]) 133 return pd.merge(left, right, on="Attribute") 134 135 def __repr__(self) -> str: 136 representation = f"{self.__class__}(" 137 for attribute, value in self.__dict__.items(): 138 if attribute in ATTRIBUTES_TO_EXCLUDE or attribute.startswith("_"): 139 continue 140 representation += f"{attribute}:[{value if value else ''}], " 141 representation += ")" 142 return representation 143 144 @staticmethod 145 def __to_df__(data: list, columns: list) -> pd.DataFrame: 146 df = None 147 for item in data: 148 row: pd.DataFrame = item.__get_row__() 149 if df is None: 150 df = row 151 else: 152 df = pd.concat([df, row]) 153 154 if df is None: 155 return pd.DataFrame(columns=columns) 156 157 df = df.reset_index() 158 159 if columns: 160 return df[columns] 161 162 return df
ATTRIBUTES_TO_EXCLUDE =
['database_id', 'table_id', 'environment', 'version', 'created', 'last_altered', 'schema_id', 'id', 'column_id', 'last_suspended', 'table_constraint_id', 'column_constraint_id', 'referential_constraint_id', 'view_id', 'pipe_id', 'task_id', 'stream_id', 'function_id', 'procedure_id', 'last_ddl', 'stale_after', 'bytes', 'row_count']
PROJECT_NAME =
'schema-sentinel'
TEMP_DIR =
'/tmp'
LOG_FILE =
'/tmp/schema-sentinel.log'
LOG_LEVEL =
'INFO'
PROJECT_HOME =
$PWD
RESOURCES_PATH =
'/Users/igor.gladyshev/PycharmProjects/schema-sentinel/resources'
META_DB_PATH =
'/Users/igor.gladyshev/PycharmProjects/schema-sentinel/resources/meta-db'
def
compare_obj(left, right) -> {}:
52def compare_obj(left, right) -> {}: 53 comparison = {"left": left.__class__.__name__, "right": right.__class__.__name__, "differences": {}} 54 for attribute, left_val in left.__dict__.items(): 55 if attribute in ATTRIBUTES_TO_EXCLUDE or attribute.startswith("_") or attribute.endswith("_id"): 56 continue 57 58 try: 59 right_val = getattr(right, attribute) 60 except Exception: 61 right_val = None 62 63 if left_val != right_val: 64 if attribute.endswith("_definition"): 65 left_val = left_val[0:100] if len(left_val) >= 100 else left_val 66 right_val = right_val[0:100] if len(right_val) >= 100 else right_val 67 else: 68 left_val_no_env = drop_environment(str(left_val).upper()) 69 right_val_no_env = drop_environment(str(right_val).upper()) 70 if left_val_no_env == right_val_no_env: 71 continue 72 log.debug(f"Values are not equal: left {left_val}, right {right_val}") 73 comparison["differences"][attribute] = [left_val, right_val] 74 75 return comparison
def
drop_environment(s: str) -> str:
class
Base:
The base class of the class hierarchy.
When called, it accepts no arguments and returns a new featureless instance that has no instance attributes and cannot be given any.
Base(**kwargs)
1185def _declarative_constructor(self, **kwargs): 1186 """A simple constructor that allows initialization from kwargs. 1187 1188 Sets attributes on the constructed instance using the names and 1189 values in ``kwargs``. 1190 1191 Only keys that are present as 1192 attributes of the instance's class are allowed. These could be, 1193 for example, any mapped columns or relationships. 1194 """ 1195 cls_ = type(self) 1196 for k in kwargs: 1197 if not hasattr(cls_, k): 1198 raise TypeError( 1199 "%r is an invalid keyword argument for %s" % (k, cls_.__name__) 1200 ) 1201 setattr(self, k, kwargs[k])
A simple constructor that allows initialization from kwargs.
Sets attributes on the constructed instance using the names and
values in kwargs.
Only keys that are present as attributes of the instance's class are allowed. These could be, for example, any mapped columns or relationships.
90class CommonBase(Base): 91 __abstract__ = True 92 93 @abstractmethod 94 def __get_id__(self): 95 pass 96 97 def __get_header__(self) -> list: 98 header = [] 99 for attribute, _value in self.__dict__.items(): 100 if attribute.startswith("_"): 101 continue 102 header.append(attribute) 103 return header 104 105 def __get_values__(self) -> list: 106 row: list = [] 107 for attribute, value in self.__dict__.items(): 108 if attribute.startswith("_"): 109 continue 110 row.append(value) 111 return row 112 113 def __get_df__(self, columns=None) -> pd.DataFrame: 114 if columns is None: 115 columns = ["Attribute", "Value"] 116 data = [] 117 118 for attribute, value in self.__dict__.items(): 119 if attribute.startswith("_"): 120 continue 121 data.append([attribute, value]) 122 123 df = pd.DataFrame(data=data, columns=columns) 124 return df 125 126 def __get_row__(self) -> pd.DataFrame: 127 return pd.DataFrame(data=[self.__get_values__()], columns=self.__get_header__()) 128 129 def __side_by_side__(self, other): 130 if self.__class__.__name__ != other.__class__.__name__: 131 raise Exception(f"{other.__class__.__name__} is not {self.__class__.__name__}") 132 left = self.__get_df__(["Attribute", "Left"]) 133 right = other.__get_df__(["Attribute", "Right"]) 134 return pd.merge(left, right, on="Attribute") 135 136 def __repr__(self) -> str: 137 representation = f"{self.__class__}(" 138 for attribute, value in self.__dict__.items(): 139 if attribute in ATTRIBUTES_TO_EXCLUDE or attribute.startswith("_"): 140 continue 141 representation += f"{attribute}:[{value if value else ''}], " 142 representation += ")" 143 return representation 144 145 @staticmethod 146 def __to_df__(data: list, columns: list) -> pd.DataFrame: 147 df = None 148 for item in data: 149 row: pd.DataFrame = item.__get_row__() 150 if df is None: 151 df = row 152 else: 153 df = pd.concat([df, row]) 154 155 if df is None: 156 return pd.DataFrame(columns=columns) 157 158 df = df.reset_index() 159 160 if columns: 161 return df[columns] 162 163 return df
The base class of the class hierarchy.
When called, it accepts no arguments and returns a new featureless instance that has no instance attributes and cannot be given any.