Coverage for src\airtable_to_sqlite\main.py: 100%

127 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-05 19:54 +0100

1import logging 

2from datetime import datetime 

3from typing import Dict, Generator, List, Optional 

4 

5import pyairtable.metadata 

6import sqlite_utils 

7from pyairtable.api.api import Api as AirtableApi 

8from pyairtable.api.base import Base as AirtableBase 

9from sqlite_utils import Database 

10from tqdm import tqdm 

11 

12from airtable_to_sqlite.constants import ( 

13 META_TABLES, 

14 AirtablePersonalAccessToken, 

15 ForeignKeySet, 

16 PreferedNamingMethod, 

17) 

18from airtable_to_sqlite.schema import BaseRecord, FieldSchema, TableSchema, ViewSchema 

19 

20logger = logging.getLogger(__name__) 

21 

22 

23def get_base_records( 

24 personal_access_token: AirtablePersonalAccessToken, base_ids: Optional[List[str]] = None 

25) -> Generator[BaseRecord, None, None]: 

26 logger.info("Fetching base record from Airtable...") 

27 api = AirtableApi(personal_access_token) 

28 all_bases = pyairtable.metadata.get_api_bases(api) 

29 

30 if base_ids is not None: 

31 base_ids_requested = set(base_ids) 

32 all_base_ids = {base_record["id"] for base_record in all_bases["bases"]} 

33 missing_base_ids = base_ids_requested - all_base_ids 

34 if missing_base_ids: 

35 msg = f"Base IDs {missing_base_ids} not found in Airtable account." 

36 raise KeyError(msg) 

37 

38 for base_record in all_bases["bases"]: 

39 if (base_ids is None) or (base_record["id"] in base_ids): 

40 yield BaseRecord(**base_record) 

41 

42 

43class AirtableBaseToSqlite: 

44 def __init__( 

45 self, 

46 personal_access_token: AirtablePersonalAccessToken, 

47 db: Database, 

48 base: BaseRecord, 

49 prefer_ids: PreferedNamingMethod = PreferedNamingMethod.NAME, 

50 ) -> None: 

51 self._base: BaseRecord = base 

52 self._db: Database = db 

53 self._base_api = AirtableBase(personal_access_token, base.id) 

54 self.prefer_ids = prefer_ids 

55 self.foreign_keys: ForeignKeySet = set() 

56 self.table_meta: List[TableSchema] = [] 

57 self.table_id_lookup: Dict[str, str] = {} 

58 self.meta_tables: Dict[str, sqlite_utils.db.Table] = {} 

59 

60 def run(self) -> None: 

61 self.get_schema() 

62 self.create_metadata_tables() 

63 self.create_all_table_metadata() 

64 self.create_foreign_keys() 

65 self.insert_settings() 

66 self.insert_all_table_data() 

67 

68 def get_schema(self) -> None: 

69 logger.info("Fetching schema from Airtable...") 

70 tables = pyairtable.metadata.get_base_schema(self._base_api) 

71 for table in tables["tables"]: 

72 fields = [] 

73 for field in table["fields"]: 

74 fields.append(FieldSchema(**field)) 

75 views = [] 

76 for view in table["views"]: 

77 views.append(ViewSchema(**view)) 

78 this_table = TableSchema( 

79 id=table["id"], name=table["name"], primaryFieldId=table["primaryFieldId"], fields=fields, views=views 

80 ) 

81 self.table_meta.append(this_table) 

82 self.table_id_lookup[this_table.id] = this_table.db_name(self.prefer_ids) 

83 

84 def create_metadata_tables(self) -> None: 

85 for table_name, (columns, options) in META_TABLES.items(): 

86 options_to_use = options.copy() 

87 for foreign_key in options_to_use.pop("foreign_keys", []): 

88 self.foreign_keys.add((table_name, foreign_key)) 

89 db_table = self._db.table(table_name) 

90 if isinstance(db_table, sqlite_utils.db.Table): 

91 db_table.create(columns=columns, **options_to_use) 

92 self.meta_tables[table_name] = db_table 

93 else: # pragma: no cover 

94 pass 

95 

96 def create_all_table_metadata(self) -> None: 

97 for table in tqdm(self.table_meta): 

98 self.create_table_metadata(table) 

99 

100 def get_link_table(self, field: FieldSchema, table: TableSchema) -> sqlite_utils.db.Table: 

101 new_table_name = table.db_name(self.prefer_ids) + "_" + field.id 

102 return sqlite_utils.db.Table(self._db, new_table_name) 

103 

104 def create_table_metadata( 

105 self, 

106 table: TableSchema, 

107 ) -> None: 

108 table_name = table.db_name(self.prefer_ids) 

109 

110 self.meta_tables["_meta_table"].insert( 

111 { 

112 "id": table.id, 

113 "name": table.name, 

114 "primaryFieldId": table.primaryFieldId, 

115 } 

116 ) 

117 column_types = { 

118 "_id": str, 

119 "_createdTime": datetime, 

120 } 

121 fields_to_insert = [] 

122 for field in table.fields: 

123 field_name = field.db_name(self.prefer_ids) 

124 

125 choices = field.choices 

126 if choices is not None: 

127 self.meta_tables["_meta_field_choice"].insert_all(choices) 

128 

129 fields_to_insert.append(field.for_insertion(table)) 

130 

131 if (field.type == "multipleRecordLinks") and field.options is not None: 

132 link_db_table = self.get_link_table(field, table) 

133 other_table = field.options["linkedTableId"] 

134 other_table_name = self.table_id_lookup.get(other_table, other_table) 

135 self.foreign_keys.add((link_db_table.name, ("recordId", table_name, "_id"))) 

136 self.foreign_keys.add((link_db_table.name, ("otherRecordId", other_table_name, "_id"))) 

137 if isinstance(link_db_table, sqlite_utils.db.Table): 

138 link_db_table.create( 

139 columns={ 

140 "recordId": str, 

141 "otherRecordId": str, 

142 }, 

143 ) 

144 else: # pragma: no cover 

145 pass 

146 

147 column_type = field.column_type 

148 if column_type is not None: 

149 column_types[field_name] = column_type 

150 

151 self.meta_tables["_meta_field"].insert_all(fields_to_insert) 

152 db_table = self._db.table(table_name) 

153 if isinstance(db_table, sqlite_utils.db.Table): 

154 db_table.create(columns=column_types, pk="_id") 

155 else: # pragma: no cover 

156 pass 

157 

158 self.meta_tables["_meta_view"].insert_all( 

159 { 

160 "id": view.id, 

161 "name": view.name, 

162 "type": view.type, 

163 "tableId": table.id, 

164 } 

165 for view in table.views 

166 ) 

167 

168 def create_foreign_keys(self) -> None: 

169 logger.info("Adding foreign keys") 

170 for table_name, foreign_key in self.foreign_keys: 

171 db_table = self._db[table_name] 

172 if isinstance(db_table, sqlite_utils.db.Table): 

173 db_table.add_foreign_key(*foreign_key) 

174 else: # pragma: no cover 

175 pass 

176 self.foreign_keys = set() 

177 

178 def insert_settings(self) -> None: 

179 self.meta_tables["_meta_settings"].insert_all( 

180 [ 

181 { 

182 "key": "base_id", 

183 "value": self._base.id, 

184 }, 

185 { 

186 "key": "base_name", 

187 "value": self._base.name, 

188 }, 

189 { 

190 "key": "permissionLevel", 

191 "value": self._base.permissionLevel, 

192 }, 

193 { 

194 "key": "prefer_ids", 

195 "value": self.prefer_ids.name, 

196 }, 

197 ] 

198 ) 

199 

200 def insert_all_table_data(self) -> None: 

201 logger.info("Fetching table data") 

202 for table in self.table_meta: 

203 self.insert_table_data(table) 

204 

205 def insert_table_data(self, table: TableSchema) -> None: 

206 # get table records and insert 

207 table_data = table.get_table_data(self._base_api) 

208 table_name = table.db_name(self.prefer_ids) 

209 db_table = self._db.table(table_name) 

210 

211 records_to_save = [] 

212 for record in tqdm(table_data): 

213 record_to_save = { 

214 "_id": record["id"], 

215 "_createdTime": record["createdTime"], 

216 } 

217 for field in table.fields: 

218 if field.type == "multipleRecordLinks": 

219 link_db_table = self.get_link_table(field, table) 

220 if isinstance(link_db_table, sqlite_utils.db.Table): 

221 link_db_table.insert_all( 

222 { 

223 "recordId": record["id"], 

224 "otherRecordId": value, 

225 } 

226 for value in record["fields"].get(field.name, []) 

227 ) 

228 else: # pragma: no cover 

229 pass 

230 else: 

231 record_to_save[field.db_name(self.prefer_ids)] = record["fields"].get(field.name) 

232 records_to_save.append(record_to_save) 

233 

234 if isinstance(db_table, sqlite_utils.db.Table): 

235 db_table.insert_all(records_to_save) 

236 else: # pragma: no cover 

237 pass