Coverage for src\airtable_to_sqlite\main.py: 100%

128 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-05 17:47 +0100

1import logging 

2from collections.abc import Generator 

3from datetime import datetime 

4from typing import Optional 

5 

6import pyairtable.metadata 

7import sqlite_utils 

8from pyairtable.api.api import Api as AirtableApi 

9from pyairtable.api.base import Base as AirtableBase 

10from sqlite_utils import Database 

11from tqdm import tqdm 

12 

13from airtable_to_sqlite.constants import ( 

14 META_TABLES, 

15 AirtablePersonalAccessToken, 

16 ForeignKeySet, 

17 PreferedNamingMethod, 

18) 

19from airtable_to_sqlite.schema import BaseRecord, FieldSchema, TableSchema, ViewSchema 

20 

21logger = logging.getLogger(__name__) 

22 

23 

24def get_base_records( 

25 personal_access_token: AirtablePersonalAccessToken, base_ids: Optional[list[str]] = None 

26) -> Generator[BaseRecord, None, None]: 

27 logger.info("Fetching base record from Airtable...") 

28 api = AirtableApi(personal_access_token) 

29 all_bases = pyairtable.metadata.get_api_bases(api) 

30 

31 if base_ids is not None: 

32 base_ids_requested = set(base_ids) 

33 all_base_ids = {base_record["id"] for base_record in all_bases["bases"]} 

34 missing_base_ids = base_ids_requested - all_base_ids 

35 if missing_base_ids: 

36 msg = f"Base IDs {missing_base_ids} not found in Airtable account." 

37 raise KeyError(msg) 

38 

39 for base_record in all_bases["bases"]: 

40 if (base_ids is None) or (base_record["id"] in base_ids): 

41 yield BaseRecord(**base_record) 

42 

43 

44class AirtableBaseToSqlite: 

45 def __init__( 

46 self, 

47 personal_access_token: AirtablePersonalAccessToken, 

48 db: Database, 

49 base: BaseRecord, 

50 prefer_ids: PreferedNamingMethod = PreferedNamingMethod.NAME, 

51 ) -> None: 

52 self._base: BaseRecord = base 

53 self._db: Database = db 

54 self._base_api = AirtableBase(personal_access_token, base.id) 

55 self.prefer_ids = prefer_ids 

56 self.foreign_keys: ForeignKeySet = set() 

57 self.table_meta: list[TableSchema] = [] 

58 self.table_id_lookup: dict[str, str] = {} 

59 self.meta_tables: dict[str, sqlite_utils.db.Table] = {} 

60 

61 def run(self) -> None: 

62 self.get_schema() 

63 self.create_metadata_tables() 

64 self.create_all_table_metadata() 

65 self.create_foreign_keys() 

66 self.insert_settings() 

67 self.insert_all_table_data() 

68 

69 def get_schema(self) -> None: 

70 logger.info("Fetching schema from Airtable...") 

71 tables = pyairtable.metadata.get_base_schema(self._base_api) 

72 for table in tables["tables"]: 

73 fields = [] 

74 for field in table["fields"]: 

75 fields.append(FieldSchema(**field)) 

76 views = [] 

77 for view in table["views"]: 

78 views.append(ViewSchema(**view)) 

79 this_table = TableSchema( 

80 id=table["id"], name=table["name"], primaryFieldId=table["primaryFieldId"], fields=fields, views=views 

81 ) 

82 self.table_meta.append(this_table) 

83 self.table_id_lookup[this_table.id] = this_table.db_name(self.prefer_ids) 

84 

85 def create_metadata_tables(self) -> None: 

86 for table_name, (columns, options) in META_TABLES.items(): 

87 options_to_use = options.copy() 

88 for foreign_key in options_to_use.pop("foreign_keys", []): 

89 self.foreign_keys.add((table_name, foreign_key)) 

90 db_table = self._db.table(table_name) 

91 if isinstance(db_table, sqlite_utils.db.Table): 

92 db_table.create(columns=columns, **options_to_use) 

93 self.meta_tables[table_name] = db_table 

94 else: # pragma: no cover 

95 pass 

96 

97 def create_all_table_metadata(self) -> None: 

98 for table in tqdm(self.table_meta): 

99 self.create_table_metadata(table) 

100 

101 def get_link_table(self, field: FieldSchema, table: TableSchema) -> sqlite_utils.db.Table: 

102 new_table_name = table.db_name(self.prefer_ids) + "_" + field.id 

103 return sqlite_utils.db.Table(self._db, new_table_name) 

104 

105 def create_table_metadata( 

106 self, 

107 table: TableSchema, 

108 ) -> None: 

109 table_name = table.db_name(self.prefer_ids) 

110 

111 self.meta_tables["_meta_table"].insert( 

112 { 

113 "id": table.id, 

114 "name": table.name, 

115 "primaryFieldId": table.primaryFieldId, 

116 } 

117 ) 

118 column_types = { 

119 "_id": str, 

120 "_createdTime": datetime, 

121 } 

122 fields_to_insert = [] 

123 for field in table.fields: 

124 field_name = field.db_name(self.prefer_ids) 

125 

126 choices = field.choices 

127 if choices is not None: 

128 self.meta_tables["_meta_field_choice"].insert_all(choices) 

129 

130 fields_to_insert.append(field.for_insertion(table)) 

131 

132 if (field.type == "multipleRecordLinks") and field.options is not None: 

133 link_db_table = self.get_link_table(field, table) 

134 other_table = field.options["linkedTableId"] 

135 other_table_name = self.table_id_lookup.get(other_table, other_table) 

136 self.foreign_keys.add((link_db_table.name, ("recordId", table_name, "_id"))) 

137 self.foreign_keys.add((link_db_table.name, ("otherRecordId", other_table_name, "_id"))) 

138 if isinstance(link_db_table, sqlite_utils.db.Table): 

139 link_db_table.create( 

140 columns={ 

141 "recordId": str, 

142 "otherRecordId": str, 

143 }, 

144 ) 

145 else: # pragma: no cover 

146 pass 

147 

148 column_type = field.column_type 

149 if column_type is not None: 

150 column_types[field_name] = column_type 

151 

152 self.meta_tables["_meta_field"].insert_all(fields_to_insert) 

153 db_table = self._db.table(table_name) 

154 if isinstance(db_table, sqlite_utils.db.Table): 

155 db_table.create(columns=column_types, pk="_id") 

156 else: # pragma: no cover 

157 pass 

158 

159 self.meta_tables["_meta_view"].insert_all( 

160 { 

161 "id": view.id, 

162 "name": view.name, 

163 "type": view.type, 

164 "tableId": table.id, 

165 } 

166 for view in table.views 

167 ) 

168 

169 def create_foreign_keys(self) -> None: 

170 logger.info("Adding foreign keys") 

171 for table_name, foreign_key in self.foreign_keys: 

172 db_table = self._db[table_name] 

173 if isinstance(db_table, sqlite_utils.db.Table): 

174 db_table.add_foreign_key(*foreign_key) 

175 else: # pragma: no cover 

176 pass 

177 self.foreign_keys = set() 

178 

179 def insert_settings(self) -> None: 

180 self.meta_tables["_meta_settings"].insert_all( 

181 [ 

182 { 

183 "key": "base_id", 

184 "value": self._base.id, 

185 }, 

186 { 

187 "key": "base_name", 

188 "value": self._base.name, 

189 }, 

190 { 

191 "key": "permissionLevel", 

192 "value": self._base.permissionLevel, 

193 }, 

194 { 

195 "key": "prefer_ids", 

196 "value": self.prefer_ids.name, 

197 }, 

198 ] 

199 ) 

200 

201 def insert_all_table_data(self) -> None: 

202 logger.info("Fetching table data") 

203 for table in self.table_meta: 

204 self.insert_table_data(table) 

205 

206 def insert_table_data(self, table: TableSchema) -> None: 

207 # get table records and insert 

208 table_data = table.get_table_data(self._base_api) 

209 table_name = table.db_name(self.prefer_ids) 

210 db_table = self._db.table(table_name) 

211 

212 records_to_save = [] 

213 for record in tqdm(table_data): 

214 record_to_save = { 

215 "_id": record["id"], 

216 "_createdTime": record["createdTime"], 

217 } 

218 for field in table.fields: 

219 if field.type == "multipleRecordLinks": 

220 link_db_table = self.get_link_table(field, table) 

221 if isinstance(link_db_table, sqlite_utils.db.Table): 

222 link_db_table.insert_all( 

223 { 

224 "recordId": record["id"], 

225 "otherRecordId": value, 

226 } 

227 for value in record["fields"].get(field.name, []) 

228 ) 

229 else: # pragma: no cover 

230 pass 

231 else: 

232 record_to_save[field.db_name(self.prefer_ids)] = record["fields"].get(field.name) 

233 records_to_save.append(record_to_save) 

234 

235 if isinstance(db_table, sqlite_utils.db.Table): 

236 db_table.insert_all(records_to_save) 

237 else: # pragma: no cover 

238 pass