Coverage for src\airtable_to_sqlite\main.py: 100%
127 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-05 19:54 +0100
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-05 19:54 +0100
1import logging
2from datetime import datetime
3from typing import Dict, Generator, List, Optional
5import pyairtable.metadata
6import sqlite_utils
7from pyairtable.api.api import Api as AirtableApi
8from pyairtable.api.base import Base as AirtableBase
9from sqlite_utils import Database
10from tqdm import tqdm
12from airtable_to_sqlite.constants import (
13 META_TABLES,
14 AirtablePersonalAccessToken,
15 ForeignKeySet,
16 PreferedNamingMethod,
17)
18from airtable_to_sqlite.schema import BaseRecord, FieldSchema, TableSchema, ViewSchema
20logger = logging.getLogger(__name__)
23def get_base_records(
24 personal_access_token: AirtablePersonalAccessToken, base_ids: Optional[List[str]] = None
25) -> Generator[BaseRecord, None, None]:
26 logger.info("Fetching base record from Airtable...")
27 api = AirtableApi(personal_access_token)
28 all_bases = pyairtable.metadata.get_api_bases(api)
30 if base_ids is not None:
31 base_ids_requested = set(base_ids)
32 all_base_ids = {base_record["id"] for base_record in all_bases["bases"]}
33 missing_base_ids = base_ids_requested - all_base_ids
34 if missing_base_ids:
35 msg = f"Base IDs {missing_base_ids} not found in Airtable account."
36 raise KeyError(msg)
38 for base_record in all_bases["bases"]:
39 if (base_ids is None) or (base_record["id"] in base_ids):
40 yield BaseRecord(**base_record)
43class AirtableBaseToSqlite:
44 def __init__(
45 self,
46 personal_access_token: AirtablePersonalAccessToken,
47 db: Database,
48 base: BaseRecord,
49 prefer_ids: PreferedNamingMethod = PreferedNamingMethod.NAME,
50 ) -> None:
51 self._base: BaseRecord = base
52 self._db: Database = db
53 self._base_api = AirtableBase(personal_access_token, base.id)
54 self.prefer_ids = prefer_ids
55 self.foreign_keys: ForeignKeySet = set()
56 self.table_meta: List[TableSchema] = []
57 self.table_id_lookup: Dict[str, str] = {}
58 self.meta_tables: Dict[str, sqlite_utils.db.Table] = {}
60 def run(self) -> None:
61 self.get_schema()
62 self.create_metadata_tables()
63 self.create_all_table_metadata()
64 self.create_foreign_keys()
65 self.insert_settings()
66 self.insert_all_table_data()
68 def get_schema(self) -> None:
69 logger.info("Fetching schema from Airtable...")
70 tables = pyairtable.metadata.get_base_schema(self._base_api)
71 for table in tables["tables"]:
72 fields = []
73 for field in table["fields"]:
74 fields.append(FieldSchema(**field))
75 views = []
76 for view in table["views"]:
77 views.append(ViewSchema(**view))
78 this_table = TableSchema(
79 id=table["id"], name=table["name"], primaryFieldId=table["primaryFieldId"], fields=fields, views=views
80 )
81 self.table_meta.append(this_table)
82 self.table_id_lookup[this_table.id] = this_table.db_name(self.prefer_ids)
84 def create_metadata_tables(self) -> None:
85 for table_name, (columns, options) in META_TABLES.items():
86 options_to_use = options.copy()
87 for foreign_key in options_to_use.pop("foreign_keys", []):
88 self.foreign_keys.add((table_name, foreign_key))
89 db_table = self._db.table(table_name)
90 if isinstance(db_table, sqlite_utils.db.Table):
91 db_table.create(columns=columns, **options_to_use)
92 self.meta_tables[table_name] = db_table
93 else: # pragma: no cover
94 pass
96 def create_all_table_metadata(self) -> None:
97 for table in tqdm(self.table_meta):
98 self.create_table_metadata(table)
100 def get_link_table(self, field: FieldSchema, table: TableSchema) -> sqlite_utils.db.Table:
101 new_table_name = table.db_name(self.prefer_ids) + "_" + field.id
102 return sqlite_utils.db.Table(self._db, new_table_name)
104 def create_table_metadata(
105 self,
106 table: TableSchema,
107 ) -> None:
108 table_name = table.db_name(self.prefer_ids)
110 self.meta_tables["_meta_table"].insert(
111 {
112 "id": table.id,
113 "name": table.name,
114 "primaryFieldId": table.primaryFieldId,
115 }
116 )
117 column_types = {
118 "_id": str,
119 "_createdTime": datetime,
120 }
121 fields_to_insert = []
122 for field in table.fields:
123 field_name = field.db_name(self.prefer_ids)
125 choices = field.choices
126 if choices is not None:
127 self.meta_tables["_meta_field_choice"].insert_all(choices)
129 fields_to_insert.append(field.for_insertion(table))
131 if (field.type == "multipleRecordLinks") and field.options is not None:
132 link_db_table = self.get_link_table(field, table)
133 other_table = field.options["linkedTableId"]
134 other_table_name = self.table_id_lookup.get(other_table, other_table)
135 self.foreign_keys.add((link_db_table.name, ("recordId", table_name, "_id")))
136 self.foreign_keys.add((link_db_table.name, ("otherRecordId", other_table_name, "_id")))
137 if isinstance(link_db_table, sqlite_utils.db.Table):
138 link_db_table.create(
139 columns={
140 "recordId": str,
141 "otherRecordId": str,
142 },
143 )
144 else: # pragma: no cover
145 pass
147 column_type = field.column_type
148 if column_type is not None:
149 column_types[field_name] = column_type
151 self.meta_tables["_meta_field"].insert_all(fields_to_insert)
152 db_table = self._db.table(table_name)
153 if isinstance(db_table, sqlite_utils.db.Table):
154 db_table.create(columns=column_types, pk="_id")
155 else: # pragma: no cover
156 pass
158 self.meta_tables["_meta_view"].insert_all(
159 {
160 "id": view.id,
161 "name": view.name,
162 "type": view.type,
163 "tableId": table.id,
164 }
165 for view in table.views
166 )
168 def create_foreign_keys(self) -> None:
169 logger.info("Adding foreign keys")
170 for table_name, foreign_key in self.foreign_keys:
171 db_table = self._db[table_name]
172 if isinstance(db_table, sqlite_utils.db.Table):
173 db_table.add_foreign_key(*foreign_key)
174 else: # pragma: no cover
175 pass
176 self.foreign_keys = set()
178 def insert_settings(self) -> None:
179 self.meta_tables["_meta_settings"].insert_all(
180 [
181 {
182 "key": "base_id",
183 "value": self._base.id,
184 },
185 {
186 "key": "base_name",
187 "value": self._base.name,
188 },
189 {
190 "key": "permissionLevel",
191 "value": self._base.permissionLevel,
192 },
193 {
194 "key": "prefer_ids",
195 "value": self.prefer_ids.name,
196 },
197 ]
198 )
200 def insert_all_table_data(self) -> None:
201 logger.info("Fetching table data")
202 for table in self.table_meta:
203 self.insert_table_data(table)
205 def insert_table_data(self, table: TableSchema) -> None:
206 # get table records and insert
207 table_data = table.get_table_data(self._base_api)
208 table_name = table.db_name(self.prefer_ids)
209 db_table = self._db.table(table_name)
211 records_to_save = []
212 for record in tqdm(table_data):
213 record_to_save = {
214 "_id": record["id"],
215 "_createdTime": record["createdTime"],
216 }
217 for field in table.fields:
218 if field.type == "multipleRecordLinks":
219 link_db_table = self.get_link_table(field, table)
220 if isinstance(link_db_table, sqlite_utils.db.Table):
221 link_db_table.insert_all(
222 {
223 "recordId": record["id"],
224 "otherRecordId": value,
225 }
226 for value in record["fields"].get(field.name, [])
227 )
228 else: # pragma: no cover
229 pass
230 else:
231 record_to_save[field.db_name(self.prefer_ids)] = record["fields"].get(field.name)
232 records_to_save.append(record_to_save)
234 if isinstance(db_table, sqlite_utils.db.Table):
235 db_table.insert_all(records_to_save)
236 else: # pragma: no cover
237 pass