Coverage for src\airtable_to_sqlite\main.py: 100%
128 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-05 17:47 +0100
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-05 17:47 +0100
1import logging
2from collections.abc import Generator
3from datetime import datetime
4from typing import Optional
6import pyairtable.metadata
7import sqlite_utils
8from pyairtable.api.api import Api as AirtableApi
9from pyairtable.api.base import Base as AirtableBase
10from sqlite_utils import Database
11from tqdm import tqdm
13from airtable_to_sqlite.constants import (
14 META_TABLES,
15 AirtablePersonalAccessToken,
16 ForeignKeySet,
17 PreferedNamingMethod,
18)
19from airtable_to_sqlite.schema import BaseRecord, FieldSchema, TableSchema, ViewSchema
21logger = logging.getLogger(__name__)
24def get_base_records(
25 personal_access_token: AirtablePersonalAccessToken, base_ids: Optional[list[str]] = None
26) -> Generator[BaseRecord, None, None]:
27 logger.info("Fetching base record from Airtable...")
28 api = AirtableApi(personal_access_token)
29 all_bases = pyairtable.metadata.get_api_bases(api)
31 if base_ids is not None:
32 base_ids_requested = set(base_ids)
33 all_base_ids = {base_record["id"] for base_record in all_bases["bases"]}
34 missing_base_ids = base_ids_requested - all_base_ids
35 if missing_base_ids:
36 msg = f"Base IDs {missing_base_ids} not found in Airtable account."
37 raise KeyError(msg)
39 for base_record in all_bases["bases"]:
40 if (base_ids is None) or (base_record["id"] in base_ids):
41 yield BaseRecord(**base_record)
44class AirtableBaseToSqlite:
45 def __init__(
46 self,
47 personal_access_token: AirtablePersonalAccessToken,
48 db: Database,
49 base: BaseRecord,
50 prefer_ids: PreferedNamingMethod = PreferedNamingMethod.NAME,
51 ) -> None:
52 self._base: BaseRecord = base
53 self._db: Database = db
54 self._base_api = AirtableBase(personal_access_token, base.id)
55 self.prefer_ids = prefer_ids
56 self.foreign_keys: ForeignKeySet = set()
57 self.table_meta: list[TableSchema] = []
58 self.table_id_lookup: dict[str, str] = {}
59 self.meta_tables: dict[str, sqlite_utils.db.Table] = {}
61 def run(self) -> None:
62 self.get_schema()
63 self.create_metadata_tables()
64 self.create_all_table_metadata()
65 self.create_foreign_keys()
66 self.insert_settings()
67 self.insert_all_table_data()
69 def get_schema(self) -> None:
70 logger.info("Fetching schema from Airtable...")
71 tables = pyairtable.metadata.get_base_schema(self._base_api)
72 for table in tables["tables"]:
73 fields = []
74 for field in table["fields"]:
75 fields.append(FieldSchema(**field))
76 views = []
77 for view in table["views"]:
78 views.append(ViewSchema(**view))
79 this_table = TableSchema(
80 id=table["id"], name=table["name"], primaryFieldId=table["primaryFieldId"], fields=fields, views=views
81 )
82 self.table_meta.append(this_table)
83 self.table_id_lookup[this_table.id] = this_table.db_name(self.prefer_ids)
85 def create_metadata_tables(self) -> None:
86 for table_name, (columns, options) in META_TABLES.items():
87 options_to_use = options.copy()
88 for foreign_key in options_to_use.pop("foreign_keys", []):
89 self.foreign_keys.add((table_name, foreign_key))
90 db_table = self._db.table(table_name)
91 if isinstance(db_table, sqlite_utils.db.Table):
92 db_table.create(columns=columns, **options_to_use)
93 self.meta_tables[table_name] = db_table
94 else: # pragma: no cover
95 pass
97 def create_all_table_metadata(self) -> None:
98 for table in tqdm(self.table_meta):
99 self.create_table_metadata(table)
101 def get_link_table(self, field: FieldSchema, table: TableSchema) -> sqlite_utils.db.Table:
102 new_table_name = table.db_name(self.prefer_ids) + "_" + field.id
103 return sqlite_utils.db.Table(self._db, new_table_name)
105 def create_table_metadata(
106 self,
107 table: TableSchema,
108 ) -> None:
109 table_name = table.db_name(self.prefer_ids)
111 self.meta_tables["_meta_table"].insert(
112 {
113 "id": table.id,
114 "name": table.name,
115 "primaryFieldId": table.primaryFieldId,
116 }
117 )
118 column_types = {
119 "_id": str,
120 "_createdTime": datetime,
121 }
122 fields_to_insert = []
123 for field in table.fields:
124 field_name = field.db_name(self.prefer_ids)
126 choices = field.choices
127 if choices is not None:
128 self.meta_tables["_meta_field_choice"].insert_all(choices)
130 fields_to_insert.append(field.for_insertion(table))
132 if (field.type == "multipleRecordLinks") and field.options is not None:
133 link_db_table = self.get_link_table(field, table)
134 other_table = field.options["linkedTableId"]
135 other_table_name = self.table_id_lookup.get(other_table, other_table)
136 self.foreign_keys.add((link_db_table.name, ("recordId", table_name, "_id")))
137 self.foreign_keys.add((link_db_table.name, ("otherRecordId", other_table_name, "_id")))
138 if isinstance(link_db_table, sqlite_utils.db.Table):
139 link_db_table.create(
140 columns={
141 "recordId": str,
142 "otherRecordId": str,
143 },
144 )
145 else: # pragma: no cover
146 pass
148 column_type = field.column_type
149 if column_type is not None:
150 column_types[field_name] = column_type
152 self.meta_tables["_meta_field"].insert_all(fields_to_insert)
153 db_table = self._db.table(table_name)
154 if isinstance(db_table, sqlite_utils.db.Table):
155 db_table.create(columns=column_types, pk="_id")
156 else: # pragma: no cover
157 pass
159 self.meta_tables["_meta_view"].insert_all(
160 {
161 "id": view.id,
162 "name": view.name,
163 "type": view.type,
164 "tableId": table.id,
165 }
166 for view in table.views
167 )
169 def create_foreign_keys(self) -> None:
170 logger.info("Adding foreign keys")
171 for table_name, foreign_key in self.foreign_keys:
172 db_table = self._db[table_name]
173 if isinstance(db_table, sqlite_utils.db.Table):
174 db_table.add_foreign_key(*foreign_key)
175 else: # pragma: no cover
176 pass
177 self.foreign_keys = set()
179 def insert_settings(self) -> None:
180 self.meta_tables["_meta_settings"].insert_all(
181 [
182 {
183 "key": "base_id",
184 "value": self._base.id,
185 },
186 {
187 "key": "base_name",
188 "value": self._base.name,
189 },
190 {
191 "key": "permissionLevel",
192 "value": self._base.permissionLevel,
193 },
194 {
195 "key": "prefer_ids",
196 "value": self.prefer_ids.name,
197 },
198 ]
199 )
201 def insert_all_table_data(self) -> None:
202 logger.info("Fetching table data")
203 for table in self.table_meta:
204 self.insert_table_data(table)
206 def insert_table_data(self, table: TableSchema) -> None:
207 # get table records and insert
208 table_data = table.get_table_data(self._base_api)
209 table_name = table.db_name(self.prefer_ids)
210 db_table = self._db.table(table_name)
212 records_to_save = []
213 for record in tqdm(table_data):
214 record_to_save = {
215 "_id": record["id"],
216 "_createdTime": record["createdTime"],
217 }
218 for field in table.fields:
219 if field.type == "multipleRecordLinks":
220 link_db_table = self.get_link_table(field, table)
221 if isinstance(link_db_table, sqlite_utils.db.Table):
222 link_db_table.insert_all(
223 {
224 "recordId": record["id"],
225 "otherRecordId": value,
226 }
227 for value in record["fields"].get(field.name, [])
228 )
229 else: # pragma: no cover
230 pass
231 else:
232 record_to_save[field.db_name(self.prefer_ids)] = record["fields"].get(field.name)
233 records_to_save.append(record_to_save)
235 if isinstance(db_table, sqlite_utils.db.Table):
236 db_table.insert_all(records_to_save)
237 else: # pragma: no cover
238 pass