Coverage for src\datasette_reconcile\reconcile.py: 95%
108 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-12-20 00:45 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-12-20 00:45 +0000
1import json
3from datasette.utils import escape_fts, escape_sqlite
4from datasette.utils.asgi import Response
5from fuzzywuzzy import fuzz
7from datasette_reconcile.settings import (
8 DEFAULT_IDENTIFER_SPACE,
9 DEFAULT_LIMIT,
10 DEFAULT_SCHEMA_SPACE,
11 DEFAULT_TYPE,
12)
13from datasette_reconcile.utils import get_select_fields, get_view_url
16class ReconcileAPI:
17 api_version = "0.2"
19 def __init__(self, config, database, table, datasette):
20 self.config = config
21 self.database = database
22 self.db = datasette.get_database(database)
23 self.table = table
24 self.datasette = datasette
26 async def reconcile(self, request):
27 """
28 Takes a request and returns a response based on the queries.
29 """
30 # work out if we are looking for queries
31 post_vars = await request.post_vars()
32 queries = post_vars.get("queries", request.args.get("queries"))
33 extend = post_vars.get("extend", request.args.get("extend"))
35 if queries:
36 return self._response({q[0]: {"result": q[1]} async for q in self._reconcile_queries(json.loads(queries))})
37 elif extend:
38 response = await self._extend(json.loads(extend))
39 return self._response(response)
40 else:
41 # if we're not then just return the service specification
42 return self._response(await self._service_manifest(request))
44 async def properties(self, request):
45 limit = request.args.get("limit", DEFAULT_LIMIT)
46 type_ = request.args.get("type", DEFAULT_TYPE)
48 return self._response(
49 {
50 "limit": limit,
51 "type": type_,
52 "properties": [{"id": p["id"], "name": p["name"]} async for p in self._get_properties()],
53 }
54 )
56 async def suggest_entity(self, request):
57 prefix = request.args.get("prefix")
58 cursor = int(request.args.get("cursor", 0))
60 name_field = self.config["name_field"]
61 id_field = self.config.get("id_field", "id")
62 query_sql = f"""
63 select {escape_sqlite(id_field)} as id, {escape_sqlite(name_field)} as name
64 from {escape_sqlite(self.table)}
65 where {escape_sqlite(name_field)} like :search_query
66 limit {DEFAULT_LIMIT} offset {cursor}
67 """ # noqa: S608
68 params = {"search_query": f"{prefix}%"}
70 return self._response(
71 {"result": [{"id": r["id"], "name": r["name"]} for r in await self.db.execute(query_sql, params)]}
72 )
74 async def suggest_property(self, request):
75 prefix = request.args.get("prefix")
76 cursor = request.args.get("cursor", 0)
78 properties = [
79 {"id": p["id"], "name": p["name"]}
80 async for p in self._get_properties()
81 if p["name"].startswith(prefix) or p["id"].startswith(prefix)
82 ][cursor : cursor + DEFAULT_LIMIT]
84 return self._response({"result": properties})
86 async def suggest_type(self, request):
87 prefix = request.args.get("prefix") # noqa: F841
89 return self._response({"result": []})
91 async def _get_properties(self):
92 column_descriptions = self.datasette.table_metadata(self.database, self.table).get("columns") or {}
93 for column in await self.db.table_column_details(self.table):
94 yield {
95 "id": column.name,
96 "name": column_descriptions.get(column.name, column.name),
97 "type": column.type,
98 }
100 def _response(self, response):
101 return Response.json(
102 response,
103 headers={
104 "Access-Control-Allow-Origin": "*",
105 },
106 )
108 async def _extend(self, data):
109 ids = data["ids"]
110 data_properties = data["properties"]
111 properties = {p["name"]: p async for p in self._get_properties()}
112 id_field = self.config.get("id_field", "id")
114 select_fields = [id_field] + [p["id"] for p in data_properties]
116 query_sql = """
117 select {fields}
118 from {table}
119 where {where_clause}
120 """.format( # noqa: S608
121 table=escape_sqlite(self.table),
122 where_clause=f"{escape_sqlite(id_field)} in ({','.join(['?'] * len(ids))})",
123 fields=",".join([escape_sqlite(f) for f in select_fields]),
124 )
125 query_results = await self.db.execute(query_sql, ids)
127 rows = {}
128 for row in query_results:
129 values = {}
130 for p in data_properties:
131 property_ = properties[p["id"]]
132 if property_["type"] == "INTEGER":
133 values[p["id"]] = [{"int": row[p["id"]]}]
134 elif property_["type"] == "FLOAT": 134 ↛ 135line 134 didn't jump to line 135, because the condition on line 134 was never true
135 values[p["id"]] = [{"float": row[p["id"]]}]
136 else:
137 values[p["id"]] = [{"str": row[p["id"]]}]
139 rows[row[id_field]] = values
141 response = {
142 "meta": [{"id": p["id"], "name": properties[p["id"]]["name"]} for p in data_properties],
143 "rows": rows,
144 }
146 return response
148 async def _reconcile_queries(self, queries):
149 select_fields = get_select_fields(self.config)
150 for query_id, query in queries.items():
151 limit = min(
152 query.get("limit", self.config.get("max_limit", DEFAULT_LIMIT)),
153 self.config.get("max_limit", DEFAULT_LIMIT),
154 )
156 where_clauses = ["1"]
157 from_clause = escape_sqlite(self.table)
158 order_by = ""
159 params = {}
160 if self.config["fts_table"]: 160 ↛ 164line 160 didn't jump to line 164, because the condition on line 160 was never true
161 # NB this will fail if the table name has non-alphanumeric
162 # characters in and sqlite3 version < 3.30.0
163 # see: https://www.sqlite.org/src/info/00e9a8f2730eb723
164 from_clause = """
165 {table}
166 inner join (
167 SELECT "rowid", "rank"
168 FROM {fts_table}
169 WHERE {fts_table} MATCH :search_query
170 ) as "a" on {table}."rowid" = a."rowid"
171 """.format( # noqa: S608
172 table=escape_sqlite(self.table),
173 fts_table=escape_sqlite(self.config["fts_table"]),
174 )
175 order_by = "order by a.rank"
176 params["search_query"] = escape_fts(query["query"])
177 else:
178 where_clauses.append(
179 "{search_col} like :search_query".format(
180 search_col=escape_sqlite(self.config["name_field"]),
181 )
182 )
183 params["search_query"] = f"%{query['query']}%"
185 query_sql = """
186 SELECT {select_fields}
187 FROM {from_clause}
188 WHERE {where_clause} {order_by}
189 LIMIT {limit}""".format( # noqa: S608
190 select_fields=",".join([escape_sqlite(f) for f in select_fields]),
191 from_clause=from_clause,
192 where_clause=" and ".join(where_clauses),
193 order_by=order_by,
194 limit=limit,
195 )
196 query_results = [self._get_query_result(r, query) for r in await self.db.execute(query_sql, params)]
197 query_results = sorted(query_results, key=lambda x: -x["score"])
198 yield query_id, query_results
200 def _get_query_result(self, row, query):
201 name = str(row[self.config["name_field"]])
202 name_match = str(name).lower().strip()
203 query_match = str(query["query"]).lower().strip()
204 type_ = self.config.get("type_default", [DEFAULT_TYPE])
205 if self.config.get("type_field") and self.config["type_field"] in row: 205 ↛ 206line 205 didn't jump to line 206, because the condition on line 205 was never true
206 type_ = [row[self.config["type_field"]]]
208 return {
209 "id": str(row[self.config["id_field"]]),
210 "name": name,
211 "type": type_,
212 "score": fuzz.ratio(name_match, query_match),
213 "match": name_match == query_match,
214 }
216 async def _service_manifest(self, request):
217 # @todo: if type_field is set then get a list of types to use in the "defaultTypes" item below.
218 # handle X-FORWARDED-PROTO in Datasette: https://github.com/simonw/datasette/issues/2215
219 scheme = request.scheme
220 if "x-forwarded-proto" in request.headers:
221 scheme = request.headers.get("x-forwarded-proto")
223 service_url = (
224 f'{scheme}://{request.host}{self.datasette.setting("base_url")}/{self.database}/{self.table}/-/reconcile'
225 )
227 view_url = self.config.get("view_url")
228 if not view_url:
229 view_url = f"{scheme}://{request.host}{get_view_url(self.datasette, self.database, self.table)}"
231 properties = self._get_properties()
233 manifest = {
234 "versions": ["0.1", "0.2"],
235 "name": self.config.get(
236 "service_name",
237 f"{self.database} {self.table} reconciliation",
238 ),
239 "identifierSpace": self.config.get("identifierSpace", DEFAULT_IDENTIFER_SPACE),
240 "schemaSpace": self.config.get("schemaSpace", DEFAULT_SCHEMA_SPACE),
241 "defaultTypes": self.config.get("type_default", [DEFAULT_TYPE]),
242 "view": {"url": view_url},
243 "extend": {
244 "propose_properties": {
245 "service_url": service_url,
246 "service_path": "/extend/propose",
247 },
248 "property_settings": [
249 {
250 "name": p["id"],
251 "label": p["name"],
252 "type": "number" if p["type"] in ["INTEGER", "FLOAT"] else "text",
253 }
254 async for p in properties
255 ],
256 },
257 "suggest": {
258 "entity": {
259 "service_url": service_url,
260 "service_path": "/suggest/entity",
261 },
262 "type": {
263 "service_url": service_url,
264 "service_path": "/suggest/type",
265 },
266 "property": {
267 "service_url": service_url,
268 "service_path": "/suggest/property",
269 },
270 },
271 }
273 return manifest