Coverage for src\datasette_reconcile\reconcile.py: 95%

108 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-12-20 00:45 +0000

1import json 

2 

3from datasette.utils import escape_fts, escape_sqlite 

4from datasette.utils.asgi import Response 

5from fuzzywuzzy import fuzz 

6 

7from datasette_reconcile.settings import ( 

8 DEFAULT_IDENTIFER_SPACE, 

9 DEFAULT_LIMIT, 

10 DEFAULT_SCHEMA_SPACE, 

11 DEFAULT_TYPE, 

12) 

13from datasette_reconcile.utils import get_select_fields, get_view_url 

14 

15 

16class ReconcileAPI: 

17 api_version = "0.2" 

18 

19 def __init__(self, config, database, table, datasette): 

20 self.config = config 

21 self.database = database 

22 self.db = datasette.get_database(database) 

23 self.table = table 

24 self.datasette = datasette 

25 

26 async def reconcile(self, request): 

27 """ 

28 Takes a request and returns a response based on the queries. 

29 """ 

30 # work out if we are looking for queries 

31 post_vars = await request.post_vars() 

32 queries = post_vars.get("queries", request.args.get("queries")) 

33 extend = post_vars.get("extend", request.args.get("extend")) 

34 

35 if queries: 

36 return self._response({q[0]: {"result": q[1]} async for q in self._reconcile_queries(json.loads(queries))}) 

37 elif extend: 

38 response = await self._extend(json.loads(extend)) 

39 return self._response(response) 

40 else: 

41 # if we're not then just return the service specification 

42 return self._response(await self._service_manifest(request)) 

43 

44 async def properties(self, request): 

45 limit = request.args.get("limit", DEFAULT_LIMIT) 

46 type_ = request.args.get("type", DEFAULT_TYPE) 

47 

48 return self._response( 

49 { 

50 "limit": limit, 

51 "type": type_, 

52 "properties": [{"id": p["id"], "name": p["name"]} async for p in self._get_properties()], 

53 } 

54 ) 

55 

56 async def suggest_entity(self, request): 

57 prefix = request.args.get("prefix") 

58 cursor = int(request.args.get("cursor", 0)) 

59 

60 name_field = self.config["name_field"] 

61 id_field = self.config.get("id_field", "id") 

62 query_sql = f""" 

63 select {escape_sqlite(id_field)} as id, {escape_sqlite(name_field)} as name 

64 from {escape_sqlite(self.table)} 

65 where {escape_sqlite(name_field)} like :search_query 

66 limit {DEFAULT_LIMIT} offset {cursor} 

67 """ # noqa: S608 

68 params = {"search_query": f"{prefix}%"} 

69 

70 return self._response( 

71 {"result": [{"id": r["id"], "name": r["name"]} for r in await self.db.execute(query_sql, params)]} 

72 ) 

73 

74 async def suggest_property(self, request): 

75 prefix = request.args.get("prefix") 

76 cursor = request.args.get("cursor", 0) 

77 

78 properties = [ 

79 {"id": p["id"], "name": p["name"]} 

80 async for p in self._get_properties() 

81 if p["name"].startswith(prefix) or p["id"].startswith(prefix) 

82 ][cursor : cursor + DEFAULT_LIMIT] 

83 

84 return self._response({"result": properties}) 

85 

86 async def suggest_type(self, request): 

87 prefix = request.args.get("prefix") # noqa: F841 

88 

89 return self._response({"result": []}) 

90 

91 async def _get_properties(self): 

92 column_descriptions = self.datasette.table_metadata(self.database, self.table).get("columns") or {} 

93 for column in await self.db.table_column_details(self.table): 

94 yield { 

95 "id": column.name, 

96 "name": column_descriptions.get(column.name, column.name), 

97 "type": column.type, 

98 } 

99 

100 def _response(self, response): 

101 return Response.json( 

102 response, 

103 headers={ 

104 "Access-Control-Allow-Origin": "*", 

105 }, 

106 ) 

107 

108 async def _extend(self, data): 

109 ids = data["ids"] 

110 data_properties = data["properties"] 

111 properties = {p["name"]: p async for p in self._get_properties()} 

112 id_field = self.config.get("id_field", "id") 

113 

114 select_fields = [id_field] + [p["id"] for p in data_properties] 

115 

116 query_sql = """ 

117 select {fields} 

118 from {table} 

119 where {where_clause} 

120 """.format( # noqa: S608 

121 table=escape_sqlite(self.table), 

122 where_clause=f"{escape_sqlite(id_field)} in ({','.join(['?'] * len(ids))})", 

123 fields=",".join([escape_sqlite(f) for f in select_fields]), 

124 ) 

125 query_results = await self.db.execute(query_sql, ids) 

126 

127 rows = {} 

128 for row in query_results: 

129 values = {} 

130 for p in data_properties: 

131 property_ = properties[p["id"]] 

132 if property_["type"] == "INTEGER": 

133 values[p["id"]] = [{"int": row[p["id"]]}] 

134 elif property_["type"] == "FLOAT": 134 ↛ 135line 134 didn't jump to line 135, because the condition on line 134 was never true

135 values[p["id"]] = [{"float": row[p["id"]]}] 

136 else: 

137 values[p["id"]] = [{"str": row[p["id"]]}] 

138 

139 rows[row[id_field]] = values 

140 

141 response = { 

142 "meta": [{"id": p["id"], "name": properties[p["id"]]["name"]} for p in data_properties], 

143 "rows": rows, 

144 } 

145 

146 return response 

147 

148 async def _reconcile_queries(self, queries): 

149 select_fields = get_select_fields(self.config) 

150 for query_id, query in queries.items(): 

151 limit = min( 

152 query.get("limit", self.config.get("max_limit", DEFAULT_LIMIT)), 

153 self.config.get("max_limit", DEFAULT_LIMIT), 

154 ) 

155 

156 where_clauses = ["1"] 

157 from_clause = escape_sqlite(self.table) 

158 order_by = "" 

159 params = {} 

160 if self.config["fts_table"]: 160 ↛ 164line 160 didn't jump to line 164, because the condition on line 160 was never true

161 # NB this will fail if the table name has non-alphanumeric 

162 # characters in and sqlite3 version < 3.30.0 

163 # see: https://www.sqlite.org/src/info/00e9a8f2730eb723 

164 from_clause = """ 

165 {table} 

166 inner join ( 

167 SELECT "rowid", "rank" 

168 FROM {fts_table} 

169 WHERE {fts_table} MATCH :search_query 

170 ) as "a" on {table}."rowid" = a."rowid" 

171 """.format( # noqa: S608 

172 table=escape_sqlite(self.table), 

173 fts_table=escape_sqlite(self.config["fts_table"]), 

174 ) 

175 order_by = "order by a.rank" 

176 params["search_query"] = escape_fts(query["query"]) 

177 else: 

178 where_clauses.append( 

179 "{search_col} like :search_query".format( 

180 search_col=escape_sqlite(self.config["name_field"]), 

181 ) 

182 ) 

183 params["search_query"] = f"%{query['query']}%" 

184 

185 query_sql = """ 

186 SELECT {select_fields} 

187 FROM {from_clause} 

188 WHERE {where_clause} {order_by} 

189 LIMIT {limit}""".format( # noqa: S608 

190 select_fields=",".join([escape_sqlite(f) for f in select_fields]), 

191 from_clause=from_clause, 

192 where_clause=" and ".join(where_clauses), 

193 order_by=order_by, 

194 limit=limit, 

195 ) 

196 query_results = [self._get_query_result(r, query) for r in await self.db.execute(query_sql, params)] 

197 query_results = sorted(query_results, key=lambda x: -x["score"]) 

198 yield query_id, query_results 

199 

200 def _get_query_result(self, row, query): 

201 name = str(row[self.config["name_field"]]) 

202 name_match = str(name).lower().strip() 

203 query_match = str(query["query"]).lower().strip() 

204 type_ = self.config.get("type_default", [DEFAULT_TYPE]) 

205 if self.config.get("type_field") and self.config["type_field"] in row: 205 ↛ 206line 205 didn't jump to line 206, because the condition on line 205 was never true

206 type_ = [row[self.config["type_field"]]] 

207 

208 return { 

209 "id": str(row[self.config["id_field"]]), 

210 "name": name, 

211 "type": type_, 

212 "score": fuzz.ratio(name_match, query_match), 

213 "match": name_match == query_match, 

214 } 

215 

216 async def _service_manifest(self, request): 

217 # @todo: if type_field is set then get a list of types to use in the "defaultTypes" item below. 

218 # handle X-FORWARDED-PROTO in Datasette: https://github.com/simonw/datasette/issues/2215 

219 scheme = request.scheme 

220 if "x-forwarded-proto" in request.headers: 

221 scheme = request.headers.get("x-forwarded-proto") 

222 

223 service_url = ( 

224 f'{scheme}://{request.host}{self.datasette.setting("base_url")}/{self.database}/{self.table}/-/reconcile' 

225 ) 

226 

227 view_url = self.config.get("view_url") 

228 if not view_url: 

229 view_url = f"{scheme}://{request.host}{get_view_url(self.datasette, self.database, self.table)}" 

230 

231 properties = self._get_properties() 

232 

233 manifest = { 

234 "versions": ["0.1", "0.2"], 

235 "name": self.config.get( 

236 "service_name", 

237 f"{self.database} {self.table} reconciliation", 

238 ), 

239 "identifierSpace": self.config.get("identifierSpace", DEFAULT_IDENTIFER_SPACE), 

240 "schemaSpace": self.config.get("schemaSpace", DEFAULT_SCHEMA_SPACE), 

241 "defaultTypes": self.config.get("type_default", [DEFAULT_TYPE]), 

242 "view": {"url": view_url}, 

243 "extend": { 

244 "propose_properties": { 

245 "service_url": service_url, 

246 "service_path": "/extend/propose", 

247 }, 

248 "property_settings": [ 

249 { 

250 "name": p["id"], 

251 "label": p["name"], 

252 "type": "number" if p["type"] in ["INTEGER", "FLOAT"] else "text", 

253 } 

254 async for p in properties 

255 ], 

256 }, 

257 "suggest": { 

258 "entity": { 

259 "service_url": service_url, 

260 "service_path": "/suggest/entity", 

261 }, 

262 "type": { 

263 "service_url": service_url, 

264 "service_path": "/suggest/type", 

265 }, 

266 "property": { 

267 "service_url": service_url, 

268 "service_path": "/suggest/property", 

269 }, 

270 }, 

271 } 

272 

273 return manifest