sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 regexp_extract_sql, 29 rename_func, 30 str_position_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36 sha256_sql, 37) 38from sqlglot.helper import seq_get 39from sqlglot.tokens import TokenType 40 41DATETIME_DELTA = t.Union[ 42 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 43] 44 45WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 46 exp.FirstValue, 47 exp.LastValue, 48 exp.Lag, 49 exp.Lead, 50 exp.NthValue, 51) 52 53 54def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 55 this = expression.this 56 unit = unit_to_var(expression) 57 op = ( 58 "+" 59 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 60 else "-" 61 ) 62 63 to_type: t.Optional[DATA_TYPE] = None 64 if isinstance(expression, exp.TsOrDsAdd): 65 to_type = expression.return_type 66 elif this.is_string: 67 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 68 to_type = ( 69 exp.DataType.Type.DATETIME 70 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 71 else exp.DataType.Type.DATE 72 ) 73 74 this = exp.cast(this, to_type) if to_type else this 75 76 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 77 78 79# BigQuery -> DuckDB conversion for the DATE function 80def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 81 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 82 zone = self.sql(expression, "zone") 83 84 if zone: 85 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 86 date_str = f"{date_str} || ' ' || {zone}" 87 88 # This will create a TIMESTAMP with time zone information 89 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 90 91 return result 92 93 94# BigQuery -> DuckDB conversion for the TIME_DIFF function 95def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 96 this = exp.cast(expression.this, exp.DataType.Type.TIME) 97 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 98 99 # Although the 2 dialects share similar signatures, BQ seems to inverse 100 # the sign of the result so the start/end time operands are flipped 101 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 102 103 104def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 105 if expression.expression: 106 self.unsupported("DuckDB ARRAY_SORT does not support a comparator") 107 return self.func("ARRAY_SORT", expression.this) 108 109 110def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 111 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 112 return self.func(name, expression.this) 113 114 115def _build_sort_array_desc(args: t.List) -> exp.Expression: 116 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 117 118 119def _build_date_diff(args: t.List) -> exp.Expression: 120 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 121 122 123def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 124 def _builder(args: t.List) -> exp.GenerateSeries: 125 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 126 if len(args) == 1: 127 # DuckDB uses 0 as a default for the series' start when it's omitted 128 args.insert(0, exp.Literal.number("0")) 129 130 gen_series = exp.GenerateSeries.from_arg_list(args) 131 gen_series.set("is_end_exclusive", end_exclusive) 132 133 return gen_series 134 135 return _builder 136 137 138def _build_make_timestamp(args: t.List) -> exp.Expression: 139 if len(args) == 1: 140 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 141 142 return exp.TimestampFromParts( 143 year=seq_get(args, 0), 144 month=seq_get(args, 1), 145 day=seq_get(args, 2), 146 hour=seq_get(args, 3), 147 min=seq_get(args, 4), 148 sec=seq_get(args, 5), 149 ) 150 151 152def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 153 args: t.List[str] = [] 154 155 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 156 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 157 # The transformation to ROW will take place if a cast to STRUCT / ARRAY of STRUCTs is found 158 ancestor_cast = expression.find_ancestor(exp.Cast) 159 is_struct_cast = ancestor_cast and any( 160 casted_type.is_type(exp.DataType.Type.STRUCT) 161 for casted_type in ancestor_cast.find_all(exp.DataType) 162 ) 163 164 for i, expr in enumerate(expression.expressions): 165 is_property_eq = isinstance(expr, exp.PropertyEQ) 166 value = expr.expression if is_property_eq else expr 167 168 if is_struct_cast: 169 args.append(self.sql(value)) 170 else: 171 key = expr.name if is_property_eq else f"_{i}" 172 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 173 174 csv_args = ", ".join(args) 175 176 return f"ROW({csv_args})" if is_struct_cast else f"{{{csv_args}}}" 177 178 179def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 180 if expression.is_type("array"): 181 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 182 183 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 184 if expression.is_type( 185 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 186 ): 187 return expression.this.value 188 189 return self.datatype_sql(expression) 190 191 192def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 193 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 194 return f"CAST({sql} AS TEXT)" 195 196 197def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 198 scale = expression.args.get("scale") 199 timestamp = expression.this 200 201 if scale in (None, exp.UnixToTime.SECONDS): 202 return self.func("TO_TIMESTAMP", timestamp) 203 if scale == exp.UnixToTime.MILLIS: 204 return self.func("EPOCH_MS", timestamp) 205 if scale == exp.UnixToTime.MICROS: 206 return self.func("MAKE_TIMESTAMP", timestamp) 207 208 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 209 210 211WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 212 213 214def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 215 arrow_sql = arrow_json_extract_sql(self, expression) 216 if not expression.same_parent and isinstance( 217 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 218 ): 219 arrow_sql = self.wrap(arrow_sql) 220 return arrow_sql 221 222 223def _implicit_datetime_cast( 224 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 225) -> t.Optional[exp.Expression]: 226 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 227 228 229def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 230 this = _implicit_datetime_cast(expression.this) 231 expr = _implicit_datetime_cast(expression.expression) 232 233 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 234 235 236def _generate_datetime_array_sql( 237 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 238) -> str: 239 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 240 241 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 242 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 243 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 244 245 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 246 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 247 start=start, end=end, step=expression.args.get("step") 248 ) 249 250 if is_generate_date_array: 251 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 252 # GENERATE_DATE_ARRAY we must cast it back to DATE array 253 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 254 255 return self.sql(gen_series) 256 257 258class DuckDB(Dialect): 259 NULL_ORDERING = "nulls_are_last" 260 SUPPORTS_USER_DEFINED_TYPES = False 261 SAFE_DIVISION = True 262 INDEX_OFFSET = 1 263 CONCAT_COALESCE = True 264 SUPPORTS_ORDER_BY_ALL = True 265 SUPPORTS_FIXED_SIZE_ARRAYS = True 266 STRICT_JSON_PATH_SYNTAX = False 267 268 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 269 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 270 271 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 272 if isinstance(path, exp.Literal): 273 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 274 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 275 # This check ensures we'll avoid trying to parse these as JSON paths, which can 276 # either result in a noisy warning or in an invalid representation of the path. 277 path_text = path.name 278 if path_text.startswith("/") or "[#" in path_text: 279 return path 280 281 return super().to_json_path(path) 282 283 class Tokenizer(tokens.Tokenizer): 284 HEREDOC_STRINGS = ["$"] 285 286 HEREDOC_TAG_IS_IDENTIFIER = True 287 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 288 289 KEYWORDS = { 290 **tokens.Tokenizer.KEYWORDS, 291 "//": TokenType.DIV, 292 "ATTACH": TokenType.COMMAND, 293 "BINARY": TokenType.VARBINARY, 294 "BITSTRING": TokenType.BIT, 295 "BPCHAR": TokenType.TEXT, 296 "CHAR": TokenType.TEXT, 297 "CHARACTER VARYING": TokenType.TEXT, 298 "EXCLUDE": TokenType.EXCEPT, 299 "LOGICAL": TokenType.BOOLEAN, 300 "ONLY": TokenType.ONLY, 301 "PIVOT_WIDER": TokenType.PIVOT, 302 "POSITIONAL": TokenType.POSITIONAL, 303 "SIGNED": TokenType.INT, 304 "STRING": TokenType.TEXT, 305 "SUMMARIZE": TokenType.SUMMARIZE, 306 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 307 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 308 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 309 "TIMESTAMP_US": TokenType.TIMESTAMP, 310 "UBIGINT": TokenType.UBIGINT, 311 "UINTEGER": TokenType.UINT, 312 "USMALLINT": TokenType.USMALLINT, 313 "UTINYINT": TokenType.UTINYINT, 314 "VARCHAR": TokenType.TEXT, 315 } 316 KEYWORDS.pop("/*+") 317 318 SINGLE_TOKENS = { 319 **tokens.Tokenizer.SINGLE_TOKENS, 320 "$": TokenType.PARAMETER, 321 } 322 323 class Parser(parser.Parser): 324 BITWISE = { 325 **parser.Parser.BITWISE, 326 TokenType.TILDA: exp.RegexpLike, 327 } 328 329 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 330 331 FUNCTIONS = { 332 **parser.Parser.FUNCTIONS, 333 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 334 "ARRAY_SORT": exp.SortArray.from_arg_list, 335 "DATEDIFF": _build_date_diff, 336 "DATE_DIFF": _build_date_diff, 337 "DATE_TRUNC": date_trunc_to_time, 338 "DATETRUNC": date_trunc_to_time, 339 "DECODE": lambda args: exp.Decode( 340 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 341 ), 342 "ENCODE": lambda args: exp.Encode( 343 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 344 ), 345 "EPOCH": exp.TimeToUnix.from_arg_list, 346 "EPOCH_MS": lambda args: exp.UnixToTime( 347 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 348 ), 349 "JSON": exp.ParseJSON.from_arg_list, 350 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 351 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 352 "LIST_HAS": exp.ArrayContains.from_arg_list, 353 "LIST_REVERSE_SORT": _build_sort_array_desc, 354 "LIST_SORT": exp.SortArray.from_arg_list, 355 "LIST_VALUE": lambda args: exp.Array(expressions=args), 356 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 357 "MAKE_TIMESTAMP": _build_make_timestamp, 358 "MEDIAN": lambda args: exp.PercentileCont( 359 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 360 ), 361 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 362 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 363 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 364 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 365 ), 366 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 367 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 368 this=seq_get(args, 0), 369 expression=seq_get(args, 1), 370 replacement=seq_get(args, 2), 371 modifiers=seq_get(args, 3), 372 ), 373 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 374 "STRING_SPLIT": exp.Split.from_arg_list, 375 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 376 "STRING_TO_ARRAY": exp.Split.from_arg_list, 377 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 378 "STRUCT_PACK": exp.Struct.from_arg_list, 379 "STR_SPLIT": exp.Split.from_arg_list, 380 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 381 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 382 "UNNEST": exp.Explode.from_arg_list, 383 "XOR": binary_from_function(exp.BitwiseXor), 384 "GENERATE_SERIES": _build_generate_series(), 385 "RANGE": _build_generate_series(end_exclusive=True), 386 } 387 388 FUNCTIONS.pop("DATE_SUB") 389 FUNCTIONS.pop("GLOB") 390 391 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 392 FUNCTION_PARSERS.pop("DECODE") 393 394 NO_PAREN_FUNCTION_PARSERS = { 395 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 396 "MAP": lambda self: self._parse_map(), 397 } 398 399 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 400 TokenType.SEMI, 401 TokenType.ANTI, 402 } 403 404 PLACEHOLDER_PARSERS = { 405 **parser.Parser.PLACEHOLDER_PARSERS, 406 TokenType.PARAMETER: lambda self: ( 407 self.expression(exp.Placeholder, this=self._prev.text) 408 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 409 else None 410 ), 411 } 412 413 TYPE_CONVERTERS = { 414 # https://duckdb.org/docs/sql/data_types/numeric 415 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 416 # https://duckdb.org/docs/sql/data_types/text 417 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 418 } 419 420 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 421 # https://duckdb.org/docs/sql/samples.html 422 sample = super()._parse_table_sample(as_modifier=as_modifier) 423 if sample and not sample.args.get("method"): 424 if sample.args.get("size"): 425 sample.set("method", exp.var("RESERVOIR")) 426 else: 427 sample.set("method", exp.var("SYSTEM")) 428 429 return sample 430 431 def _parse_bracket( 432 self, this: t.Optional[exp.Expression] = None 433 ) -> t.Optional[exp.Expression]: 434 bracket = super()._parse_bracket(this) 435 if isinstance(bracket, exp.Bracket): 436 bracket.set("returns_list_for_maps", True) 437 438 return bracket 439 440 def _parse_map(self) -> exp.ToMap | exp.Map: 441 if self._match(TokenType.L_BRACE, advance=False): 442 return self.expression(exp.ToMap, this=self._parse_bracket()) 443 444 args = self._parse_wrapped_csv(self._parse_assignment) 445 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 446 447 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 448 return self._parse_field_def() 449 450 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 451 if len(aggregations) == 1: 452 return super()._pivot_column_names(aggregations) 453 return pivot_column_names(aggregations, dialect="duckdb") 454 455 class Generator(generator.Generator): 456 PARAMETER_TOKEN = "$" 457 NAMED_PLACEHOLDER_TOKEN = "$" 458 JOIN_HINTS = False 459 TABLE_HINTS = False 460 QUERY_HINTS = False 461 LIMIT_FETCH = "LIMIT" 462 STRUCT_DELIMITER = ("(", ")") 463 RENAME_TABLE_WITH_DB = False 464 NVL2_SUPPORTED = False 465 SEMI_ANTI_JOIN_WITH_SIDE = False 466 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 467 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 468 LAST_DAY_SUPPORTS_DATE_PART = False 469 JSON_KEY_VALUE_PAIR_SEP = "," 470 IGNORE_NULLS_IN_FUNC = True 471 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 472 SUPPORTS_CREATE_TABLE_LIKE = False 473 MULTI_ARG_DISTINCT = False 474 CAN_IMPLEMENT_ARRAY_ANY = True 475 SUPPORTS_TO_NUMBER = False 476 COPY_HAS_INTO_KEYWORD = False 477 STAR_EXCEPT = "EXCLUDE" 478 PAD_FILL_PATTERN_IS_REQUIRED = True 479 ARRAY_CONCAT_IS_VAR_LEN = False 480 481 TRANSFORMS = { 482 **generator.Generator.TRANSFORMS, 483 exp.ApproxDistinct: approx_count_distinct_sql, 484 exp.Array: inline_array_unless_query, 485 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 486 exp.ArrayFilter: rename_func("LIST_FILTER"), 487 exp.ArraySize: rename_func("ARRAY_LENGTH"), 488 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 489 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 490 exp.ArraySort: _array_sort_sql, 491 exp.ArraySum: rename_func("LIST_SUM"), 492 exp.BitwiseXor: rename_func("XOR"), 493 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 494 exp.CurrentDate: lambda *_: "CURRENT_DATE", 495 exp.CurrentTime: lambda *_: "CURRENT_TIME", 496 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 497 exp.DayOfMonth: rename_func("DAYOFMONTH"), 498 exp.DayOfWeek: rename_func("DAYOFWEEK"), 499 exp.DayOfWeekIso: rename_func("ISODOW"), 500 exp.DayOfYear: rename_func("DAYOFYEAR"), 501 exp.DataType: _datatype_sql, 502 exp.Date: _date_sql, 503 exp.DateAdd: _date_delta_sql, 504 exp.DateFromParts: rename_func("MAKE_DATE"), 505 exp.DateSub: _date_delta_sql, 506 exp.DateDiff: _date_diff_sql, 507 exp.DateStrToDate: datestrtodate_sql, 508 exp.Datetime: no_datetime_sql, 509 exp.DatetimeSub: _date_delta_sql, 510 exp.DatetimeAdd: _date_delta_sql, 511 exp.DateToDi: lambda self, 512 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 513 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 514 exp.DiToDate: lambda self, 515 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 516 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 517 exp.GenerateDateArray: _generate_datetime_array_sql, 518 exp.GenerateTimestampArray: _generate_datetime_array_sql, 519 exp.Explode: rename_func("UNNEST"), 520 exp.IntDiv: lambda self, e: self.binary(e, "//"), 521 exp.IsInf: rename_func("ISINF"), 522 exp.IsNan: rename_func("ISNAN"), 523 exp.JSONExtract: _arrow_json_extract_sql, 524 exp.JSONExtractScalar: _arrow_json_extract_sql, 525 exp.JSONFormat: _json_format_sql, 526 exp.LogicalOr: rename_func("BOOL_OR"), 527 exp.LogicalAnd: rename_func("BOOL_AND"), 528 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 529 exp.MonthsBetween: lambda self, e: self.func( 530 "DATEDIFF", 531 "'month'", 532 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 533 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 534 ), 535 exp.PercentileCont: rename_func("QUANTILE_CONT"), 536 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 537 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 538 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 539 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 540 exp.RegexpExtract: regexp_extract_sql, 541 exp.RegexpReplace: lambda self, e: self.func( 542 "REGEXP_REPLACE", 543 e.this, 544 e.expression, 545 e.args.get("replacement"), 546 e.args.get("modifiers"), 547 ), 548 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 549 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 550 exp.Return: lambda self, e: self.sql(e, "this"), 551 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 552 exp.Rand: rename_func("RANDOM"), 553 exp.SafeDivide: no_safe_divide_sql, 554 exp.SHA2: sha256_sql, 555 exp.Split: rename_func("STR_SPLIT"), 556 exp.SortArray: _sort_array_sql, 557 exp.StrPosition: str_position_sql, 558 exp.StrToUnix: lambda self, e: self.func( 559 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 560 ), 561 exp.Struct: _struct_sql, 562 exp.Transform: rename_func("LIST_TRANSFORM"), 563 exp.TimeAdd: _date_delta_sql, 564 exp.Time: no_time_sql, 565 exp.TimeDiff: _timediff_sql, 566 exp.Timestamp: no_timestamp_sql, 567 exp.TimestampDiff: lambda self, e: self.func( 568 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 569 ), 570 exp.TimestampTrunc: timestamptrunc_sql(), 571 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 572 exp.TimeStrToTime: timestrtotime_sql, 573 exp.TimeStrToUnix: lambda self, e: self.func( 574 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 575 ), 576 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 577 exp.TimeToUnix: rename_func("EPOCH"), 578 exp.TsOrDiToDi: lambda self, 579 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 580 exp.TsOrDsAdd: _date_delta_sql, 581 exp.TsOrDsDiff: lambda self, e: self.func( 582 "DATE_DIFF", 583 f"'{e.args.get('unit') or 'DAY'}'", 584 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 585 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 586 ), 587 exp.UnixToStr: lambda self, e: self.func( 588 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 589 ), 590 exp.DatetimeTrunc: lambda self, e: self.func( 591 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 592 ), 593 exp.UnixToTime: _unix_to_time_sql, 594 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 595 exp.VariancePop: rename_func("VAR_POP"), 596 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 597 exp.Xor: bool_xor_sql, 598 } 599 600 SUPPORTED_JSON_PATH_PARTS = { 601 exp.JSONPathKey, 602 exp.JSONPathRoot, 603 exp.JSONPathSubscript, 604 exp.JSONPathWildcard, 605 } 606 607 TYPE_MAPPING = { 608 **generator.Generator.TYPE_MAPPING, 609 exp.DataType.Type.BINARY: "BLOB", 610 exp.DataType.Type.BPCHAR: "TEXT", 611 exp.DataType.Type.CHAR: "TEXT", 612 exp.DataType.Type.FLOAT: "REAL", 613 exp.DataType.Type.NCHAR: "TEXT", 614 exp.DataType.Type.NVARCHAR: "TEXT", 615 exp.DataType.Type.UINT: "UINTEGER", 616 exp.DataType.Type.VARBINARY: "BLOB", 617 exp.DataType.Type.ROWVERSION: "BLOB", 618 exp.DataType.Type.VARCHAR: "TEXT", 619 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 620 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 621 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 622 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 623 } 624 625 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 626 RESERVED_KEYWORDS = { 627 "array", 628 "analyse", 629 "union", 630 "all", 631 "when", 632 "in_p", 633 "default", 634 "create_p", 635 "window", 636 "asymmetric", 637 "to", 638 "else", 639 "localtime", 640 "from", 641 "end_p", 642 "select", 643 "current_date", 644 "foreign", 645 "with", 646 "grant", 647 "session_user", 648 "or", 649 "except", 650 "references", 651 "fetch", 652 "limit", 653 "group_p", 654 "leading", 655 "into", 656 "collate", 657 "offset", 658 "do", 659 "then", 660 "localtimestamp", 661 "check_p", 662 "lateral_p", 663 "current_role", 664 "where", 665 "asc_p", 666 "placing", 667 "desc_p", 668 "user", 669 "unique", 670 "initially", 671 "column", 672 "both", 673 "some", 674 "as", 675 "any", 676 "only", 677 "deferrable", 678 "null_p", 679 "current_time", 680 "true_p", 681 "table", 682 "case", 683 "trailing", 684 "variadic", 685 "for", 686 "on", 687 "distinct", 688 "false_p", 689 "not", 690 "constraint", 691 "current_timestamp", 692 "returning", 693 "primary", 694 "intersect", 695 "having", 696 "analyze", 697 "current_user", 698 "and", 699 "cast", 700 "symmetric", 701 "using", 702 "order", 703 "current_catalog", 704 } 705 706 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 707 708 # DuckDB doesn't generally support CREATE TABLE .. properties 709 # https://duckdb.org/docs/sql/statements/create_table.html 710 PROPERTIES_LOCATION = { 711 prop: exp.Properties.Location.UNSUPPORTED 712 for prop in generator.Generator.PROPERTIES_LOCATION 713 } 714 715 # There are a few exceptions (e.g. temporary tables) which are supported or 716 # can be transpiled to DuckDB, so we explicitly override them accordingly 717 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 718 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 719 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 720 721 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 722 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 723 724 def strtotime_sql(self, expression: exp.StrToTime) -> str: 725 if expression.args.get("safe"): 726 formatted_time = self.format_time(expression) 727 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 728 return str_to_time_sql(self, expression) 729 730 def strtodate_sql(self, expression: exp.StrToDate) -> str: 731 if expression.args.get("safe"): 732 formatted_time = self.format_time(expression) 733 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 734 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 735 736 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 737 arg = expression.this 738 if expression.args.get("safe"): 739 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 740 return self.func("JSON", arg) 741 742 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 743 nano = expression.args.get("nano") 744 if nano is not None: 745 expression.set( 746 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 747 ) 748 749 return rename_func("MAKE_TIME")(self, expression) 750 751 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 752 sec = expression.args["sec"] 753 754 milli = expression.args.get("milli") 755 if milli is not None: 756 sec += milli.pop() / exp.Literal.number(1000.0) 757 758 nano = expression.args.get("nano") 759 if nano is not None: 760 sec += nano.pop() / exp.Literal.number(1000000000.0) 761 762 if milli or nano: 763 expression.set("sec", sec) 764 765 return rename_func("MAKE_TIMESTAMP")(self, expression) 766 767 def tablesample_sql( 768 self, 769 expression: exp.TableSample, 770 tablesample_keyword: t.Optional[str] = None, 771 ) -> str: 772 if not isinstance(expression.parent, exp.Select): 773 # This sample clause only applies to a single source, not the entire resulting relation 774 tablesample_keyword = "TABLESAMPLE" 775 776 if expression.args.get("size"): 777 method = expression.args.get("method") 778 if method and method.name.upper() != "RESERVOIR": 779 self.unsupported( 780 f"Sampling method {method} is not supported with a discrete sample count, " 781 "defaulting to reservoir sampling" 782 ) 783 expression.set("method", exp.var("RESERVOIR")) 784 785 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 786 787 def interval_sql(self, expression: exp.Interval) -> str: 788 multiplier: t.Optional[int] = None 789 unit = expression.text("unit").lower() 790 791 if unit.startswith("week"): 792 multiplier = 7 793 if unit.startswith("quarter"): 794 multiplier = 90 795 796 if multiplier: 797 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 798 799 return super().interval_sql(expression) 800 801 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 802 if isinstance(expression.parent, exp.UserDefinedFunction): 803 return self.sql(expression, "this") 804 return super().columndef_sql(expression, sep) 805 806 def join_sql(self, expression: exp.Join) -> str: 807 if ( 808 expression.side == "LEFT" 809 and not expression.args.get("on") 810 and isinstance(expression.this, exp.Unnest) 811 ): 812 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 813 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 814 return super().join_sql(expression.on(exp.true())) 815 816 return super().join_sql(expression) 817 818 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 819 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 820 if expression.args.get("is_end_exclusive"): 821 return rename_func("RANGE")(self, expression) 822 823 return self.function_fallback_sql(expression) 824 825 def bracket_sql(self, expression: exp.Bracket) -> str: 826 this = expression.this 827 if isinstance(this, exp.Array): 828 this.replace(exp.paren(this)) 829 830 bracket = super().bracket_sql(expression) 831 832 if not expression.args.get("returns_list_for_maps"): 833 if not this.type: 834 from sqlglot.optimizer.annotate_types import annotate_types 835 836 this = annotate_types(this) 837 838 if this.is_type(exp.DataType.Type.MAP): 839 bracket = f"({bracket})[1]" 840 841 return bracket 842 843 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 844 expression_sql = self.sql(expression, "expression") 845 846 func = expression.this 847 if isinstance(func, exp.PERCENTILES): 848 # Make the order key the first arg and slide the fraction to the right 849 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 850 order_col = expression.find(exp.Ordered) 851 if order_col: 852 func.set("expression", func.this) 853 func.set("this", order_col.this) 854 855 this = self.sql(expression, "this").rstrip(")") 856 857 return f"{this}{expression_sql})" 858 859 def length_sql(self, expression: exp.Length) -> str: 860 arg = expression.this 861 862 # Dialects like BQ and Snowflake also accept binary values as args, so 863 # DDB will attempt to infer the type or resort to case/when resolution 864 if not expression.args.get("binary") or arg.is_string: 865 return self.func("LENGTH", arg) 866 867 if not arg.type: 868 from sqlglot.optimizer.annotate_types import annotate_types 869 870 arg = annotate_types(arg) 871 872 if arg.is_type(*exp.DataType.TEXT_TYPES): 873 return self.func("LENGTH", arg) 874 875 # We need these casts to make duckdb's static type checker happy 876 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 877 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 878 879 case = ( 880 exp.case(self.func("TYPEOF", arg)) 881 .when( 882 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 883 ) # anonymous to break length_sql recursion 884 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 885 ) 886 887 return self.sql(case) 888 889 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 890 this = expression.this 891 key = expression.args.get("key") 892 key_sql = key.name if isinstance(key, exp.Expression) else "" 893 value_sql = self.sql(expression, "value") 894 895 kv_sql = f"{key_sql} := {value_sql}" 896 897 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 898 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 899 if isinstance(this, exp.Struct) and not this.expressions: 900 return self.func("STRUCT_PACK", kv_sql) 901 902 return self.func("STRUCT_INSERT", this, kv_sql) 903 904 def unnest_sql(self, expression: exp.Unnest) -> str: 905 explode_array = expression.args.get("explode_array") 906 if explode_array: 907 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 908 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 909 expression.expressions.append( 910 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 911 ) 912 913 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 914 alias = expression.args.get("alias") 915 if alias: 916 expression.set("alias", None) 917 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 918 919 unnest_sql = super().unnest_sql(expression) 920 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 921 return self.sql(select) 922 923 return super().unnest_sql(expression) 924 925 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 926 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 927 # DuckDB should render IGNORE NULLS only for the general-purpose 928 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 929 return super().ignorenulls_sql(expression) 930 931 return self.sql(expression, "this") 932 933 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 934 this = self.sql(expression, "this") 935 null_text = self.sql(expression, "null") 936 937 if null_text: 938 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 939 940 return self.func("ARRAY_TO_STRING", this, expression.expression)
259class DuckDB(Dialect): 260 NULL_ORDERING = "nulls_are_last" 261 SUPPORTS_USER_DEFINED_TYPES = False 262 SAFE_DIVISION = True 263 INDEX_OFFSET = 1 264 CONCAT_COALESCE = True 265 SUPPORTS_ORDER_BY_ALL = True 266 SUPPORTS_FIXED_SIZE_ARRAYS = True 267 STRICT_JSON_PATH_SYNTAX = False 268 269 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 270 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 271 272 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 273 if isinstance(path, exp.Literal): 274 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 275 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 276 # This check ensures we'll avoid trying to parse these as JSON paths, which can 277 # either result in a noisy warning or in an invalid representation of the path. 278 path_text = path.name 279 if path_text.startswith("/") or "[#" in path_text: 280 return path 281 282 return super().to_json_path(path) 283 284 class Tokenizer(tokens.Tokenizer): 285 HEREDOC_STRINGS = ["$"] 286 287 HEREDOC_TAG_IS_IDENTIFIER = True 288 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 289 290 KEYWORDS = { 291 **tokens.Tokenizer.KEYWORDS, 292 "//": TokenType.DIV, 293 "ATTACH": TokenType.COMMAND, 294 "BINARY": TokenType.VARBINARY, 295 "BITSTRING": TokenType.BIT, 296 "BPCHAR": TokenType.TEXT, 297 "CHAR": TokenType.TEXT, 298 "CHARACTER VARYING": TokenType.TEXT, 299 "EXCLUDE": TokenType.EXCEPT, 300 "LOGICAL": TokenType.BOOLEAN, 301 "ONLY": TokenType.ONLY, 302 "PIVOT_WIDER": TokenType.PIVOT, 303 "POSITIONAL": TokenType.POSITIONAL, 304 "SIGNED": TokenType.INT, 305 "STRING": TokenType.TEXT, 306 "SUMMARIZE": TokenType.SUMMARIZE, 307 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 308 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 309 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 310 "TIMESTAMP_US": TokenType.TIMESTAMP, 311 "UBIGINT": TokenType.UBIGINT, 312 "UINTEGER": TokenType.UINT, 313 "USMALLINT": TokenType.USMALLINT, 314 "UTINYINT": TokenType.UTINYINT, 315 "VARCHAR": TokenType.TEXT, 316 } 317 KEYWORDS.pop("/*+") 318 319 SINGLE_TOKENS = { 320 **tokens.Tokenizer.SINGLE_TOKENS, 321 "$": TokenType.PARAMETER, 322 } 323 324 class Parser(parser.Parser): 325 BITWISE = { 326 **parser.Parser.BITWISE, 327 TokenType.TILDA: exp.RegexpLike, 328 } 329 330 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 331 332 FUNCTIONS = { 333 **parser.Parser.FUNCTIONS, 334 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 335 "ARRAY_SORT": exp.SortArray.from_arg_list, 336 "DATEDIFF": _build_date_diff, 337 "DATE_DIFF": _build_date_diff, 338 "DATE_TRUNC": date_trunc_to_time, 339 "DATETRUNC": date_trunc_to_time, 340 "DECODE": lambda args: exp.Decode( 341 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 342 ), 343 "ENCODE": lambda args: exp.Encode( 344 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 345 ), 346 "EPOCH": exp.TimeToUnix.from_arg_list, 347 "EPOCH_MS": lambda args: exp.UnixToTime( 348 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 349 ), 350 "JSON": exp.ParseJSON.from_arg_list, 351 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 352 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 353 "LIST_HAS": exp.ArrayContains.from_arg_list, 354 "LIST_REVERSE_SORT": _build_sort_array_desc, 355 "LIST_SORT": exp.SortArray.from_arg_list, 356 "LIST_VALUE": lambda args: exp.Array(expressions=args), 357 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 358 "MAKE_TIMESTAMP": _build_make_timestamp, 359 "MEDIAN": lambda args: exp.PercentileCont( 360 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 361 ), 362 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 363 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 364 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 365 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 366 ), 367 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 368 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 369 this=seq_get(args, 0), 370 expression=seq_get(args, 1), 371 replacement=seq_get(args, 2), 372 modifiers=seq_get(args, 3), 373 ), 374 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 375 "STRING_SPLIT": exp.Split.from_arg_list, 376 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 377 "STRING_TO_ARRAY": exp.Split.from_arg_list, 378 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 379 "STRUCT_PACK": exp.Struct.from_arg_list, 380 "STR_SPLIT": exp.Split.from_arg_list, 381 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 382 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 383 "UNNEST": exp.Explode.from_arg_list, 384 "XOR": binary_from_function(exp.BitwiseXor), 385 "GENERATE_SERIES": _build_generate_series(), 386 "RANGE": _build_generate_series(end_exclusive=True), 387 } 388 389 FUNCTIONS.pop("DATE_SUB") 390 FUNCTIONS.pop("GLOB") 391 392 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 393 FUNCTION_PARSERS.pop("DECODE") 394 395 NO_PAREN_FUNCTION_PARSERS = { 396 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 397 "MAP": lambda self: self._parse_map(), 398 } 399 400 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 401 TokenType.SEMI, 402 TokenType.ANTI, 403 } 404 405 PLACEHOLDER_PARSERS = { 406 **parser.Parser.PLACEHOLDER_PARSERS, 407 TokenType.PARAMETER: lambda self: ( 408 self.expression(exp.Placeholder, this=self._prev.text) 409 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 410 else None 411 ), 412 } 413 414 TYPE_CONVERTERS = { 415 # https://duckdb.org/docs/sql/data_types/numeric 416 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 417 # https://duckdb.org/docs/sql/data_types/text 418 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 419 } 420 421 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 422 # https://duckdb.org/docs/sql/samples.html 423 sample = super()._parse_table_sample(as_modifier=as_modifier) 424 if sample and not sample.args.get("method"): 425 if sample.args.get("size"): 426 sample.set("method", exp.var("RESERVOIR")) 427 else: 428 sample.set("method", exp.var("SYSTEM")) 429 430 return sample 431 432 def _parse_bracket( 433 self, this: t.Optional[exp.Expression] = None 434 ) -> t.Optional[exp.Expression]: 435 bracket = super()._parse_bracket(this) 436 if isinstance(bracket, exp.Bracket): 437 bracket.set("returns_list_for_maps", True) 438 439 return bracket 440 441 def _parse_map(self) -> exp.ToMap | exp.Map: 442 if self._match(TokenType.L_BRACE, advance=False): 443 return self.expression(exp.ToMap, this=self._parse_bracket()) 444 445 args = self._parse_wrapped_csv(self._parse_assignment) 446 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 447 448 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 449 return self._parse_field_def() 450 451 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 452 if len(aggregations) == 1: 453 return super()._pivot_column_names(aggregations) 454 return pivot_column_names(aggregations, dialect="duckdb") 455 456 class Generator(generator.Generator): 457 PARAMETER_TOKEN = "$" 458 NAMED_PLACEHOLDER_TOKEN = "$" 459 JOIN_HINTS = False 460 TABLE_HINTS = False 461 QUERY_HINTS = False 462 LIMIT_FETCH = "LIMIT" 463 STRUCT_DELIMITER = ("(", ")") 464 RENAME_TABLE_WITH_DB = False 465 NVL2_SUPPORTED = False 466 SEMI_ANTI_JOIN_WITH_SIDE = False 467 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 468 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 469 LAST_DAY_SUPPORTS_DATE_PART = False 470 JSON_KEY_VALUE_PAIR_SEP = "," 471 IGNORE_NULLS_IN_FUNC = True 472 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 473 SUPPORTS_CREATE_TABLE_LIKE = False 474 MULTI_ARG_DISTINCT = False 475 CAN_IMPLEMENT_ARRAY_ANY = True 476 SUPPORTS_TO_NUMBER = False 477 COPY_HAS_INTO_KEYWORD = False 478 STAR_EXCEPT = "EXCLUDE" 479 PAD_FILL_PATTERN_IS_REQUIRED = True 480 ARRAY_CONCAT_IS_VAR_LEN = False 481 482 TRANSFORMS = { 483 **generator.Generator.TRANSFORMS, 484 exp.ApproxDistinct: approx_count_distinct_sql, 485 exp.Array: inline_array_unless_query, 486 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 487 exp.ArrayFilter: rename_func("LIST_FILTER"), 488 exp.ArraySize: rename_func("ARRAY_LENGTH"), 489 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 490 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 491 exp.ArraySort: _array_sort_sql, 492 exp.ArraySum: rename_func("LIST_SUM"), 493 exp.BitwiseXor: rename_func("XOR"), 494 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 495 exp.CurrentDate: lambda *_: "CURRENT_DATE", 496 exp.CurrentTime: lambda *_: "CURRENT_TIME", 497 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 498 exp.DayOfMonth: rename_func("DAYOFMONTH"), 499 exp.DayOfWeek: rename_func("DAYOFWEEK"), 500 exp.DayOfWeekIso: rename_func("ISODOW"), 501 exp.DayOfYear: rename_func("DAYOFYEAR"), 502 exp.DataType: _datatype_sql, 503 exp.Date: _date_sql, 504 exp.DateAdd: _date_delta_sql, 505 exp.DateFromParts: rename_func("MAKE_DATE"), 506 exp.DateSub: _date_delta_sql, 507 exp.DateDiff: _date_diff_sql, 508 exp.DateStrToDate: datestrtodate_sql, 509 exp.Datetime: no_datetime_sql, 510 exp.DatetimeSub: _date_delta_sql, 511 exp.DatetimeAdd: _date_delta_sql, 512 exp.DateToDi: lambda self, 513 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 514 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 515 exp.DiToDate: lambda self, 516 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 517 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 518 exp.GenerateDateArray: _generate_datetime_array_sql, 519 exp.GenerateTimestampArray: _generate_datetime_array_sql, 520 exp.Explode: rename_func("UNNEST"), 521 exp.IntDiv: lambda self, e: self.binary(e, "//"), 522 exp.IsInf: rename_func("ISINF"), 523 exp.IsNan: rename_func("ISNAN"), 524 exp.JSONExtract: _arrow_json_extract_sql, 525 exp.JSONExtractScalar: _arrow_json_extract_sql, 526 exp.JSONFormat: _json_format_sql, 527 exp.LogicalOr: rename_func("BOOL_OR"), 528 exp.LogicalAnd: rename_func("BOOL_AND"), 529 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 530 exp.MonthsBetween: lambda self, e: self.func( 531 "DATEDIFF", 532 "'month'", 533 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 534 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 535 ), 536 exp.PercentileCont: rename_func("QUANTILE_CONT"), 537 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 538 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 539 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 540 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 541 exp.RegexpExtract: regexp_extract_sql, 542 exp.RegexpReplace: lambda self, e: self.func( 543 "REGEXP_REPLACE", 544 e.this, 545 e.expression, 546 e.args.get("replacement"), 547 e.args.get("modifiers"), 548 ), 549 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 550 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 551 exp.Return: lambda self, e: self.sql(e, "this"), 552 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 553 exp.Rand: rename_func("RANDOM"), 554 exp.SafeDivide: no_safe_divide_sql, 555 exp.SHA2: sha256_sql, 556 exp.Split: rename_func("STR_SPLIT"), 557 exp.SortArray: _sort_array_sql, 558 exp.StrPosition: str_position_sql, 559 exp.StrToUnix: lambda self, e: self.func( 560 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 561 ), 562 exp.Struct: _struct_sql, 563 exp.Transform: rename_func("LIST_TRANSFORM"), 564 exp.TimeAdd: _date_delta_sql, 565 exp.Time: no_time_sql, 566 exp.TimeDiff: _timediff_sql, 567 exp.Timestamp: no_timestamp_sql, 568 exp.TimestampDiff: lambda self, e: self.func( 569 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 570 ), 571 exp.TimestampTrunc: timestamptrunc_sql(), 572 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 573 exp.TimeStrToTime: timestrtotime_sql, 574 exp.TimeStrToUnix: lambda self, e: self.func( 575 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 576 ), 577 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 578 exp.TimeToUnix: rename_func("EPOCH"), 579 exp.TsOrDiToDi: lambda self, 580 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 581 exp.TsOrDsAdd: _date_delta_sql, 582 exp.TsOrDsDiff: lambda self, e: self.func( 583 "DATE_DIFF", 584 f"'{e.args.get('unit') or 'DAY'}'", 585 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 586 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 587 ), 588 exp.UnixToStr: lambda self, e: self.func( 589 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 590 ), 591 exp.DatetimeTrunc: lambda self, e: self.func( 592 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 593 ), 594 exp.UnixToTime: _unix_to_time_sql, 595 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 596 exp.VariancePop: rename_func("VAR_POP"), 597 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 598 exp.Xor: bool_xor_sql, 599 } 600 601 SUPPORTED_JSON_PATH_PARTS = { 602 exp.JSONPathKey, 603 exp.JSONPathRoot, 604 exp.JSONPathSubscript, 605 exp.JSONPathWildcard, 606 } 607 608 TYPE_MAPPING = { 609 **generator.Generator.TYPE_MAPPING, 610 exp.DataType.Type.BINARY: "BLOB", 611 exp.DataType.Type.BPCHAR: "TEXT", 612 exp.DataType.Type.CHAR: "TEXT", 613 exp.DataType.Type.FLOAT: "REAL", 614 exp.DataType.Type.NCHAR: "TEXT", 615 exp.DataType.Type.NVARCHAR: "TEXT", 616 exp.DataType.Type.UINT: "UINTEGER", 617 exp.DataType.Type.VARBINARY: "BLOB", 618 exp.DataType.Type.ROWVERSION: "BLOB", 619 exp.DataType.Type.VARCHAR: "TEXT", 620 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 621 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 622 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 623 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 624 } 625 626 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 627 RESERVED_KEYWORDS = { 628 "array", 629 "analyse", 630 "union", 631 "all", 632 "when", 633 "in_p", 634 "default", 635 "create_p", 636 "window", 637 "asymmetric", 638 "to", 639 "else", 640 "localtime", 641 "from", 642 "end_p", 643 "select", 644 "current_date", 645 "foreign", 646 "with", 647 "grant", 648 "session_user", 649 "or", 650 "except", 651 "references", 652 "fetch", 653 "limit", 654 "group_p", 655 "leading", 656 "into", 657 "collate", 658 "offset", 659 "do", 660 "then", 661 "localtimestamp", 662 "check_p", 663 "lateral_p", 664 "current_role", 665 "where", 666 "asc_p", 667 "placing", 668 "desc_p", 669 "user", 670 "unique", 671 "initially", 672 "column", 673 "both", 674 "some", 675 "as", 676 "any", 677 "only", 678 "deferrable", 679 "null_p", 680 "current_time", 681 "true_p", 682 "table", 683 "case", 684 "trailing", 685 "variadic", 686 "for", 687 "on", 688 "distinct", 689 "false_p", 690 "not", 691 "constraint", 692 "current_timestamp", 693 "returning", 694 "primary", 695 "intersect", 696 "having", 697 "analyze", 698 "current_user", 699 "and", 700 "cast", 701 "symmetric", 702 "using", 703 "order", 704 "current_catalog", 705 } 706 707 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 708 709 # DuckDB doesn't generally support CREATE TABLE .. properties 710 # https://duckdb.org/docs/sql/statements/create_table.html 711 PROPERTIES_LOCATION = { 712 prop: exp.Properties.Location.UNSUPPORTED 713 for prop in generator.Generator.PROPERTIES_LOCATION 714 } 715 716 # There are a few exceptions (e.g. temporary tables) which are supported or 717 # can be transpiled to DuckDB, so we explicitly override them accordingly 718 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 719 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 720 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 721 722 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 723 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 724 725 def strtotime_sql(self, expression: exp.StrToTime) -> str: 726 if expression.args.get("safe"): 727 formatted_time = self.format_time(expression) 728 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 729 return str_to_time_sql(self, expression) 730 731 def strtodate_sql(self, expression: exp.StrToDate) -> str: 732 if expression.args.get("safe"): 733 formatted_time = self.format_time(expression) 734 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 735 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 736 737 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 738 arg = expression.this 739 if expression.args.get("safe"): 740 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 741 return self.func("JSON", arg) 742 743 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 744 nano = expression.args.get("nano") 745 if nano is not None: 746 expression.set( 747 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 748 ) 749 750 return rename_func("MAKE_TIME")(self, expression) 751 752 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 753 sec = expression.args["sec"] 754 755 milli = expression.args.get("milli") 756 if milli is not None: 757 sec += milli.pop() / exp.Literal.number(1000.0) 758 759 nano = expression.args.get("nano") 760 if nano is not None: 761 sec += nano.pop() / exp.Literal.number(1000000000.0) 762 763 if milli or nano: 764 expression.set("sec", sec) 765 766 return rename_func("MAKE_TIMESTAMP")(self, expression) 767 768 def tablesample_sql( 769 self, 770 expression: exp.TableSample, 771 tablesample_keyword: t.Optional[str] = None, 772 ) -> str: 773 if not isinstance(expression.parent, exp.Select): 774 # This sample clause only applies to a single source, not the entire resulting relation 775 tablesample_keyword = "TABLESAMPLE" 776 777 if expression.args.get("size"): 778 method = expression.args.get("method") 779 if method and method.name.upper() != "RESERVOIR": 780 self.unsupported( 781 f"Sampling method {method} is not supported with a discrete sample count, " 782 "defaulting to reservoir sampling" 783 ) 784 expression.set("method", exp.var("RESERVOIR")) 785 786 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 787 788 def interval_sql(self, expression: exp.Interval) -> str: 789 multiplier: t.Optional[int] = None 790 unit = expression.text("unit").lower() 791 792 if unit.startswith("week"): 793 multiplier = 7 794 if unit.startswith("quarter"): 795 multiplier = 90 796 797 if multiplier: 798 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 799 800 return super().interval_sql(expression) 801 802 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 803 if isinstance(expression.parent, exp.UserDefinedFunction): 804 return self.sql(expression, "this") 805 return super().columndef_sql(expression, sep) 806 807 def join_sql(self, expression: exp.Join) -> str: 808 if ( 809 expression.side == "LEFT" 810 and not expression.args.get("on") 811 and isinstance(expression.this, exp.Unnest) 812 ): 813 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 814 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 815 return super().join_sql(expression.on(exp.true())) 816 817 return super().join_sql(expression) 818 819 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 820 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 821 if expression.args.get("is_end_exclusive"): 822 return rename_func("RANGE")(self, expression) 823 824 return self.function_fallback_sql(expression) 825 826 def bracket_sql(self, expression: exp.Bracket) -> str: 827 this = expression.this 828 if isinstance(this, exp.Array): 829 this.replace(exp.paren(this)) 830 831 bracket = super().bracket_sql(expression) 832 833 if not expression.args.get("returns_list_for_maps"): 834 if not this.type: 835 from sqlglot.optimizer.annotate_types import annotate_types 836 837 this = annotate_types(this) 838 839 if this.is_type(exp.DataType.Type.MAP): 840 bracket = f"({bracket})[1]" 841 842 return bracket 843 844 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 845 expression_sql = self.sql(expression, "expression") 846 847 func = expression.this 848 if isinstance(func, exp.PERCENTILES): 849 # Make the order key the first arg and slide the fraction to the right 850 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 851 order_col = expression.find(exp.Ordered) 852 if order_col: 853 func.set("expression", func.this) 854 func.set("this", order_col.this) 855 856 this = self.sql(expression, "this").rstrip(")") 857 858 return f"{this}{expression_sql})" 859 860 def length_sql(self, expression: exp.Length) -> str: 861 arg = expression.this 862 863 # Dialects like BQ and Snowflake also accept binary values as args, so 864 # DDB will attempt to infer the type or resort to case/when resolution 865 if not expression.args.get("binary") or arg.is_string: 866 return self.func("LENGTH", arg) 867 868 if not arg.type: 869 from sqlglot.optimizer.annotate_types import annotate_types 870 871 arg = annotate_types(arg) 872 873 if arg.is_type(*exp.DataType.TEXT_TYPES): 874 return self.func("LENGTH", arg) 875 876 # We need these casts to make duckdb's static type checker happy 877 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 878 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 879 880 case = ( 881 exp.case(self.func("TYPEOF", arg)) 882 .when( 883 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 884 ) # anonymous to break length_sql recursion 885 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 886 ) 887 888 return self.sql(case) 889 890 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 891 this = expression.this 892 key = expression.args.get("key") 893 key_sql = key.name if isinstance(key, exp.Expression) else "" 894 value_sql = self.sql(expression, "value") 895 896 kv_sql = f"{key_sql} := {value_sql}" 897 898 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 899 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 900 if isinstance(this, exp.Struct) and not this.expressions: 901 return self.func("STRUCT_PACK", kv_sql) 902 903 return self.func("STRUCT_INSERT", this, kv_sql) 904 905 def unnest_sql(self, expression: exp.Unnest) -> str: 906 explode_array = expression.args.get("explode_array") 907 if explode_array: 908 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 909 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 910 expression.expressions.append( 911 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 912 ) 913 914 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 915 alias = expression.args.get("alias") 916 if alias: 917 expression.set("alias", None) 918 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 919 920 unnest_sql = super().unnest_sql(expression) 921 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 922 return self.sql(select) 923 924 return super().unnest_sql(expression) 925 926 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 927 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 928 # DuckDB should render IGNORE NULLS only for the general-purpose 929 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 930 return super().ignorenulls_sql(expression) 931 932 return self.sql(expression, "this") 933 934 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 935 this = self.sql(expression, "this") 936 null_text = self.sql(expression, "null") 937 938 if null_text: 939 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 940 941 return self.func("ARRAY_TO_STRING", this, expression.expression)
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
A NULL arg in CONCAT yields NULL by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Specifies the strategy according to which identifiers should be normalized.
272 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 273 if isinstance(path, exp.Literal): 274 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 275 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 276 # This check ensures we'll avoid trying to parse these as JSON paths, which can 277 # either result in a noisy warning or in an invalid representation of the path. 278 path_text = path.name 279 if path_text.startswith("/") or "[#" in path_text: 280 return path 281 282 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
284 class Tokenizer(tokens.Tokenizer): 285 HEREDOC_STRINGS = ["$"] 286 287 HEREDOC_TAG_IS_IDENTIFIER = True 288 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 289 290 KEYWORDS = { 291 **tokens.Tokenizer.KEYWORDS, 292 "//": TokenType.DIV, 293 "ATTACH": TokenType.COMMAND, 294 "BINARY": TokenType.VARBINARY, 295 "BITSTRING": TokenType.BIT, 296 "BPCHAR": TokenType.TEXT, 297 "CHAR": TokenType.TEXT, 298 "CHARACTER VARYING": TokenType.TEXT, 299 "EXCLUDE": TokenType.EXCEPT, 300 "LOGICAL": TokenType.BOOLEAN, 301 "ONLY": TokenType.ONLY, 302 "PIVOT_WIDER": TokenType.PIVOT, 303 "POSITIONAL": TokenType.POSITIONAL, 304 "SIGNED": TokenType.INT, 305 "STRING": TokenType.TEXT, 306 "SUMMARIZE": TokenType.SUMMARIZE, 307 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 308 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 309 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 310 "TIMESTAMP_US": TokenType.TIMESTAMP, 311 "UBIGINT": TokenType.UBIGINT, 312 "UINTEGER": TokenType.UINT, 313 "USMALLINT": TokenType.USMALLINT, 314 "UTINYINT": TokenType.UTINYINT, 315 "VARCHAR": TokenType.TEXT, 316 } 317 KEYWORDS.pop("/*+") 318 319 SINGLE_TOKENS = { 320 **tokens.Tokenizer.SINGLE_TOKENS, 321 "$": TokenType.PARAMETER, 322 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
324 class Parser(parser.Parser): 325 BITWISE = { 326 **parser.Parser.BITWISE, 327 TokenType.TILDA: exp.RegexpLike, 328 } 329 330 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 331 332 FUNCTIONS = { 333 **parser.Parser.FUNCTIONS, 334 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 335 "ARRAY_SORT": exp.SortArray.from_arg_list, 336 "DATEDIFF": _build_date_diff, 337 "DATE_DIFF": _build_date_diff, 338 "DATE_TRUNC": date_trunc_to_time, 339 "DATETRUNC": date_trunc_to_time, 340 "DECODE": lambda args: exp.Decode( 341 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 342 ), 343 "ENCODE": lambda args: exp.Encode( 344 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 345 ), 346 "EPOCH": exp.TimeToUnix.from_arg_list, 347 "EPOCH_MS": lambda args: exp.UnixToTime( 348 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 349 ), 350 "JSON": exp.ParseJSON.from_arg_list, 351 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 352 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 353 "LIST_HAS": exp.ArrayContains.from_arg_list, 354 "LIST_REVERSE_SORT": _build_sort_array_desc, 355 "LIST_SORT": exp.SortArray.from_arg_list, 356 "LIST_VALUE": lambda args: exp.Array(expressions=args), 357 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 358 "MAKE_TIMESTAMP": _build_make_timestamp, 359 "MEDIAN": lambda args: exp.PercentileCont( 360 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 361 ), 362 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 363 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 364 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 365 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 366 ), 367 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 368 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 369 this=seq_get(args, 0), 370 expression=seq_get(args, 1), 371 replacement=seq_get(args, 2), 372 modifiers=seq_get(args, 3), 373 ), 374 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 375 "STRING_SPLIT": exp.Split.from_arg_list, 376 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 377 "STRING_TO_ARRAY": exp.Split.from_arg_list, 378 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 379 "STRUCT_PACK": exp.Struct.from_arg_list, 380 "STR_SPLIT": exp.Split.from_arg_list, 381 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 382 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 383 "UNNEST": exp.Explode.from_arg_list, 384 "XOR": binary_from_function(exp.BitwiseXor), 385 "GENERATE_SERIES": _build_generate_series(), 386 "RANGE": _build_generate_series(end_exclusive=True), 387 } 388 389 FUNCTIONS.pop("DATE_SUB") 390 FUNCTIONS.pop("GLOB") 391 392 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 393 FUNCTION_PARSERS.pop("DECODE") 394 395 NO_PAREN_FUNCTION_PARSERS = { 396 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 397 "MAP": lambda self: self._parse_map(), 398 } 399 400 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 401 TokenType.SEMI, 402 TokenType.ANTI, 403 } 404 405 PLACEHOLDER_PARSERS = { 406 **parser.Parser.PLACEHOLDER_PARSERS, 407 TokenType.PARAMETER: lambda self: ( 408 self.expression(exp.Placeholder, this=self._prev.text) 409 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 410 else None 411 ), 412 } 413 414 TYPE_CONVERTERS = { 415 # https://duckdb.org/docs/sql/data_types/numeric 416 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 417 # https://duckdb.org/docs/sql/data_types/text 418 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 419 } 420 421 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 422 # https://duckdb.org/docs/sql/samples.html 423 sample = super()._parse_table_sample(as_modifier=as_modifier) 424 if sample and not sample.args.get("method"): 425 if sample.args.get("size"): 426 sample.set("method", exp.var("RESERVOIR")) 427 else: 428 sample.set("method", exp.var("SYSTEM")) 429 430 return sample 431 432 def _parse_bracket( 433 self, this: t.Optional[exp.Expression] = None 434 ) -> t.Optional[exp.Expression]: 435 bracket = super()._parse_bracket(this) 436 if isinstance(bracket, exp.Bracket): 437 bracket.set("returns_list_for_maps", True) 438 439 return bracket 440 441 def _parse_map(self) -> exp.ToMap | exp.Map: 442 if self._match(TokenType.L_BRACE, advance=False): 443 return self.expression(exp.ToMap, this=self._parse_bracket()) 444 445 args = self._parse_wrapped_csv(self._parse_assignment) 446 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 447 448 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 449 return self._parse_field_def() 450 451 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 452 if len(aggregations) == 1: 453 return super()._pivot_column_names(aggregations) 454 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
456 class Generator(generator.Generator): 457 PARAMETER_TOKEN = "$" 458 NAMED_PLACEHOLDER_TOKEN = "$" 459 JOIN_HINTS = False 460 TABLE_HINTS = False 461 QUERY_HINTS = False 462 LIMIT_FETCH = "LIMIT" 463 STRUCT_DELIMITER = ("(", ")") 464 RENAME_TABLE_WITH_DB = False 465 NVL2_SUPPORTED = False 466 SEMI_ANTI_JOIN_WITH_SIDE = False 467 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 468 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 469 LAST_DAY_SUPPORTS_DATE_PART = False 470 JSON_KEY_VALUE_PAIR_SEP = "," 471 IGNORE_NULLS_IN_FUNC = True 472 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 473 SUPPORTS_CREATE_TABLE_LIKE = False 474 MULTI_ARG_DISTINCT = False 475 CAN_IMPLEMENT_ARRAY_ANY = True 476 SUPPORTS_TO_NUMBER = False 477 COPY_HAS_INTO_KEYWORD = False 478 STAR_EXCEPT = "EXCLUDE" 479 PAD_FILL_PATTERN_IS_REQUIRED = True 480 ARRAY_CONCAT_IS_VAR_LEN = False 481 482 TRANSFORMS = { 483 **generator.Generator.TRANSFORMS, 484 exp.ApproxDistinct: approx_count_distinct_sql, 485 exp.Array: inline_array_unless_query, 486 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 487 exp.ArrayFilter: rename_func("LIST_FILTER"), 488 exp.ArraySize: rename_func("ARRAY_LENGTH"), 489 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 490 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 491 exp.ArraySort: _array_sort_sql, 492 exp.ArraySum: rename_func("LIST_SUM"), 493 exp.BitwiseXor: rename_func("XOR"), 494 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 495 exp.CurrentDate: lambda *_: "CURRENT_DATE", 496 exp.CurrentTime: lambda *_: "CURRENT_TIME", 497 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 498 exp.DayOfMonth: rename_func("DAYOFMONTH"), 499 exp.DayOfWeek: rename_func("DAYOFWEEK"), 500 exp.DayOfWeekIso: rename_func("ISODOW"), 501 exp.DayOfYear: rename_func("DAYOFYEAR"), 502 exp.DataType: _datatype_sql, 503 exp.Date: _date_sql, 504 exp.DateAdd: _date_delta_sql, 505 exp.DateFromParts: rename_func("MAKE_DATE"), 506 exp.DateSub: _date_delta_sql, 507 exp.DateDiff: _date_diff_sql, 508 exp.DateStrToDate: datestrtodate_sql, 509 exp.Datetime: no_datetime_sql, 510 exp.DatetimeSub: _date_delta_sql, 511 exp.DatetimeAdd: _date_delta_sql, 512 exp.DateToDi: lambda self, 513 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 514 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 515 exp.DiToDate: lambda self, 516 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 517 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 518 exp.GenerateDateArray: _generate_datetime_array_sql, 519 exp.GenerateTimestampArray: _generate_datetime_array_sql, 520 exp.Explode: rename_func("UNNEST"), 521 exp.IntDiv: lambda self, e: self.binary(e, "//"), 522 exp.IsInf: rename_func("ISINF"), 523 exp.IsNan: rename_func("ISNAN"), 524 exp.JSONExtract: _arrow_json_extract_sql, 525 exp.JSONExtractScalar: _arrow_json_extract_sql, 526 exp.JSONFormat: _json_format_sql, 527 exp.LogicalOr: rename_func("BOOL_OR"), 528 exp.LogicalAnd: rename_func("BOOL_AND"), 529 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 530 exp.MonthsBetween: lambda self, e: self.func( 531 "DATEDIFF", 532 "'month'", 533 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 534 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 535 ), 536 exp.PercentileCont: rename_func("QUANTILE_CONT"), 537 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 538 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 539 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 540 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 541 exp.RegexpExtract: regexp_extract_sql, 542 exp.RegexpReplace: lambda self, e: self.func( 543 "REGEXP_REPLACE", 544 e.this, 545 e.expression, 546 e.args.get("replacement"), 547 e.args.get("modifiers"), 548 ), 549 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 550 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 551 exp.Return: lambda self, e: self.sql(e, "this"), 552 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 553 exp.Rand: rename_func("RANDOM"), 554 exp.SafeDivide: no_safe_divide_sql, 555 exp.SHA2: sha256_sql, 556 exp.Split: rename_func("STR_SPLIT"), 557 exp.SortArray: _sort_array_sql, 558 exp.StrPosition: str_position_sql, 559 exp.StrToUnix: lambda self, e: self.func( 560 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 561 ), 562 exp.Struct: _struct_sql, 563 exp.Transform: rename_func("LIST_TRANSFORM"), 564 exp.TimeAdd: _date_delta_sql, 565 exp.Time: no_time_sql, 566 exp.TimeDiff: _timediff_sql, 567 exp.Timestamp: no_timestamp_sql, 568 exp.TimestampDiff: lambda self, e: self.func( 569 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 570 ), 571 exp.TimestampTrunc: timestamptrunc_sql(), 572 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 573 exp.TimeStrToTime: timestrtotime_sql, 574 exp.TimeStrToUnix: lambda self, e: self.func( 575 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 576 ), 577 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 578 exp.TimeToUnix: rename_func("EPOCH"), 579 exp.TsOrDiToDi: lambda self, 580 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 581 exp.TsOrDsAdd: _date_delta_sql, 582 exp.TsOrDsDiff: lambda self, e: self.func( 583 "DATE_DIFF", 584 f"'{e.args.get('unit') or 'DAY'}'", 585 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 586 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 587 ), 588 exp.UnixToStr: lambda self, e: self.func( 589 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 590 ), 591 exp.DatetimeTrunc: lambda self, e: self.func( 592 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 593 ), 594 exp.UnixToTime: _unix_to_time_sql, 595 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 596 exp.VariancePop: rename_func("VAR_POP"), 597 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 598 exp.Xor: bool_xor_sql, 599 } 600 601 SUPPORTED_JSON_PATH_PARTS = { 602 exp.JSONPathKey, 603 exp.JSONPathRoot, 604 exp.JSONPathSubscript, 605 exp.JSONPathWildcard, 606 } 607 608 TYPE_MAPPING = { 609 **generator.Generator.TYPE_MAPPING, 610 exp.DataType.Type.BINARY: "BLOB", 611 exp.DataType.Type.BPCHAR: "TEXT", 612 exp.DataType.Type.CHAR: "TEXT", 613 exp.DataType.Type.FLOAT: "REAL", 614 exp.DataType.Type.NCHAR: "TEXT", 615 exp.DataType.Type.NVARCHAR: "TEXT", 616 exp.DataType.Type.UINT: "UINTEGER", 617 exp.DataType.Type.VARBINARY: "BLOB", 618 exp.DataType.Type.ROWVERSION: "BLOB", 619 exp.DataType.Type.VARCHAR: "TEXT", 620 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 621 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 622 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 623 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 624 } 625 626 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 627 RESERVED_KEYWORDS = { 628 "array", 629 "analyse", 630 "union", 631 "all", 632 "when", 633 "in_p", 634 "default", 635 "create_p", 636 "window", 637 "asymmetric", 638 "to", 639 "else", 640 "localtime", 641 "from", 642 "end_p", 643 "select", 644 "current_date", 645 "foreign", 646 "with", 647 "grant", 648 "session_user", 649 "or", 650 "except", 651 "references", 652 "fetch", 653 "limit", 654 "group_p", 655 "leading", 656 "into", 657 "collate", 658 "offset", 659 "do", 660 "then", 661 "localtimestamp", 662 "check_p", 663 "lateral_p", 664 "current_role", 665 "where", 666 "asc_p", 667 "placing", 668 "desc_p", 669 "user", 670 "unique", 671 "initially", 672 "column", 673 "both", 674 "some", 675 "as", 676 "any", 677 "only", 678 "deferrable", 679 "null_p", 680 "current_time", 681 "true_p", 682 "table", 683 "case", 684 "trailing", 685 "variadic", 686 "for", 687 "on", 688 "distinct", 689 "false_p", 690 "not", 691 "constraint", 692 "current_timestamp", 693 "returning", 694 "primary", 695 "intersect", 696 "having", 697 "analyze", 698 "current_user", 699 "and", 700 "cast", 701 "symmetric", 702 "using", 703 "order", 704 "current_catalog", 705 } 706 707 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 708 709 # DuckDB doesn't generally support CREATE TABLE .. properties 710 # https://duckdb.org/docs/sql/statements/create_table.html 711 PROPERTIES_LOCATION = { 712 prop: exp.Properties.Location.UNSUPPORTED 713 for prop in generator.Generator.PROPERTIES_LOCATION 714 } 715 716 # There are a few exceptions (e.g. temporary tables) which are supported or 717 # can be transpiled to DuckDB, so we explicitly override them accordingly 718 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 719 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 720 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 721 722 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 723 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 724 725 def strtotime_sql(self, expression: exp.StrToTime) -> str: 726 if expression.args.get("safe"): 727 formatted_time = self.format_time(expression) 728 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 729 return str_to_time_sql(self, expression) 730 731 def strtodate_sql(self, expression: exp.StrToDate) -> str: 732 if expression.args.get("safe"): 733 formatted_time = self.format_time(expression) 734 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 735 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 736 737 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 738 arg = expression.this 739 if expression.args.get("safe"): 740 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 741 return self.func("JSON", arg) 742 743 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 744 nano = expression.args.get("nano") 745 if nano is not None: 746 expression.set( 747 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 748 ) 749 750 return rename_func("MAKE_TIME")(self, expression) 751 752 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 753 sec = expression.args["sec"] 754 755 milli = expression.args.get("milli") 756 if milli is not None: 757 sec += milli.pop() / exp.Literal.number(1000.0) 758 759 nano = expression.args.get("nano") 760 if nano is not None: 761 sec += nano.pop() / exp.Literal.number(1000000000.0) 762 763 if milli or nano: 764 expression.set("sec", sec) 765 766 return rename_func("MAKE_TIMESTAMP")(self, expression) 767 768 def tablesample_sql( 769 self, 770 expression: exp.TableSample, 771 tablesample_keyword: t.Optional[str] = None, 772 ) -> str: 773 if not isinstance(expression.parent, exp.Select): 774 # This sample clause only applies to a single source, not the entire resulting relation 775 tablesample_keyword = "TABLESAMPLE" 776 777 if expression.args.get("size"): 778 method = expression.args.get("method") 779 if method and method.name.upper() != "RESERVOIR": 780 self.unsupported( 781 f"Sampling method {method} is not supported with a discrete sample count, " 782 "defaulting to reservoir sampling" 783 ) 784 expression.set("method", exp.var("RESERVOIR")) 785 786 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 787 788 def interval_sql(self, expression: exp.Interval) -> str: 789 multiplier: t.Optional[int] = None 790 unit = expression.text("unit").lower() 791 792 if unit.startswith("week"): 793 multiplier = 7 794 if unit.startswith("quarter"): 795 multiplier = 90 796 797 if multiplier: 798 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 799 800 return super().interval_sql(expression) 801 802 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 803 if isinstance(expression.parent, exp.UserDefinedFunction): 804 return self.sql(expression, "this") 805 return super().columndef_sql(expression, sep) 806 807 def join_sql(self, expression: exp.Join) -> str: 808 if ( 809 expression.side == "LEFT" 810 and not expression.args.get("on") 811 and isinstance(expression.this, exp.Unnest) 812 ): 813 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 814 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 815 return super().join_sql(expression.on(exp.true())) 816 817 return super().join_sql(expression) 818 819 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 820 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 821 if expression.args.get("is_end_exclusive"): 822 return rename_func("RANGE")(self, expression) 823 824 return self.function_fallback_sql(expression) 825 826 def bracket_sql(self, expression: exp.Bracket) -> str: 827 this = expression.this 828 if isinstance(this, exp.Array): 829 this.replace(exp.paren(this)) 830 831 bracket = super().bracket_sql(expression) 832 833 if not expression.args.get("returns_list_for_maps"): 834 if not this.type: 835 from sqlglot.optimizer.annotate_types import annotate_types 836 837 this = annotate_types(this) 838 839 if this.is_type(exp.DataType.Type.MAP): 840 bracket = f"({bracket})[1]" 841 842 return bracket 843 844 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 845 expression_sql = self.sql(expression, "expression") 846 847 func = expression.this 848 if isinstance(func, exp.PERCENTILES): 849 # Make the order key the first arg and slide the fraction to the right 850 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 851 order_col = expression.find(exp.Ordered) 852 if order_col: 853 func.set("expression", func.this) 854 func.set("this", order_col.this) 855 856 this = self.sql(expression, "this").rstrip(")") 857 858 return f"{this}{expression_sql})" 859 860 def length_sql(self, expression: exp.Length) -> str: 861 arg = expression.this 862 863 # Dialects like BQ and Snowflake also accept binary values as args, so 864 # DDB will attempt to infer the type or resort to case/when resolution 865 if not expression.args.get("binary") or arg.is_string: 866 return self.func("LENGTH", arg) 867 868 if not arg.type: 869 from sqlglot.optimizer.annotate_types import annotate_types 870 871 arg = annotate_types(arg) 872 873 if arg.is_type(*exp.DataType.TEXT_TYPES): 874 return self.func("LENGTH", arg) 875 876 # We need these casts to make duckdb's static type checker happy 877 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 878 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 879 880 case = ( 881 exp.case(self.func("TYPEOF", arg)) 882 .when( 883 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 884 ) # anonymous to break length_sql recursion 885 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 886 ) 887 888 return self.sql(case) 889 890 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 891 this = expression.this 892 key = expression.args.get("key") 893 key_sql = key.name if isinstance(key, exp.Expression) else "" 894 value_sql = self.sql(expression, "value") 895 896 kv_sql = f"{key_sql} := {value_sql}" 897 898 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 899 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 900 if isinstance(this, exp.Struct) and not this.expressions: 901 return self.func("STRUCT_PACK", kv_sql) 902 903 return self.func("STRUCT_INSERT", this, kv_sql) 904 905 def unnest_sql(self, expression: exp.Unnest) -> str: 906 explode_array = expression.args.get("explode_array") 907 if explode_array: 908 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 909 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 910 expression.expressions.append( 911 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 912 ) 913 914 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 915 alias = expression.args.get("alias") 916 if alias: 917 expression.set("alias", None) 918 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 919 920 unnest_sql = super().unnest_sql(expression) 921 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 922 return self.sql(select) 923 924 return super().unnest_sql(expression) 925 926 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 927 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 928 # DuckDB should render IGNORE NULLS only for the general-purpose 929 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 930 return super().ignorenulls_sql(expression) 931 932 return self.sql(expression, "this") 933 934 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 935 this = self.sql(expression, "this") 936 null_text = self.sql(expression, "null") 937 938 if null_text: 939 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 940 941 return self.func("ARRAY_TO_STRING", this, expression.expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
731 def strtodate_sql(self, expression: exp.StrToDate) -> str: 732 if expression.args.get("safe"): 733 formatted_time = self.format_time(expression) 734 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 735 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
743 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 744 nano = expression.args.get("nano") 745 if nano is not None: 746 expression.set( 747 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 748 ) 749 750 return rename_func("MAKE_TIME")(self, expression)
752 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 753 sec = expression.args["sec"] 754 755 milli = expression.args.get("milli") 756 if milli is not None: 757 sec += milli.pop() / exp.Literal.number(1000.0) 758 759 nano = expression.args.get("nano") 760 if nano is not None: 761 sec += nano.pop() / exp.Literal.number(1000000000.0) 762 763 if milli or nano: 764 expression.set("sec", sec) 765 766 return rename_func("MAKE_TIMESTAMP")(self, expression)
768 def tablesample_sql( 769 self, 770 expression: exp.TableSample, 771 tablesample_keyword: t.Optional[str] = None, 772 ) -> str: 773 if not isinstance(expression.parent, exp.Select): 774 # This sample clause only applies to a single source, not the entire resulting relation 775 tablesample_keyword = "TABLESAMPLE" 776 777 if expression.args.get("size"): 778 method = expression.args.get("method") 779 if method and method.name.upper() != "RESERVOIR": 780 self.unsupported( 781 f"Sampling method {method} is not supported with a discrete sample count, " 782 "defaulting to reservoir sampling" 783 ) 784 expression.set("method", exp.var("RESERVOIR")) 785 786 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
788 def interval_sql(self, expression: exp.Interval) -> str: 789 multiplier: t.Optional[int] = None 790 unit = expression.text("unit").lower() 791 792 if unit.startswith("week"): 793 multiplier = 7 794 if unit.startswith("quarter"): 795 multiplier = 90 796 797 if multiplier: 798 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 799 800 return super().interval_sql(expression)
807 def join_sql(self, expression: exp.Join) -> str: 808 if ( 809 expression.side == "LEFT" 810 and not expression.args.get("on") 811 and isinstance(expression.this, exp.Unnest) 812 ): 813 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 814 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 815 return super().join_sql(expression.on(exp.true())) 816 817 return super().join_sql(expression)
826 def bracket_sql(self, expression: exp.Bracket) -> str: 827 this = expression.this 828 if isinstance(this, exp.Array): 829 this.replace(exp.paren(this)) 830 831 bracket = super().bracket_sql(expression) 832 833 if not expression.args.get("returns_list_for_maps"): 834 if not this.type: 835 from sqlglot.optimizer.annotate_types import annotate_types 836 837 this = annotate_types(this) 838 839 if this.is_type(exp.DataType.Type.MAP): 840 bracket = f"({bracket})[1]" 841 842 return bracket
844 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 845 expression_sql = self.sql(expression, "expression") 846 847 func = expression.this 848 if isinstance(func, exp.PERCENTILES): 849 # Make the order key the first arg and slide the fraction to the right 850 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 851 order_col = expression.find(exp.Ordered) 852 if order_col: 853 func.set("expression", func.this) 854 func.set("this", order_col.this) 855 856 this = self.sql(expression, "this").rstrip(")") 857 858 return f"{this}{expression_sql})"
860 def length_sql(self, expression: exp.Length) -> str: 861 arg = expression.this 862 863 # Dialects like BQ and Snowflake also accept binary values as args, so 864 # DDB will attempt to infer the type or resort to case/when resolution 865 if not expression.args.get("binary") or arg.is_string: 866 return self.func("LENGTH", arg) 867 868 if not arg.type: 869 from sqlglot.optimizer.annotate_types import annotate_types 870 871 arg = annotate_types(arg) 872 873 if arg.is_type(*exp.DataType.TEXT_TYPES): 874 return self.func("LENGTH", arg) 875 876 # We need these casts to make duckdb's static type checker happy 877 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 878 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 879 880 case = ( 881 exp.case(self.func("TYPEOF", arg)) 882 .when( 883 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 884 ) # anonymous to break length_sql recursion 885 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 886 ) 887 888 return self.sql(case)
890 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 891 this = expression.this 892 key = expression.args.get("key") 893 key_sql = key.name if isinstance(key, exp.Expression) else "" 894 value_sql = self.sql(expression, "value") 895 896 kv_sql = f"{key_sql} := {value_sql}" 897 898 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 899 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 900 if isinstance(this, exp.Struct) and not this.expressions: 901 return self.func("STRUCT_PACK", kv_sql) 902 903 return self.func("STRUCT_INSERT", this, kv_sql)
905 def unnest_sql(self, expression: exp.Unnest) -> str: 906 explode_array = expression.args.get("explode_array") 907 if explode_array: 908 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 909 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 910 expression.expressions.append( 911 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 912 ) 913 914 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 915 alias = expression.args.get("alias") 916 if alias: 917 expression.set("alias", None) 918 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 919 920 unnest_sql = super().unnest_sql(expression) 921 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 922 return self.sql(select) 923 924 return super().unnest_sql(expression)
926 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 927 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 928 # DuckDB should render IGNORE NULLS only for the general-purpose 929 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 930 return super().ignorenulls_sql(expression) 931 932 return self.sql(expression, "this")
934 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 935 this = self.sql(expression, "this") 936 null_text = self.sql(expression, "null") 937 938 if null_text: 939 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 940 941 return self.func("ARRAY_TO_STRING", this, expression.expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql