sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 regexp_extract_sql, 29 rename_func, 30 str_position_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36 sha256_sql, 37 build_regexp_extract, 38) 39from sqlglot.helper import seq_get 40from sqlglot.tokens import TokenType 41 42DATETIME_DELTA = t.Union[ 43 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 44] 45 46WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 47 exp.FirstValue, 48 exp.LastValue, 49 exp.Lag, 50 exp.Lead, 51 exp.NthValue, 52) 53 54 55def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 56 this = expression.this 57 unit = unit_to_var(expression) 58 op = ( 59 "+" 60 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 61 else "-" 62 ) 63 64 to_type: t.Optional[DATA_TYPE] = None 65 if isinstance(expression, exp.TsOrDsAdd): 66 to_type = expression.return_type 67 elif this.is_string: 68 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 69 to_type = ( 70 exp.DataType.Type.DATETIME 71 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 72 else exp.DataType.Type.DATE 73 ) 74 75 this = exp.cast(this, to_type) if to_type else this 76 77 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 78 79 80# BigQuery -> DuckDB conversion for the DATE function 81def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 82 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 83 zone = self.sql(expression, "zone") 84 85 if zone: 86 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 87 date_str = f"{date_str} || ' ' || {zone}" 88 89 # This will create a TIMESTAMP with time zone information 90 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 91 92 return result 93 94 95# BigQuery -> DuckDB conversion for the TIME_DIFF function 96def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 97 this = exp.cast(expression.this, exp.DataType.Type.TIME) 98 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 99 100 # Although the 2 dialects share similar signatures, BQ seems to inverse 101 # the sign of the result so the start/end time operands are flipped 102 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 103 104 105def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 106 if expression.expression: 107 self.unsupported("DuckDB ARRAY_SORT does not support a comparator") 108 return self.func("ARRAY_SORT", expression.this) 109 110 111def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 112 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 113 return self.func(name, expression.this) 114 115 116def _build_sort_array_desc(args: t.List) -> exp.Expression: 117 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 118 119 120def _build_date_diff(args: t.List) -> exp.Expression: 121 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 122 123 124def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 125 def _builder(args: t.List) -> exp.GenerateSeries: 126 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 127 if len(args) == 1: 128 # DuckDB uses 0 as a default for the series' start when it's omitted 129 args.insert(0, exp.Literal.number("0")) 130 131 gen_series = exp.GenerateSeries.from_arg_list(args) 132 gen_series.set("is_end_exclusive", end_exclusive) 133 134 return gen_series 135 136 return _builder 137 138 139def _build_make_timestamp(args: t.List) -> exp.Expression: 140 if len(args) == 1: 141 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 142 143 return exp.TimestampFromParts( 144 year=seq_get(args, 0), 145 month=seq_get(args, 1), 146 day=seq_get(args, 2), 147 hour=seq_get(args, 3), 148 min=seq_get(args, 4), 149 sec=seq_get(args, 5), 150 ) 151 152 153def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 154 args: t.List[str] = [] 155 156 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 157 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 158 # The transformation to ROW will take place if a cast to STRUCT / ARRAY of STRUCTs is found 159 ancestor_cast = expression.find_ancestor(exp.Cast) 160 is_struct_cast = ancestor_cast and any( 161 casted_type.is_type(exp.DataType.Type.STRUCT) 162 for casted_type in ancestor_cast.find_all(exp.DataType) 163 ) 164 165 for i, expr in enumerate(expression.expressions): 166 is_property_eq = isinstance(expr, exp.PropertyEQ) 167 value = expr.expression if is_property_eq else expr 168 169 if is_struct_cast: 170 args.append(self.sql(value)) 171 else: 172 key = expr.name if is_property_eq else f"_{i}" 173 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 174 175 csv_args = ", ".join(args) 176 177 return f"ROW({csv_args})" if is_struct_cast else f"{{{csv_args}}}" 178 179 180def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 181 if expression.is_type("array"): 182 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 183 184 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 185 if expression.is_type( 186 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 187 ): 188 return expression.this.value 189 190 return self.datatype_sql(expression) 191 192 193def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 194 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 195 return f"CAST({sql} AS TEXT)" 196 197 198def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 199 scale = expression.args.get("scale") 200 timestamp = expression.this 201 202 if scale in (None, exp.UnixToTime.SECONDS): 203 return self.func("TO_TIMESTAMP", timestamp) 204 if scale == exp.UnixToTime.MILLIS: 205 return self.func("EPOCH_MS", timestamp) 206 if scale == exp.UnixToTime.MICROS: 207 return self.func("MAKE_TIMESTAMP", timestamp) 208 209 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 210 211 212WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 213 214 215def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 216 arrow_sql = arrow_json_extract_sql(self, expression) 217 if not expression.same_parent and isinstance( 218 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 219 ): 220 arrow_sql = self.wrap(arrow_sql) 221 return arrow_sql 222 223 224def _implicit_datetime_cast( 225 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 226) -> t.Optional[exp.Expression]: 227 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 228 229 230def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 231 this = _implicit_datetime_cast(expression.this) 232 expr = _implicit_datetime_cast(expression.expression) 233 234 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 235 236 237def _generate_datetime_array_sql( 238 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 239) -> str: 240 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 241 242 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 243 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 244 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 245 246 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 247 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 248 start=start, end=end, step=expression.args.get("step") 249 ) 250 251 if is_generate_date_array: 252 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 253 # GENERATE_DATE_ARRAY we must cast it back to DATE array 254 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 255 256 return self.sql(gen_series) 257 258 259class DuckDB(Dialect): 260 NULL_ORDERING = "nulls_are_last" 261 SUPPORTS_USER_DEFINED_TYPES = False 262 SAFE_DIVISION = True 263 INDEX_OFFSET = 1 264 CONCAT_COALESCE = True 265 SUPPORTS_ORDER_BY_ALL = True 266 SUPPORTS_FIXED_SIZE_ARRAYS = True 267 STRICT_JSON_PATH_SYNTAX = False 268 269 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 270 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 271 272 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 273 if isinstance(path, exp.Literal): 274 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 275 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 276 # This check ensures we'll avoid trying to parse these as JSON paths, which can 277 # either result in a noisy warning or in an invalid representation of the path. 278 path_text = path.name 279 if path_text.startswith("/") or "[#" in path_text: 280 return path 281 282 return super().to_json_path(path) 283 284 class Tokenizer(tokens.Tokenizer): 285 HEREDOC_STRINGS = ["$"] 286 287 HEREDOC_TAG_IS_IDENTIFIER = True 288 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 289 290 KEYWORDS = { 291 **tokens.Tokenizer.KEYWORDS, 292 "//": TokenType.DIV, 293 "ATTACH": TokenType.COMMAND, 294 "BINARY": TokenType.VARBINARY, 295 "BITSTRING": TokenType.BIT, 296 "BPCHAR": TokenType.TEXT, 297 "CHAR": TokenType.TEXT, 298 "CHARACTER VARYING": TokenType.TEXT, 299 "EXCLUDE": TokenType.EXCEPT, 300 "LOGICAL": TokenType.BOOLEAN, 301 "ONLY": TokenType.ONLY, 302 "PIVOT_WIDER": TokenType.PIVOT, 303 "POSITIONAL": TokenType.POSITIONAL, 304 "SIGNED": TokenType.INT, 305 "STRING": TokenType.TEXT, 306 "SUMMARIZE": TokenType.SUMMARIZE, 307 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 308 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 309 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 310 "TIMESTAMP_US": TokenType.TIMESTAMP, 311 "UBIGINT": TokenType.UBIGINT, 312 "UINTEGER": TokenType.UINT, 313 "USMALLINT": TokenType.USMALLINT, 314 "UTINYINT": TokenType.UTINYINT, 315 "VARCHAR": TokenType.TEXT, 316 } 317 KEYWORDS.pop("/*+") 318 319 SINGLE_TOKENS = { 320 **tokens.Tokenizer.SINGLE_TOKENS, 321 "$": TokenType.PARAMETER, 322 } 323 324 class Parser(parser.Parser): 325 BITWISE = { 326 **parser.Parser.BITWISE, 327 TokenType.TILDA: exp.RegexpLike, 328 } 329 330 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 331 332 FUNCTIONS = { 333 **parser.Parser.FUNCTIONS, 334 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 335 "ARRAY_SORT": exp.SortArray.from_arg_list, 336 "DATEDIFF": _build_date_diff, 337 "DATE_DIFF": _build_date_diff, 338 "DATE_TRUNC": date_trunc_to_time, 339 "DATETRUNC": date_trunc_to_time, 340 "DECODE": lambda args: exp.Decode( 341 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 342 ), 343 "ENCODE": lambda args: exp.Encode( 344 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 345 ), 346 "EPOCH": exp.TimeToUnix.from_arg_list, 347 "EPOCH_MS": lambda args: exp.UnixToTime( 348 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 349 ), 350 "JSON": exp.ParseJSON.from_arg_list, 351 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 352 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 353 "LIST_HAS": exp.ArrayContains.from_arg_list, 354 "LIST_REVERSE_SORT": _build_sort_array_desc, 355 "LIST_SORT": exp.SortArray.from_arg_list, 356 "LIST_VALUE": lambda args: exp.Array(expressions=args), 357 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 358 "MAKE_TIMESTAMP": _build_make_timestamp, 359 "MEDIAN": lambda args: exp.PercentileCont( 360 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 361 ), 362 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 363 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 364 "REGEXP_EXTRACT": build_regexp_extract, 365 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 366 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 367 this=seq_get(args, 0), 368 expression=seq_get(args, 1), 369 replacement=seq_get(args, 2), 370 modifiers=seq_get(args, 3), 371 ), 372 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 373 "STRING_SPLIT": exp.Split.from_arg_list, 374 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 375 "STRING_TO_ARRAY": exp.Split.from_arg_list, 376 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 377 "STRUCT_PACK": exp.Struct.from_arg_list, 378 "STR_SPLIT": exp.Split.from_arg_list, 379 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 380 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 381 "UNNEST": exp.Explode.from_arg_list, 382 "XOR": binary_from_function(exp.BitwiseXor), 383 "GENERATE_SERIES": _build_generate_series(), 384 "RANGE": _build_generate_series(end_exclusive=True), 385 } 386 387 FUNCTIONS.pop("DATE_SUB") 388 FUNCTIONS.pop("GLOB") 389 390 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 391 FUNCTION_PARSERS.pop("DECODE") 392 393 NO_PAREN_FUNCTION_PARSERS = { 394 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 395 "MAP": lambda self: self._parse_map(), 396 } 397 398 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 399 TokenType.SEMI, 400 TokenType.ANTI, 401 } 402 403 PLACEHOLDER_PARSERS = { 404 **parser.Parser.PLACEHOLDER_PARSERS, 405 TokenType.PARAMETER: lambda self: ( 406 self.expression(exp.Placeholder, this=self._prev.text) 407 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 408 else None 409 ), 410 } 411 412 TYPE_CONVERTERS = { 413 # https://duckdb.org/docs/sql/data_types/numeric 414 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 415 # https://duckdb.org/docs/sql/data_types/text 416 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 417 } 418 419 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 420 # https://duckdb.org/docs/sql/samples.html 421 sample = super()._parse_table_sample(as_modifier=as_modifier) 422 if sample and not sample.args.get("method"): 423 if sample.args.get("size"): 424 sample.set("method", exp.var("RESERVOIR")) 425 else: 426 sample.set("method", exp.var("SYSTEM")) 427 428 return sample 429 430 def _parse_bracket( 431 self, this: t.Optional[exp.Expression] = None 432 ) -> t.Optional[exp.Expression]: 433 bracket = super()._parse_bracket(this) 434 if isinstance(bracket, exp.Bracket): 435 bracket.set("returns_list_for_maps", True) 436 437 return bracket 438 439 def _parse_map(self) -> exp.ToMap | exp.Map: 440 if self._match(TokenType.L_BRACE, advance=False): 441 return self.expression(exp.ToMap, this=self._parse_bracket()) 442 443 args = self._parse_wrapped_csv(self._parse_assignment) 444 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 445 446 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 447 return self._parse_field_def() 448 449 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 450 if len(aggregations) == 1: 451 return super()._pivot_column_names(aggregations) 452 return pivot_column_names(aggregations, dialect="duckdb") 453 454 class Generator(generator.Generator): 455 PARAMETER_TOKEN = "$" 456 NAMED_PLACEHOLDER_TOKEN = "$" 457 JOIN_HINTS = False 458 TABLE_HINTS = False 459 QUERY_HINTS = False 460 LIMIT_FETCH = "LIMIT" 461 STRUCT_DELIMITER = ("(", ")") 462 RENAME_TABLE_WITH_DB = False 463 NVL2_SUPPORTED = False 464 SEMI_ANTI_JOIN_WITH_SIDE = False 465 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 466 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 467 LAST_DAY_SUPPORTS_DATE_PART = False 468 JSON_KEY_VALUE_PAIR_SEP = "," 469 IGNORE_NULLS_IN_FUNC = True 470 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 471 SUPPORTS_CREATE_TABLE_LIKE = False 472 MULTI_ARG_DISTINCT = False 473 CAN_IMPLEMENT_ARRAY_ANY = True 474 SUPPORTS_TO_NUMBER = False 475 COPY_HAS_INTO_KEYWORD = False 476 STAR_EXCEPT = "EXCLUDE" 477 PAD_FILL_PATTERN_IS_REQUIRED = True 478 ARRAY_CONCAT_IS_VAR_LEN = False 479 480 TRANSFORMS = { 481 **generator.Generator.TRANSFORMS, 482 exp.ApproxDistinct: approx_count_distinct_sql, 483 exp.Array: inline_array_unless_query, 484 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 485 exp.ArrayFilter: rename_func("LIST_FILTER"), 486 exp.ArraySize: rename_func("ARRAY_LENGTH"), 487 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 488 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 489 exp.ArraySort: _array_sort_sql, 490 exp.ArraySum: rename_func("LIST_SUM"), 491 exp.BitwiseXor: rename_func("XOR"), 492 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 493 exp.CurrentDate: lambda *_: "CURRENT_DATE", 494 exp.CurrentTime: lambda *_: "CURRENT_TIME", 495 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 496 exp.DayOfMonth: rename_func("DAYOFMONTH"), 497 exp.DayOfWeek: rename_func("DAYOFWEEK"), 498 exp.DayOfWeekIso: rename_func("ISODOW"), 499 exp.DayOfYear: rename_func("DAYOFYEAR"), 500 exp.DataType: _datatype_sql, 501 exp.Date: _date_sql, 502 exp.DateAdd: _date_delta_sql, 503 exp.DateFromParts: rename_func("MAKE_DATE"), 504 exp.DateSub: _date_delta_sql, 505 exp.DateDiff: _date_diff_sql, 506 exp.DateStrToDate: datestrtodate_sql, 507 exp.Datetime: no_datetime_sql, 508 exp.DatetimeSub: _date_delta_sql, 509 exp.DatetimeAdd: _date_delta_sql, 510 exp.DateToDi: lambda self, 511 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 512 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 513 exp.DiToDate: lambda self, 514 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 515 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 516 exp.GenerateDateArray: _generate_datetime_array_sql, 517 exp.GenerateTimestampArray: _generate_datetime_array_sql, 518 exp.Explode: rename_func("UNNEST"), 519 exp.IntDiv: lambda self, e: self.binary(e, "//"), 520 exp.IsInf: rename_func("ISINF"), 521 exp.IsNan: rename_func("ISNAN"), 522 exp.JSONExtract: _arrow_json_extract_sql, 523 exp.JSONExtractScalar: _arrow_json_extract_sql, 524 exp.JSONFormat: _json_format_sql, 525 exp.LogicalOr: rename_func("BOOL_OR"), 526 exp.LogicalAnd: rename_func("BOOL_AND"), 527 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 528 exp.MonthsBetween: lambda self, e: self.func( 529 "DATEDIFF", 530 "'month'", 531 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 532 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 533 ), 534 exp.PercentileCont: rename_func("QUANTILE_CONT"), 535 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 536 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 537 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 538 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 539 exp.RegexpExtract: regexp_extract_sql, 540 exp.RegexpReplace: lambda self, e: self.func( 541 "REGEXP_REPLACE", 542 e.this, 543 e.expression, 544 e.args.get("replacement"), 545 e.args.get("modifiers"), 546 ), 547 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 548 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 549 exp.Return: lambda self, e: self.sql(e, "this"), 550 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 551 exp.Rand: rename_func("RANDOM"), 552 exp.SafeDivide: no_safe_divide_sql, 553 exp.SHA2: sha256_sql, 554 exp.Split: rename_func("STR_SPLIT"), 555 exp.SortArray: _sort_array_sql, 556 exp.StrPosition: str_position_sql, 557 exp.StrToUnix: lambda self, e: self.func( 558 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 559 ), 560 exp.Struct: _struct_sql, 561 exp.Transform: rename_func("LIST_TRANSFORM"), 562 exp.TimeAdd: _date_delta_sql, 563 exp.Time: no_time_sql, 564 exp.TimeDiff: _timediff_sql, 565 exp.Timestamp: no_timestamp_sql, 566 exp.TimestampDiff: lambda self, e: self.func( 567 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 568 ), 569 exp.TimestampTrunc: timestamptrunc_sql(), 570 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 571 exp.TimeStrToTime: timestrtotime_sql, 572 exp.TimeStrToUnix: lambda self, e: self.func( 573 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 574 ), 575 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 576 exp.TimeToUnix: rename_func("EPOCH"), 577 exp.TsOrDiToDi: lambda self, 578 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 579 exp.TsOrDsAdd: _date_delta_sql, 580 exp.TsOrDsDiff: lambda self, e: self.func( 581 "DATE_DIFF", 582 f"'{e.args.get('unit') or 'DAY'}'", 583 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 584 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 585 ), 586 exp.UnixToStr: lambda self, e: self.func( 587 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 588 ), 589 exp.DatetimeTrunc: lambda self, e: self.func( 590 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 591 ), 592 exp.UnixToTime: _unix_to_time_sql, 593 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 594 exp.VariancePop: rename_func("VAR_POP"), 595 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 596 exp.Xor: bool_xor_sql, 597 } 598 599 SUPPORTED_JSON_PATH_PARTS = { 600 exp.JSONPathKey, 601 exp.JSONPathRoot, 602 exp.JSONPathSubscript, 603 exp.JSONPathWildcard, 604 } 605 606 TYPE_MAPPING = { 607 **generator.Generator.TYPE_MAPPING, 608 exp.DataType.Type.BINARY: "BLOB", 609 exp.DataType.Type.BPCHAR: "TEXT", 610 exp.DataType.Type.CHAR: "TEXT", 611 exp.DataType.Type.FLOAT: "REAL", 612 exp.DataType.Type.NCHAR: "TEXT", 613 exp.DataType.Type.NVARCHAR: "TEXT", 614 exp.DataType.Type.UINT: "UINTEGER", 615 exp.DataType.Type.VARBINARY: "BLOB", 616 exp.DataType.Type.ROWVERSION: "BLOB", 617 exp.DataType.Type.VARCHAR: "TEXT", 618 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 619 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 620 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 621 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 622 } 623 624 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 625 RESERVED_KEYWORDS = { 626 "array", 627 "analyse", 628 "union", 629 "all", 630 "when", 631 "in_p", 632 "default", 633 "create_p", 634 "window", 635 "asymmetric", 636 "to", 637 "else", 638 "localtime", 639 "from", 640 "end_p", 641 "select", 642 "current_date", 643 "foreign", 644 "with", 645 "grant", 646 "session_user", 647 "or", 648 "except", 649 "references", 650 "fetch", 651 "limit", 652 "group_p", 653 "leading", 654 "into", 655 "collate", 656 "offset", 657 "do", 658 "then", 659 "localtimestamp", 660 "check_p", 661 "lateral_p", 662 "current_role", 663 "where", 664 "asc_p", 665 "placing", 666 "desc_p", 667 "user", 668 "unique", 669 "initially", 670 "column", 671 "both", 672 "some", 673 "as", 674 "any", 675 "only", 676 "deferrable", 677 "null_p", 678 "current_time", 679 "true_p", 680 "table", 681 "case", 682 "trailing", 683 "variadic", 684 "for", 685 "on", 686 "distinct", 687 "false_p", 688 "not", 689 "constraint", 690 "current_timestamp", 691 "returning", 692 "primary", 693 "intersect", 694 "having", 695 "analyze", 696 "current_user", 697 "and", 698 "cast", 699 "symmetric", 700 "using", 701 "order", 702 "current_catalog", 703 } 704 705 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 706 707 # DuckDB doesn't generally support CREATE TABLE .. properties 708 # https://duckdb.org/docs/sql/statements/create_table.html 709 PROPERTIES_LOCATION = { 710 prop: exp.Properties.Location.UNSUPPORTED 711 for prop in generator.Generator.PROPERTIES_LOCATION 712 } 713 714 # There are a few exceptions (e.g. temporary tables) which are supported or 715 # can be transpiled to DuckDB, so we explicitly override them accordingly 716 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 717 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 718 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 719 720 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 721 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 722 723 def strtotime_sql(self, expression: exp.StrToTime) -> str: 724 if expression.args.get("safe"): 725 formatted_time = self.format_time(expression) 726 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 727 return str_to_time_sql(self, expression) 728 729 def strtodate_sql(self, expression: exp.StrToDate) -> str: 730 if expression.args.get("safe"): 731 formatted_time = self.format_time(expression) 732 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 733 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 734 735 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 736 arg = expression.this 737 if expression.args.get("safe"): 738 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 739 return self.func("JSON", arg) 740 741 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 742 nano = expression.args.get("nano") 743 if nano is not None: 744 expression.set( 745 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 746 ) 747 748 return rename_func("MAKE_TIME")(self, expression) 749 750 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 751 sec = expression.args["sec"] 752 753 milli = expression.args.get("milli") 754 if milli is not None: 755 sec += milli.pop() / exp.Literal.number(1000.0) 756 757 nano = expression.args.get("nano") 758 if nano is not None: 759 sec += nano.pop() / exp.Literal.number(1000000000.0) 760 761 if milli or nano: 762 expression.set("sec", sec) 763 764 return rename_func("MAKE_TIMESTAMP")(self, expression) 765 766 def tablesample_sql( 767 self, 768 expression: exp.TableSample, 769 tablesample_keyword: t.Optional[str] = None, 770 ) -> str: 771 if not isinstance(expression.parent, exp.Select): 772 # This sample clause only applies to a single source, not the entire resulting relation 773 tablesample_keyword = "TABLESAMPLE" 774 775 if expression.args.get("size"): 776 method = expression.args.get("method") 777 if method and method.name.upper() != "RESERVOIR": 778 self.unsupported( 779 f"Sampling method {method} is not supported with a discrete sample count, " 780 "defaulting to reservoir sampling" 781 ) 782 expression.set("method", exp.var("RESERVOIR")) 783 784 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 785 786 def interval_sql(self, expression: exp.Interval) -> str: 787 multiplier: t.Optional[int] = None 788 unit = expression.text("unit").lower() 789 790 if unit.startswith("week"): 791 multiplier = 7 792 if unit.startswith("quarter"): 793 multiplier = 90 794 795 if multiplier: 796 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 797 798 return super().interval_sql(expression) 799 800 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 801 if isinstance(expression.parent, exp.UserDefinedFunction): 802 return self.sql(expression, "this") 803 return super().columndef_sql(expression, sep) 804 805 def join_sql(self, expression: exp.Join) -> str: 806 if ( 807 expression.side == "LEFT" 808 and not expression.args.get("on") 809 and isinstance(expression.this, exp.Unnest) 810 ): 811 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 812 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 813 return super().join_sql(expression.on(exp.true())) 814 815 return super().join_sql(expression) 816 817 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 818 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 819 if expression.args.get("is_end_exclusive"): 820 return rename_func("RANGE")(self, expression) 821 822 return self.function_fallback_sql(expression) 823 824 def bracket_sql(self, expression: exp.Bracket) -> str: 825 this = expression.this 826 if isinstance(this, exp.Array): 827 this.replace(exp.paren(this)) 828 829 bracket = super().bracket_sql(expression) 830 831 if not expression.args.get("returns_list_for_maps"): 832 if not this.type: 833 from sqlglot.optimizer.annotate_types import annotate_types 834 835 this = annotate_types(this) 836 837 if this.is_type(exp.DataType.Type.MAP): 838 bracket = f"({bracket})[1]" 839 840 return bracket 841 842 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 843 expression_sql = self.sql(expression, "expression") 844 845 func = expression.this 846 if isinstance(func, exp.PERCENTILES): 847 # Make the order key the first arg and slide the fraction to the right 848 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 849 order_col = expression.find(exp.Ordered) 850 if order_col: 851 func.set("expression", func.this) 852 func.set("this", order_col.this) 853 854 this = self.sql(expression, "this").rstrip(")") 855 856 return f"{this}{expression_sql})" 857 858 def length_sql(self, expression: exp.Length) -> str: 859 arg = expression.this 860 861 # Dialects like BQ and Snowflake also accept binary values as args, so 862 # DDB will attempt to infer the type or resort to case/when resolution 863 if not expression.args.get("binary") or arg.is_string: 864 return self.func("LENGTH", arg) 865 866 if not arg.type: 867 from sqlglot.optimizer.annotate_types import annotate_types 868 869 arg = annotate_types(arg) 870 871 if arg.is_type(*exp.DataType.TEXT_TYPES): 872 return self.func("LENGTH", arg) 873 874 # We need these casts to make duckdb's static type checker happy 875 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 876 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 877 878 case = ( 879 exp.case(self.func("TYPEOF", arg)) 880 .when( 881 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 882 ) # anonymous to break length_sql recursion 883 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 884 ) 885 886 return self.sql(case) 887 888 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 889 this = expression.this 890 key = expression.args.get("key") 891 key_sql = key.name if isinstance(key, exp.Expression) else "" 892 value_sql = self.sql(expression, "value") 893 894 kv_sql = f"{key_sql} := {value_sql}" 895 896 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 897 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 898 if isinstance(this, exp.Struct) and not this.expressions: 899 return self.func("STRUCT_PACK", kv_sql) 900 901 return self.func("STRUCT_INSERT", this, kv_sql) 902 903 def unnest_sql(self, expression: exp.Unnest) -> str: 904 explode_array = expression.args.get("explode_array") 905 if explode_array: 906 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 907 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 908 expression.expressions.append( 909 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 910 ) 911 912 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 913 alias = expression.args.get("alias") 914 if alias: 915 expression.set("alias", None) 916 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 917 918 unnest_sql = super().unnest_sql(expression) 919 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 920 return self.sql(select) 921 922 return super().unnest_sql(expression) 923 924 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 925 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 926 # DuckDB should render IGNORE NULLS only for the general-purpose 927 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 928 return super().ignorenulls_sql(expression) 929 930 return self.sql(expression, "this") 931 932 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 933 this = self.sql(expression, "this") 934 null_text = self.sql(expression, "null") 935 936 if null_text: 937 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 938 939 return self.func("ARRAY_TO_STRING", this, expression.expression)
260class DuckDB(Dialect): 261 NULL_ORDERING = "nulls_are_last" 262 SUPPORTS_USER_DEFINED_TYPES = False 263 SAFE_DIVISION = True 264 INDEX_OFFSET = 1 265 CONCAT_COALESCE = True 266 SUPPORTS_ORDER_BY_ALL = True 267 SUPPORTS_FIXED_SIZE_ARRAYS = True 268 STRICT_JSON_PATH_SYNTAX = False 269 270 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 271 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 272 273 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 274 if isinstance(path, exp.Literal): 275 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 276 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 277 # This check ensures we'll avoid trying to parse these as JSON paths, which can 278 # either result in a noisy warning or in an invalid representation of the path. 279 path_text = path.name 280 if path_text.startswith("/") or "[#" in path_text: 281 return path 282 283 return super().to_json_path(path) 284 285 class Tokenizer(tokens.Tokenizer): 286 HEREDOC_STRINGS = ["$"] 287 288 HEREDOC_TAG_IS_IDENTIFIER = True 289 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 290 291 KEYWORDS = { 292 **tokens.Tokenizer.KEYWORDS, 293 "//": TokenType.DIV, 294 "ATTACH": TokenType.COMMAND, 295 "BINARY": TokenType.VARBINARY, 296 "BITSTRING": TokenType.BIT, 297 "BPCHAR": TokenType.TEXT, 298 "CHAR": TokenType.TEXT, 299 "CHARACTER VARYING": TokenType.TEXT, 300 "EXCLUDE": TokenType.EXCEPT, 301 "LOGICAL": TokenType.BOOLEAN, 302 "ONLY": TokenType.ONLY, 303 "PIVOT_WIDER": TokenType.PIVOT, 304 "POSITIONAL": TokenType.POSITIONAL, 305 "SIGNED": TokenType.INT, 306 "STRING": TokenType.TEXT, 307 "SUMMARIZE": TokenType.SUMMARIZE, 308 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 309 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 310 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 311 "TIMESTAMP_US": TokenType.TIMESTAMP, 312 "UBIGINT": TokenType.UBIGINT, 313 "UINTEGER": TokenType.UINT, 314 "USMALLINT": TokenType.USMALLINT, 315 "UTINYINT": TokenType.UTINYINT, 316 "VARCHAR": TokenType.TEXT, 317 } 318 KEYWORDS.pop("/*+") 319 320 SINGLE_TOKENS = { 321 **tokens.Tokenizer.SINGLE_TOKENS, 322 "$": TokenType.PARAMETER, 323 } 324 325 class Parser(parser.Parser): 326 BITWISE = { 327 **parser.Parser.BITWISE, 328 TokenType.TILDA: exp.RegexpLike, 329 } 330 331 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 332 333 FUNCTIONS = { 334 **parser.Parser.FUNCTIONS, 335 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 336 "ARRAY_SORT": exp.SortArray.from_arg_list, 337 "DATEDIFF": _build_date_diff, 338 "DATE_DIFF": _build_date_diff, 339 "DATE_TRUNC": date_trunc_to_time, 340 "DATETRUNC": date_trunc_to_time, 341 "DECODE": lambda args: exp.Decode( 342 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 343 ), 344 "ENCODE": lambda args: exp.Encode( 345 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 346 ), 347 "EPOCH": exp.TimeToUnix.from_arg_list, 348 "EPOCH_MS": lambda args: exp.UnixToTime( 349 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 350 ), 351 "JSON": exp.ParseJSON.from_arg_list, 352 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 353 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 354 "LIST_HAS": exp.ArrayContains.from_arg_list, 355 "LIST_REVERSE_SORT": _build_sort_array_desc, 356 "LIST_SORT": exp.SortArray.from_arg_list, 357 "LIST_VALUE": lambda args: exp.Array(expressions=args), 358 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 359 "MAKE_TIMESTAMP": _build_make_timestamp, 360 "MEDIAN": lambda args: exp.PercentileCont( 361 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 362 ), 363 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 364 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 365 "REGEXP_EXTRACT": build_regexp_extract, 366 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 367 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 368 this=seq_get(args, 0), 369 expression=seq_get(args, 1), 370 replacement=seq_get(args, 2), 371 modifiers=seq_get(args, 3), 372 ), 373 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 374 "STRING_SPLIT": exp.Split.from_arg_list, 375 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 376 "STRING_TO_ARRAY": exp.Split.from_arg_list, 377 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 378 "STRUCT_PACK": exp.Struct.from_arg_list, 379 "STR_SPLIT": exp.Split.from_arg_list, 380 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 381 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 382 "UNNEST": exp.Explode.from_arg_list, 383 "XOR": binary_from_function(exp.BitwiseXor), 384 "GENERATE_SERIES": _build_generate_series(), 385 "RANGE": _build_generate_series(end_exclusive=True), 386 } 387 388 FUNCTIONS.pop("DATE_SUB") 389 FUNCTIONS.pop("GLOB") 390 391 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 392 FUNCTION_PARSERS.pop("DECODE") 393 394 NO_PAREN_FUNCTION_PARSERS = { 395 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 396 "MAP": lambda self: self._parse_map(), 397 } 398 399 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 400 TokenType.SEMI, 401 TokenType.ANTI, 402 } 403 404 PLACEHOLDER_PARSERS = { 405 **parser.Parser.PLACEHOLDER_PARSERS, 406 TokenType.PARAMETER: lambda self: ( 407 self.expression(exp.Placeholder, this=self._prev.text) 408 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 409 else None 410 ), 411 } 412 413 TYPE_CONVERTERS = { 414 # https://duckdb.org/docs/sql/data_types/numeric 415 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 416 # https://duckdb.org/docs/sql/data_types/text 417 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 418 } 419 420 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 421 # https://duckdb.org/docs/sql/samples.html 422 sample = super()._parse_table_sample(as_modifier=as_modifier) 423 if sample and not sample.args.get("method"): 424 if sample.args.get("size"): 425 sample.set("method", exp.var("RESERVOIR")) 426 else: 427 sample.set("method", exp.var("SYSTEM")) 428 429 return sample 430 431 def _parse_bracket( 432 self, this: t.Optional[exp.Expression] = None 433 ) -> t.Optional[exp.Expression]: 434 bracket = super()._parse_bracket(this) 435 if isinstance(bracket, exp.Bracket): 436 bracket.set("returns_list_for_maps", True) 437 438 return bracket 439 440 def _parse_map(self) -> exp.ToMap | exp.Map: 441 if self._match(TokenType.L_BRACE, advance=False): 442 return self.expression(exp.ToMap, this=self._parse_bracket()) 443 444 args = self._parse_wrapped_csv(self._parse_assignment) 445 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 446 447 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 448 return self._parse_field_def() 449 450 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 451 if len(aggregations) == 1: 452 return super()._pivot_column_names(aggregations) 453 return pivot_column_names(aggregations, dialect="duckdb") 454 455 class Generator(generator.Generator): 456 PARAMETER_TOKEN = "$" 457 NAMED_PLACEHOLDER_TOKEN = "$" 458 JOIN_HINTS = False 459 TABLE_HINTS = False 460 QUERY_HINTS = False 461 LIMIT_FETCH = "LIMIT" 462 STRUCT_DELIMITER = ("(", ")") 463 RENAME_TABLE_WITH_DB = False 464 NVL2_SUPPORTED = False 465 SEMI_ANTI_JOIN_WITH_SIDE = False 466 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 467 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 468 LAST_DAY_SUPPORTS_DATE_PART = False 469 JSON_KEY_VALUE_PAIR_SEP = "," 470 IGNORE_NULLS_IN_FUNC = True 471 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 472 SUPPORTS_CREATE_TABLE_LIKE = False 473 MULTI_ARG_DISTINCT = False 474 CAN_IMPLEMENT_ARRAY_ANY = True 475 SUPPORTS_TO_NUMBER = False 476 COPY_HAS_INTO_KEYWORD = False 477 STAR_EXCEPT = "EXCLUDE" 478 PAD_FILL_PATTERN_IS_REQUIRED = True 479 ARRAY_CONCAT_IS_VAR_LEN = False 480 481 TRANSFORMS = { 482 **generator.Generator.TRANSFORMS, 483 exp.ApproxDistinct: approx_count_distinct_sql, 484 exp.Array: inline_array_unless_query, 485 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 486 exp.ArrayFilter: rename_func("LIST_FILTER"), 487 exp.ArraySize: rename_func("ARRAY_LENGTH"), 488 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 489 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 490 exp.ArraySort: _array_sort_sql, 491 exp.ArraySum: rename_func("LIST_SUM"), 492 exp.BitwiseXor: rename_func("XOR"), 493 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 494 exp.CurrentDate: lambda *_: "CURRENT_DATE", 495 exp.CurrentTime: lambda *_: "CURRENT_TIME", 496 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 497 exp.DayOfMonth: rename_func("DAYOFMONTH"), 498 exp.DayOfWeek: rename_func("DAYOFWEEK"), 499 exp.DayOfWeekIso: rename_func("ISODOW"), 500 exp.DayOfYear: rename_func("DAYOFYEAR"), 501 exp.DataType: _datatype_sql, 502 exp.Date: _date_sql, 503 exp.DateAdd: _date_delta_sql, 504 exp.DateFromParts: rename_func("MAKE_DATE"), 505 exp.DateSub: _date_delta_sql, 506 exp.DateDiff: _date_diff_sql, 507 exp.DateStrToDate: datestrtodate_sql, 508 exp.Datetime: no_datetime_sql, 509 exp.DatetimeSub: _date_delta_sql, 510 exp.DatetimeAdd: _date_delta_sql, 511 exp.DateToDi: lambda self, 512 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 513 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 514 exp.DiToDate: lambda self, 515 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 516 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 517 exp.GenerateDateArray: _generate_datetime_array_sql, 518 exp.GenerateTimestampArray: _generate_datetime_array_sql, 519 exp.Explode: rename_func("UNNEST"), 520 exp.IntDiv: lambda self, e: self.binary(e, "//"), 521 exp.IsInf: rename_func("ISINF"), 522 exp.IsNan: rename_func("ISNAN"), 523 exp.JSONExtract: _arrow_json_extract_sql, 524 exp.JSONExtractScalar: _arrow_json_extract_sql, 525 exp.JSONFormat: _json_format_sql, 526 exp.LogicalOr: rename_func("BOOL_OR"), 527 exp.LogicalAnd: rename_func("BOOL_AND"), 528 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 529 exp.MonthsBetween: lambda self, e: self.func( 530 "DATEDIFF", 531 "'month'", 532 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 533 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 534 ), 535 exp.PercentileCont: rename_func("QUANTILE_CONT"), 536 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 537 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 538 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 539 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 540 exp.RegexpExtract: regexp_extract_sql, 541 exp.RegexpReplace: lambda self, e: self.func( 542 "REGEXP_REPLACE", 543 e.this, 544 e.expression, 545 e.args.get("replacement"), 546 e.args.get("modifiers"), 547 ), 548 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 549 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 550 exp.Return: lambda self, e: self.sql(e, "this"), 551 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 552 exp.Rand: rename_func("RANDOM"), 553 exp.SafeDivide: no_safe_divide_sql, 554 exp.SHA2: sha256_sql, 555 exp.Split: rename_func("STR_SPLIT"), 556 exp.SortArray: _sort_array_sql, 557 exp.StrPosition: str_position_sql, 558 exp.StrToUnix: lambda self, e: self.func( 559 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 560 ), 561 exp.Struct: _struct_sql, 562 exp.Transform: rename_func("LIST_TRANSFORM"), 563 exp.TimeAdd: _date_delta_sql, 564 exp.Time: no_time_sql, 565 exp.TimeDiff: _timediff_sql, 566 exp.Timestamp: no_timestamp_sql, 567 exp.TimestampDiff: lambda self, e: self.func( 568 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 569 ), 570 exp.TimestampTrunc: timestamptrunc_sql(), 571 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 572 exp.TimeStrToTime: timestrtotime_sql, 573 exp.TimeStrToUnix: lambda self, e: self.func( 574 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 575 ), 576 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 577 exp.TimeToUnix: rename_func("EPOCH"), 578 exp.TsOrDiToDi: lambda self, 579 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 580 exp.TsOrDsAdd: _date_delta_sql, 581 exp.TsOrDsDiff: lambda self, e: self.func( 582 "DATE_DIFF", 583 f"'{e.args.get('unit') or 'DAY'}'", 584 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 585 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 586 ), 587 exp.UnixToStr: lambda self, e: self.func( 588 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 589 ), 590 exp.DatetimeTrunc: lambda self, e: self.func( 591 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 592 ), 593 exp.UnixToTime: _unix_to_time_sql, 594 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 595 exp.VariancePop: rename_func("VAR_POP"), 596 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 597 exp.Xor: bool_xor_sql, 598 } 599 600 SUPPORTED_JSON_PATH_PARTS = { 601 exp.JSONPathKey, 602 exp.JSONPathRoot, 603 exp.JSONPathSubscript, 604 exp.JSONPathWildcard, 605 } 606 607 TYPE_MAPPING = { 608 **generator.Generator.TYPE_MAPPING, 609 exp.DataType.Type.BINARY: "BLOB", 610 exp.DataType.Type.BPCHAR: "TEXT", 611 exp.DataType.Type.CHAR: "TEXT", 612 exp.DataType.Type.FLOAT: "REAL", 613 exp.DataType.Type.NCHAR: "TEXT", 614 exp.DataType.Type.NVARCHAR: "TEXT", 615 exp.DataType.Type.UINT: "UINTEGER", 616 exp.DataType.Type.VARBINARY: "BLOB", 617 exp.DataType.Type.ROWVERSION: "BLOB", 618 exp.DataType.Type.VARCHAR: "TEXT", 619 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 620 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 621 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 622 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 623 } 624 625 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 626 RESERVED_KEYWORDS = { 627 "array", 628 "analyse", 629 "union", 630 "all", 631 "when", 632 "in_p", 633 "default", 634 "create_p", 635 "window", 636 "asymmetric", 637 "to", 638 "else", 639 "localtime", 640 "from", 641 "end_p", 642 "select", 643 "current_date", 644 "foreign", 645 "with", 646 "grant", 647 "session_user", 648 "or", 649 "except", 650 "references", 651 "fetch", 652 "limit", 653 "group_p", 654 "leading", 655 "into", 656 "collate", 657 "offset", 658 "do", 659 "then", 660 "localtimestamp", 661 "check_p", 662 "lateral_p", 663 "current_role", 664 "where", 665 "asc_p", 666 "placing", 667 "desc_p", 668 "user", 669 "unique", 670 "initially", 671 "column", 672 "both", 673 "some", 674 "as", 675 "any", 676 "only", 677 "deferrable", 678 "null_p", 679 "current_time", 680 "true_p", 681 "table", 682 "case", 683 "trailing", 684 "variadic", 685 "for", 686 "on", 687 "distinct", 688 "false_p", 689 "not", 690 "constraint", 691 "current_timestamp", 692 "returning", 693 "primary", 694 "intersect", 695 "having", 696 "analyze", 697 "current_user", 698 "and", 699 "cast", 700 "symmetric", 701 "using", 702 "order", 703 "current_catalog", 704 } 705 706 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 707 708 # DuckDB doesn't generally support CREATE TABLE .. properties 709 # https://duckdb.org/docs/sql/statements/create_table.html 710 PROPERTIES_LOCATION = { 711 prop: exp.Properties.Location.UNSUPPORTED 712 for prop in generator.Generator.PROPERTIES_LOCATION 713 } 714 715 # There are a few exceptions (e.g. temporary tables) which are supported or 716 # can be transpiled to DuckDB, so we explicitly override them accordingly 717 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 718 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 719 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 720 721 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 722 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 723 724 def strtotime_sql(self, expression: exp.StrToTime) -> str: 725 if expression.args.get("safe"): 726 formatted_time = self.format_time(expression) 727 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 728 return str_to_time_sql(self, expression) 729 730 def strtodate_sql(self, expression: exp.StrToDate) -> str: 731 if expression.args.get("safe"): 732 formatted_time = self.format_time(expression) 733 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 734 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 735 736 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 737 arg = expression.this 738 if expression.args.get("safe"): 739 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 740 return self.func("JSON", arg) 741 742 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 743 nano = expression.args.get("nano") 744 if nano is not None: 745 expression.set( 746 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 747 ) 748 749 return rename_func("MAKE_TIME")(self, expression) 750 751 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 752 sec = expression.args["sec"] 753 754 milli = expression.args.get("milli") 755 if milli is not None: 756 sec += milli.pop() / exp.Literal.number(1000.0) 757 758 nano = expression.args.get("nano") 759 if nano is not None: 760 sec += nano.pop() / exp.Literal.number(1000000000.0) 761 762 if milli or nano: 763 expression.set("sec", sec) 764 765 return rename_func("MAKE_TIMESTAMP")(self, expression) 766 767 def tablesample_sql( 768 self, 769 expression: exp.TableSample, 770 tablesample_keyword: t.Optional[str] = None, 771 ) -> str: 772 if not isinstance(expression.parent, exp.Select): 773 # This sample clause only applies to a single source, not the entire resulting relation 774 tablesample_keyword = "TABLESAMPLE" 775 776 if expression.args.get("size"): 777 method = expression.args.get("method") 778 if method and method.name.upper() != "RESERVOIR": 779 self.unsupported( 780 f"Sampling method {method} is not supported with a discrete sample count, " 781 "defaulting to reservoir sampling" 782 ) 783 expression.set("method", exp.var("RESERVOIR")) 784 785 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 786 787 def interval_sql(self, expression: exp.Interval) -> str: 788 multiplier: t.Optional[int] = None 789 unit = expression.text("unit").lower() 790 791 if unit.startswith("week"): 792 multiplier = 7 793 if unit.startswith("quarter"): 794 multiplier = 90 795 796 if multiplier: 797 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 798 799 return super().interval_sql(expression) 800 801 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 802 if isinstance(expression.parent, exp.UserDefinedFunction): 803 return self.sql(expression, "this") 804 return super().columndef_sql(expression, sep) 805 806 def join_sql(self, expression: exp.Join) -> str: 807 if ( 808 expression.side == "LEFT" 809 and not expression.args.get("on") 810 and isinstance(expression.this, exp.Unnest) 811 ): 812 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 813 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 814 return super().join_sql(expression.on(exp.true())) 815 816 return super().join_sql(expression) 817 818 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 819 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 820 if expression.args.get("is_end_exclusive"): 821 return rename_func("RANGE")(self, expression) 822 823 return self.function_fallback_sql(expression) 824 825 def bracket_sql(self, expression: exp.Bracket) -> str: 826 this = expression.this 827 if isinstance(this, exp.Array): 828 this.replace(exp.paren(this)) 829 830 bracket = super().bracket_sql(expression) 831 832 if not expression.args.get("returns_list_for_maps"): 833 if not this.type: 834 from sqlglot.optimizer.annotate_types import annotate_types 835 836 this = annotate_types(this) 837 838 if this.is_type(exp.DataType.Type.MAP): 839 bracket = f"({bracket})[1]" 840 841 return bracket 842 843 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 844 expression_sql = self.sql(expression, "expression") 845 846 func = expression.this 847 if isinstance(func, exp.PERCENTILES): 848 # Make the order key the first arg and slide the fraction to the right 849 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 850 order_col = expression.find(exp.Ordered) 851 if order_col: 852 func.set("expression", func.this) 853 func.set("this", order_col.this) 854 855 this = self.sql(expression, "this").rstrip(")") 856 857 return f"{this}{expression_sql})" 858 859 def length_sql(self, expression: exp.Length) -> str: 860 arg = expression.this 861 862 # Dialects like BQ and Snowflake also accept binary values as args, so 863 # DDB will attempt to infer the type or resort to case/when resolution 864 if not expression.args.get("binary") or arg.is_string: 865 return self.func("LENGTH", arg) 866 867 if not arg.type: 868 from sqlglot.optimizer.annotate_types import annotate_types 869 870 arg = annotate_types(arg) 871 872 if arg.is_type(*exp.DataType.TEXT_TYPES): 873 return self.func("LENGTH", arg) 874 875 # We need these casts to make duckdb's static type checker happy 876 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 877 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 878 879 case = ( 880 exp.case(self.func("TYPEOF", arg)) 881 .when( 882 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 883 ) # anonymous to break length_sql recursion 884 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 885 ) 886 887 return self.sql(case) 888 889 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 890 this = expression.this 891 key = expression.args.get("key") 892 key_sql = key.name if isinstance(key, exp.Expression) else "" 893 value_sql = self.sql(expression, "value") 894 895 kv_sql = f"{key_sql} := {value_sql}" 896 897 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 898 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 899 if isinstance(this, exp.Struct) and not this.expressions: 900 return self.func("STRUCT_PACK", kv_sql) 901 902 return self.func("STRUCT_INSERT", this, kv_sql) 903 904 def unnest_sql(self, expression: exp.Unnest) -> str: 905 explode_array = expression.args.get("explode_array") 906 if explode_array: 907 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 908 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 909 expression.expressions.append( 910 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 911 ) 912 913 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 914 alias = expression.args.get("alias") 915 if alias: 916 expression.set("alias", None) 917 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 918 919 unnest_sql = super().unnest_sql(expression) 920 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 921 return self.sql(select) 922 923 return super().unnest_sql(expression) 924 925 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 926 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 927 # DuckDB should render IGNORE NULLS only for the general-purpose 928 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 929 return super().ignorenulls_sql(expression) 930 931 return self.sql(expression, "this") 932 933 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 934 this = self.sql(expression, "this") 935 null_text = self.sql(expression, "null") 936 937 if null_text: 938 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 939 940 return self.func("ARRAY_TO_STRING", this, expression.expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Specifies the strategy according to which identifiers should be normalized.
273 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 274 if isinstance(path, exp.Literal): 275 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 276 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 277 # This check ensures we'll avoid trying to parse these as JSON paths, which can 278 # either result in a noisy warning or in an invalid representation of the path. 279 path_text = path.name 280 if path_text.startswith("/") or "[#" in path_text: 281 return path 282 283 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
285 class Tokenizer(tokens.Tokenizer): 286 HEREDOC_STRINGS = ["$"] 287 288 HEREDOC_TAG_IS_IDENTIFIER = True 289 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 290 291 KEYWORDS = { 292 **tokens.Tokenizer.KEYWORDS, 293 "//": TokenType.DIV, 294 "ATTACH": TokenType.COMMAND, 295 "BINARY": TokenType.VARBINARY, 296 "BITSTRING": TokenType.BIT, 297 "BPCHAR": TokenType.TEXT, 298 "CHAR": TokenType.TEXT, 299 "CHARACTER VARYING": TokenType.TEXT, 300 "EXCLUDE": TokenType.EXCEPT, 301 "LOGICAL": TokenType.BOOLEAN, 302 "ONLY": TokenType.ONLY, 303 "PIVOT_WIDER": TokenType.PIVOT, 304 "POSITIONAL": TokenType.POSITIONAL, 305 "SIGNED": TokenType.INT, 306 "STRING": TokenType.TEXT, 307 "SUMMARIZE": TokenType.SUMMARIZE, 308 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 309 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 310 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 311 "TIMESTAMP_US": TokenType.TIMESTAMP, 312 "UBIGINT": TokenType.UBIGINT, 313 "UINTEGER": TokenType.UINT, 314 "USMALLINT": TokenType.USMALLINT, 315 "UTINYINT": TokenType.UTINYINT, 316 "VARCHAR": TokenType.TEXT, 317 } 318 KEYWORDS.pop("/*+") 319 320 SINGLE_TOKENS = { 321 **tokens.Tokenizer.SINGLE_TOKENS, 322 "$": TokenType.PARAMETER, 323 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
325 class Parser(parser.Parser): 326 BITWISE = { 327 **parser.Parser.BITWISE, 328 TokenType.TILDA: exp.RegexpLike, 329 } 330 331 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 332 333 FUNCTIONS = { 334 **parser.Parser.FUNCTIONS, 335 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 336 "ARRAY_SORT": exp.SortArray.from_arg_list, 337 "DATEDIFF": _build_date_diff, 338 "DATE_DIFF": _build_date_diff, 339 "DATE_TRUNC": date_trunc_to_time, 340 "DATETRUNC": date_trunc_to_time, 341 "DECODE": lambda args: exp.Decode( 342 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 343 ), 344 "ENCODE": lambda args: exp.Encode( 345 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 346 ), 347 "EPOCH": exp.TimeToUnix.from_arg_list, 348 "EPOCH_MS": lambda args: exp.UnixToTime( 349 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 350 ), 351 "JSON": exp.ParseJSON.from_arg_list, 352 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 353 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 354 "LIST_HAS": exp.ArrayContains.from_arg_list, 355 "LIST_REVERSE_SORT": _build_sort_array_desc, 356 "LIST_SORT": exp.SortArray.from_arg_list, 357 "LIST_VALUE": lambda args: exp.Array(expressions=args), 358 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 359 "MAKE_TIMESTAMP": _build_make_timestamp, 360 "MEDIAN": lambda args: exp.PercentileCont( 361 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 362 ), 363 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 364 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 365 "REGEXP_EXTRACT": build_regexp_extract, 366 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 367 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 368 this=seq_get(args, 0), 369 expression=seq_get(args, 1), 370 replacement=seq_get(args, 2), 371 modifiers=seq_get(args, 3), 372 ), 373 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 374 "STRING_SPLIT": exp.Split.from_arg_list, 375 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 376 "STRING_TO_ARRAY": exp.Split.from_arg_list, 377 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 378 "STRUCT_PACK": exp.Struct.from_arg_list, 379 "STR_SPLIT": exp.Split.from_arg_list, 380 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 381 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 382 "UNNEST": exp.Explode.from_arg_list, 383 "XOR": binary_from_function(exp.BitwiseXor), 384 "GENERATE_SERIES": _build_generate_series(), 385 "RANGE": _build_generate_series(end_exclusive=True), 386 } 387 388 FUNCTIONS.pop("DATE_SUB") 389 FUNCTIONS.pop("GLOB") 390 391 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 392 FUNCTION_PARSERS.pop("DECODE") 393 394 NO_PAREN_FUNCTION_PARSERS = { 395 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 396 "MAP": lambda self: self._parse_map(), 397 } 398 399 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 400 TokenType.SEMI, 401 TokenType.ANTI, 402 } 403 404 PLACEHOLDER_PARSERS = { 405 **parser.Parser.PLACEHOLDER_PARSERS, 406 TokenType.PARAMETER: lambda self: ( 407 self.expression(exp.Placeholder, this=self._prev.text) 408 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 409 else None 410 ), 411 } 412 413 TYPE_CONVERTERS = { 414 # https://duckdb.org/docs/sql/data_types/numeric 415 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 416 # https://duckdb.org/docs/sql/data_types/text 417 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 418 } 419 420 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 421 # https://duckdb.org/docs/sql/samples.html 422 sample = super()._parse_table_sample(as_modifier=as_modifier) 423 if sample and not sample.args.get("method"): 424 if sample.args.get("size"): 425 sample.set("method", exp.var("RESERVOIR")) 426 else: 427 sample.set("method", exp.var("SYSTEM")) 428 429 return sample 430 431 def _parse_bracket( 432 self, this: t.Optional[exp.Expression] = None 433 ) -> t.Optional[exp.Expression]: 434 bracket = super()._parse_bracket(this) 435 if isinstance(bracket, exp.Bracket): 436 bracket.set("returns_list_for_maps", True) 437 438 return bracket 439 440 def _parse_map(self) -> exp.ToMap | exp.Map: 441 if self._match(TokenType.L_BRACE, advance=False): 442 return self.expression(exp.ToMap, this=self._parse_bracket()) 443 444 args = self._parse_wrapped_csv(self._parse_assignment) 445 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 446 447 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 448 return self._parse_field_def() 449 450 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 451 if len(aggregations) == 1: 452 return super()._pivot_column_names(aggregations) 453 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
455 class Generator(generator.Generator): 456 PARAMETER_TOKEN = "$" 457 NAMED_PLACEHOLDER_TOKEN = "$" 458 JOIN_HINTS = False 459 TABLE_HINTS = False 460 QUERY_HINTS = False 461 LIMIT_FETCH = "LIMIT" 462 STRUCT_DELIMITER = ("(", ")") 463 RENAME_TABLE_WITH_DB = False 464 NVL2_SUPPORTED = False 465 SEMI_ANTI_JOIN_WITH_SIDE = False 466 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 467 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 468 LAST_DAY_SUPPORTS_DATE_PART = False 469 JSON_KEY_VALUE_PAIR_SEP = "," 470 IGNORE_NULLS_IN_FUNC = True 471 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 472 SUPPORTS_CREATE_TABLE_LIKE = False 473 MULTI_ARG_DISTINCT = False 474 CAN_IMPLEMENT_ARRAY_ANY = True 475 SUPPORTS_TO_NUMBER = False 476 COPY_HAS_INTO_KEYWORD = False 477 STAR_EXCEPT = "EXCLUDE" 478 PAD_FILL_PATTERN_IS_REQUIRED = True 479 ARRAY_CONCAT_IS_VAR_LEN = False 480 481 TRANSFORMS = { 482 **generator.Generator.TRANSFORMS, 483 exp.ApproxDistinct: approx_count_distinct_sql, 484 exp.Array: inline_array_unless_query, 485 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 486 exp.ArrayFilter: rename_func("LIST_FILTER"), 487 exp.ArraySize: rename_func("ARRAY_LENGTH"), 488 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 489 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 490 exp.ArraySort: _array_sort_sql, 491 exp.ArraySum: rename_func("LIST_SUM"), 492 exp.BitwiseXor: rename_func("XOR"), 493 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 494 exp.CurrentDate: lambda *_: "CURRENT_DATE", 495 exp.CurrentTime: lambda *_: "CURRENT_TIME", 496 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 497 exp.DayOfMonth: rename_func("DAYOFMONTH"), 498 exp.DayOfWeek: rename_func("DAYOFWEEK"), 499 exp.DayOfWeekIso: rename_func("ISODOW"), 500 exp.DayOfYear: rename_func("DAYOFYEAR"), 501 exp.DataType: _datatype_sql, 502 exp.Date: _date_sql, 503 exp.DateAdd: _date_delta_sql, 504 exp.DateFromParts: rename_func("MAKE_DATE"), 505 exp.DateSub: _date_delta_sql, 506 exp.DateDiff: _date_diff_sql, 507 exp.DateStrToDate: datestrtodate_sql, 508 exp.Datetime: no_datetime_sql, 509 exp.DatetimeSub: _date_delta_sql, 510 exp.DatetimeAdd: _date_delta_sql, 511 exp.DateToDi: lambda self, 512 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 513 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 514 exp.DiToDate: lambda self, 515 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 516 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 517 exp.GenerateDateArray: _generate_datetime_array_sql, 518 exp.GenerateTimestampArray: _generate_datetime_array_sql, 519 exp.Explode: rename_func("UNNEST"), 520 exp.IntDiv: lambda self, e: self.binary(e, "//"), 521 exp.IsInf: rename_func("ISINF"), 522 exp.IsNan: rename_func("ISNAN"), 523 exp.JSONExtract: _arrow_json_extract_sql, 524 exp.JSONExtractScalar: _arrow_json_extract_sql, 525 exp.JSONFormat: _json_format_sql, 526 exp.LogicalOr: rename_func("BOOL_OR"), 527 exp.LogicalAnd: rename_func("BOOL_AND"), 528 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 529 exp.MonthsBetween: lambda self, e: self.func( 530 "DATEDIFF", 531 "'month'", 532 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 533 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 534 ), 535 exp.PercentileCont: rename_func("QUANTILE_CONT"), 536 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 537 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 538 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 539 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 540 exp.RegexpExtract: regexp_extract_sql, 541 exp.RegexpReplace: lambda self, e: self.func( 542 "REGEXP_REPLACE", 543 e.this, 544 e.expression, 545 e.args.get("replacement"), 546 e.args.get("modifiers"), 547 ), 548 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 549 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 550 exp.Return: lambda self, e: self.sql(e, "this"), 551 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 552 exp.Rand: rename_func("RANDOM"), 553 exp.SafeDivide: no_safe_divide_sql, 554 exp.SHA2: sha256_sql, 555 exp.Split: rename_func("STR_SPLIT"), 556 exp.SortArray: _sort_array_sql, 557 exp.StrPosition: str_position_sql, 558 exp.StrToUnix: lambda self, e: self.func( 559 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 560 ), 561 exp.Struct: _struct_sql, 562 exp.Transform: rename_func("LIST_TRANSFORM"), 563 exp.TimeAdd: _date_delta_sql, 564 exp.Time: no_time_sql, 565 exp.TimeDiff: _timediff_sql, 566 exp.Timestamp: no_timestamp_sql, 567 exp.TimestampDiff: lambda self, e: self.func( 568 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 569 ), 570 exp.TimestampTrunc: timestamptrunc_sql(), 571 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 572 exp.TimeStrToTime: timestrtotime_sql, 573 exp.TimeStrToUnix: lambda self, e: self.func( 574 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 575 ), 576 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 577 exp.TimeToUnix: rename_func("EPOCH"), 578 exp.TsOrDiToDi: lambda self, 579 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 580 exp.TsOrDsAdd: _date_delta_sql, 581 exp.TsOrDsDiff: lambda self, e: self.func( 582 "DATE_DIFF", 583 f"'{e.args.get('unit') or 'DAY'}'", 584 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 585 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 586 ), 587 exp.UnixToStr: lambda self, e: self.func( 588 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 589 ), 590 exp.DatetimeTrunc: lambda self, e: self.func( 591 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 592 ), 593 exp.UnixToTime: _unix_to_time_sql, 594 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 595 exp.VariancePop: rename_func("VAR_POP"), 596 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 597 exp.Xor: bool_xor_sql, 598 } 599 600 SUPPORTED_JSON_PATH_PARTS = { 601 exp.JSONPathKey, 602 exp.JSONPathRoot, 603 exp.JSONPathSubscript, 604 exp.JSONPathWildcard, 605 } 606 607 TYPE_MAPPING = { 608 **generator.Generator.TYPE_MAPPING, 609 exp.DataType.Type.BINARY: "BLOB", 610 exp.DataType.Type.BPCHAR: "TEXT", 611 exp.DataType.Type.CHAR: "TEXT", 612 exp.DataType.Type.FLOAT: "REAL", 613 exp.DataType.Type.NCHAR: "TEXT", 614 exp.DataType.Type.NVARCHAR: "TEXT", 615 exp.DataType.Type.UINT: "UINTEGER", 616 exp.DataType.Type.VARBINARY: "BLOB", 617 exp.DataType.Type.ROWVERSION: "BLOB", 618 exp.DataType.Type.VARCHAR: "TEXT", 619 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 620 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 621 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 622 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 623 } 624 625 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 626 RESERVED_KEYWORDS = { 627 "array", 628 "analyse", 629 "union", 630 "all", 631 "when", 632 "in_p", 633 "default", 634 "create_p", 635 "window", 636 "asymmetric", 637 "to", 638 "else", 639 "localtime", 640 "from", 641 "end_p", 642 "select", 643 "current_date", 644 "foreign", 645 "with", 646 "grant", 647 "session_user", 648 "or", 649 "except", 650 "references", 651 "fetch", 652 "limit", 653 "group_p", 654 "leading", 655 "into", 656 "collate", 657 "offset", 658 "do", 659 "then", 660 "localtimestamp", 661 "check_p", 662 "lateral_p", 663 "current_role", 664 "where", 665 "asc_p", 666 "placing", 667 "desc_p", 668 "user", 669 "unique", 670 "initially", 671 "column", 672 "both", 673 "some", 674 "as", 675 "any", 676 "only", 677 "deferrable", 678 "null_p", 679 "current_time", 680 "true_p", 681 "table", 682 "case", 683 "trailing", 684 "variadic", 685 "for", 686 "on", 687 "distinct", 688 "false_p", 689 "not", 690 "constraint", 691 "current_timestamp", 692 "returning", 693 "primary", 694 "intersect", 695 "having", 696 "analyze", 697 "current_user", 698 "and", 699 "cast", 700 "symmetric", 701 "using", 702 "order", 703 "current_catalog", 704 } 705 706 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 707 708 # DuckDB doesn't generally support CREATE TABLE .. properties 709 # https://duckdb.org/docs/sql/statements/create_table.html 710 PROPERTIES_LOCATION = { 711 prop: exp.Properties.Location.UNSUPPORTED 712 for prop in generator.Generator.PROPERTIES_LOCATION 713 } 714 715 # There are a few exceptions (e.g. temporary tables) which are supported or 716 # can be transpiled to DuckDB, so we explicitly override them accordingly 717 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 718 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 719 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 720 721 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 722 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 723 724 def strtotime_sql(self, expression: exp.StrToTime) -> str: 725 if expression.args.get("safe"): 726 formatted_time = self.format_time(expression) 727 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 728 return str_to_time_sql(self, expression) 729 730 def strtodate_sql(self, expression: exp.StrToDate) -> str: 731 if expression.args.get("safe"): 732 formatted_time = self.format_time(expression) 733 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 734 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 735 736 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 737 arg = expression.this 738 if expression.args.get("safe"): 739 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 740 return self.func("JSON", arg) 741 742 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 743 nano = expression.args.get("nano") 744 if nano is not None: 745 expression.set( 746 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 747 ) 748 749 return rename_func("MAKE_TIME")(self, expression) 750 751 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 752 sec = expression.args["sec"] 753 754 milli = expression.args.get("milli") 755 if milli is not None: 756 sec += milli.pop() / exp.Literal.number(1000.0) 757 758 nano = expression.args.get("nano") 759 if nano is not None: 760 sec += nano.pop() / exp.Literal.number(1000000000.0) 761 762 if milli or nano: 763 expression.set("sec", sec) 764 765 return rename_func("MAKE_TIMESTAMP")(self, expression) 766 767 def tablesample_sql( 768 self, 769 expression: exp.TableSample, 770 tablesample_keyword: t.Optional[str] = None, 771 ) -> str: 772 if not isinstance(expression.parent, exp.Select): 773 # This sample clause only applies to a single source, not the entire resulting relation 774 tablesample_keyword = "TABLESAMPLE" 775 776 if expression.args.get("size"): 777 method = expression.args.get("method") 778 if method and method.name.upper() != "RESERVOIR": 779 self.unsupported( 780 f"Sampling method {method} is not supported with a discrete sample count, " 781 "defaulting to reservoir sampling" 782 ) 783 expression.set("method", exp.var("RESERVOIR")) 784 785 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 786 787 def interval_sql(self, expression: exp.Interval) -> str: 788 multiplier: t.Optional[int] = None 789 unit = expression.text("unit").lower() 790 791 if unit.startswith("week"): 792 multiplier = 7 793 if unit.startswith("quarter"): 794 multiplier = 90 795 796 if multiplier: 797 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 798 799 return super().interval_sql(expression) 800 801 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 802 if isinstance(expression.parent, exp.UserDefinedFunction): 803 return self.sql(expression, "this") 804 return super().columndef_sql(expression, sep) 805 806 def join_sql(self, expression: exp.Join) -> str: 807 if ( 808 expression.side == "LEFT" 809 and not expression.args.get("on") 810 and isinstance(expression.this, exp.Unnest) 811 ): 812 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 813 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 814 return super().join_sql(expression.on(exp.true())) 815 816 return super().join_sql(expression) 817 818 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 819 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 820 if expression.args.get("is_end_exclusive"): 821 return rename_func("RANGE")(self, expression) 822 823 return self.function_fallback_sql(expression) 824 825 def bracket_sql(self, expression: exp.Bracket) -> str: 826 this = expression.this 827 if isinstance(this, exp.Array): 828 this.replace(exp.paren(this)) 829 830 bracket = super().bracket_sql(expression) 831 832 if not expression.args.get("returns_list_for_maps"): 833 if not this.type: 834 from sqlglot.optimizer.annotate_types import annotate_types 835 836 this = annotate_types(this) 837 838 if this.is_type(exp.DataType.Type.MAP): 839 bracket = f"({bracket})[1]" 840 841 return bracket 842 843 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 844 expression_sql = self.sql(expression, "expression") 845 846 func = expression.this 847 if isinstance(func, exp.PERCENTILES): 848 # Make the order key the first arg and slide the fraction to the right 849 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 850 order_col = expression.find(exp.Ordered) 851 if order_col: 852 func.set("expression", func.this) 853 func.set("this", order_col.this) 854 855 this = self.sql(expression, "this").rstrip(")") 856 857 return f"{this}{expression_sql})" 858 859 def length_sql(self, expression: exp.Length) -> str: 860 arg = expression.this 861 862 # Dialects like BQ and Snowflake also accept binary values as args, so 863 # DDB will attempt to infer the type or resort to case/when resolution 864 if not expression.args.get("binary") or arg.is_string: 865 return self.func("LENGTH", arg) 866 867 if not arg.type: 868 from sqlglot.optimizer.annotate_types import annotate_types 869 870 arg = annotate_types(arg) 871 872 if arg.is_type(*exp.DataType.TEXT_TYPES): 873 return self.func("LENGTH", arg) 874 875 # We need these casts to make duckdb's static type checker happy 876 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 877 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 878 879 case = ( 880 exp.case(self.func("TYPEOF", arg)) 881 .when( 882 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 883 ) # anonymous to break length_sql recursion 884 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 885 ) 886 887 return self.sql(case) 888 889 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 890 this = expression.this 891 key = expression.args.get("key") 892 key_sql = key.name if isinstance(key, exp.Expression) else "" 893 value_sql = self.sql(expression, "value") 894 895 kv_sql = f"{key_sql} := {value_sql}" 896 897 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 898 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 899 if isinstance(this, exp.Struct) and not this.expressions: 900 return self.func("STRUCT_PACK", kv_sql) 901 902 return self.func("STRUCT_INSERT", this, kv_sql) 903 904 def unnest_sql(self, expression: exp.Unnest) -> str: 905 explode_array = expression.args.get("explode_array") 906 if explode_array: 907 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 908 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 909 expression.expressions.append( 910 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 911 ) 912 913 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 914 alias = expression.args.get("alias") 915 if alias: 916 expression.set("alias", None) 917 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 918 919 unnest_sql = super().unnest_sql(expression) 920 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 921 return self.sql(select) 922 923 return super().unnest_sql(expression) 924 925 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 926 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 927 # DuckDB should render IGNORE NULLS only for the general-purpose 928 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 929 return super().ignorenulls_sql(expression) 930 931 return self.sql(expression, "this") 932 933 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 934 this = self.sql(expression, "this") 935 null_text = self.sql(expression, "null") 936 937 if null_text: 938 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 939 940 return self.func("ARRAY_TO_STRING", this, expression.expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
730 def strtodate_sql(self, expression: exp.StrToDate) -> str: 731 if expression.args.get("safe"): 732 formatted_time = self.format_time(expression) 733 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 734 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
742 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 743 nano = expression.args.get("nano") 744 if nano is not None: 745 expression.set( 746 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 747 ) 748 749 return rename_func("MAKE_TIME")(self, expression)
751 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 752 sec = expression.args["sec"] 753 754 milli = expression.args.get("milli") 755 if milli is not None: 756 sec += milli.pop() / exp.Literal.number(1000.0) 757 758 nano = expression.args.get("nano") 759 if nano is not None: 760 sec += nano.pop() / exp.Literal.number(1000000000.0) 761 762 if milli or nano: 763 expression.set("sec", sec) 764 765 return rename_func("MAKE_TIMESTAMP")(self, expression)
767 def tablesample_sql( 768 self, 769 expression: exp.TableSample, 770 tablesample_keyword: t.Optional[str] = None, 771 ) -> str: 772 if not isinstance(expression.parent, exp.Select): 773 # This sample clause only applies to a single source, not the entire resulting relation 774 tablesample_keyword = "TABLESAMPLE" 775 776 if expression.args.get("size"): 777 method = expression.args.get("method") 778 if method and method.name.upper() != "RESERVOIR": 779 self.unsupported( 780 f"Sampling method {method} is not supported with a discrete sample count, " 781 "defaulting to reservoir sampling" 782 ) 783 expression.set("method", exp.var("RESERVOIR")) 784 785 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
787 def interval_sql(self, expression: exp.Interval) -> str: 788 multiplier: t.Optional[int] = None 789 unit = expression.text("unit").lower() 790 791 if unit.startswith("week"): 792 multiplier = 7 793 if unit.startswith("quarter"): 794 multiplier = 90 795 796 if multiplier: 797 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 798 799 return super().interval_sql(expression)
806 def join_sql(self, expression: exp.Join) -> str: 807 if ( 808 expression.side == "LEFT" 809 and not expression.args.get("on") 810 and isinstance(expression.this, exp.Unnest) 811 ): 812 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 813 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 814 return super().join_sql(expression.on(exp.true())) 815 816 return super().join_sql(expression)
825 def bracket_sql(self, expression: exp.Bracket) -> str: 826 this = expression.this 827 if isinstance(this, exp.Array): 828 this.replace(exp.paren(this)) 829 830 bracket = super().bracket_sql(expression) 831 832 if not expression.args.get("returns_list_for_maps"): 833 if not this.type: 834 from sqlglot.optimizer.annotate_types import annotate_types 835 836 this = annotate_types(this) 837 838 if this.is_type(exp.DataType.Type.MAP): 839 bracket = f"({bracket})[1]" 840 841 return bracket
843 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 844 expression_sql = self.sql(expression, "expression") 845 846 func = expression.this 847 if isinstance(func, exp.PERCENTILES): 848 # Make the order key the first arg and slide the fraction to the right 849 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 850 order_col = expression.find(exp.Ordered) 851 if order_col: 852 func.set("expression", func.this) 853 func.set("this", order_col.this) 854 855 this = self.sql(expression, "this").rstrip(")") 856 857 return f"{this}{expression_sql})"
859 def length_sql(self, expression: exp.Length) -> str: 860 arg = expression.this 861 862 # Dialects like BQ and Snowflake also accept binary values as args, so 863 # DDB will attempt to infer the type or resort to case/when resolution 864 if not expression.args.get("binary") or arg.is_string: 865 return self.func("LENGTH", arg) 866 867 if not arg.type: 868 from sqlglot.optimizer.annotate_types import annotate_types 869 870 arg = annotate_types(arg) 871 872 if arg.is_type(*exp.DataType.TEXT_TYPES): 873 return self.func("LENGTH", arg) 874 875 # We need these casts to make duckdb's static type checker happy 876 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 877 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 878 879 case = ( 880 exp.case(self.func("TYPEOF", arg)) 881 .when( 882 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 883 ) # anonymous to break length_sql recursion 884 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 885 ) 886 887 return self.sql(case)
889 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 890 this = expression.this 891 key = expression.args.get("key") 892 key_sql = key.name if isinstance(key, exp.Expression) else "" 893 value_sql = self.sql(expression, "value") 894 895 kv_sql = f"{key_sql} := {value_sql}" 896 897 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 898 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 899 if isinstance(this, exp.Struct) and not this.expressions: 900 return self.func("STRUCT_PACK", kv_sql) 901 902 return self.func("STRUCT_INSERT", this, kv_sql)
904 def unnest_sql(self, expression: exp.Unnest) -> str: 905 explode_array = expression.args.get("explode_array") 906 if explode_array: 907 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 908 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 909 expression.expressions.append( 910 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 911 ) 912 913 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 914 alias = expression.args.get("alias") 915 if alias: 916 expression.set("alias", None) 917 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 918 919 unnest_sql = super().unnest_sql(expression) 920 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 921 return self.sql(select) 922 923 return super().unnest_sql(expression)
925 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 926 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 927 # DuckDB should render IGNORE NULLS only for the general-purpose 928 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 929 return super().ignorenulls_sql(expression) 930 931 return self.sql(expression, "this")
933 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 934 this = self.sql(expression, "this") 935 null_text = self.sql(expression, "null") 936 937 if null_text: 938 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 939 940 return self.func("ARRAY_TO_STRING", this, expression.expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql