sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 regexp_extract_sql, 29 rename_func, 30 str_position_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36) 37from sqlglot.helper import seq_get 38from sqlglot.tokens import TokenType 39 40DATETIME_DELTA = t.Union[ 41 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 42] 43 44 45def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 46 this = expression.this 47 unit = unit_to_var(expression) 48 op = ( 49 "+" 50 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 51 else "-" 52 ) 53 54 to_type: t.Optional[DATA_TYPE] = None 55 if isinstance(expression, exp.TsOrDsAdd): 56 to_type = expression.return_type 57 elif this.is_string: 58 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 59 to_type = ( 60 exp.DataType.Type.DATETIME 61 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 62 else exp.DataType.Type.DATE 63 ) 64 65 this = exp.cast(this, to_type) if to_type else this 66 67 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 68 69 70# BigQuery -> DuckDB conversion for the DATE function 71def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 72 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 73 zone = self.sql(expression, "zone") 74 75 if zone: 76 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 77 date_str = f"{date_str} || ' ' || {zone}" 78 79 # This will create a TIMESTAMP with time zone information 80 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 81 82 return result 83 84 85# BigQuery -> DuckDB conversion for the TIME_DIFF function 86def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 87 this = exp.cast(expression.this, exp.DataType.Type.TIME) 88 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 89 90 # Although the 2 dialects share similar signatures, BQ seems to inverse 91 # the sign of the result so the start/end time operands are flipped 92 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 93 94 95def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 96 if expression.expression: 97 self.unsupported("DuckDB ARRAY_SORT does not support a comparator") 98 return self.func("ARRAY_SORT", expression.this) 99 100 101def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 102 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 103 return self.func(name, expression.this) 104 105 106def _build_sort_array_desc(args: t.List) -> exp.Expression: 107 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 108 109 110def _build_date_diff(args: t.List) -> exp.Expression: 111 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 112 113 114def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 115 def _builder(args: t.List) -> exp.GenerateSeries: 116 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 117 if len(args) == 1: 118 # DuckDB uses 0 as a default for the series' start when it's omitted 119 args.insert(0, exp.Literal.number("0")) 120 121 gen_series = exp.GenerateSeries.from_arg_list(args) 122 gen_series.set("is_end_exclusive", end_exclusive) 123 124 return gen_series 125 126 return _builder 127 128 129def _build_make_timestamp(args: t.List) -> exp.Expression: 130 if len(args) == 1: 131 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 132 133 return exp.TimestampFromParts( 134 year=seq_get(args, 0), 135 month=seq_get(args, 1), 136 day=seq_get(args, 2), 137 hour=seq_get(args, 3), 138 min=seq_get(args, 4), 139 sec=seq_get(args, 5), 140 ) 141 142 143def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 144 args: t.List[str] = [] 145 146 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 147 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 148 # The transformation to ROW will take place if a cast to STRUCT / ARRAY of STRUCTs is found 149 ancestor_cast = expression.find_ancestor(exp.Cast) 150 is_struct_cast = ancestor_cast and any( 151 casted_type.is_type(exp.DataType.Type.STRUCT) 152 for casted_type in ancestor_cast.find_all(exp.DataType) 153 ) 154 155 for i, expr in enumerate(expression.expressions): 156 is_property_eq = isinstance(expr, exp.PropertyEQ) 157 value = expr.expression if is_property_eq else expr 158 159 if is_struct_cast: 160 args.append(self.sql(value)) 161 else: 162 key = expr.name if is_property_eq else f"_{i}" 163 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 164 165 csv_args = ", ".join(args) 166 167 return f"ROW({csv_args})" if is_struct_cast else f"{{{csv_args}}}" 168 169 170def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 171 if expression.is_type("array"): 172 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 173 174 # Type TIMESTAMP / TIME WITH TIME ZONE does not support any modifiers 175 if expression.is_type("timestamptz", "timetz"): 176 return expression.this.value 177 178 return self.datatype_sql(expression) 179 180 181def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 182 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 183 return f"CAST({sql} AS TEXT)" 184 185 186def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 187 scale = expression.args.get("scale") 188 timestamp = expression.this 189 190 if scale in (None, exp.UnixToTime.SECONDS): 191 return self.func("TO_TIMESTAMP", timestamp) 192 if scale == exp.UnixToTime.MILLIS: 193 return self.func("EPOCH_MS", timestamp) 194 if scale == exp.UnixToTime.MICROS: 195 return self.func("MAKE_TIMESTAMP", timestamp) 196 197 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 198 199 200WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 201 202 203def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 204 arrow_sql = arrow_json_extract_sql(self, expression) 205 if not expression.same_parent and isinstance( 206 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 207 ): 208 arrow_sql = self.wrap(arrow_sql) 209 return arrow_sql 210 211 212def _implicit_datetime_cast( 213 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 214) -> t.Optional[exp.Expression]: 215 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 216 217 218def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 219 this = _implicit_datetime_cast(expression.this) 220 expr = _implicit_datetime_cast(expression.expression) 221 222 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 223 224 225def _generate_datetime_array_sql( 226 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 227) -> str: 228 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 229 230 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 231 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 232 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 233 234 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 235 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 236 start=start, end=end, step=expression.args.get("interval") 237 ) 238 239 if is_generate_date_array: 240 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 241 # GENERATE_DATE_ARRAY we must cast it back to DATE array 242 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 243 244 return self.sql(gen_series) 245 246 247class DuckDB(Dialect): 248 NULL_ORDERING = "nulls_are_last" 249 SUPPORTS_USER_DEFINED_TYPES = False 250 SAFE_DIVISION = True 251 INDEX_OFFSET = 1 252 CONCAT_COALESCE = True 253 SUPPORTS_ORDER_BY_ALL = True 254 SUPPORTS_FIXED_SIZE_ARRAYS = True 255 256 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 257 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 258 259 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 260 if isinstance(path, exp.Literal): 261 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 262 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 263 # This check ensures we'll avoid trying to parse these as JSON paths, which can 264 # either result in a noisy warning or in an invalid representation of the path. 265 path_text = path.name 266 if path_text.startswith("/") or "[#" in path_text: 267 return path 268 269 return super().to_json_path(path) 270 271 class Tokenizer(tokens.Tokenizer): 272 HEREDOC_STRINGS = ["$"] 273 274 HEREDOC_TAG_IS_IDENTIFIER = True 275 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 276 277 KEYWORDS = { 278 **tokens.Tokenizer.KEYWORDS, 279 "//": TokenType.DIV, 280 "ATTACH": TokenType.COMMAND, 281 "BINARY": TokenType.VARBINARY, 282 "BITSTRING": TokenType.BIT, 283 "BPCHAR": TokenType.TEXT, 284 "CHAR": TokenType.TEXT, 285 "CHARACTER VARYING": TokenType.TEXT, 286 "EXCLUDE": TokenType.EXCEPT, 287 "LOGICAL": TokenType.BOOLEAN, 288 "ONLY": TokenType.ONLY, 289 "PIVOT_WIDER": TokenType.PIVOT, 290 "POSITIONAL": TokenType.POSITIONAL, 291 "SIGNED": TokenType.INT, 292 "STRING": TokenType.TEXT, 293 "SUMMARIZE": TokenType.SUMMARIZE, 294 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 295 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 296 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 297 "TIMESTAMP_US": TokenType.TIMESTAMP, 298 "UBIGINT": TokenType.UBIGINT, 299 "UINTEGER": TokenType.UINT, 300 "USMALLINT": TokenType.USMALLINT, 301 "UTINYINT": TokenType.UTINYINT, 302 "VARCHAR": TokenType.TEXT, 303 } 304 KEYWORDS.pop("/*+") 305 306 SINGLE_TOKENS = { 307 **tokens.Tokenizer.SINGLE_TOKENS, 308 "$": TokenType.PARAMETER, 309 } 310 311 class Parser(parser.Parser): 312 BITWISE = { 313 **parser.Parser.BITWISE, 314 TokenType.TILDA: exp.RegexpLike, 315 } 316 317 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 318 319 FUNCTIONS = { 320 **parser.Parser.FUNCTIONS, 321 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 322 "ARRAY_SORT": exp.SortArray.from_arg_list, 323 "DATEDIFF": _build_date_diff, 324 "DATE_DIFF": _build_date_diff, 325 "DATE_TRUNC": date_trunc_to_time, 326 "DATETRUNC": date_trunc_to_time, 327 "DECODE": lambda args: exp.Decode( 328 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 329 ), 330 "ENCODE": lambda args: exp.Encode( 331 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 332 ), 333 "EPOCH": exp.TimeToUnix.from_arg_list, 334 "EPOCH_MS": lambda args: exp.UnixToTime( 335 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 336 ), 337 "JSON": exp.ParseJSON.from_arg_list, 338 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 339 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 340 "LIST_HAS": exp.ArrayContains.from_arg_list, 341 "LIST_REVERSE_SORT": _build_sort_array_desc, 342 "LIST_SORT": exp.SortArray.from_arg_list, 343 "LIST_VALUE": lambda args: exp.Array(expressions=args), 344 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 345 "MAKE_TIMESTAMP": _build_make_timestamp, 346 "MEDIAN": lambda args: exp.PercentileCont( 347 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 348 ), 349 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 350 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 351 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 352 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 353 ), 354 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 355 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 356 this=seq_get(args, 0), 357 expression=seq_get(args, 1), 358 replacement=seq_get(args, 2), 359 modifiers=seq_get(args, 3), 360 ), 361 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 362 "STRING_SPLIT": exp.Split.from_arg_list, 363 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 364 "STRING_TO_ARRAY": exp.Split.from_arg_list, 365 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 366 "STRUCT_PACK": exp.Struct.from_arg_list, 367 "STR_SPLIT": exp.Split.from_arg_list, 368 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 369 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 370 "UNNEST": exp.Explode.from_arg_list, 371 "XOR": binary_from_function(exp.BitwiseXor), 372 "GENERATE_SERIES": _build_generate_series(), 373 "RANGE": _build_generate_series(end_exclusive=True), 374 } 375 376 FUNCTIONS.pop("DATE_SUB") 377 378 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 379 FUNCTION_PARSERS.pop("DECODE") 380 381 NO_PAREN_FUNCTION_PARSERS = { 382 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 383 "MAP": lambda self: self._parse_map(), 384 } 385 386 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 387 TokenType.SEMI, 388 TokenType.ANTI, 389 } 390 391 PLACEHOLDER_PARSERS = { 392 **parser.Parser.PLACEHOLDER_PARSERS, 393 TokenType.PARAMETER: lambda self: ( 394 self.expression(exp.Placeholder, this=self._prev.text) 395 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 396 else None 397 ), 398 } 399 400 TYPE_CONVERTERS = { 401 # https://duckdb.org/docs/sql/data_types/numeric 402 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 403 # https://duckdb.org/docs/sql/data_types/text 404 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 405 } 406 407 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 408 # https://duckdb.org/docs/sql/samples.html 409 sample = super()._parse_table_sample(as_modifier=as_modifier) 410 if sample and not sample.args.get("method"): 411 if sample.args.get("size"): 412 sample.set("method", exp.var("RESERVOIR")) 413 else: 414 sample.set("method", exp.var("SYSTEM")) 415 416 return sample 417 418 def _parse_bracket( 419 self, this: t.Optional[exp.Expression] = None 420 ) -> t.Optional[exp.Expression]: 421 bracket = super()._parse_bracket(this) 422 if isinstance(bracket, exp.Bracket): 423 bracket.set("returns_list_for_maps", True) 424 425 return bracket 426 427 def _parse_map(self) -> exp.ToMap | exp.Map: 428 if self._match(TokenType.L_BRACE, advance=False): 429 return self.expression(exp.ToMap, this=self._parse_bracket()) 430 431 args = self._parse_wrapped_csv(self._parse_assignment) 432 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 433 434 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 435 return self._parse_field_def() 436 437 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 438 if len(aggregations) == 1: 439 return super()._pivot_column_names(aggregations) 440 return pivot_column_names(aggregations, dialect="duckdb") 441 442 class Generator(generator.Generator): 443 PARAMETER_TOKEN = "$" 444 NAMED_PLACEHOLDER_TOKEN = "$" 445 JOIN_HINTS = False 446 TABLE_HINTS = False 447 QUERY_HINTS = False 448 LIMIT_FETCH = "LIMIT" 449 STRUCT_DELIMITER = ("(", ")") 450 RENAME_TABLE_WITH_DB = False 451 NVL2_SUPPORTED = False 452 SEMI_ANTI_JOIN_WITH_SIDE = False 453 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 454 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 455 LAST_DAY_SUPPORTS_DATE_PART = False 456 JSON_KEY_VALUE_PAIR_SEP = "," 457 IGNORE_NULLS_IN_FUNC = True 458 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 459 SUPPORTS_CREATE_TABLE_LIKE = False 460 MULTI_ARG_DISTINCT = False 461 CAN_IMPLEMENT_ARRAY_ANY = True 462 SUPPORTS_TO_NUMBER = False 463 COPY_HAS_INTO_KEYWORD = False 464 STAR_EXCEPT = "EXCLUDE" 465 PAD_FILL_PATTERN_IS_REQUIRED = True 466 ARRAY_CONCAT_IS_VAR_LEN = False 467 468 TRANSFORMS = { 469 **generator.Generator.TRANSFORMS, 470 exp.ApproxDistinct: approx_count_distinct_sql, 471 exp.Array: inline_array_unless_query, 472 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 473 exp.ArrayFilter: rename_func("LIST_FILTER"), 474 exp.ArraySize: rename_func("ARRAY_LENGTH"), 475 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 476 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 477 exp.ArraySort: _array_sort_sql, 478 exp.ArraySum: rename_func("LIST_SUM"), 479 exp.BitwiseXor: rename_func("XOR"), 480 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 481 exp.CurrentDate: lambda *_: "CURRENT_DATE", 482 exp.CurrentTime: lambda *_: "CURRENT_TIME", 483 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 484 exp.DayOfMonth: rename_func("DAYOFMONTH"), 485 exp.DayOfWeek: rename_func("DAYOFWEEK"), 486 exp.DayOfYear: rename_func("DAYOFYEAR"), 487 exp.DataType: _datatype_sql, 488 exp.Date: _date_sql, 489 exp.DateAdd: _date_delta_sql, 490 exp.DateFromParts: rename_func("MAKE_DATE"), 491 exp.DateSub: _date_delta_sql, 492 exp.DateDiff: _date_diff_sql, 493 exp.DateStrToDate: datestrtodate_sql, 494 exp.Datetime: no_datetime_sql, 495 exp.DatetimeSub: _date_delta_sql, 496 exp.DatetimeAdd: _date_delta_sql, 497 exp.DateToDi: lambda self, 498 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 499 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 500 exp.DiToDate: lambda self, 501 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 502 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 503 exp.GenerateDateArray: _generate_datetime_array_sql, 504 exp.GenerateTimestampArray: _generate_datetime_array_sql, 505 exp.Explode: rename_func("UNNEST"), 506 exp.IntDiv: lambda self, e: self.binary(e, "//"), 507 exp.IsInf: rename_func("ISINF"), 508 exp.IsNan: rename_func("ISNAN"), 509 exp.JSONExtract: _arrow_json_extract_sql, 510 exp.JSONExtractScalar: _arrow_json_extract_sql, 511 exp.JSONFormat: _json_format_sql, 512 exp.LogicalOr: rename_func("BOOL_OR"), 513 exp.LogicalAnd: rename_func("BOOL_AND"), 514 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 515 exp.MonthsBetween: lambda self, e: self.func( 516 "DATEDIFF", 517 "'month'", 518 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 519 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 520 ), 521 exp.PercentileCont: rename_func("QUANTILE_CONT"), 522 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 523 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 524 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 525 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 526 exp.RegexpExtract: regexp_extract_sql, 527 exp.RegexpReplace: lambda self, e: self.func( 528 "REGEXP_REPLACE", 529 e.this, 530 e.expression, 531 e.args.get("replacement"), 532 e.args.get("modifiers"), 533 ), 534 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 535 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 536 exp.Return: lambda self, e: self.sql(e, "this"), 537 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 538 exp.Rand: rename_func("RANDOM"), 539 exp.SafeDivide: no_safe_divide_sql, 540 exp.Split: rename_func("STR_SPLIT"), 541 exp.SortArray: _sort_array_sql, 542 exp.StrPosition: str_position_sql, 543 exp.StrToUnix: lambda self, e: self.func( 544 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 545 ), 546 exp.Struct: _struct_sql, 547 exp.TimeAdd: _date_delta_sql, 548 exp.Time: no_time_sql, 549 exp.TimeDiff: _timediff_sql, 550 exp.Timestamp: no_timestamp_sql, 551 exp.TimestampDiff: lambda self, e: self.func( 552 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 553 ), 554 exp.TimestampTrunc: timestamptrunc_sql(), 555 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 556 exp.TimeStrToTime: timestrtotime_sql, 557 exp.TimeStrToUnix: lambda self, e: self.func( 558 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 559 ), 560 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 561 exp.TimeToUnix: rename_func("EPOCH"), 562 exp.TsOrDiToDi: lambda self, 563 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 564 exp.TsOrDsAdd: _date_delta_sql, 565 exp.TsOrDsDiff: lambda self, e: self.func( 566 "DATE_DIFF", 567 f"'{e.args.get('unit') or 'DAY'}'", 568 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 569 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 570 ), 571 exp.UnixToStr: lambda self, e: self.func( 572 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 573 ), 574 exp.UnixToTime: _unix_to_time_sql, 575 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 576 exp.VariancePop: rename_func("VAR_POP"), 577 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 578 exp.Xor: bool_xor_sql, 579 } 580 581 SUPPORTED_JSON_PATH_PARTS = { 582 exp.JSONPathKey, 583 exp.JSONPathRoot, 584 exp.JSONPathSubscript, 585 exp.JSONPathWildcard, 586 } 587 588 TYPE_MAPPING = { 589 **generator.Generator.TYPE_MAPPING, 590 exp.DataType.Type.BINARY: "BLOB", 591 exp.DataType.Type.BPCHAR: "TEXT", 592 exp.DataType.Type.CHAR: "TEXT", 593 exp.DataType.Type.FLOAT: "REAL", 594 exp.DataType.Type.NCHAR: "TEXT", 595 exp.DataType.Type.NVARCHAR: "TEXT", 596 exp.DataType.Type.UINT: "UINTEGER", 597 exp.DataType.Type.VARBINARY: "BLOB", 598 exp.DataType.Type.ROWVERSION: "BLOB", 599 exp.DataType.Type.VARCHAR: "TEXT", 600 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 601 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 602 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 603 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 604 } 605 606 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 607 RESERVED_KEYWORDS = { 608 "array", 609 "analyse", 610 "union", 611 "all", 612 "when", 613 "in_p", 614 "default", 615 "create_p", 616 "window", 617 "asymmetric", 618 "to", 619 "else", 620 "localtime", 621 "from", 622 "end_p", 623 "select", 624 "current_date", 625 "foreign", 626 "with", 627 "grant", 628 "session_user", 629 "or", 630 "except", 631 "references", 632 "fetch", 633 "limit", 634 "group_p", 635 "leading", 636 "into", 637 "collate", 638 "offset", 639 "do", 640 "then", 641 "localtimestamp", 642 "check_p", 643 "lateral_p", 644 "current_role", 645 "where", 646 "asc_p", 647 "placing", 648 "desc_p", 649 "user", 650 "unique", 651 "initially", 652 "column", 653 "both", 654 "some", 655 "as", 656 "any", 657 "only", 658 "deferrable", 659 "null_p", 660 "current_time", 661 "true_p", 662 "table", 663 "case", 664 "trailing", 665 "variadic", 666 "for", 667 "on", 668 "distinct", 669 "false_p", 670 "not", 671 "constraint", 672 "current_timestamp", 673 "returning", 674 "primary", 675 "intersect", 676 "having", 677 "analyze", 678 "current_user", 679 "and", 680 "cast", 681 "symmetric", 682 "using", 683 "order", 684 "current_catalog", 685 } 686 687 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 688 689 # DuckDB doesn't generally support CREATE TABLE .. properties 690 # https://duckdb.org/docs/sql/statements/create_table.html 691 PROPERTIES_LOCATION = { 692 prop: exp.Properties.Location.UNSUPPORTED 693 for prop in generator.Generator.PROPERTIES_LOCATION 694 } 695 696 # There are a few exceptions (e.g. temporary tables) which are supported or 697 # can be transpiled to DuckDB, so we explicitly override them accordingly 698 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 699 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 700 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 701 702 def strtotime_sql(self, expression: exp.StrToTime) -> str: 703 if expression.args.get("safe"): 704 formatted_time = self.format_time(expression) 705 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 706 return str_to_time_sql(self, expression) 707 708 def strtodate_sql(self, expression: exp.StrToDate) -> str: 709 if expression.args.get("safe"): 710 formatted_time = self.format_time(expression) 711 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 712 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 713 714 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 715 arg = expression.this 716 if expression.args.get("safe"): 717 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 718 return self.func("JSON", arg) 719 720 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 721 nano = expression.args.get("nano") 722 if nano is not None: 723 expression.set( 724 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 725 ) 726 727 return rename_func("MAKE_TIME")(self, expression) 728 729 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 730 sec = expression.args["sec"] 731 732 milli = expression.args.get("milli") 733 if milli is not None: 734 sec += milli.pop() / exp.Literal.number(1000.0) 735 736 nano = expression.args.get("nano") 737 if nano is not None: 738 sec += nano.pop() / exp.Literal.number(1000000000.0) 739 740 if milli or nano: 741 expression.set("sec", sec) 742 743 return rename_func("MAKE_TIMESTAMP")(self, expression) 744 745 def tablesample_sql( 746 self, 747 expression: exp.TableSample, 748 sep: str = " AS ", 749 tablesample_keyword: t.Optional[str] = None, 750 ) -> str: 751 if not isinstance(expression.parent, exp.Select): 752 # This sample clause only applies to a single source, not the entire resulting relation 753 tablesample_keyword = "TABLESAMPLE" 754 755 if expression.args.get("size"): 756 method = expression.args.get("method") 757 if method and method.name.upper() != "RESERVOIR": 758 self.unsupported( 759 f"Sampling method {method} is not supported with a discrete sample count, " 760 "defaulting to reservoir sampling" 761 ) 762 expression.set("method", exp.var("RESERVOIR")) 763 764 return super().tablesample_sql( 765 expression, sep=sep, tablesample_keyword=tablesample_keyword 766 ) 767 768 def interval_sql(self, expression: exp.Interval) -> str: 769 multiplier: t.Optional[int] = None 770 unit = expression.text("unit").lower() 771 772 if unit.startswith("week"): 773 multiplier = 7 774 if unit.startswith("quarter"): 775 multiplier = 90 776 777 if multiplier: 778 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 779 780 return super().interval_sql(expression) 781 782 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 783 if isinstance(expression.parent, exp.UserDefinedFunction): 784 return self.sql(expression, "this") 785 return super().columndef_sql(expression, sep) 786 787 def join_sql(self, expression: exp.Join) -> str: 788 if ( 789 expression.side == "LEFT" 790 and not expression.args.get("on") 791 and isinstance(expression.this, exp.Unnest) 792 ): 793 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 794 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 795 return super().join_sql(expression.on(exp.true())) 796 797 return super().join_sql(expression) 798 799 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 800 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 801 if expression.args.get("is_end_exclusive"): 802 return rename_func("RANGE")(self, expression) 803 804 return self.function_fallback_sql(expression) 805 806 def bracket_sql(self, expression: exp.Bracket) -> str: 807 this = expression.this 808 if isinstance(this, exp.Array): 809 this.replace(exp.paren(this)) 810 811 bracket = super().bracket_sql(expression) 812 813 if not expression.args.get("returns_list_for_maps"): 814 if not this.type: 815 from sqlglot.optimizer.annotate_types import annotate_types 816 817 this = annotate_types(this) 818 819 if this.is_type(exp.DataType.Type.MAP): 820 bracket = f"({bracket})[1]" 821 822 return bracket 823 824 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 825 expression_sql = self.sql(expression, "expression") 826 827 func = expression.this 828 if isinstance(func, exp.PERCENTILES): 829 # Make the order key the first arg and slide the fraction to the right 830 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 831 order_col = expression.find(exp.Ordered) 832 if order_col: 833 func.set("expression", func.this) 834 func.set("this", order_col.this) 835 836 this = self.sql(expression, "this").rstrip(")") 837 838 return f"{this}{expression_sql})" 839 840 def length_sql(self, expression: exp.Length) -> str: 841 arg = expression.this 842 843 # Dialects like BQ and Snowflake also accept binary values as args, so 844 # DDB will attempt to infer the type or resort to case/when resolution 845 if not expression.args.get("binary") or arg.is_string: 846 return self.func("LENGTH", arg) 847 848 if not arg.type: 849 from sqlglot.optimizer.annotate_types import annotate_types 850 851 arg = annotate_types(arg) 852 853 if arg.is_type(*exp.DataType.TEXT_TYPES): 854 return self.func("LENGTH", arg) 855 856 # We need these casts to make duckdb's static type checker happy 857 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 858 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 859 860 case = ( 861 exp.case(self.func("TYPEOF", arg)) 862 .when( 863 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 864 ) # anonymous to break length_sql recursion 865 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 866 ) 867 868 return self.sql(case) 869 870 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 871 this = expression.this 872 key = expression.args.get("key") 873 key_sql = key.name if isinstance(key, exp.Expression) else "" 874 value_sql = self.sql(expression, "value") 875 876 kv_sql = f"{key_sql} := {value_sql}" 877 878 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 879 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 880 if isinstance(this, exp.Struct) and not this.expressions: 881 return self.func("STRUCT_PACK", kv_sql) 882 883 return self.func("STRUCT_INSERT", this, kv_sql)
248class DuckDB(Dialect): 249 NULL_ORDERING = "nulls_are_last" 250 SUPPORTS_USER_DEFINED_TYPES = False 251 SAFE_DIVISION = True 252 INDEX_OFFSET = 1 253 CONCAT_COALESCE = True 254 SUPPORTS_ORDER_BY_ALL = True 255 SUPPORTS_FIXED_SIZE_ARRAYS = True 256 257 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 258 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 259 260 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 261 if isinstance(path, exp.Literal): 262 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 263 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 264 # This check ensures we'll avoid trying to parse these as JSON paths, which can 265 # either result in a noisy warning or in an invalid representation of the path. 266 path_text = path.name 267 if path_text.startswith("/") or "[#" in path_text: 268 return path 269 270 return super().to_json_path(path) 271 272 class Tokenizer(tokens.Tokenizer): 273 HEREDOC_STRINGS = ["$"] 274 275 HEREDOC_TAG_IS_IDENTIFIER = True 276 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 277 278 KEYWORDS = { 279 **tokens.Tokenizer.KEYWORDS, 280 "//": TokenType.DIV, 281 "ATTACH": TokenType.COMMAND, 282 "BINARY": TokenType.VARBINARY, 283 "BITSTRING": TokenType.BIT, 284 "BPCHAR": TokenType.TEXT, 285 "CHAR": TokenType.TEXT, 286 "CHARACTER VARYING": TokenType.TEXT, 287 "EXCLUDE": TokenType.EXCEPT, 288 "LOGICAL": TokenType.BOOLEAN, 289 "ONLY": TokenType.ONLY, 290 "PIVOT_WIDER": TokenType.PIVOT, 291 "POSITIONAL": TokenType.POSITIONAL, 292 "SIGNED": TokenType.INT, 293 "STRING": TokenType.TEXT, 294 "SUMMARIZE": TokenType.SUMMARIZE, 295 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 296 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 297 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 298 "TIMESTAMP_US": TokenType.TIMESTAMP, 299 "UBIGINT": TokenType.UBIGINT, 300 "UINTEGER": TokenType.UINT, 301 "USMALLINT": TokenType.USMALLINT, 302 "UTINYINT": TokenType.UTINYINT, 303 "VARCHAR": TokenType.TEXT, 304 } 305 KEYWORDS.pop("/*+") 306 307 SINGLE_TOKENS = { 308 **tokens.Tokenizer.SINGLE_TOKENS, 309 "$": TokenType.PARAMETER, 310 } 311 312 class Parser(parser.Parser): 313 BITWISE = { 314 **parser.Parser.BITWISE, 315 TokenType.TILDA: exp.RegexpLike, 316 } 317 318 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 319 320 FUNCTIONS = { 321 **parser.Parser.FUNCTIONS, 322 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 323 "ARRAY_SORT": exp.SortArray.from_arg_list, 324 "DATEDIFF": _build_date_diff, 325 "DATE_DIFF": _build_date_diff, 326 "DATE_TRUNC": date_trunc_to_time, 327 "DATETRUNC": date_trunc_to_time, 328 "DECODE": lambda args: exp.Decode( 329 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 330 ), 331 "ENCODE": lambda args: exp.Encode( 332 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 333 ), 334 "EPOCH": exp.TimeToUnix.from_arg_list, 335 "EPOCH_MS": lambda args: exp.UnixToTime( 336 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 337 ), 338 "JSON": exp.ParseJSON.from_arg_list, 339 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 340 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 341 "LIST_HAS": exp.ArrayContains.from_arg_list, 342 "LIST_REVERSE_SORT": _build_sort_array_desc, 343 "LIST_SORT": exp.SortArray.from_arg_list, 344 "LIST_VALUE": lambda args: exp.Array(expressions=args), 345 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 346 "MAKE_TIMESTAMP": _build_make_timestamp, 347 "MEDIAN": lambda args: exp.PercentileCont( 348 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 349 ), 350 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 351 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 352 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 353 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 354 ), 355 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 356 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1), 359 replacement=seq_get(args, 2), 360 modifiers=seq_get(args, 3), 361 ), 362 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 363 "STRING_SPLIT": exp.Split.from_arg_list, 364 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 365 "STRING_TO_ARRAY": exp.Split.from_arg_list, 366 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 367 "STRUCT_PACK": exp.Struct.from_arg_list, 368 "STR_SPLIT": exp.Split.from_arg_list, 369 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 370 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 371 "UNNEST": exp.Explode.from_arg_list, 372 "XOR": binary_from_function(exp.BitwiseXor), 373 "GENERATE_SERIES": _build_generate_series(), 374 "RANGE": _build_generate_series(end_exclusive=True), 375 } 376 377 FUNCTIONS.pop("DATE_SUB") 378 379 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 380 FUNCTION_PARSERS.pop("DECODE") 381 382 NO_PAREN_FUNCTION_PARSERS = { 383 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 384 "MAP": lambda self: self._parse_map(), 385 } 386 387 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 388 TokenType.SEMI, 389 TokenType.ANTI, 390 } 391 392 PLACEHOLDER_PARSERS = { 393 **parser.Parser.PLACEHOLDER_PARSERS, 394 TokenType.PARAMETER: lambda self: ( 395 self.expression(exp.Placeholder, this=self._prev.text) 396 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 397 else None 398 ), 399 } 400 401 TYPE_CONVERTERS = { 402 # https://duckdb.org/docs/sql/data_types/numeric 403 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 404 # https://duckdb.org/docs/sql/data_types/text 405 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 406 } 407 408 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 409 # https://duckdb.org/docs/sql/samples.html 410 sample = super()._parse_table_sample(as_modifier=as_modifier) 411 if sample and not sample.args.get("method"): 412 if sample.args.get("size"): 413 sample.set("method", exp.var("RESERVOIR")) 414 else: 415 sample.set("method", exp.var("SYSTEM")) 416 417 return sample 418 419 def _parse_bracket( 420 self, this: t.Optional[exp.Expression] = None 421 ) -> t.Optional[exp.Expression]: 422 bracket = super()._parse_bracket(this) 423 if isinstance(bracket, exp.Bracket): 424 bracket.set("returns_list_for_maps", True) 425 426 return bracket 427 428 def _parse_map(self) -> exp.ToMap | exp.Map: 429 if self._match(TokenType.L_BRACE, advance=False): 430 return self.expression(exp.ToMap, this=self._parse_bracket()) 431 432 args = self._parse_wrapped_csv(self._parse_assignment) 433 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 434 435 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 436 return self._parse_field_def() 437 438 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 439 if len(aggregations) == 1: 440 return super()._pivot_column_names(aggregations) 441 return pivot_column_names(aggregations, dialect="duckdb") 442 443 class Generator(generator.Generator): 444 PARAMETER_TOKEN = "$" 445 NAMED_PLACEHOLDER_TOKEN = "$" 446 JOIN_HINTS = False 447 TABLE_HINTS = False 448 QUERY_HINTS = False 449 LIMIT_FETCH = "LIMIT" 450 STRUCT_DELIMITER = ("(", ")") 451 RENAME_TABLE_WITH_DB = False 452 NVL2_SUPPORTED = False 453 SEMI_ANTI_JOIN_WITH_SIDE = False 454 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 455 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 456 LAST_DAY_SUPPORTS_DATE_PART = False 457 JSON_KEY_VALUE_PAIR_SEP = "," 458 IGNORE_NULLS_IN_FUNC = True 459 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 460 SUPPORTS_CREATE_TABLE_LIKE = False 461 MULTI_ARG_DISTINCT = False 462 CAN_IMPLEMENT_ARRAY_ANY = True 463 SUPPORTS_TO_NUMBER = False 464 COPY_HAS_INTO_KEYWORD = False 465 STAR_EXCEPT = "EXCLUDE" 466 PAD_FILL_PATTERN_IS_REQUIRED = True 467 ARRAY_CONCAT_IS_VAR_LEN = False 468 469 TRANSFORMS = { 470 **generator.Generator.TRANSFORMS, 471 exp.ApproxDistinct: approx_count_distinct_sql, 472 exp.Array: inline_array_unless_query, 473 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 474 exp.ArrayFilter: rename_func("LIST_FILTER"), 475 exp.ArraySize: rename_func("ARRAY_LENGTH"), 476 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 477 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 478 exp.ArraySort: _array_sort_sql, 479 exp.ArraySum: rename_func("LIST_SUM"), 480 exp.BitwiseXor: rename_func("XOR"), 481 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 482 exp.CurrentDate: lambda *_: "CURRENT_DATE", 483 exp.CurrentTime: lambda *_: "CURRENT_TIME", 484 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 485 exp.DayOfMonth: rename_func("DAYOFMONTH"), 486 exp.DayOfWeek: rename_func("DAYOFWEEK"), 487 exp.DayOfYear: rename_func("DAYOFYEAR"), 488 exp.DataType: _datatype_sql, 489 exp.Date: _date_sql, 490 exp.DateAdd: _date_delta_sql, 491 exp.DateFromParts: rename_func("MAKE_DATE"), 492 exp.DateSub: _date_delta_sql, 493 exp.DateDiff: _date_diff_sql, 494 exp.DateStrToDate: datestrtodate_sql, 495 exp.Datetime: no_datetime_sql, 496 exp.DatetimeSub: _date_delta_sql, 497 exp.DatetimeAdd: _date_delta_sql, 498 exp.DateToDi: lambda self, 499 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 500 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 501 exp.DiToDate: lambda self, 502 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 503 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 504 exp.GenerateDateArray: _generate_datetime_array_sql, 505 exp.GenerateTimestampArray: _generate_datetime_array_sql, 506 exp.Explode: rename_func("UNNEST"), 507 exp.IntDiv: lambda self, e: self.binary(e, "//"), 508 exp.IsInf: rename_func("ISINF"), 509 exp.IsNan: rename_func("ISNAN"), 510 exp.JSONExtract: _arrow_json_extract_sql, 511 exp.JSONExtractScalar: _arrow_json_extract_sql, 512 exp.JSONFormat: _json_format_sql, 513 exp.LogicalOr: rename_func("BOOL_OR"), 514 exp.LogicalAnd: rename_func("BOOL_AND"), 515 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 516 exp.MonthsBetween: lambda self, e: self.func( 517 "DATEDIFF", 518 "'month'", 519 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 520 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 521 ), 522 exp.PercentileCont: rename_func("QUANTILE_CONT"), 523 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 524 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 525 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 526 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 527 exp.RegexpExtract: regexp_extract_sql, 528 exp.RegexpReplace: lambda self, e: self.func( 529 "REGEXP_REPLACE", 530 e.this, 531 e.expression, 532 e.args.get("replacement"), 533 e.args.get("modifiers"), 534 ), 535 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 536 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 537 exp.Return: lambda self, e: self.sql(e, "this"), 538 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 539 exp.Rand: rename_func("RANDOM"), 540 exp.SafeDivide: no_safe_divide_sql, 541 exp.Split: rename_func("STR_SPLIT"), 542 exp.SortArray: _sort_array_sql, 543 exp.StrPosition: str_position_sql, 544 exp.StrToUnix: lambda self, e: self.func( 545 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 546 ), 547 exp.Struct: _struct_sql, 548 exp.TimeAdd: _date_delta_sql, 549 exp.Time: no_time_sql, 550 exp.TimeDiff: _timediff_sql, 551 exp.Timestamp: no_timestamp_sql, 552 exp.TimestampDiff: lambda self, e: self.func( 553 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 554 ), 555 exp.TimestampTrunc: timestamptrunc_sql(), 556 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 557 exp.TimeStrToTime: timestrtotime_sql, 558 exp.TimeStrToUnix: lambda self, e: self.func( 559 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 560 ), 561 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 562 exp.TimeToUnix: rename_func("EPOCH"), 563 exp.TsOrDiToDi: lambda self, 564 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 565 exp.TsOrDsAdd: _date_delta_sql, 566 exp.TsOrDsDiff: lambda self, e: self.func( 567 "DATE_DIFF", 568 f"'{e.args.get('unit') or 'DAY'}'", 569 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 570 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 571 ), 572 exp.UnixToStr: lambda self, e: self.func( 573 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 574 ), 575 exp.UnixToTime: _unix_to_time_sql, 576 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 577 exp.VariancePop: rename_func("VAR_POP"), 578 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 579 exp.Xor: bool_xor_sql, 580 } 581 582 SUPPORTED_JSON_PATH_PARTS = { 583 exp.JSONPathKey, 584 exp.JSONPathRoot, 585 exp.JSONPathSubscript, 586 exp.JSONPathWildcard, 587 } 588 589 TYPE_MAPPING = { 590 **generator.Generator.TYPE_MAPPING, 591 exp.DataType.Type.BINARY: "BLOB", 592 exp.DataType.Type.BPCHAR: "TEXT", 593 exp.DataType.Type.CHAR: "TEXT", 594 exp.DataType.Type.FLOAT: "REAL", 595 exp.DataType.Type.NCHAR: "TEXT", 596 exp.DataType.Type.NVARCHAR: "TEXT", 597 exp.DataType.Type.UINT: "UINTEGER", 598 exp.DataType.Type.VARBINARY: "BLOB", 599 exp.DataType.Type.ROWVERSION: "BLOB", 600 exp.DataType.Type.VARCHAR: "TEXT", 601 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 602 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 603 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 604 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 605 } 606 607 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 608 RESERVED_KEYWORDS = { 609 "array", 610 "analyse", 611 "union", 612 "all", 613 "when", 614 "in_p", 615 "default", 616 "create_p", 617 "window", 618 "asymmetric", 619 "to", 620 "else", 621 "localtime", 622 "from", 623 "end_p", 624 "select", 625 "current_date", 626 "foreign", 627 "with", 628 "grant", 629 "session_user", 630 "or", 631 "except", 632 "references", 633 "fetch", 634 "limit", 635 "group_p", 636 "leading", 637 "into", 638 "collate", 639 "offset", 640 "do", 641 "then", 642 "localtimestamp", 643 "check_p", 644 "lateral_p", 645 "current_role", 646 "where", 647 "asc_p", 648 "placing", 649 "desc_p", 650 "user", 651 "unique", 652 "initially", 653 "column", 654 "both", 655 "some", 656 "as", 657 "any", 658 "only", 659 "deferrable", 660 "null_p", 661 "current_time", 662 "true_p", 663 "table", 664 "case", 665 "trailing", 666 "variadic", 667 "for", 668 "on", 669 "distinct", 670 "false_p", 671 "not", 672 "constraint", 673 "current_timestamp", 674 "returning", 675 "primary", 676 "intersect", 677 "having", 678 "analyze", 679 "current_user", 680 "and", 681 "cast", 682 "symmetric", 683 "using", 684 "order", 685 "current_catalog", 686 } 687 688 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 689 690 # DuckDB doesn't generally support CREATE TABLE .. properties 691 # https://duckdb.org/docs/sql/statements/create_table.html 692 PROPERTIES_LOCATION = { 693 prop: exp.Properties.Location.UNSUPPORTED 694 for prop in generator.Generator.PROPERTIES_LOCATION 695 } 696 697 # There are a few exceptions (e.g. temporary tables) which are supported or 698 # can be transpiled to DuckDB, so we explicitly override them accordingly 699 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 700 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 701 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 702 703 def strtotime_sql(self, expression: exp.StrToTime) -> str: 704 if expression.args.get("safe"): 705 formatted_time = self.format_time(expression) 706 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 707 return str_to_time_sql(self, expression) 708 709 def strtodate_sql(self, expression: exp.StrToDate) -> str: 710 if expression.args.get("safe"): 711 formatted_time = self.format_time(expression) 712 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 713 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 714 715 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 716 arg = expression.this 717 if expression.args.get("safe"): 718 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 719 return self.func("JSON", arg) 720 721 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 722 nano = expression.args.get("nano") 723 if nano is not None: 724 expression.set( 725 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 726 ) 727 728 return rename_func("MAKE_TIME")(self, expression) 729 730 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 731 sec = expression.args["sec"] 732 733 milli = expression.args.get("milli") 734 if milli is not None: 735 sec += milli.pop() / exp.Literal.number(1000.0) 736 737 nano = expression.args.get("nano") 738 if nano is not None: 739 sec += nano.pop() / exp.Literal.number(1000000000.0) 740 741 if milli or nano: 742 expression.set("sec", sec) 743 744 return rename_func("MAKE_TIMESTAMP")(self, expression) 745 746 def tablesample_sql( 747 self, 748 expression: exp.TableSample, 749 sep: str = " AS ", 750 tablesample_keyword: t.Optional[str] = None, 751 ) -> str: 752 if not isinstance(expression.parent, exp.Select): 753 # This sample clause only applies to a single source, not the entire resulting relation 754 tablesample_keyword = "TABLESAMPLE" 755 756 if expression.args.get("size"): 757 method = expression.args.get("method") 758 if method and method.name.upper() != "RESERVOIR": 759 self.unsupported( 760 f"Sampling method {method} is not supported with a discrete sample count, " 761 "defaulting to reservoir sampling" 762 ) 763 expression.set("method", exp.var("RESERVOIR")) 764 765 return super().tablesample_sql( 766 expression, sep=sep, tablesample_keyword=tablesample_keyword 767 ) 768 769 def interval_sql(self, expression: exp.Interval) -> str: 770 multiplier: t.Optional[int] = None 771 unit = expression.text("unit").lower() 772 773 if unit.startswith("week"): 774 multiplier = 7 775 if unit.startswith("quarter"): 776 multiplier = 90 777 778 if multiplier: 779 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 780 781 return super().interval_sql(expression) 782 783 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 784 if isinstance(expression.parent, exp.UserDefinedFunction): 785 return self.sql(expression, "this") 786 return super().columndef_sql(expression, sep) 787 788 def join_sql(self, expression: exp.Join) -> str: 789 if ( 790 expression.side == "LEFT" 791 and not expression.args.get("on") 792 and isinstance(expression.this, exp.Unnest) 793 ): 794 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 795 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 796 return super().join_sql(expression.on(exp.true())) 797 798 return super().join_sql(expression) 799 800 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 801 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 802 if expression.args.get("is_end_exclusive"): 803 return rename_func("RANGE")(self, expression) 804 805 return self.function_fallback_sql(expression) 806 807 def bracket_sql(self, expression: exp.Bracket) -> str: 808 this = expression.this 809 if isinstance(this, exp.Array): 810 this.replace(exp.paren(this)) 811 812 bracket = super().bracket_sql(expression) 813 814 if not expression.args.get("returns_list_for_maps"): 815 if not this.type: 816 from sqlglot.optimizer.annotate_types import annotate_types 817 818 this = annotate_types(this) 819 820 if this.is_type(exp.DataType.Type.MAP): 821 bracket = f"({bracket})[1]" 822 823 return bracket 824 825 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 826 expression_sql = self.sql(expression, "expression") 827 828 func = expression.this 829 if isinstance(func, exp.PERCENTILES): 830 # Make the order key the first arg and slide the fraction to the right 831 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 832 order_col = expression.find(exp.Ordered) 833 if order_col: 834 func.set("expression", func.this) 835 func.set("this", order_col.this) 836 837 this = self.sql(expression, "this").rstrip(")") 838 839 return f"{this}{expression_sql})" 840 841 def length_sql(self, expression: exp.Length) -> str: 842 arg = expression.this 843 844 # Dialects like BQ and Snowflake also accept binary values as args, so 845 # DDB will attempt to infer the type or resort to case/when resolution 846 if not expression.args.get("binary") or arg.is_string: 847 return self.func("LENGTH", arg) 848 849 if not arg.type: 850 from sqlglot.optimizer.annotate_types import annotate_types 851 852 arg = annotate_types(arg) 853 854 if arg.is_type(*exp.DataType.TEXT_TYPES): 855 return self.func("LENGTH", arg) 856 857 # We need these casts to make duckdb's static type checker happy 858 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 859 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 860 861 case = ( 862 exp.case(self.func("TYPEOF", arg)) 863 .when( 864 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 865 ) # anonymous to break length_sql recursion 866 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 867 ) 868 869 return self.sql(case) 870 871 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 872 this = expression.this 873 key = expression.args.get("key") 874 key_sql = key.name if isinstance(key, exp.Expression) else "" 875 value_sql = self.sql(expression, "value") 876 877 kv_sql = f"{key_sql} := {value_sql}" 878 879 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 880 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 881 if isinstance(this, exp.Struct) and not this.expressions: 882 return self.func("STRUCT_PACK", kv_sql) 883 884 return self.func("STRUCT_INSERT", this, kv_sql)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator
Specifies the strategy according to which identifiers should be normalized.
260 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 261 if isinstance(path, exp.Literal): 262 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 263 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 264 # This check ensures we'll avoid trying to parse these as JSON paths, which can 265 # either result in a noisy warning or in an invalid representation of the path. 266 path_text = path.name 267 if path_text.startswith("/") or "[#" in path_text: 268 return path 269 270 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
272 class Tokenizer(tokens.Tokenizer): 273 HEREDOC_STRINGS = ["$"] 274 275 HEREDOC_TAG_IS_IDENTIFIER = True 276 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 277 278 KEYWORDS = { 279 **tokens.Tokenizer.KEYWORDS, 280 "//": TokenType.DIV, 281 "ATTACH": TokenType.COMMAND, 282 "BINARY": TokenType.VARBINARY, 283 "BITSTRING": TokenType.BIT, 284 "BPCHAR": TokenType.TEXT, 285 "CHAR": TokenType.TEXT, 286 "CHARACTER VARYING": TokenType.TEXT, 287 "EXCLUDE": TokenType.EXCEPT, 288 "LOGICAL": TokenType.BOOLEAN, 289 "ONLY": TokenType.ONLY, 290 "PIVOT_WIDER": TokenType.PIVOT, 291 "POSITIONAL": TokenType.POSITIONAL, 292 "SIGNED": TokenType.INT, 293 "STRING": TokenType.TEXT, 294 "SUMMARIZE": TokenType.SUMMARIZE, 295 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 296 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 297 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 298 "TIMESTAMP_US": TokenType.TIMESTAMP, 299 "UBIGINT": TokenType.UBIGINT, 300 "UINTEGER": TokenType.UINT, 301 "USMALLINT": TokenType.USMALLINT, 302 "UTINYINT": TokenType.UTINYINT, 303 "VARCHAR": TokenType.TEXT, 304 } 305 KEYWORDS.pop("/*+") 306 307 SINGLE_TOKENS = { 308 **tokens.Tokenizer.SINGLE_TOKENS, 309 "$": TokenType.PARAMETER, 310 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
312 class Parser(parser.Parser): 313 BITWISE = { 314 **parser.Parser.BITWISE, 315 TokenType.TILDA: exp.RegexpLike, 316 } 317 318 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 319 320 FUNCTIONS = { 321 **parser.Parser.FUNCTIONS, 322 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 323 "ARRAY_SORT": exp.SortArray.from_arg_list, 324 "DATEDIFF": _build_date_diff, 325 "DATE_DIFF": _build_date_diff, 326 "DATE_TRUNC": date_trunc_to_time, 327 "DATETRUNC": date_trunc_to_time, 328 "DECODE": lambda args: exp.Decode( 329 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 330 ), 331 "ENCODE": lambda args: exp.Encode( 332 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 333 ), 334 "EPOCH": exp.TimeToUnix.from_arg_list, 335 "EPOCH_MS": lambda args: exp.UnixToTime( 336 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 337 ), 338 "JSON": exp.ParseJSON.from_arg_list, 339 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 340 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 341 "LIST_HAS": exp.ArrayContains.from_arg_list, 342 "LIST_REVERSE_SORT": _build_sort_array_desc, 343 "LIST_SORT": exp.SortArray.from_arg_list, 344 "LIST_VALUE": lambda args: exp.Array(expressions=args), 345 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 346 "MAKE_TIMESTAMP": _build_make_timestamp, 347 "MEDIAN": lambda args: exp.PercentileCont( 348 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 349 ), 350 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 351 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 352 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 353 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 354 ), 355 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 356 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1), 359 replacement=seq_get(args, 2), 360 modifiers=seq_get(args, 3), 361 ), 362 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 363 "STRING_SPLIT": exp.Split.from_arg_list, 364 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 365 "STRING_TO_ARRAY": exp.Split.from_arg_list, 366 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 367 "STRUCT_PACK": exp.Struct.from_arg_list, 368 "STR_SPLIT": exp.Split.from_arg_list, 369 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 370 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 371 "UNNEST": exp.Explode.from_arg_list, 372 "XOR": binary_from_function(exp.BitwiseXor), 373 "GENERATE_SERIES": _build_generate_series(), 374 "RANGE": _build_generate_series(end_exclusive=True), 375 } 376 377 FUNCTIONS.pop("DATE_SUB") 378 379 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 380 FUNCTION_PARSERS.pop("DECODE") 381 382 NO_PAREN_FUNCTION_PARSERS = { 383 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 384 "MAP": lambda self: self._parse_map(), 385 } 386 387 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 388 TokenType.SEMI, 389 TokenType.ANTI, 390 } 391 392 PLACEHOLDER_PARSERS = { 393 **parser.Parser.PLACEHOLDER_PARSERS, 394 TokenType.PARAMETER: lambda self: ( 395 self.expression(exp.Placeholder, this=self._prev.text) 396 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 397 else None 398 ), 399 } 400 401 TYPE_CONVERTERS = { 402 # https://duckdb.org/docs/sql/data_types/numeric 403 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 404 # https://duckdb.org/docs/sql/data_types/text 405 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 406 } 407 408 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 409 # https://duckdb.org/docs/sql/samples.html 410 sample = super()._parse_table_sample(as_modifier=as_modifier) 411 if sample and not sample.args.get("method"): 412 if sample.args.get("size"): 413 sample.set("method", exp.var("RESERVOIR")) 414 else: 415 sample.set("method", exp.var("SYSTEM")) 416 417 return sample 418 419 def _parse_bracket( 420 self, this: t.Optional[exp.Expression] = None 421 ) -> t.Optional[exp.Expression]: 422 bracket = super()._parse_bracket(this) 423 if isinstance(bracket, exp.Bracket): 424 bracket.set("returns_list_for_maps", True) 425 426 return bracket 427 428 def _parse_map(self) -> exp.ToMap | exp.Map: 429 if self._match(TokenType.L_BRACE, advance=False): 430 return self.expression(exp.ToMap, this=self._parse_bracket()) 431 432 args = self._parse_wrapped_csv(self._parse_assignment) 433 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 434 435 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 436 return self._parse_field_def() 437 438 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 439 if len(aggregations) == 1: 440 return super()._pivot_column_names(aggregations) 441 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
443 class Generator(generator.Generator): 444 PARAMETER_TOKEN = "$" 445 NAMED_PLACEHOLDER_TOKEN = "$" 446 JOIN_HINTS = False 447 TABLE_HINTS = False 448 QUERY_HINTS = False 449 LIMIT_FETCH = "LIMIT" 450 STRUCT_DELIMITER = ("(", ")") 451 RENAME_TABLE_WITH_DB = False 452 NVL2_SUPPORTED = False 453 SEMI_ANTI_JOIN_WITH_SIDE = False 454 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 455 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 456 LAST_DAY_SUPPORTS_DATE_PART = False 457 JSON_KEY_VALUE_PAIR_SEP = "," 458 IGNORE_NULLS_IN_FUNC = True 459 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 460 SUPPORTS_CREATE_TABLE_LIKE = False 461 MULTI_ARG_DISTINCT = False 462 CAN_IMPLEMENT_ARRAY_ANY = True 463 SUPPORTS_TO_NUMBER = False 464 COPY_HAS_INTO_KEYWORD = False 465 STAR_EXCEPT = "EXCLUDE" 466 PAD_FILL_PATTERN_IS_REQUIRED = True 467 ARRAY_CONCAT_IS_VAR_LEN = False 468 469 TRANSFORMS = { 470 **generator.Generator.TRANSFORMS, 471 exp.ApproxDistinct: approx_count_distinct_sql, 472 exp.Array: inline_array_unless_query, 473 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 474 exp.ArrayFilter: rename_func("LIST_FILTER"), 475 exp.ArraySize: rename_func("ARRAY_LENGTH"), 476 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 477 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 478 exp.ArraySort: _array_sort_sql, 479 exp.ArraySum: rename_func("LIST_SUM"), 480 exp.BitwiseXor: rename_func("XOR"), 481 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 482 exp.CurrentDate: lambda *_: "CURRENT_DATE", 483 exp.CurrentTime: lambda *_: "CURRENT_TIME", 484 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 485 exp.DayOfMonth: rename_func("DAYOFMONTH"), 486 exp.DayOfWeek: rename_func("DAYOFWEEK"), 487 exp.DayOfYear: rename_func("DAYOFYEAR"), 488 exp.DataType: _datatype_sql, 489 exp.Date: _date_sql, 490 exp.DateAdd: _date_delta_sql, 491 exp.DateFromParts: rename_func("MAKE_DATE"), 492 exp.DateSub: _date_delta_sql, 493 exp.DateDiff: _date_diff_sql, 494 exp.DateStrToDate: datestrtodate_sql, 495 exp.Datetime: no_datetime_sql, 496 exp.DatetimeSub: _date_delta_sql, 497 exp.DatetimeAdd: _date_delta_sql, 498 exp.DateToDi: lambda self, 499 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 500 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 501 exp.DiToDate: lambda self, 502 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 503 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 504 exp.GenerateDateArray: _generate_datetime_array_sql, 505 exp.GenerateTimestampArray: _generate_datetime_array_sql, 506 exp.Explode: rename_func("UNNEST"), 507 exp.IntDiv: lambda self, e: self.binary(e, "//"), 508 exp.IsInf: rename_func("ISINF"), 509 exp.IsNan: rename_func("ISNAN"), 510 exp.JSONExtract: _arrow_json_extract_sql, 511 exp.JSONExtractScalar: _arrow_json_extract_sql, 512 exp.JSONFormat: _json_format_sql, 513 exp.LogicalOr: rename_func("BOOL_OR"), 514 exp.LogicalAnd: rename_func("BOOL_AND"), 515 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 516 exp.MonthsBetween: lambda self, e: self.func( 517 "DATEDIFF", 518 "'month'", 519 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 520 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 521 ), 522 exp.PercentileCont: rename_func("QUANTILE_CONT"), 523 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 524 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 525 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 526 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 527 exp.RegexpExtract: regexp_extract_sql, 528 exp.RegexpReplace: lambda self, e: self.func( 529 "REGEXP_REPLACE", 530 e.this, 531 e.expression, 532 e.args.get("replacement"), 533 e.args.get("modifiers"), 534 ), 535 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 536 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 537 exp.Return: lambda self, e: self.sql(e, "this"), 538 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 539 exp.Rand: rename_func("RANDOM"), 540 exp.SafeDivide: no_safe_divide_sql, 541 exp.Split: rename_func("STR_SPLIT"), 542 exp.SortArray: _sort_array_sql, 543 exp.StrPosition: str_position_sql, 544 exp.StrToUnix: lambda self, e: self.func( 545 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 546 ), 547 exp.Struct: _struct_sql, 548 exp.TimeAdd: _date_delta_sql, 549 exp.Time: no_time_sql, 550 exp.TimeDiff: _timediff_sql, 551 exp.Timestamp: no_timestamp_sql, 552 exp.TimestampDiff: lambda self, e: self.func( 553 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 554 ), 555 exp.TimestampTrunc: timestamptrunc_sql(), 556 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 557 exp.TimeStrToTime: timestrtotime_sql, 558 exp.TimeStrToUnix: lambda self, e: self.func( 559 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 560 ), 561 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 562 exp.TimeToUnix: rename_func("EPOCH"), 563 exp.TsOrDiToDi: lambda self, 564 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 565 exp.TsOrDsAdd: _date_delta_sql, 566 exp.TsOrDsDiff: lambda self, e: self.func( 567 "DATE_DIFF", 568 f"'{e.args.get('unit') or 'DAY'}'", 569 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 570 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 571 ), 572 exp.UnixToStr: lambda self, e: self.func( 573 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 574 ), 575 exp.UnixToTime: _unix_to_time_sql, 576 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 577 exp.VariancePop: rename_func("VAR_POP"), 578 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 579 exp.Xor: bool_xor_sql, 580 } 581 582 SUPPORTED_JSON_PATH_PARTS = { 583 exp.JSONPathKey, 584 exp.JSONPathRoot, 585 exp.JSONPathSubscript, 586 exp.JSONPathWildcard, 587 } 588 589 TYPE_MAPPING = { 590 **generator.Generator.TYPE_MAPPING, 591 exp.DataType.Type.BINARY: "BLOB", 592 exp.DataType.Type.BPCHAR: "TEXT", 593 exp.DataType.Type.CHAR: "TEXT", 594 exp.DataType.Type.FLOAT: "REAL", 595 exp.DataType.Type.NCHAR: "TEXT", 596 exp.DataType.Type.NVARCHAR: "TEXT", 597 exp.DataType.Type.UINT: "UINTEGER", 598 exp.DataType.Type.VARBINARY: "BLOB", 599 exp.DataType.Type.ROWVERSION: "BLOB", 600 exp.DataType.Type.VARCHAR: "TEXT", 601 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 602 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 603 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 604 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 605 } 606 607 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 608 RESERVED_KEYWORDS = { 609 "array", 610 "analyse", 611 "union", 612 "all", 613 "when", 614 "in_p", 615 "default", 616 "create_p", 617 "window", 618 "asymmetric", 619 "to", 620 "else", 621 "localtime", 622 "from", 623 "end_p", 624 "select", 625 "current_date", 626 "foreign", 627 "with", 628 "grant", 629 "session_user", 630 "or", 631 "except", 632 "references", 633 "fetch", 634 "limit", 635 "group_p", 636 "leading", 637 "into", 638 "collate", 639 "offset", 640 "do", 641 "then", 642 "localtimestamp", 643 "check_p", 644 "lateral_p", 645 "current_role", 646 "where", 647 "asc_p", 648 "placing", 649 "desc_p", 650 "user", 651 "unique", 652 "initially", 653 "column", 654 "both", 655 "some", 656 "as", 657 "any", 658 "only", 659 "deferrable", 660 "null_p", 661 "current_time", 662 "true_p", 663 "table", 664 "case", 665 "trailing", 666 "variadic", 667 "for", 668 "on", 669 "distinct", 670 "false_p", 671 "not", 672 "constraint", 673 "current_timestamp", 674 "returning", 675 "primary", 676 "intersect", 677 "having", 678 "analyze", 679 "current_user", 680 "and", 681 "cast", 682 "symmetric", 683 "using", 684 "order", 685 "current_catalog", 686 } 687 688 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 689 690 # DuckDB doesn't generally support CREATE TABLE .. properties 691 # https://duckdb.org/docs/sql/statements/create_table.html 692 PROPERTIES_LOCATION = { 693 prop: exp.Properties.Location.UNSUPPORTED 694 for prop in generator.Generator.PROPERTIES_LOCATION 695 } 696 697 # There are a few exceptions (e.g. temporary tables) which are supported or 698 # can be transpiled to DuckDB, so we explicitly override them accordingly 699 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 700 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 701 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 702 703 def strtotime_sql(self, expression: exp.StrToTime) -> str: 704 if expression.args.get("safe"): 705 formatted_time = self.format_time(expression) 706 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 707 return str_to_time_sql(self, expression) 708 709 def strtodate_sql(self, expression: exp.StrToDate) -> str: 710 if expression.args.get("safe"): 711 formatted_time = self.format_time(expression) 712 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 713 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 714 715 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 716 arg = expression.this 717 if expression.args.get("safe"): 718 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 719 return self.func("JSON", arg) 720 721 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 722 nano = expression.args.get("nano") 723 if nano is not None: 724 expression.set( 725 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 726 ) 727 728 return rename_func("MAKE_TIME")(self, expression) 729 730 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 731 sec = expression.args["sec"] 732 733 milli = expression.args.get("milli") 734 if milli is not None: 735 sec += milli.pop() / exp.Literal.number(1000.0) 736 737 nano = expression.args.get("nano") 738 if nano is not None: 739 sec += nano.pop() / exp.Literal.number(1000000000.0) 740 741 if milli or nano: 742 expression.set("sec", sec) 743 744 return rename_func("MAKE_TIMESTAMP")(self, expression) 745 746 def tablesample_sql( 747 self, 748 expression: exp.TableSample, 749 sep: str = " AS ", 750 tablesample_keyword: t.Optional[str] = None, 751 ) -> str: 752 if not isinstance(expression.parent, exp.Select): 753 # This sample clause only applies to a single source, not the entire resulting relation 754 tablesample_keyword = "TABLESAMPLE" 755 756 if expression.args.get("size"): 757 method = expression.args.get("method") 758 if method and method.name.upper() != "RESERVOIR": 759 self.unsupported( 760 f"Sampling method {method} is not supported with a discrete sample count, " 761 "defaulting to reservoir sampling" 762 ) 763 expression.set("method", exp.var("RESERVOIR")) 764 765 return super().tablesample_sql( 766 expression, sep=sep, tablesample_keyword=tablesample_keyword 767 ) 768 769 def interval_sql(self, expression: exp.Interval) -> str: 770 multiplier: t.Optional[int] = None 771 unit = expression.text("unit").lower() 772 773 if unit.startswith("week"): 774 multiplier = 7 775 if unit.startswith("quarter"): 776 multiplier = 90 777 778 if multiplier: 779 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 780 781 return super().interval_sql(expression) 782 783 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 784 if isinstance(expression.parent, exp.UserDefinedFunction): 785 return self.sql(expression, "this") 786 return super().columndef_sql(expression, sep) 787 788 def join_sql(self, expression: exp.Join) -> str: 789 if ( 790 expression.side == "LEFT" 791 and not expression.args.get("on") 792 and isinstance(expression.this, exp.Unnest) 793 ): 794 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 795 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 796 return super().join_sql(expression.on(exp.true())) 797 798 return super().join_sql(expression) 799 800 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 801 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 802 if expression.args.get("is_end_exclusive"): 803 return rename_func("RANGE")(self, expression) 804 805 return self.function_fallback_sql(expression) 806 807 def bracket_sql(self, expression: exp.Bracket) -> str: 808 this = expression.this 809 if isinstance(this, exp.Array): 810 this.replace(exp.paren(this)) 811 812 bracket = super().bracket_sql(expression) 813 814 if not expression.args.get("returns_list_for_maps"): 815 if not this.type: 816 from sqlglot.optimizer.annotate_types import annotate_types 817 818 this = annotate_types(this) 819 820 if this.is_type(exp.DataType.Type.MAP): 821 bracket = f"({bracket})[1]" 822 823 return bracket 824 825 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 826 expression_sql = self.sql(expression, "expression") 827 828 func = expression.this 829 if isinstance(func, exp.PERCENTILES): 830 # Make the order key the first arg and slide the fraction to the right 831 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 832 order_col = expression.find(exp.Ordered) 833 if order_col: 834 func.set("expression", func.this) 835 func.set("this", order_col.this) 836 837 this = self.sql(expression, "this").rstrip(")") 838 839 return f"{this}{expression_sql})" 840 841 def length_sql(self, expression: exp.Length) -> str: 842 arg = expression.this 843 844 # Dialects like BQ and Snowflake also accept binary values as args, so 845 # DDB will attempt to infer the type or resort to case/when resolution 846 if not expression.args.get("binary") or arg.is_string: 847 return self.func("LENGTH", arg) 848 849 if not arg.type: 850 from sqlglot.optimizer.annotate_types import annotate_types 851 852 arg = annotate_types(arg) 853 854 if arg.is_type(*exp.DataType.TEXT_TYPES): 855 return self.func("LENGTH", arg) 856 857 # We need these casts to make duckdb's static type checker happy 858 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 859 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 860 861 case = ( 862 exp.case(self.func("TYPEOF", arg)) 863 .when( 864 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 865 ) # anonymous to break length_sql recursion 866 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 867 ) 868 869 return self.sql(case) 870 871 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 872 this = expression.this 873 key = expression.args.get("key") 874 key_sql = key.name if isinstance(key, exp.Expression) else "" 875 value_sql = self.sql(expression, "value") 876 877 kv_sql = f"{key_sql} := {value_sql}" 878 879 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 880 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 881 if isinstance(this, exp.Struct) and not this.expressions: 882 return self.func("STRUCT_PACK", kv_sql) 883 884 return self.func("STRUCT_INSERT", this, kv_sql)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
709 def strtodate_sql(self, expression: exp.StrToDate) -> str: 710 if expression.args.get("safe"): 711 formatted_time = self.format_time(expression) 712 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 713 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
721 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 722 nano = expression.args.get("nano") 723 if nano is not None: 724 expression.set( 725 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 726 ) 727 728 return rename_func("MAKE_TIME")(self, expression)
730 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 731 sec = expression.args["sec"] 732 733 milli = expression.args.get("milli") 734 if milli is not None: 735 sec += milli.pop() / exp.Literal.number(1000.0) 736 737 nano = expression.args.get("nano") 738 if nano is not None: 739 sec += nano.pop() / exp.Literal.number(1000000000.0) 740 741 if milli or nano: 742 expression.set("sec", sec) 743 744 return rename_func("MAKE_TIMESTAMP")(self, expression)
746 def tablesample_sql( 747 self, 748 expression: exp.TableSample, 749 sep: str = " AS ", 750 tablesample_keyword: t.Optional[str] = None, 751 ) -> str: 752 if not isinstance(expression.parent, exp.Select): 753 # This sample clause only applies to a single source, not the entire resulting relation 754 tablesample_keyword = "TABLESAMPLE" 755 756 if expression.args.get("size"): 757 method = expression.args.get("method") 758 if method and method.name.upper() != "RESERVOIR": 759 self.unsupported( 760 f"Sampling method {method} is not supported with a discrete sample count, " 761 "defaulting to reservoir sampling" 762 ) 763 expression.set("method", exp.var("RESERVOIR")) 764 765 return super().tablesample_sql( 766 expression, sep=sep, tablesample_keyword=tablesample_keyword 767 )
769 def interval_sql(self, expression: exp.Interval) -> str: 770 multiplier: t.Optional[int] = None 771 unit = expression.text("unit").lower() 772 773 if unit.startswith("week"): 774 multiplier = 7 775 if unit.startswith("quarter"): 776 multiplier = 90 777 778 if multiplier: 779 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 780 781 return super().interval_sql(expression)
788 def join_sql(self, expression: exp.Join) -> str: 789 if ( 790 expression.side == "LEFT" 791 and not expression.args.get("on") 792 and isinstance(expression.this, exp.Unnest) 793 ): 794 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 795 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 796 return super().join_sql(expression.on(exp.true())) 797 798 return super().join_sql(expression)
807 def bracket_sql(self, expression: exp.Bracket) -> str: 808 this = expression.this 809 if isinstance(this, exp.Array): 810 this.replace(exp.paren(this)) 811 812 bracket = super().bracket_sql(expression) 813 814 if not expression.args.get("returns_list_for_maps"): 815 if not this.type: 816 from sqlglot.optimizer.annotate_types import annotate_types 817 818 this = annotate_types(this) 819 820 if this.is_type(exp.DataType.Type.MAP): 821 bracket = f"({bracket})[1]" 822 823 return bracket
825 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 826 expression_sql = self.sql(expression, "expression") 827 828 func = expression.this 829 if isinstance(func, exp.PERCENTILES): 830 # Make the order key the first arg and slide the fraction to the right 831 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 832 order_col = expression.find(exp.Ordered) 833 if order_col: 834 func.set("expression", func.this) 835 func.set("this", order_col.this) 836 837 this = self.sql(expression, "this").rstrip(")") 838 839 return f"{this}{expression_sql})"
841 def length_sql(self, expression: exp.Length) -> str: 842 arg = expression.this 843 844 # Dialects like BQ and Snowflake also accept binary values as args, so 845 # DDB will attempt to infer the type or resort to case/when resolution 846 if not expression.args.get("binary") or arg.is_string: 847 return self.func("LENGTH", arg) 848 849 if not arg.type: 850 from sqlglot.optimizer.annotate_types import annotate_types 851 852 arg = annotate_types(arg) 853 854 if arg.is_type(*exp.DataType.TEXT_TYPES): 855 return self.func("LENGTH", arg) 856 857 # We need these casts to make duckdb's static type checker happy 858 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 859 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 860 861 case = ( 862 exp.case(self.func("TYPEOF", arg)) 863 .when( 864 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 865 ) # anonymous to break length_sql recursion 866 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 867 ) 868 869 return self.sql(case)
871 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 872 this = expression.this 873 key = expression.args.get("key") 874 key_sql = key.name if isinstance(key, exp.Expression) else "" 875 value_sql = self.sql(expression, "value") 876 877 kv_sql = f"{key_sql} := {value_sql}" 878 879 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 880 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 881 if isinstance(this, exp.Struct) and not this.expressions: 882 return self.func("STRUCT_PACK", kv_sql) 883 884 return self.func("STRUCT_INSERT", this, kv_sql)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql