sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 regexp_extract_sql, 29 rename_func, 30 str_position_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36) 37from sqlglot.helper import seq_get 38from sqlglot.tokens import TokenType 39 40DATETIME_DELTA = t.Union[ 41 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 42] 43 44 45def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 46 this = expression.this 47 unit = unit_to_var(expression) 48 op = ( 49 "+" 50 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 51 else "-" 52 ) 53 54 to_type: t.Optional[DATA_TYPE] = None 55 if isinstance(expression, exp.TsOrDsAdd): 56 to_type = expression.return_type 57 elif this.is_string: 58 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 59 to_type = ( 60 exp.DataType.Type.DATETIME 61 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 62 else exp.DataType.Type.DATE 63 ) 64 65 this = exp.cast(this, to_type) if to_type else this 66 67 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 68 69 70# BigQuery -> DuckDB conversion for the DATE function 71def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 72 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 73 zone = self.sql(expression, "zone") 74 75 if zone: 76 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 77 date_str = f"{date_str} || ' ' || {zone}" 78 79 # This will create a TIMESTAMP with time zone information 80 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 81 82 return result 83 84 85# BigQuery -> DuckDB conversion for the TIME_DIFF function 86def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 87 this = exp.cast(expression.this, exp.DataType.Type.TIME) 88 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 89 90 # Although the 2 dialects share similar signatures, BQ seems to inverse 91 # the sign of the result so the start/end time operands are flipped 92 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 93 94 95def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 96 if expression.expression: 97 self.unsupported("DuckDB ARRAY_SORT does not support a comparator") 98 return self.func("ARRAY_SORT", expression.this) 99 100 101def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 102 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 103 return self.func(name, expression.this) 104 105 106def _build_sort_array_desc(args: t.List) -> exp.Expression: 107 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 108 109 110def _build_date_diff(args: t.List) -> exp.Expression: 111 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 112 113 114def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 115 def _builder(args: t.List) -> exp.GenerateSeries: 116 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 117 if len(args) == 1: 118 # DuckDB uses 0 as a default for the series' start when it's omitted 119 args.insert(0, exp.Literal.number("0")) 120 121 gen_series = exp.GenerateSeries.from_arg_list(args) 122 gen_series.set("is_end_exclusive", end_exclusive) 123 124 return gen_series 125 126 return _builder 127 128 129def _build_make_timestamp(args: t.List) -> exp.Expression: 130 if len(args) == 1: 131 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 132 133 return exp.TimestampFromParts( 134 year=seq_get(args, 0), 135 month=seq_get(args, 1), 136 day=seq_get(args, 2), 137 hour=seq_get(args, 3), 138 min=seq_get(args, 4), 139 sec=seq_get(args, 5), 140 ) 141 142 143def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 144 args: t.List[str] = [] 145 146 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 147 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 148 # The transformation to ROW will take place if a cast to STRUCT / ARRAY of STRUCTs is found 149 ancestor_cast = expression.find_ancestor(exp.Cast) 150 is_struct_cast = ancestor_cast and any( 151 casted_type.is_type(exp.DataType.Type.STRUCT) 152 for casted_type in ancestor_cast.find_all(exp.DataType) 153 ) 154 155 for i, expr in enumerate(expression.expressions): 156 is_property_eq = isinstance(expr, exp.PropertyEQ) 157 value = expr.expression if is_property_eq else expr 158 159 if is_struct_cast: 160 args.append(self.sql(value)) 161 else: 162 key = expr.name if is_property_eq else f"_{i}" 163 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 164 165 csv_args = ", ".join(args) 166 167 return f"ROW({csv_args})" if is_struct_cast else f"{{{csv_args}}}" 168 169 170def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 171 if expression.is_type("array"): 172 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 173 174 # Type TIMESTAMP / TIME WITH TIME ZONE does not support any modifiers 175 if expression.is_type("timestamptz", "timetz"): 176 return expression.this.value 177 178 return self.datatype_sql(expression) 179 180 181def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 182 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 183 return f"CAST({sql} AS TEXT)" 184 185 186def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 187 scale = expression.args.get("scale") 188 timestamp = expression.this 189 190 if scale in (None, exp.UnixToTime.SECONDS): 191 return self.func("TO_TIMESTAMP", timestamp) 192 if scale == exp.UnixToTime.MILLIS: 193 return self.func("EPOCH_MS", timestamp) 194 if scale == exp.UnixToTime.MICROS: 195 return self.func("MAKE_TIMESTAMP", timestamp) 196 197 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 198 199 200WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 201 202 203def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 204 arrow_sql = arrow_json_extract_sql(self, expression) 205 if not expression.same_parent and isinstance( 206 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 207 ): 208 arrow_sql = self.wrap(arrow_sql) 209 return arrow_sql 210 211 212def _implicit_datetime_cast( 213 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 214) -> t.Optional[exp.Expression]: 215 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 216 217 218def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 219 this = _implicit_datetime_cast(expression.this) 220 expr = _implicit_datetime_cast(expression.expression) 221 222 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 223 224 225def _generate_datetime_array_sql( 226 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 227) -> str: 228 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 229 230 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 231 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 232 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 233 234 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 235 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 236 start=start, end=end, step=expression.args.get("step") 237 ) 238 239 if is_generate_date_array: 240 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 241 # GENERATE_DATE_ARRAY we must cast it back to DATE array 242 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 243 244 return self.sql(gen_series) 245 246 247class DuckDB(Dialect): 248 NULL_ORDERING = "nulls_are_last" 249 SUPPORTS_USER_DEFINED_TYPES = False 250 SAFE_DIVISION = True 251 INDEX_OFFSET = 1 252 CONCAT_COALESCE = True 253 SUPPORTS_ORDER_BY_ALL = True 254 SUPPORTS_FIXED_SIZE_ARRAYS = True 255 256 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 257 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 258 259 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 260 if isinstance(path, exp.Literal): 261 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 262 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 263 # This check ensures we'll avoid trying to parse these as JSON paths, which can 264 # either result in a noisy warning or in an invalid representation of the path. 265 path_text = path.name 266 if path_text.startswith("/") or "[#" in path_text: 267 return path 268 269 return super().to_json_path(path) 270 271 class Tokenizer(tokens.Tokenizer): 272 HEREDOC_STRINGS = ["$"] 273 274 HEREDOC_TAG_IS_IDENTIFIER = True 275 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 276 277 KEYWORDS = { 278 **tokens.Tokenizer.KEYWORDS, 279 "//": TokenType.DIV, 280 "ATTACH": TokenType.COMMAND, 281 "BINARY": TokenType.VARBINARY, 282 "BITSTRING": TokenType.BIT, 283 "BPCHAR": TokenType.TEXT, 284 "CHAR": TokenType.TEXT, 285 "CHARACTER VARYING": TokenType.TEXT, 286 "EXCLUDE": TokenType.EXCEPT, 287 "LOGICAL": TokenType.BOOLEAN, 288 "ONLY": TokenType.ONLY, 289 "PIVOT_WIDER": TokenType.PIVOT, 290 "POSITIONAL": TokenType.POSITIONAL, 291 "SIGNED": TokenType.INT, 292 "STRING": TokenType.TEXT, 293 "SUMMARIZE": TokenType.SUMMARIZE, 294 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 295 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 296 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 297 "TIMESTAMP_US": TokenType.TIMESTAMP, 298 "UBIGINT": TokenType.UBIGINT, 299 "UINTEGER": TokenType.UINT, 300 "USMALLINT": TokenType.USMALLINT, 301 "UTINYINT": TokenType.UTINYINT, 302 "VARCHAR": TokenType.TEXT, 303 } 304 KEYWORDS.pop("/*+") 305 306 SINGLE_TOKENS = { 307 **tokens.Tokenizer.SINGLE_TOKENS, 308 "$": TokenType.PARAMETER, 309 } 310 311 class Parser(parser.Parser): 312 BITWISE = { 313 **parser.Parser.BITWISE, 314 TokenType.TILDA: exp.RegexpLike, 315 } 316 317 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 318 319 FUNCTIONS = { 320 **parser.Parser.FUNCTIONS, 321 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 322 "ARRAY_SORT": exp.SortArray.from_arg_list, 323 "DATEDIFF": _build_date_diff, 324 "DATE_DIFF": _build_date_diff, 325 "DATE_TRUNC": date_trunc_to_time, 326 "DATETRUNC": date_trunc_to_time, 327 "DECODE": lambda args: exp.Decode( 328 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 329 ), 330 "ENCODE": lambda args: exp.Encode( 331 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 332 ), 333 "EPOCH": exp.TimeToUnix.from_arg_list, 334 "EPOCH_MS": lambda args: exp.UnixToTime( 335 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 336 ), 337 "JSON": exp.ParseJSON.from_arg_list, 338 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 339 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 340 "LIST_HAS": exp.ArrayContains.from_arg_list, 341 "LIST_REVERSE_SORT": _build_sort_array_desc, 342 "LIST_SORT": exp.SortArray.from_arg_list, 343 "LIST_VALUE": lambda args: exp.Array(expressions=args), 344 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 345 "MAKE_TIMESTAMP": _build_make_timestamp, 346 "MEDIAN": lambda args: exp.PercentileCont( 347 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 348 ), 349 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 350 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 351 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 352 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 353 ), 354 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 355 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 356 this=seq_get(args, 0), 357 expression=seq_get(args, 1), 358 replacement=seq_get(args, 2), 359 modifiers=seq_get(args, 3), 360 ), 361 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 362 "STRING_SPLIT": exp.Split.from_arg_list, 363 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 364 "STRING_TO_ARRAY": exp.Split.from_arg_list, 365 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 366 "STRUCT_PACK": exp.Struct.from_arg_list, 367 "STR_SPLIT": exp.Split.from_arg_list, 368 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 369 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 370 "UNNEST": exp.Explode.from_arg_list, 371 "XOR": binary_from_function(exp.BitwiseXor), 372 "GENERATE_SERIES": _build_generate_series(), 373 "RANGE": _build_generate_series(end_exclusive=True), 374 } 375 376 FUNCTIONS.pop("DATE_SUB") 377 378 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 379 FUNCTION_PARSERS.pop("DECODE") 380 381 NO_PAREN_FUNCTION_PARSERS = { 382 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 383 "MAP": lambda self: self._parse_map(), 384 } 385 386 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 387 TokenType.SEMI, 388 TokenType.ANTI, 389 } 390 391 PLACEHOLDER_PARSERS = { 392 **parser.Parser.PLACEHOLDER_PARSERS, 393 TokenType.PARAMETER: lambda self: ( 394 self.expression(exp.Placeholder, this=self._prev.text) 395 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 396 else None 397 ), 398 } 399 400 TYPE_CONVERTERS = { 401 # https://duckdb.org/docs/sql/data_types/numeric 402 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 403 # https://duckdb.org/docs/sql/data_types/text 404 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 405 } 406 407 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 408 # https://duckdb.org/docs/sql/samples.html 409 sample = super()._parse_table_sample(as_modifier=as_modifier) 410 if sample and not sample.args.get("method"): 411 if sample.args.get("size"): 412 sample.set("method", exp.var("RESERVOIR")) 413 else: 414 sample.set("method", exp.var("SYSTEM")) 415 416 return sample 417 418 def _parse_bracket( 419 self, this: t.Optional[exp.Expression] = None 420 ) -> t.Optional[exp.Expression]: 421 bracket = super()._parse_bracket(this) 422 if isinstance(bracket, exp.Bracket): 423 bracket.set("returns_list_for_maps", True) 424 425 return bracket 426 427 def _parse_map(self) -> exp.ToMap | exp.Map: 428 if self._match(TokenType.L_BRACE, advance=False): 429 return self.expression(exp.ToMap, this=self._parse_bracket()) 430 431 args = self._parse_wrapped_csv(self._parse_assignment) 432 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 433 434 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 435 return self._parse_field_def() 436 437 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 438 if len(aggregations) == 1: 439 return super()._pivot_column_names(aggregations) 440 return pivot_column_names(aggregations, dialect="duckdb") 441 442 class Generator(generator.Generator): 443 PARAMETER_TOKEN = "$" 444 NAMED_PLACEHOLDER_TOKEN = "$" 445 JOIN_HINTS = False 446 TABLE_HINTS = False 447 QUERY_HINTS = False 448 LIMIT_FETCH = "LIMIT" 449 STRUCT_DELIMITER = ("(", ")") 450 RENAME_TABLE_WITH_DB = False 451 NVL2_SUPPORTED = False 452 SEMI_ANTI_JOIN_WITH_SIDE = False 453 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 454 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 455 LAST_DAY_SUPPORTS_DATE_PART = False 456 JSON_KEY_VALUE_PAIR_SEP = "," 457 IGNORE_NULLS_IN_FUNC = True 458 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 459 SUPPORTS_CREATE_TABLE_LIKE = False 460 MULTI_ARG_DISTINCT = False 461 CAN_IMPLEMENT_ARRAY_ANY = True 462 SUPPORTS_TO_NUMBER = False 463 COPY_HAS_INTO_KEYWORD = False 464 STAR_EXCEPT = "EXCLUDE" 465 PAD_FILL_PATTERN_IS_REQUIRED = True 466 ARRAY_CONCAT_IS_VAR_LEN = False 467 468 TRANSFORMS = { 469 **generator.Generator.TRANSFORMS, 470 exp.ApproxDistinct: approx_count_distinct_sql, 471 exp.Array: inline_array_unless_query, 472 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 473 exp.ArrayFilter: rename_func("LIST_FILTER"), 474 exp.ArraySize: rename_func("ARRAY_LENGTH"), 475 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 476 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 477 exp.ArraySort: _array_sort_sql, 478 exp.ArraySum: rename_func("LIST_SUM"), 479 exp.BitwiseXor: rename_func("XOR"), 480 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 481 exp.CurrentDate: lambda *_: "CURRENT_DATE", 482 exp.CurrentTime: lambda *_: "CURRENT_TIME", 483 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 484 exp.DayOfMonth: rename_func("DAYOFMONTH"), 485 exp.DayOfWeek: rename_func("DAYOFWEEK"), 486 exp.DayOfYear: rename_func("DAYOFYEAR"), 487 exp.DataType: _datatype_sql, 488 exp.Date: _date_sql, 489 exp.DateAdd: _date_delta_sql, 490 exp.DateFromParts: rename_func("MAKE_DATE"), 491 exp.DateSub: _date_delta_sql, 492 exp.DateDiff: _date_diff_sql, 493 exp.DateStrToDate: datestrtodate_sql, 494 exp.Datetime: no_datetime_sql, 495 exp.DatetimeSub: _date_delta_sql, 496 exp.DatetimeAdd: _date_delta_sql, 497 exp.DateToDi: lambda self, 498 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 499 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 500 exp.DiToDate: lambda self, 501 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 502 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 503 exp.GenerateDateArray: _generate_datetime_array_sql, 504 exp.GenerateTimestampArray: _generate_datetime_array_sql, 505 exp.Explode: rename_func("UNNEST"), 506 exp.IntDiv: lambda self, e: self.binary(e, "//"), 507 exp.IsInf: rename_func("ISINF"), 508 exp.IsNan: rename_func("ISNAN"), 509 exp.JSONExtract: _arrow_json_extract_sql, 510 exp.JSONExtractScalar: _arrow_json_extract_sql, 511 exp.JSONFormat: _json_format_sql, 512 exp.LogicalOr: rename_func("BOOL_OR"), 513 exp.LogicalAnd: rename_func("BOOL_AND"), 514 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 515 exp.MonthsBetween: lambda self, e: self.func( 516 "DATEDIFF", 517 "'month'", 518 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 519 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 520 ), 521 exp.PercentileCont: rename_func("QUANTILE_CONT"), 522 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 523 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 524 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 525 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 526 exp.RegexpExtract: regexp_extract_sql, 527 exp.RegexpReplace: lambda self, e: self.func( 528 "REGEXP_REPLACE", 529 e.this, 530 e.expression, 531 e.args.get("replacement"), 532 e.args.get("modifiers"), 533 ), 534 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 535 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 536 exp.Return: lambda self, e: self.sql(e, "this"), 537 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 538 exp.Rand: rename_func("RANDOM"), 539 exp.SafeDivide: no_safe_divide_sql, 540 exp.Split: rename_func("STR_SPLIT"), 541 exp.SortArray: _sort_array_sql, 542 exp.StrPosition: str_position_sql, 543 exp.StrToUnix: lambda self, e: self.func( 544 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 545 ), 546 exp.Struct: _struct_sql, 547 exp.TimeAdd: _date_delta_sql, 548 exp.Time: no_time_sql, 549 exp.TimeDiff: _timediff_sql, 550 exp.Timestamp: no_timestamp_sql, 551 exp.TimestampDiff: lambda self, e: self.func( 552 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 553 ), 554 exp.TimestampTrunc: timestamptrunc_sql(), 555 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 556 exp.TimeStrToTime: timestrtotime_sql, 557 exp.TimeStrToUnix: lambda self, e: self.func( 558 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 559 ), 560 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 561 exp.TimeToUnix: rename_func("EPOCH"), 562 exp.TsOrDiToDi: lambda self, 563 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 564 exp.TsOrDsAdd: _date_delta_sql, 565 exp.TsOrDsDiff: lambda self, e: self.func( 566 "DATE_DIFF", 567 f"'{e.args.get('unit') or 'DAY'}'", 568 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 569 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 570 ), 571 exp.UnixToStr: lambda self, e: self.func( 572 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 573 ), 574 exp.DatetimeTrunc: lambda self, e: self.func( 575 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 576 ), 577 exp.UnixToTime: _unix_to_time_sql, 578 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 579 exp.VariancePop: rename_func("VAR_POP"), 580 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 581 exp.Xor: bool_xor_sql, 582 } 583 584 SUPPORTED_JSON_PATH_PARTS = { 585 exp.JSONPathKey, 586 exp.JSONPathRoot, 587 exp.JSONPathSubscript, 588 exp.JSONPathWildcard, 589 } 590 591 TYPE_MAPPING = { 592 **generator.Generator.TYPE_MAPPING, 593 exp.DataType.Type.BINARY: "BLOB", 594 exp.DataType.Type.BPCHAR: "TEXT", 595 exp.DataType.Type.CHAR: "TEXT", 596 exp.DataType.Type.FLOAT: "REAL", 597 exp.DataType.Type.NCHAR: "TEXT", 598 exp.DataType.Type.NVARCHAR: "TEXT", 599 exp.DataType.Type.UINT: "UINTEGER", 600 exp.DataType.Type.VARBINARY: "BLOB", 601 exp.DataType.Type.ROWVERSION: "BLOB", 602 exp.DataType.Type.VARCHAR: "TEXT", 603 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 604 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 605 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 606 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 607 } 608 609 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 610 RESERVED_KEYWORDS = { 611 "array", 612 "analyse", 613 "union", 614 "all", 615 "when", 616 "in_p", 617 "default", 618 "create_p", 619 "window", 620 "asymmetric", 621 "to", 622 "else", 623 "localtime", 624 "from", 625 "end_p", 626 "select", 627 "current_date", 628 "foreign", 629 "with", 630 "grant", 631 "session_user", 632 "or", 633 "except", 634 "references", 635 "fetch", 636 "limit", 637 "group_p", 638 "leading", 639 "into", 640 "collate", 641 "offset", 642 "do", 643 "then", 644 "localtimestamp", 645 "check_p", 646 "lateral_p", 647 "current_role", 648 "where", 649 "asc_p", 650 "placing", 651 "desc_p", 652 "user", 653 "unique", 654 "initially", 655 "column", 656 "both", 657 "some", 658 "as", 659 "any", 660 "only", 661 "deferrable", 662 "null_p", 663 "current_time", 664 "true_p", 665 "table", 666 "case", 667 "trailing", 668 "variadic", 669 "for", 670 "on", 671 "distinct", 672 "false_p", 673 "not", 674 "constraint", 675 "current_timestamp", 676 "returning", 677 "primary", 678 "intersect", 679 "having", 680 "analyze", 681 "current_user", 682 "and", 683 "cast", 684 "symmetric", 685 "using", 686 "order", 687 "current_catalog", 688 } 689 690 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 691 692 # DuckDB doesn't generally support CREATE TABLE .. properties 693 # https://duckdb.org/docs/sql/statements/create_table.html 694 PROPERTIES_LOCATION = { 695 prop: exp.Properties.Location.UNSUPPORTED 696 for prop in generator.Generator.PROPERTIES_LOCATION 697 } 698 699 # There are a few exceptions (e.g. temporary tables) which are supported or 700 # can be transpiled to DuckDB, so we explicitly override them accordingly 701 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 702 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 703 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 704 705 def strtotime_sql(self, expression: exp.StrToTime) -> str: 706 if expression.args.get("safe"): 707 formatted_time = self.format_time(expression) 708 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 709 return str_to_time_sql(self, expression) 710 711 def strtodate_sql(self, expression: exp.StrToDate) -> str: 712 if expression.args.get("safe"): 713 formatted_time = self.format_time(expression) 714 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 715 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 716 717 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 718 arg = expression.this 719 if expression.args.get("safe"): 720 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 721 return self.func("JSON", arg) 722 723 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 724 nano = expression.args.get("nano") 725 if nano is not None: 726 expression.set( 727 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 728 ) 729 730 return rename_func("MAKE_TIME")(self, expression) 731 732 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 733 sec = expression.args["sec"] 734 735 milli = expression.args.get("milli") 736 if milli is not None: 737 sec += milli.pop() / exp.Literal.number(1000.0) 738 739 nano = expression.args.get("nano") 740 if nano is not None: 741 sec += nano.pop() / exp.Literal.number(1000000000.0) 742 743 if milli or nano: 744 expression.set("sec", sec) 745 746 return rename_func("MAKE_TIMESTAMP")(self, expression) 747 748 def tablesample_sql( 749 self, 750 expression: exp.TableSample, 751 sep: str = " AS ", 752 tablesample_keyword: t.Optional[str] = None, 753 ) -> str: 754 if not isinstance(expression.parent, exp.Select): 755 # This sample clause only applies to a single source, not the entire resulting relation 756 tablesample_keyword = "TABLESAMPLE" 757 758 if expression.args.get("size"): 759 method = expression.args.get("method") 760 if method and method.name.upper() != "RESERVOIR": 761 self.unsupported( 762 f"Sampling method {method} is not supported with a discrete sample count, " 763 "defaulting to reservoir sampling" 764 ) 765 expression.set("method", exp.var("RESERVOIR")) 766 767 return super().tablesample_sql( 768 expression, sep=sep, tablesample_keyword=tablesample_keyword 769 ) 770 771 def interval_sql(self, expression: exp.Interval) -> str: 772 multiplier: t.Optional[int] = None 773 unit = expression.text("unit").lower() 774 775 if unit.startswith("week"): 776 multiplier = 7 777 if unit.startswith("quarter"): 778 multiplier = 90 779 780 if multiplier: 781 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 782 783 return super().interval_sql(expression) 784 785 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 786 if isinstance(expression.parent, exp.UserDefinedFunction): 787 return self.sql(expression, "this") 788 return super().columndef_sql(expression, sep) 789 790 def join_sql(self, expression: exp.Join) -> str: 791 if ( 792 expression.side == "LEFT" 793 and not expression.args.get("on") 794 and isinstance(expression.this, exp.Unnest) 795 ): 796 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 797 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 798 return super().join_sql(expression.on(exp.true())) 799 800 return super().join_sql(expression) 801 802 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 803 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 804 if expression.args.get("is_end_exclusive"): 805 return rename_func("RANGE")(self, expression) 806 807 return self.function_fallback_sql(expression) 808 809 def bracket_sql(self, expression: exp.Bracket) -> str: 810 this = expression.this 811 if isinstance(this, exp.Array): 812 this.replace(exp.paren(this)) 813 814 bracket = super().bracket_sql(expression) 815 816 if not expression.args.get("returns_list_for_maps"): 817 if not this.type: 818 from sqlglot.optimizer.annotate_types import annotate_types 819 820 this = annotate_types(this) 821 822 if this.is_type(exp.DataType.Type.MAP): 823 bracket = f"({bracket})[1]" 824 825 return bracket 826 827 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 828 expression_sql = self.sql(expression, "expression") 829 830 func = expression.this 831 if isinstance(func, exp.PERCENTILES): 832 # Make the order key the first arg and slide the fraction to the right 833 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 834 order_col = expression.find(exp.Ordered) 835 if order_col: 836 func.set("expression", func.this) 837 func.set("this", order_col.this) 838 839 this = self.sql(expression, "this").rstrip(")") 840 841 return f"{this}{expression_sql})" 842 843 def length_sql(self, expression: exp.Length) -> str: 844 arg = expression.this 845 846 # Dialects like BQ and Snowflake also accept binary values as args, so 847 # DDB will attempt to infer the type or resort to case/when resolution 848 if not expression.args.get("binary") or arg.is_string: 849 return self.func("LENGTH", arg) 850 851 if not arg.type: 852 from sqlglot.optimizer.annotate_types import annotate_types 853 854 arg = annotate_types(arg) 855 856 if arg.is_type(*exp.DataType.TEXT_TYPES): 857 return self.func("LENGTH", arg) 858 859 # We need these casts to make duckdb's static type checker happy 860 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 861 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 862 863 case = ( 864 exp.case(self.func("TYPEOF", arg)) 865 .when( 866 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 867 ) # anonymous to break length_sql recursion 868 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 869 ) 870 871 return self.sql(case) 872 873 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 874 this = expression.this 875 key = expression.args.get("key") 876 key_sql = key.name if isinstance(key, exp.Expression) else "" 877 value_sql = self.sql(expression, "value") 878 879 kv_sql = f"{key_sql} := {value_sql}" 880 881 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 882 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 883 if isinstance(this, exp.Struct) and not this.expressions: 884 return self.func("STRUCT_PACK", kv_sql) 885 886 return self.func("STRUCT_INSERT", this, kv_sql)
248class DuckDB(Dialect): 249 NULL_ORDERING = "nulls_are_last" 250 SUPPORTS_USER_DEFINED_TYPES = False 251 SAFE_DIVISION = True 252 INDEX_OFFSET = 1 253 CONCAT_COALESCE = True 254 SUPPORTS_ORDER_BY_ALL = True 255 SUPPORTS_FIXED_SIZE_ARRAYS = True 256 257 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 258 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 259 260 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 261 if isinstance(path, exp.Literal): 262 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 263 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 264 # This check ensures we'll avoid trying to parse these as JSON paths, which can 265 # either result in a noisy warning or in an invalid representation of the path. 266 path_text = path.name 267 if path_text.startswith("/") or "[#" in path_text: 268 return path 269 270 return super().to_json_path(path) 271 272 class Tokenizer(tokens.Tokenizer): 273 HEREDOC_STRINGS = ["$"] 274 275 HEREDOC_TAG_IS_IDENTIFIER = True 276 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 277 278 KEYWORDS = { 279 **tokens.Tokenizer.KEYWORDS, 280 "//": TokenType.DIV, 281 "ATTACH": TokenType.COMMAND, 282 "BINARY": TokenType.VARBINARY, 283 "BITSTRING": TokenType.BIT, 284 "BPCHAR": TokenType.TEXT, 285 "CHAR": TokenType.TEXT, 286 "CHARACTER VARYING": TokenType.TEXT, 287 "EXCLUDE": TokenType.EXCEPT, 288 "LOGICAL": TokenType.BOOLEAN, 289 "ONLY": TokenType.ONLY, 290 "PIVOT_WIDER": TokenType.PIVOT, 291 "POSITIONAL": TokenType.POSITIONAL, 292 "SIGNED": TokenType.INT, 293 "STRING": TokenType.TEXT, 294 "SUMMARIZE": TokenType.SUMMARIZE, 295 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 296 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 297 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 298 "TIMESTAMP_US": TokenType.TIMESTAMP, 299 "UBIGINT": TokenType.UBIGINT, 300 "UINTEGER": TokenType.UINT, 301 "USMALLINT": TokenType.USMALLINT, 302 "UTINYINT": TokenType.UTINYINT, 303 "VARCHAR": TokenType.TEXT, 304 } 305 KEYWORDS.pop("/*+") 306 307 SINGLE_TOKENS = { 308 **tokens.Tokenizer.SINGLE_TOKENS, 309 "$": TokenType.PARAMETER, 310 } 311 312 class Parser(parser.Parser): 313 BITWISE = { 314 **parser.Parser.BITWISE, 315 TokenType.TILDA: exp.RegexpLike, 316 } 317 318 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 319 320 FUNCTIONS = { 321 **parser.Parser.FUNCTIONS, 322 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 323 "ARRAY_SORT": exp.SortArray.from_arg_list, 324 "DATEDIFF": _build_date_diff, 325 "DATE_DIFF": _build_date_diff, 326 "DATE_TRUNC": date_trunc_to_time, 327 "DATETRUNC": date_trunc_to_time, 328 "DECODE": lambda args: exp.Decode( 329 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 330 ), 331 "ENCODE": lambda args: exp.Encode( 332 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 333 ), 334 "EPOCH": exp.TimeToUnix.from_arg_list, 335 "EPOCH_MS": lambda args: exp.UnixToTime( 336 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 337 ), 338 "JSON": exp.ParseJSON.from_arg_list, 339 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 340 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 341 "LIST_HAS": exp.ArrayContains.from_arg_list, 342 "LIST_REVERSE_SORT": _build_sort_array_desc, 343 "LIST_SORT": exp.SortArray.from_arg_list, 344 "LIST_VALUE": lambda args: exp.Array(expressions=args), 345 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 346 "MAKE_TIMESTAMP": _build_make_timestamp, 347 "MEDIAN": lambda args: exp.PercentileCont( 348 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 349 ), 350 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 351 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 352 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 353 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 354 ), 355 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 356 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1), 359 replacement=seq_get(args, 2), 360 modifiers=seq_get(args, 3), 361 ), 362 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 363 "STRING_SPLIT": exp.Split.from_arg_list, 364 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 365 "STRING_TO_ARRAY": exp.Split.from_arg_list, 366 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 367 "STRUCT_PACK": exp.Struct.from_arg_list, 368 "STR_SPLIT": exp.Split.from_arg_list, 369 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 370 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 371 "UNNEST": exp.Explode.from_arg_list, 372 "XOR": binary_from_function(exp.BitwiseXor), 373 "GENERATE_SERIES": _build_generate_series(), 374 "RANGE": _build_generate_series(end_exclusive=True), 375 } 376 377 FUNCTIONS.pop("DATE_SUB") 378 379 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 380 FUNCTION_PARSERS.pop("DECODE") 381 382 NO_PAREN_FUNCTION_PARSERS = { 383 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 384 "MAP": lambda self: self._parse_map(), 385 } 386 387 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 388 TokenType.SEMI, 389 TokenType.ANTI, 390 } 391 392 PLACEHOLDER_PARSERS = { 393 **parser.Parser.PLACEHOLDER_PARSERS, 394 TokenType.PARAMETER: lambda self: ( 395 self.expression(exp.Placeholder, this=self._prev.text) 396 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 397 else None 398 ), 399 } 400 401 TYPE_CONVERTERS = { 402 # https://duckdb.org/docs/sql/data_types/numeric 403 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 404 # https://duckdb.org/docs/sql/data_types/text 405 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 406 } 407 408 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 409 # https://duckdb.org/docs/sql/samples.html 410 sample = super()._parse_table_sample(as_modifier=as_modifier) 411 if sample and not sample.args.get("method"): 412 if sample.args.get("size"): 413 sample.set("method", exp.var("RESERVOIR")) 414 else: 415 sample.set("method", exp.var("SYSTEM")) 416 417 return sample 418 419 def _parse_bracket( 420 self, this: t.Optional[exp.Expression] = None 421 ) -> t.Optional[exp.Expression]: 422 bracket = super()._parse_bracket(this) 423 if isinstance(bracket, exp.Bracket): 424 bracket.set("returns_list_for_maps", True) 425 426 return bracket 427 428 def _parse_map(self) -> exp.ToMap | exp.Map: 429 if self._match(TokenType.L_BRACE, advance=False): 430 return self.expression(exp.ToMap, this=self._parse_bracket()) 431 432 args = self._parse_wrapped_csv(self._parse_assignment) 433 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 434 435 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 436 return self._parse_field_def() 437 438 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 439 if len(aggregations) == 1: 440 return super()._pivot_column_names(aggregations) 441 return pivot_column_names(aggregations, dialect="duckdb") 442 443 class Generator(generator.Generator): 444 PARAMETER_TOKEN = "$" 445 NAMED_PLACEHOLDER_TOKEN = "$" 446 JOIN_HINTS = False 447 TABLE_HINTS = False 448 QUERY_HINTS = False 449 LIMIT_FETCH = "LIMIT" 450 STRUCT_DELIMITER = ("(", ")") 451 RENAME_TABLE_WITH_DB = False 452 NVL2_SUPPORTED = False 453 SEMI_ANTI_JOIN_WITH_SIDE = False 454 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 455 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 456 LAST_DAY_SUPPORTS_DATE_PART = False 457 JSON_KEY_VALUE_PAIR_SEP = "," 458 IGNORE_NULLS_IN_FUNC = True 459 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 460 SUPPORTS_CREATE_TABLE_LIKE = False 461 MULTI_ARG_DISTINCT = False 462 CAN_IMPLEMENT_ARRAY_ANY = True 463 SUPPORTS_TO_NUMBER = False 464 COPY_HAS_INTO_KEYWORD = False 465 STAR_EXCEPT = "EXCLUDE" 466 PAD_FILL_PATTERN_IS_REQUIRED = True 467 ARRAY_CONCAT_IS_VAR_LEN = False 468 469 TRANSFORMS = { 470 **generator.Generator.TRANSFORMS, 471 exp.ApproxDistinct: approx_count_distinct_sql, 472 exp.Array: inline_array_unless_query, 473 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 474 exp.ArrayFilter: rename_func("LIST_FILTER"), 475 exp.ArraySize: rename_func("ARRAY_LENGTH"), 476 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 477 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 478 exp.ArraySort: _array_sort_sql, 479 exp.ArraySum: rename_func("LIST_SUM"), 480 exp.BitwiseXor: rename_func("XOR"), 481 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 482 exp.CurrentDate: lambda *_: "CURRENT_DATE", 483 exp.CurrentTime: lambda *_: "CURRENT_TIME", 484 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 485 exp.DayOfMonth: rename_func("DAYOFMONTH"), 486 exp.DayOfWeek: rename_func("DAYOFWEEK"), 487 exp.DayOfYear: rename_func("DAYOFYEAR"), 488 exp.DataType: _datatype_sql, 489 exp.Date: _date_sql, 490 exp.DateAdd: _date_delta_sql, 491 exp.DateFromParts: rename_func("MAKE_DATE"), 492 exp.DateSub: _date_delta_sql, 493 exp.DateDiff: _date_diff_sql, 494 exp.DateStrToDate: datestrtodate_sql, 495 exp.Datetime: no_datetime_sql, 496 exp.DatetimeSub: _date_delta_sql, 497 exp.DatetimeAdd: _date_delta_sql, 498 exp.DateToDi: lambda self, 499 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 500 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 501 exp.DiToDate: lambda self, 502 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 503 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 504 exp.GenerateDateArray: _generate_datetime_array_sql, 505 exp.GenerateTimestampArray: _generate_datetime_array_sql, 506 exp.Explode: rename_func("UNNEST"), 507 exp.IntDiv: lambda self, e: self.binary(e, "//"), 508 exp.IsInf: rename_func("ISINF"), 509 exp.IsNan: rename_func("ISNAN"), 510 exp.JSONExtract: _arrow_json_extract_sql, 511 exp.JSONExtractScalar: _arrow_json_extract_sql, 512 exp.JSONFormat: _json_format_sql, 513 exp.LogicalOr: rename_func("BOOL_OR"), 514 exp.LogicalAnd: rename_func("BOOL_AND"), 515 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 516 exp.MonthsBetween: lambda self, e: self.func( 517 "DATEDIFF", 518 "'month'", 519 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 520 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 521 ), 522 exp.PercentileCont: rename_func("QUANTILE_CONT"), 523 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 524 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 525 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 526 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 527 exp.RegexpExtract: regexp_extract_sql, 528 exp.RegexpReplace: lambda self, e: self.func( 529 "REGEXP_REPLACE", 530 e.this, 531 e.expression, 532 e.args.get("replacement"), 533 e.args.get("modifiers"), 534 ), 535 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 536 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 537 exp.Return: lambda self, e: self.sql(e, "this"), 538 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 539 exp.Rand: rename_func("RANDOM"), 540 exp.SafeDivide: no_safe_divide_sql, 541 exp.Split: rename_func("STR_SPLIT"), 542 exp.SortArray: _sort_array_sql, 543 exp.StrPosition: str_position_sql, 544 exp.StrToUnix: lambda self, e: self.func( 545 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 546 ), 547 exp.Struct: _struct_sql, 548 exp.TimeAdd: _date_delta_sql, 549 exp.Time: no_time_sql, 550 exp.TimeDiff: _timediff_sql, 551 exp.Timestamp: no_timestamp_sql, 552 exp.TimestampDiff: lambda self, e: self.func( 553 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 554 ), 555 exp.TimestampTrunc: timestamptrunc_sql(), 556 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 557 exp.TimeStrToTime: timestrtotime_sql, 558 exp.TimeStrToUnix: lambda self, e: self.func( 559 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 560 ), 561 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 562 exp.TimeToUnix: rename_func("EPOCH"), 563 exp.TsOrDiToDi: lambda self, 564 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 565 exp.TsOrDsAdd: _date_delta_sql, 566 exp.TsOrDsDiff: lambda self, e: self.func( 567 "DATE_DIFF", 568 f"'{e.args.get('unit') or 'DAY'}'", 569 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 570 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 571 ), 572 exp.UnixToStr: lambda self, e: self.func( 573 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 574 ), 575 exp.DatetimeTrunc: lambda self, e: self.func( 576 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 577 ), 578 exp.UnixToTime: _unix_to_time_sql, 579 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 580 exp.VariancePop: rename_func("VAR_POP"), 581 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 582 exp.Xor: bool_xor_sql, 583 } 584 585 SUPPORTED_JSON_PATH_PARTS = { 586 exp.JSONPathKey, 587 exp.JSONPathRoot, 588 exp.JSONPathSubscript, 589 exp.JSONPathWildcard, 590 } 591 592 TYPE_MAPPING = { 593 **generator.Generator.TYPE_MAPPING, 594 exp.DataType.Type.BINARY: "BLOB", 595 exp.DataType.Type.BPCHAR: "TEXT", 596 exp.DataType.Type.CHAR: "TEXT", 597 exp.DataType.Type.FLOAT: "REAL", 598 exp.DataType.Type.NCHAR: "TEXT", 599 exp.DataType.Type.NVARCHAR: "TEXT", 600 exp.DataType.Type.UINT: "UINTEGER", 601 exp.DataType.Type.VARBINARY: "BLOB", 602 exp.DataType.Type.ROWVERSION: "BLOB", 603 exp.DataType.Type.VARCHAR: "TEXT", 604 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 605 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 606 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 607 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 608 } 609 610 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 611 RESERVED_KEYWORDS = { 612 "array", 613 "analyse", 614 "union", 615 "all", 616 "when", 617 "in_p", 618 "default", 619 "create_p", 620 "window", 621 "asymmetric", 622 "to", 623 "else", 624 "localtime", 625 "from", 626 "end_p", 627 "select", 628 "current_date", 629 "foreign", 630 "with", 631 "grant", 632 "session_user", 633 "or", 634 "except", 635 "references", 636 "fetch", 637 "limit", 638 "group_p", 639 "leading", 640 "into", 641 "collate", 642 "offset", 643 "do", 644 "then", 645 "localtimestamp", 646 "check_p", 647 "lateral_p", 648 "current_role", 649 "where", 650 "asc_p", 651 "placing", 652 "desc_p", 653 "user", 654 "unique", 655 "initially", 656 "column", 657 "both", 658 "some", 659 "as", 660 "any", 661 "only", 662 "deferrable", 663 "null_p", 664 "current_time", 665 "true_p", 666 "table", 667 "case", 668 "trailing", 669 "variadic", 670 "for", 671 "on", 672 "distinct", 673 "false_p", 674 "not", 675 "constraint", 676 "current_timestamp", 677 "returning", 678 "primary", 679 "intersect", 680 "having", 681 "analyze", 682 "current_user", 683 "and", 684 "cast", 685 "symmetric", 686 "using", 687 "order", 688 "current_catalog", 689 } 690 691 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 692 693 # DuckDB doesn't generally support CREATE TABLE .. properties 694 # https://duckdb.org/docs/sql/statements/create_table.html 695 PROPERTIES_LOCATION = { 696 prop: exp.Properties.Location.UNSUPPORTED 697 for prop in generator.Generator.PROPERTIES_LOCATION 698 } 699 700 # There are a few exceptions (e.g. temporary tables) which are supported or 701 # can be transpiled to DuckDB, so we explicitly override them accordingly 702 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 703 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 704 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 705 706 def strtotime_sql(self, expression: exp.StrToTime) -> str: 707 if expression.args.get("safe"): 708 formatted_time = self.format_time(expression) 709 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 710 return str_to_time_sql(self, expression) 711 712 def strtodate_sql(self, expression: exp.StrToDate) -> str: 713 if expression.args.get("safe"): 714 formatted_time = self.format_time(expression) 715 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 716 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 717 718 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 719 arg = expression.this 720 if expression.args.get("safe"): 721 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 722 return self.func("JSON", arg) 723 724 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 725 nano = expression.args.get("nano") 726 if nano is not None: 727 expression.set( 728 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 729 ) 730 731 return rename_func("MAKE_TIME")(self, expression) 732 733 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 734 sec = expression.args["sec"] 735 736 milli = expression.args.get("milli") 737 if milli is not None: 738 sec += milli.pop() / exp.Literal.number(1000.0) 739 740 nano = expression.args.get("nano") 741 if nano is not None: 742 sec += nano.pop() / exp.Literal.number(1000000000.0) 743 744 if milli or nano: 745 expression.set("sec", sec) 746 747 return rename_func("MAKE_TIMESTAMP")(self, expression) 748 749 def tablesample_sql( 750 self, 751 expression: exp.TableSample, 752 sep: str = " AS ", 753 tablesample_keyword: t.Optional[str] = None, 754 ) -> str: 755 if not isinstance(expression.parent, exp.Select): 756 # This sample clause only applies to a single source, not the entire resulting relation 757 tablesample_keyword = "TABLESAMPLE" 758 759 if expression.args.get("size"): 760 method = expression.args.get("method") 761 if method and method.name.upper() != "RESERVOIR": 762 self.unsupported( 763 f"Sampling method {method} is not supported with a discrete sample count, " 764 "defaulting to reservoir sampling" 765 ) 766 expression.set("method", exp.var("RESERVOIR")) 767 768 return super().tablesample_sql( 769 expression, sep=sep, tablesample_keyword=tablesample_keyword 770 ) 771 772 def interval_sql(self, expression: exp.Interval) -> str: 773 multiplier: t.Optional[int] = None 774 unit = expression.text("unit").lower() 775 776 if unit.startswith("week"): 777 multiplier = 7 778 if unit.startswith("quarter"): 779 multiplier = 90 780 781 if multiplier: 782 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 783 784 return super().interval_sql(expression) 785 786 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 787 if isinstance(expression.parent, exp.UserDefinedFunction): 788 return self.sql(expression, "this") 789 return super().columndef_sql(expression, sep) 790 791 def join_sql(self, expression: exp.Join) -> str: 792 if ( 793 expression.side == "LEFT" 794 and not expression.args.get("on") 795 and isinstance(expression.this, exp.Unnest) 796 ): 797 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 798 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 799 return super().join_sql(expression.on(exp.true())) 800 801 return super().join_sql(expression) 802 803 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 804 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 805 if expression.args.get("is_end_exclusive"): 806 return rename_func("RANGE")(self, expression) 807 808 return self.function_fallback_sql(expression) 809 810 def bracket_sql(self, expression: exp.Bracket) -> str: 811 this = expression.this 812 if isinstance(this, exp.Array): 813 this.replace(exp.paren(this)) 814 815 bracket = super().bracket_sql(expression) 816 817 if not expression.args.get("returns_list_for_maps"): 818 if not this.type: 819 from sqlglot.optimizer.annotate_types import annotate_types 820 821 this = annotate_types(this) 822 823 if this.is_type(exp.DataType.Type.MAP): 824 bracket = f"({bracket})[1]" 825 826 return bracket 827 828 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 829 expression_sql = self.sql(expression, "expression") 830 831 func = expression.this 832 if isinstance(func, exp.PERCENTILES): 833 # Make the order key the first arg and slide the fraction to the right 834 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 835 order_col = expression.find(exp.Ordered) 836 if order_col: 837 func.set("expression", func.this) 838 func.set("this", order_col.this) 839 840 this = self.sql(expression, "this").rstrip(")") 841 842 return f"{this}{expression_sql})" 843 844 def length_sql(self, expression: exp.Length) -> str: 845 arg = expression.this 846 847 # Dialects like BQ and Snowflake also accept binary values as args, so 848 # DDB will attempt to infer the type or resort to case/when resolution 849 if not expression.args.get("binary") or arg.is_string: 850 return self.func("LENGTH", arg) 851 852 if not arg.type: 853 from sqlglot.optimizer.annotate_types import annotate_types 854 855 arg = annotate_types(arg) 856 857 if arg.is_type(*exp.DataType.TEXT_TYPES): 858 return self.func("LENGTH", arg) 859 860 # We need these casts to make duckdb's static type checker happy 861 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 862 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 863 864 case = ( 865 exp.case(self.func("TYPEOF", arg)) 866 .when( 867 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 868 ) # anonymous to break length_sql recursion 869 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 870 ) 871 872 return self.sql(case) 873 874 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 875 this = expression.this 876 key = expression.args.get("key") 877 key_sql = key.name if isinstance(key, exp.Expression) else "" 878 value_sql = self.sql(expression, "value") 879 880 kv_sql = f"{key_sql} := {value_sql}" 881 882 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 883 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 884 if isinstance(this, exp.Struct) and not this.expressions: 885 return self.func("STRUCT_PACK", kv_sql) 886 887 return self.func("STRUCT_INSERT", this, kv_sql)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator
Specifies the strategy according to which identifiers should be normalized.
260 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 261 if isinstance(path, exp.Literal): 262 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 263 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 264 # This check ensures we'll avoid trying to parse these as JSON paths, which can 265 # either result in a noisy warning or in an invalid representation of the path. 266 path_text = path.name 267 if path_text.startswith("/") or "[#" in path_text: 268 return path 269 270 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
272 class Tokenizer(tokens.Tokenizer): 273 HEREDOC_STRINGS = ["$"] 274 275 HEREDOC_TAG_IS_IDENTIFIER = True 276 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 277 278 KEYWORDS = { 279 **tokens.Tokenizer.KEYWORDS, 280 "//": TokenType.DIV, 281 "ATTACH": TokenType.COMMAND, 282 "BINARY": TokenType.VARBINARY, 283 "BITSTRING": TokenType.BIT, 284 "BPCHAR": TokenType.TEXT, 285 "CHAR": TokenType.TEXT, 286 "CHARACTER VARYING": TokenType.TEXT, 287 "EXCLUDE": TokenType.EXCEPT, 288 "LOGICAL": TokenType.BOOLEAN, 289 "ONLY": TokenType.ONLY, 290 "PIVOT_WIDER": TokenType.PIVOT, 291 "POSITIONAL": TokenType.POSITIONAL, 292 "SIGNED": TokenType.INT, 293 "STRING": TokenType.TEXT, 294 "SUMMARIZE": TokenType.SUMMARIZE, 295 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 296 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 297 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 298 "TIMESTAMP_US": TokenType.TIMESTAMP, 299 "UBIGINT": TokenType.UBIGINT, 300 "UINTEGER": TokenType.UINT, 301 "USMALLINT": TokenType.USMALLINT, 302 "UTINYINT": TokenType.UTINYINT, 303 "VARCHAR": TokenType.TEXT, 304 } 305 KEYWORDS.pop("/*+") 306 307 SINGLE_TOKENS = { 308 **tokens.Tokenizer.SINGLE_TOKENS, 309 "$": TokenType.PARAMETER, 310 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
312 class Parser(parser.Parser): 313 BITWISE = { 314 **parser.Parser.BITWISE, 315 TokenType.TILDA: exp.RegexpLike, 316 } 317 318 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 319 320 FUNCTIONS = { 321 **parser.Parser.FUNCTIONS, 322 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 323 "ARRAY_SORT": exp.SortArray.from_arg_list, 324 "DATEDIFF": _build_date_diff, 325 "DATE_DIFF": _build_date_diff, 326 "DATE_TRUNC": date_trunc_to_time, 327 "DATETRUNC": date_trunc_to_time, 328 "DECODE": lambda args: exp.Decode( 329 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 330 ), 331 "ENCODE": lambda args: exp.Encode( 332 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 333 ), 334 "EPOCH": exp.TimeToUnix.from_arg_list, 335 "EPOCH_MS": lambda args: exp.UnixToTime( 336 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 337 ), 338 "JSON": exp.ParseJSON.from_arg_list, 339 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 340 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 341 "LIST_HAS": exp.ArrayContains.from_arg_list, 342 "LIST_REVERSE_SORT": _build_sort_array_desc, 343 "LIST_SORT": exp.SortArray.from_arg_list, 344 "LIST_VALUE": lambda args: exp.Array(expressions=args), 345 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 346 "MAKE_TIMESTAMP": _build_make_timestamp, 347 "MEDIAN": lambda args: exp.PercentileCont( 348 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 349 ), 350 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 351 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 352 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 353 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 354 ), 355 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 356 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1), 359 replacement=seq_get(args, 2), 360 modifiers=seq_get(args, 3), 361 ), 362 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 363 "STRING_SPLIT": exp.Split.from_arg_list, 364 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 365 "STRING_TO_ARRAY": exp.Split.from_arg_list, 366 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 367 "STRUCT_PACK": exp.Struct.from_arg_list, 368 "STR_SPLIT": exp.Split.from_arg_list, 369 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 370 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 371 "UNNEST": exp.Explode.from_arg_list, 372 "XOR": binary_from_function(exp.BitwiseXor), 373 "GENERATE_SERIES": _build_generate_series(), 374 "RANGE": _build_generate_series(end_exclusive=True), 375 } 376 377 FUNCTIONS.pop("DATE_SUB") 378 379 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 380 FUNCTION_PARSERS.pop("DECODE") 381 382 NO_PAREN_FUNCTION_PARSERS = { 383 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 384 "MAP": lambda self: self._parse_map(), 385 } 386 387 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 388 TokenType.SEMI, 389 TokenType.ANTI, 390 } 391 392 PLACEHOLDER_PARSERS = { 393 **parser.Parser.PLACEHOLDER_PARSERS, 394 TokenType.PARAMETER: lambda self: ( 395 self.expression(exp.Placeholder, this=self._prev.text) 396 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 397 else None 398 ), 399 } 400 401 TYPE_CONVERTERS = { 402 # https://duckdb.org/docs/sql/data_types/numeric 403 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 404 # https://duckdb.org/docs/sql/data_types/text 405 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 406 } 407 408 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 409 # https://duckdb.org/docs/sql/samples.html 410 sample = super()._parse_table_sample(as_modifier=as_modifier) 411 if sample and not sample.args.get("method"): 412 if sample.args.get("size"): 413 sample.set("method", exp.var("RESERVOIR")) 414 else: 415 sample.set("method", exp.var("SYSTEM")) 416 417 return sample 418 419 def _parse_bracket( 420 self, this: t.Optional[exp.Expression] = None 421 ) -> t.Optional[exp.Expression]: 422 bracket = super()._parse_bracket(this) 423 if isinstance(bracket, exp.Bracket): 424 bracket.set("returns_list_for_maps", True) 425 426 return bracket 427 428 def _parse_map(self) -> exp.ToMap | exp.Map: 429 if self._match(TokenType.L_BRACE, advance=False): 430 return self.expression(exp.ToMap, this=self._parse_bracket()) 431 432 args = self._parse_wrapped_csv(self._parse_assignment) 433 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 434 435 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 436 return self._parse_field_def() 437 438 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 439 if len(aggregations) == 1: 440 return super()._pivot_column_names(aggregations) 441 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
443 class Generator(generator.Generator): 444 PARAMETER_TOKEN = "$" 445 NAMED_PLACEHOLDER_TOKEN = "$" 446 JOIN_HINTS = False 447 TABLE_HINTS = False 448 QUERY_HINTS = False 449 LIMIT_FETCH = "LIMIT" 450 STRUCT_DELIMITER = ("(", ")") 451 RENAME_TABLE_WITH_DB = False 452 NVL2_SUPPORTED = False 453 SEMI_ANTI_JOIN_WITH_SIDE = False 454 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 455 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 456 LAST_DAY_SUPPORTS_DATE_PART = False 457 JSON_KEY_VALUE_PAIR_SEP = "," 458 IGNORE_NULLS_IN_FUNC = True 459 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 460 SUPPORTS_CREATE_TABLE_LIKE = False 461 MULTI_ARG_DISTINCT = False 462 CAN_IMPLEMENT_ARRAY_ANY = True 463 SUPPORTS_TO_NUMBER = False 464 COPY_HAS_INTO_KEYWORD = False 465 STAR_EXCEPT = "EXCLUDE" 466 PAD_FILL_PATTERN_IS_REQUIRED = True 467 ARRAY_CONCAT_IS_VAR_LEN = False 468 469 TRANSFORMS = { 470 **generator.Generator.TRANSFORMS, 471 exp.ApproxDistinct: approx_count_distinct_sql, 472 exp.Array: inline_array_unless_query, 473 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 474 exp.ArrayFilter: rename_func("LIST_FILTER"), 475 exp.ArraySize: rename_func("ARRAY_LENGTH"), 476 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 477 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 478 exp.ArraySort: _array_sort_sql, 479 exp.ArraySum: rename_func("LIST_SUM"), 480 exp.BitwiseXor: rename_func("XOR"), 481 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 482 exp.CurrentDate: lambda *_: "CURRENT_DATE", 483 exp.CurrentTime: lambda *_: "CURRENT_TIME", 484 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 485 exp.DayOfMonth: rename_func("DAYOFMONTH"), 486 exp.DayOfWeek: rename_func("DAYOFWEEK"), 487 exp.DayOfYear: rename_func("DAYOFYEAR"), 488 exp.DataType: _datatype_sql, 489 exp.Date: _date_sql, 490 exp.DateAdd: _date_delta_sql, 491 exp.DateFromParts: rename_func("MAKE_DATE"), 492 exp.DateSub: _date_delta_sql, 493 exp.DateDiff: _date_diff_sql, 494 exp.DateStrToDate: datestrtodate_sql, 495 exp.Datetime: no_datetime_sql, 496 exp.DatetimeSub: _date_delta_sql, 497 exp.DatetimeAdd: _date_delta_sql, 498 exp.DateToDi: lambda self, 499 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 500 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 501 exp.DiToDate: lambda self, 502 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 503 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 504 exp.GenerateDateArray: _generate_datetime_array_sql, 505 exp.GenerateTimestampArray: _generate_datetime_array_sql, 506 exp.Explode: rename_func("UNNEST"), 507 exp.IntDiv: lambda self, e: self.binary(e, "//"), 508 exp.IsInf: rename_func("ISINF"), 509 exp.IsNan: rename_func("ISNAN"), 510 exp.JSONExtract: _arrow_json_extract_sql, 511 exp.JSONExtractScalar: _arrow_json_extract_sql, 512 exp.JSONFormat: _json_format_sql, 513 exp.LogicalOr: rename_func("BOOL_OR"), 514 exp.LogicalAnd: rename_func("BOOL_AND"), 515 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 516 exp.MonthsBetween: lambda self, e: self.func( 517 "DATEDIFF", 518 "'month'", 519 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 520 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 521 ), 522 exp.PercentileCont: rename_func("QUANTILE_CONT"), 523 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 524 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 525 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 526 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 527 exp.RegexpExtract: regexp_extract_sql, 528 exp.RegexpReplace: lambda self, e: self.func( 529 "REGEXP_REPLACE", 530 e.this, 531 e.expression, 532 e.args.get("replacement"), 533 e.args.get("modifiers"), 534 ), 535 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 536 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 537 exp.Return: lambda self, e: self.sql(e, "this"), 538 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 539 exp.Rand: rename_func("RANDOM"), 540 exp.SafeDivide: no_safe_divide_sql, 541 exp.Split: rename_func("STR_SPLIT"), 542 exp.SortArray: _sort_array_sql, 543 exp.StrPosition: str_position_sql, 544 exp.StrToUnix: lambda self, e: self.func( 545 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 546 ), 547 exp.Struct: _struct_sql, 548 exp.TimeAdd: _date_delta_sql, 549 exp.Time: no_time_sql, 550 exp.TimeDiff: _timediff_sql, 551 exp.Timestamp: no_timestamp_sql, 552 exp.TimestampDiff: lambda self, e: self.func( 553 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 554 ), 555 exp.TimestampTrunc: timestamptrunc_sql(), 556 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 557 exp.TimeStrToTime: timestrtotime_sql, 558 exp.TimeStrToUnix: lambda self, e: self.func( 559 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 560 ), 561 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 562 exp.TimeToUnix: rename_func("EPOCH"), 563 exp.TsOrDiToDi: lambda self, 564 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 565 exp.TsOrDsAdd: _date_delta_sql, 566 exp.TsOrDsDiff: lambda self, e: self.func( 567 "DATE_DIFF", 568 f"'{e.args.get('unit') or 'DAY'}'", 569 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 570 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 571 ), 572 exp.UnixToStr: lambda self, e: self.func( 573 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 574 ), 575 exp.DatetimeTrunc: lambda self, e: self.func( 576 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 577 ), 578 exp.UnixToTime: _unix_to_time_sql, 579 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 580 exp.VariancePop: rename_func("VAR_POP"), 581 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 582 exp.Xor: bool_xor_sql, 583 } 584 585 SUPPORTED_JSON_PATH_PARTS = { 586 exp.JSONPathKey, 587 exp.JSONPathRoot, 588 exp.JSONPathSubscript, 589 exp.JSONPathWildcard, 590 } 591 592 TYPE_MAPPING = { 593 **generator.Generator.TYPE_MAPPING, 594 exp.DataType.Type.BINARY: "BLOB", 595 exp.DataType.Type.BPCHAR: "TEXT", 596 exp.DataType.Type.CHAR: "TEXT", 597 exp.DataType.Type.FLOAT: "REAL", 598 exp.DataType.Type.NCHAR: "TEXT", 599 exp.DataType.Type.NVARCHAR: "TEXT", 600 exp.DataType.Type.UINT: "UINTEGER", 601 exp.DataType.Type.VARBINARY: "BLOB", 602 exp.DataType.Type.ROWVERSION: "BLOB", 603 exp.DataType.Type.VARCHAR: "TEXT", 604 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 605 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 606 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 607 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 608 } 609 610 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 611 RESERVED_KEYWORDS = { 612 "array", 613 "analyse", 614 "union", 615 "all", 616 "when", 617 "in_p", 618 "default", 619 "create_p", 620 "window", 621 "asymmetric", 622 "to", 623 "else", 624 "localtime", 625 "from", 626 "end_p", 627 "select", 628 "current_date", 629 "foreign", 630 "with", 631 "grant", 632 "session_user", 633 "or", 634 "except", 635 "references", 636 "fetch", 637 "limit", 638 "group_p", 639 "leading", 640 "into", 641 "collate", 642 "offset", 643 "do", 644 "then", 645 "localtimestamp", 646 "check_p", 647 "lateral_p", 648 "current_role", 649 "where", 650 "asc_p", 651 "placing", 652 "desc_p", 653 "user", 654 "unique", 655 "initially", 656 "column", 657 "both", 658 "some", 659 "as", 660 "any", 661 "only", 662 "deferrable", 663 "null_p", 664 "current_time", 665 "true_p", 666 "table", 667 "case", 668 "trailing", 669 "variadic", 670 "for", 671 "on", 672 "distinct", 673 "false_p", 674 "not", 675 "constraint", 676 "current_timestamp", 677 "returning", 678 "primary", 679 "intersect", 680 "having", 681 "analyze", 682 "current_user", 683 "and", 684 "cast", 685 "symmetric", 686 "using", 687 "order", 688 "current_catalog", 689 } 690 691 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 692 693 # DuckDB doesn't generally support CREATE TABLE .. properties 694 # https://duckdb.org/docs/sql/statements/create_table.html 695 PROPERTIES_LOCATION = { 696 prop: exp.Properties.Location.UNSUPPORTED 697 for prop in generator.Generator.PROPERTIES_LOCATION 698 } 699 700 # There are a few exceptions (e.g. temporary tables) which are supported or 701 # can be transpiled to DuckDB, so we explicitly override them accordingly 702 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 703 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 704 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 705 706 def strtotime_sql(self, expression: exp.StrToTime) -> str: 707 if expression.args.get("safe"): 708 formatted_time = self.format_time(expression) 709 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 710 return str_to_time_sql(self, expression) 711 712 def strtodate_sql(self, expression: exp.StrToDate) -> str: 713 if expression.args.get("safe"): 714 formatted_time = self.format_time(expression) 715 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 716 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 717 718 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 719 arg = expression.this 720 if expression.args.get("safe"): 721 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 722 return self.func("JSON", arg) 723 724 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 725 nano = expression.args.get("nano") 726 if nano is not None: 727 expression.set( 728 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 729 ) 730 731 return rename_func("MAKE_TIME")(self, expression) 732 733 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 734 sec = expression.args["sec"] 735 736 milli = expression.args.get("milli") 737 if milli is not None: 738 sec += milli.pop() / exp.Literal.number(1000.0) 739 740 nano = expression.args.get("nano") 741 if nano is not None: 742 sec += nano.pop() / exp.Literal.number(1000000000.0) 743 744 if milli or nano: 745 expression.set("sec", sec) 746 747 return rename_func("MAKE_TIMESTAMP")(self, expression) 748 749 def tablesample_sql( 750 self, 751 expression: exp.TableSample, 752 sep: str = " AS ", 753 tablesample_keyword: t.Optional[str] = None, 754 ) -> str: 755 if not isinstance(expression.parent, exp.Select): 756 # This sample clause only applies to a single source, not the entire resulting relation 757 tablesample_keyword = "TABLESAMPLE" 758 759 if expression.args.get("size"): 760 method = expression.args.get("method") 761 if method and method.name.upper() != "RESERVOIR": 762 self.unsupported( 763 f"Sampling method {method} is not supported with a discrete sample count, " 764 "defaulting to reservoir sampling" 765 ) 766 expression.set("method", exp.var("RESERVOIR")) 767 768 return super().tablesample_sql( 769 expression, sep=sep, tablesample_keyword=tablesample_keyword 770 ) 771 772 def interval_sql(self, expression: exp.Interval) -> str: 773 multiplier: t.Optional[int] = None 774 unit = expression.text("unit").lower() 775 776 if unit.startswith("week"): 777 multiplier = 7 778 if unit.startswith("quarter"): 779 multiplier = 90 780 781 if multiplier: 782 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 783 784 return super().interval_sql(expression) 785 786 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 787 if isinstance(expression.parent, exp.UserDefinedFunction): 788 return self.sql(expression, "this") 789 return super().columndef_sql(expression, sep) 790 791 def join_sql(self, expression: exp.Join) -> str: 792 if ( 793 expression.side == "LEFT" 794 and not expression.args.get("on") 795 and isinstance(expression.this, exp.Unnest) 796 ): 797 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 798 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 799 return super().join_sql(expression.on(exp.true())) 800 801 return super().join_sql(expression) 802 803 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 804 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 805 if expression.args.get("is_end_exclusive"): 806 return rename_func("RANGE")(self, expression) 807 808 return self.function_fallback_sql(expression) 809 810 def bracket_sql(self, expression: exp.Bracket) -> str: 811 this = expression.this 812 if isinstance(this, exp.Array): 813 this.replace(exp.paren(this)) 814 815 bracket = super().bracket_sql(expression) 816 817 if not expression.args.get("returns_list_for_maps"): 818 if not this.type: 819 from sqlglot.optimizer.annotate_types import annotate_types 820 821 this = annotate_types(this) 822 823 if this.is_type(exp.DataType.Type.MAP): 824 bracket = f"({bracket})[1]" 825 826 return bracket 827 828 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 829 expression_sql = self.sql(expression, "expression") 830 831 func = expression.this 832 if isinstance(func, exp.PERCENTILES): 833 # Make the order key the first arg and slide the fraction to the right 834 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 835 order_col = expression.find(exp.Ordered) 836 if order_col: 837 func.set("expression", func.this) 838 func.set("this", order_col.this) 839 840 this = self.sql(expression, "this").rstrip(")") 841 842 return f"{this}{expression_sql})" 843 844 def length_sql(self, expression: exp.Length) -> str: 845 arg = expression.this 846 847 # Dialects like BQ and Snowflake also accept binary values as args, so 848 # DDB will attempt to infer the type or resort to case/when resolution 849 if not expression.args.get("binary") or arg.is_string: 850 return self.func("LENGTH", arg) 851 852 if not arg.type: 853 from sqlglot.optimizer.annotate_types import annotate_types 854 855 arg = annotate_types(arg) 856 857 if arg.is_type(*exp.DataType.TEXT_TYPES): 858 return self.func("LENGTH", arg) 859 860 # We need these casts to make duckdb's static type checker happy 861 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 862 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 863 864 case = ( 865 exp.case(self.func("TYPEOF", arg)) 866 .when( 867 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 868 ) # anonymous to break length_sql recursion 869 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 870 ) 871 872 return self.sql(case) 873 874 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 875 this = expression.this 876 key = expression.args.get("key") 877 key_sql = key.name if isinstance(key, exp.Expression) else "" 878 value_sql = self.sql(expression, "value") 879 880 kv_sql = f"{key_sql} := {value_sql}" 881 882 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 883 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 884 if isinstance(this, exp.Struct) and not this.expressions: 885 return self.func("STRUCT_PACK", kv_sql) 886 887 return self.func("STRUCT_INSERT", this, kv_sql)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
712 def strtodate_sql(self, expression: exp.StrToDate) -> str: 713 if expression.args.get("safe"): 714 formatted_time = self.format_time(expression) 715 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 716 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
724 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 725 nano = expression.args.get("nano") 726 if nano is not None: 727 expression.set( 728 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 729 ) 730 731 return rename_func("MAKE_TIME")(self, expression)
733 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 734 sec = expression.args["sec"] 735 736 milli = expression.args.get("milli") 737 if milli is not None: 738 sec += milli.pop() / exp.Literal.number(1000.0) 739 740 nano = expression.args.get("nano") 741 if nano is not None: 742 sec += nano.pop() / exp.Literal.number(1000000000.0) 743 744 if milli or nano: 745 expression.set("sec", sec) 746 747 return rename_func("MAKE_TIMESTAMP")(self, expression)
749 def tablesample_sql( 750 self, 751 expression: exp.TableSample, 752 sep: str = " AS ", 753 tablesample_keyword: t.Optional[str] = None, 754 ) -> str: 755 if not isinstance(expression.parent, exp.Select): 756 # This sample clause only applies to a single source, not the entire resulting relation 757 tablesample_keyword = "TABLESAMPLE" 758 759 if expression.args.get("size"): 760 method = expression.args.get("method") 761 if method and method.name.upper() != "RESERVOIR": 762 self.unsupported( 763 f"Sampling method {method} is not supported with a discrete sample count, " 764 "defaulting to reservoir sampling" 765 ) 766 expression.set("method", exp.var("RESERVOIR")) 767 768 return super().tablesample_sql( 769 expression, sep=sep, tablesample_keyword=tablesample_keyword 770 )
772 def interval_sql(self, expression: exp.Interval) -> str: 773 multiplier: t.Optional[int] = None 774 unit = expression.text("unit").lower() 775 776 if unit.startswith("week"): 777 multiplier = 7 778 if unit.startswith("quarter"): 779 multiplier = 90 780 781 if multiplier: 782 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 783 784 return super().interval_sql(expression)
791 def join_sql(self, expression: exp.Join) -> str: 792 if ( 793 expression.side == "LEFT" 794 and not expression.args.get("on") 795 and isinstance(expression.this, exp.Unnest) 796 ): 797 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 798 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 799 return super().join_sql(expression.on(exp.true())) 800 801 return super().join_sql(expression)
810 def bracket_sql(self, expression: exp.Bracket) -> str: 811 this = expression.this 812 if isinstance(this, exp.Array): 813 this.replace(exp.paren(this)) 814 815 bracket = super().bracket_sql(expression) 816 817 if not expression.args.get("returns_list_for_maps"): 818 if not this.type: 819 from sqlglot.optimizer.annotate_types import annotate_types 820 821 this = annotate_types(this) 822 823 if this.is_type(exp.DataType.Type.MAP): 824 bracket = f"({bracket})[1]" 825 826 return bracket
828 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 829 expression_sql = self.sql(expression, "expression") 830 831 func = expression.this 832 if isinstance(func, exp.PERCENTILES): 833 # Make the order key the first arg and slide the fraction to the right 834 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 835 order_col = expression.find(exp.Ordered) 836 if order_col: 837 func.set("expression", func.this) 838 func.set("this", order_col.this) 839 840 this = self.sql(expression, "this").rstrip(")") 841 842 return f"{this}{expression_sql})"
844 def length_sql(self, expression: exp.Length) -> str: 845 arg = expression.this 846 847 # Dialects like BQ and Snowflake also accept binary values as args, so 848 # DDB will attempt to infer the type or resort to case/when resolution 849 if not expression.args.get("binary") or arg.is_string: 850 return self.func("LENGTH", arg) 851 852 if not arg.type: 853 from sqlglot.optimizer.annotate_types import annotate_types 854 855 arg = annotate_types(arg) 856 857 if arg.is_type(*exp.DataType.TEXT_TYPES): 858 return self.func("LENGTH", arg) 859 860 # We need these casts to make duckdb's static type checker happy 861 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 862 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 863 864 case = ( 865 exp.case(self.func("TYPEOF", arg)) 866 .when( 867 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 868 ) # anonymous to break length_sql recursion 869 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 870 ) 871 872 return self.sql(case)
874 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 875 this = expression.this 876 key = expression.args.get("key") 877 key_sql = key.name if isinstance(key, exp.Expression) else "" 878 value_sql = self.sql(expression, "value") 879 880 kv_sql = f"{key_sql} := {value_sql}" 881 882 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 883 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 884 if isinstance(this, exp.Struct) and not this.expressions: 885 return self.func("STRUCT_PACK", kv_sql) 886 887 return self.func("STRUCT_INSERT", this, kv_sql)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql