sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 date_delta_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 build_formatted_time, 14 if_sql, 15 inline_array_sql, 16 max_or_greatest, 17 min_or_least, 18 rename_func, 19 timestamptrunc_sql, 20 timestrtotime_sql, 21 var_map_sql, 22) 23from sqlglot.expressions import Literal 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import TokenType 26 27if t.TYPE_CHECKING: 28 from sqlglot._typing import E 29 30 31# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 32def _build_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime, exp.TimeStrToTime]: 33 if len(args) == 2: 34 first_arg, second_arg = args 35 if second_arg.is_string: 36 # case: <string_expr> [ , <format> ] 37 return build_formatted_time(exp.StrToTime, "snowflake")(args) 38 return exp.UnixToTime(this=first_arg, scale=second_arg) 39 40 from sqlglot.optimizer.simplify import simplify_literals 41 42 # The first argument might be an expression like 40 * 365 * 86400, so we try to 43 # reduce it using `simplify_literals` first and then check if it's a Literal. 44 first_arg = seq_get(args, 0) 45 if not isinstance(simplify_literals(first_arg, root=True), Literal): 46 # case: <variant_expr> or other expressions such as columns 47 return exp.TimeStrToTime.from_arg_list(args) 48 49 if first_arg.is_string: 50 if is_int(first_arg.this): 51 # case: <integer> 52 return exp.UnixToTime.from_arg_list(args) 53 54 # case: <date_expr> 55 return build_formatted_time(exp.StrToTime, "snowflake", default=True)(args) 56 57 # case: <numeric_expr> 58 return exp.UnixToTime.from_arg_list(args) 59 60 61def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 62 expression = parser.build_var_map(args) 63 64 if isinstance(expression, exp.StarMap): 65 return expression 66 67 return exp.Struct( 68 expressions=[ 69 t.cast(exp.Condition, k).eq(v) for k, v in zip(expression.keys, expression.values) 70 ] 71 ) 72 73 74def _build_datediff(args: t.List) -> exp.DateDiff: 75 return exp.DateDiff( 76 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 77 ) 78 79 80# https://docs.snowflake.com/en/sql-reference/functions/div0 81def _build_if_from_div0(args: t.List) -> exp.If: 82 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 83 true = exp.Literal.number(0) 84 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 85 return exp.If(this=cond, true=true, false=false) 86 87 88# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 89def _build_if_from_zeroifnull(args: t.List) -> exp.If: 90 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 91 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 95def _build_if_from_nullifzero(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 97 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 98 99 100def _datatype_sql(self: Snowflake.Generator, expression: exp.DataType) -> str: 101 if expression.is_type("array"): 102 return "ARRAY" 103 elif expression.is_type("map"): 104 return "OBJECT" 105 return self.datatype_sql(expression) 106 107 108def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 109 flag = expression.text("flag") 110 111 if "i" not in flag: 112 flag += "i" 113 114 return self.func( 115 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 116 ) 117 118 119def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 120 if len(args) == 3: 121 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 122 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 123 124 125def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 126 regexp_replace = exp.RegexpReplace.from_arg_list(args) 127 128 if not regexp_replace.args.get("replacement"): 129 regexp_replace.set("replacement", exp.Literal.string("")) 130 131 return regexp_replace 132 133 134def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 135 def _parse(self: Snowflake.Parser) -> exp.Show: 136 return self._parse_show_snowflake(*args, **kwargs) 137 138 return _parse 139 140 141DATE_PART_MAPPING = { 142 "Y": "YEAR", 143 "YY": "YEAR", 144 "YYY": "YEAR", 145 "YYYY": "YEAR", 146 "YR": "YEAR", 147 "YEARS": "YEAR", 148 "YRS": "YEAR", 149 "MM": "MONTH", 150 "MON": "MONTH", 151 "MONS": "MONTH", 152 "MONTHS": "MONTH", 153 "D": "DAY", 154 "DD": "DAY", 155 "DAYS": "DAY", 156 "DAYOFMONTH": "DAY", 157 "WEEKDAY": "DAYOFWEEK", 158 "DOW": "DAYOFWEEK", 159 "DW": "DAYOFWEEK", 160 "WEEKDAY_ISO": "DAYOFWEEKISO", 161 "DOW_ISO": "DAYOFWEEKISO", 162 "DW_ISO": "DAYOFWEEKISO", 163 "YEARDAY": "DAYOFYEAR", 164 "DOY": "DAYOFYEAR", 165 "DY": "DAYOFYEAR", 166 "W": "WEEK", 167 "WK": "WEEK", 168 "WEEKOFYEAR": "WEEK", 169 "WOY": "WEEK", 170 "WY": "WEEK", 171 "WEEK_ISO": "WEEKISO", 172 "WEEKOFYEARISO": "WEEKISO", 173 "WEEKOFYEAR_ISO": "WEEKISO", 174 "Q": "QUARTER", 175 "QTR": "QUARTER", 176 "QTRS": "QUARTER", 177 "QUARTERS": "QUARTER", 178 "H": "HOUR", 179 "HH": "HOUR", 180 "HR": "HOUR", 181 "HOURS": "HOUR", 182 "HRS": "HOUR", 183 "M": "MINUTE", 184 "MI": "MINUTE", 185 "MIN": "MINUTE", 186 "MINUTES": "MINUTE", 187 "MINS": "MINUTE", 188 "S": "SECOND", 189 "SEC": "SECOND", 190 "SECONDS": "SECOND", 191 "SECS": "SECOND", 192 "MS": "MILLISECOND", 193 "MSEC": "MILLISECOND", 194 "MILLISECONDS": "MILLISECOND", 195 "US": "MICROSECOND", 196 "USEC": "MICROSECOND", 197 "MICROSECONDS": "MICROSECOND", 198 "NS": "NANOSECOND", 199 "NSEC": "NANOSECOND", 200 "NANOSEC": "NANOSECOND", 201 "NSECOND": "NANOSECOND", 202 "NSECONDS": "NANOSECOND", 203 "NANOSECS": "NANOSECOND", 204 "EPOCH": "EPOCH_SECOND", 205 "EPOCH_SECONDS": "EPOCH_SECOND", 206 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 207 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 208 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 209 "TZH": "TIMEZONE_HOUR", 210 "TZM": "TIMEZONE_MINUTE", 211} 212 213 214@t.overload 215def _map_date_part(part: exp.Expression) -> exp.Var: 216 pass 217 218 219@t.overload 220def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 221 pass 222 223 224def _map_date_part(part): 225 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 226 return exp.var(mapped) if mapped else part 227 228 229def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 230 trunc = date_trunc_to_time(args) 231 trunc.set("unit", _map_date_part(trunc.args["unit"])) 232 return trunc 233 234 235def _build_timestamp_from_parts(args: t.List) -> exp.Func: 236 if len(args) == 2: 237 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 238 # so we parse this into Anonymous for now instead of introducing complexity 239 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 240 241 return exp.TimestampFromParts.from_arg_list(args) 242 243 244def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 245 """ 246 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 247 so we need to unqualify them. 248 249 Example: 250 >>> from sqlglot import parse_one 251 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 252 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 253 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 254 """ 255 if isinstance(expression, exp.Pivot) and expression.unpivot: 256 expression = transforms.unqualify_columns(expression) 257 258 return expression 259 260 261class Snowflake(Dialect): 262 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 263 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 264 NULL_ORDERING = "nulls_are_large" 265 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 266 SUPPORTS_USER_DEFINED_TYPES = False 267 SUPPORTS_SEMI_ANTI_JOIN = False 268 PREFER_CTE_ALIAS_COLUMN = True 269 TABLESAMPLE_SIZE_IS_PERCENT = True 270 271 TIME_MAPPING = { 272 "YYYY": "%Y", 273 "yyyy": "%Y", 274 "YY": "%y", 275 "yy": "%y", 276 "MMMM": "%B", 277 "mmmm": "%B", 278 "MON": "%b", 279 "mon": "%b", 280 "MM": "%m", 281 "mm": "%m", 282 "DD": "%d", 283 "dd": "%-d", 284 "DY": "%a", 285 "dy": "%w", 286 "HH24": "%H", 287 "hh24": "%H", 288 "HH12": "%I", 289 "hh12": "%I", 290 "MI": "%M", 291 "mi": "%M", 292 "SS": "%S", 293 "ss": "%S", 294 "FF": "%f", 295 "ff": "%f", 296 "FF6": "%f", 297 "ff6": "%f", 298 } 299 300 def quote_identifier(self, expression: E, identify: bool = True) -> E: 301 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 302 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 303 if ( 304 isinstance(expression, exp.Identifier) 305 and isinstance(expression.parent, exp.Table) 306 and expression.name.lower() == "dual" 307 ): 308 return expression # type: ignore 309 310 return super().quote_identifier(expression, identify=identify) 311 312 class Parser(parser.Parser): 313 IDENTIFY_PIVOT_STRINGS = True 314 315 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 316 317 FUNCTIONS = { 318 **parser.Parser.FUNCTIONS, 319 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 320 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 321 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 322 this=seq_get(args, 1), expression=seq_get(args, 0) 323 ), 324 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 325 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 326 start=seq_get(args, 0), 327 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 328 step=seq_get(args, 2), 329 ), 330 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 331 "BITXOR": binary_from_function(exp.BitwiseXor), 332 "BIT_XOR": binary_from_function(exp.BitwiseXor), 333 "BOOLXOR": binary_from_function(exp.Xor), 334 "CONVERT_TIMEZONE": _build_convert_timezone, 335 "DATE_TRUNC": _date_trunc_to_time, 336 "DATEADD": lambda args: exp.DateAdd( 337 this=seq_get(args, 2), 338 expression=seq_get(args, 1), 339 unit=_map_date_part(seq_get(args, 0)), 340 ), 341 "DATEDIFF": _build_datediff, 342 "DIV0": _build_if_from_div0, 343 "FLATTEN": exp.Explode.from_arg_list, 344 "GET_PATH": lambda args, dialect: exp.JSONExtract( 345 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 346 ), 347 "IFF": exp.If.from_arg_list, 348 "LAST_DAY": lambda args: exp.LastDay( 349 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 350 ), 351 "LISTAGG": exp.GroupConcat.from_arg_list, 352 "NULLIFZERO": _build_if_from_nullifzero, 353 "OBJECT_CONSTRUCT": _build_object_construct, 354 "REGEXP_REPLACE": _build_regexp_replace, 355 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 356 "RLIKE": exp.RegexpLike.from_arg_list, 357 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 358 "TIMEDIFF": _build_datediff, 359 "TIMESTAMPDIFF": _build_datediff, 360 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 361 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 362 "TO_TIMESTAMP": _build_to_timestamp, 363 "TO_VARCHAR": exp.ToChar.from_arg_list, 364 "ZEROIFNULL": _build_if_from_zeroifnull, 365 } 366 367 FUNCTION_PARSERS = { 368 **parser.Parser.FUNCTION_PARSERS, 369 "DATE_PART": lambda self: self._parse_date_part(), 370 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 371 } 372 FUNCTION_PARSERS.pop("TRIM") 373 374 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 375 376 RANGE_PARSERS = { 377 **parser.Parser.RANGE_PARSERS, 378 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 379 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 380 TokenType.COLON: lambda self, this: self._parse_colon_get_path(this), 381 } 382 383 ALTER_PARSERS = { 384 **parser.Parser.ALTER_PARSERS, 385 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 386 "UNSET": lambda self: self.expression( 387 exp.Set, 388 tag=self._match_text_seq("TAG"), 389 expressions=self._parse_csv(self._parse_id_var), 390 unset=True, 391 ), 392 "SWAP": lambda self: self._parse_alter_table_swap(), 393 } 394 395 STATEMENT_PARSERS = { 396 **parser.Parser.STATEMENT_PARSERS, 397 TokenType.SHOW: lambda self: self._parse_show(), 398 } 399 400 PROPERTY_PARSERS = { 401 **parser.Parser.PROPERTY_PARSERS, 402 "LOCATION": lambda self: self._parse_location(), 403 } 404 405 SHOW_PARSERS = { 406 "SCHEMAS": _show_parser("SCHEMAS"), 407 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 408 "OBJECTS": _show_parser("OBJECTS"), 409 "TERSE OBJECTS": _show_parser("OBJECTS"), 410 "TABLES": _show_parser("TABLES"), 411 "TERSE TABLES": _show_parser("TABLES"), 412 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 413 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 414 "COLUMNS": _show_parser("COLUMNS"), 415 "USERS": _show_parser("USERS"), 416 "TERSE USERS": _show_parser("USERS"), 417 } 418 419 STAGED_FILE_SINGLE_TOKENS = { 420 TokenType.DOT, 421 TokenType.MOD, 422 TokenType.SLASH, 423 } 424 425 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 426 427 def _parse_colon_get_path( 428 self: parser.Parser, this: t.Optional[exp.Expression] 429 ) -> t.Optional[exp.Expression]: 430 while True: 431 path = self._parse_bitwise() 432 433 # The cast :: operator has a lower precedence than the extraction operator :, so 434 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 435 if isinstance(path, exp.Cast): 436 target_type = path.to 437 path = path.this 438 else: 439 target_type = None 440 441 if isinstance(path, exp.Expression): 442 path = exp.Literal.string(path.sql(dialect="snowflake")) 443 444 # The extraction operator : is left-associative 445 this = self.expression( 446 exp.JSONExtract, this=this, expression=self.dialect.to_json_path(path) 447 ) 448 449 if target_type: 450 this = exp.cast(this, target_type) 451 452 if not self._match(TokenType.COLON): 453 break 454 455 return self._parse_range(this) 456 457 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 458 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 459 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 460 this = self._parse_var() or self._parse_type() 461 462 if not this: 463 return None 464 465 self._match(TokenType.COMMA) 466 expression = self._parse_bitwise() 467 this = _map_date_part(this) 468 name = this.name.upper() 469 470 if name.startswith("EPOCH"): 471 if name == "EPOCH_MILLISECOND": 472 scale = 10**3 473 elif name == "EPOCH_MICROSECOND": 474 scale = 10**6 475 elif name == "EPOCH_NANOSECOND": 476 scale = 10**9 477 else: 478 scale = None 479 480 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 481 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 482 483 if scale: 484 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 485 486 return to_unix 487 488 return self.expression(exp.Extract, this=this, expression=expression) 489 490 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 491 if is_map: 492 # Keys are strings in Snowflake's objects, see also: 493 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 494 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 495 return self._parse_slice(self._parse_string()) 496 497 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 498 499 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 500 lateral = super()._parse_lateral() 501 if not lateral: 502 return lateral 503 504 if isinstance(lateral.this, exp.Explode): 505 table_alias = lateral.args.get("alias") 506 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 507 if table_alias and not table_alias.args.get("columns"): 508 table_alias.set("columns", columns) 509 elif not table_alias: 510 exp.alias_(lateral, "_flattened", table=columns, copy=False) 511 512 return lateral 513 514 def _parse_at_before(self, table: exp.Table) -> exp.Table: 515 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 516 index = self._index 517 if self._match_texts(("AT", "BEFORE")): 518 this = self._prev.text.upper() 519 kind = ( 520 self._match(TokenType.L_PAREN) 521 and self._match_texts(self.HISTORICAL_DATA_KIND) 522 and self._prev.text.upper() 523 ) 524 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 525 526 if expression: 527 self._match_r_paren() 528 when = self.expression( 529 exp.HistoricalData, this=this, kind=kind, expression=expression 530 ) 531 table.set("when", when) 532 else: 533 self._retreat(index) 534 535 return table 536 537 def _parse_table_parts( 538 self, schema: bool = False, is_db_reference: bool = False 539 ) -> exp.Table: 540 # https://docs.snowflake.com/en/user-guide/querying-stage 541 if self._match(TokenType.STRING, advance=False): 542 table = self._parse_string() 543 elif self._match_text_seq("@", advance=False): 544 table = self._parse_location_path() 545 else: 546 table = None 547 548 if table: 549 file_format = None 550 pattern = None 551 552 self._match(TokenType.L_PAREN) 553 while self._curr and not self._match(TokenType.R_PAREN): 554 if self._match_text_seq("FILE_FORMAT", "=>"): 555 file_format = self._parse_string() or super()._parse_table_parts( 556 is_db_reference=is_db_reference 557 ) 558 elif self._match_text_seq("PATTERN", "=>"): 559 pattern = self._parse_string() 560 else: 561 break 562 563 self._match(TokenType.COMMA) 564 565 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 566 else: 567 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 568 569 return self._parse_at_before(table) 570 571 def _parse_id_var( 572 self, 573 any_token: bool = True, 574 tokens: t.Optional[t.Collection[TokenType]] = None, 575 ) -> t.Optional[exp.Expression]: 576 if self._match_text_seq("IDENTIFIER", "("): 577 identifier = ( 578 super()._parse_id_var(any_token=any_token, tokens=tokens) 579 or self._parse_string() 580 ) 581 self._match_r_paren() 582 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 583 584 return super()._parse_id_var(any_token=any_token, tokens=tokens) 585 586 def _parse_show_snowflake(self, this: str) -> exp.Show: 587 scope = None 588 scope_kind = None 589 590 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 591 # which is syntactically valid but has no effect on the output 592 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 593 594 history = self._match_text_seq("HISTORY") 595 596 like = self._parse_string() if self._match(TokenType.LIKE) else None 597 598 if self._match(TokenType.IN): 599 if self._match_text_seq("ACCOUNT"): 600 scope_kind = "ACCOUNT" 601 elif self._match_set(self.DB_CREATABLES): 602 scope_kind = self._prev.text.upper() 603 if self._curr: 604 scope = self._parse_table_parts() 605 elif self._curr: 606 scope_kind = "SCHEMA" if this in ("OBJECTS", "TABLES") else "TABLE" 607 scope = self._parse_table_parts() 608 609 return self.expression( 610 exp.Show, 611 **{ 612 "terse": terse, 613 "this": this, 614 "history": history, 615 "like": like, 616 "scope": scope, 617 "scope_kind": scope_kind, 618 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 619 "limit": self._parse_limit(), 620 "from": self._parse_string() if self._match(TokenType.FROM) else None, 621 }, 622 ) 623 624 def _parse_alter_table_swap(self) -> exp.SwapTable: 625 self._match_text_seq("WITH") 626 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 627 628 def _parse_location(self) -> exp.LocationProperty: 629 self._match(TokenType.EQ) 630 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 631 632 def _parse_location_path(self) -> exp.Var: 633 parts = [self._advance_any(ignore_reserved=True)] 634 635 # We avoid consuming a comma token because external tables like @foo and @bar 636 # can be joined in a query with a comma separator. 637 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 638 parts.append(self._advance_any(ignore_reserved=True)) 639 640 return exp.var("".join(part.text for part in parts if part)) 641 642 class Tokenizer(tokens.Tokenizer): 643 STRING_ESCAPES = ["\\", "'"] 644 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 645 RAW_STRINGS = ["$$"] 646 COMMENTS = ["--", "//", ("/*", "*/")] 647 648 KEYWORDS = { 649 **tokens.Tokenizer.KEYWORDS, 650 "BYTEINT": TokenType.INT, 651 "CHAR VARYING": TokenType.VARCHAR, 652 "CHARACTER VARYING": TokenType.VARCHAR, 653 "EXCLUDE": TokenType.EXCEPT, 654 "ILIKE ANY": TokenType.ILIKE_ANY, 655 "LIKE ANY": TokenType.LIKE_ANY, 656 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 657 "MINUS": TokenType.EXCEPT, 658 "NCHAR VARYING": TokenType.VARCHAR, 659 "PUT": TokenType.COMMAND, 660 "REMOVE": TokenType.COMMAND, 661 "RENAME": TokenType.REPLACE, 662 "RM": TokenType.COMMAND, 663 "SAMPLE": TokenType.TABLE_SAMPLE, 664 "SQL_DOUBLE": TokenType.DOUBLE, 665 "SQL_VARCHAR": TokenType.VARCHAR, 666 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 667 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 668 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 669 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 670 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 671 "TOP": TokenType.TOP, 672 } 673 674 SINGLE_TOKENS = { 675 **tokens.Tokenizer.SINGLE_TOKENS, 676 "$": TokenType.PARAMETER, 677 } 678 679 VAR_SINGLE_TOKENS = {"$"} 680 681 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 682 683 class Generator(generator.Generator): 684 PARAMETER_TOKEN = "$" 685 MATCHED_BY_SOURCE = False 686 SINGLE_STRING_INTERVAL = True 687 JOIN_HINTS = False 688 TABLE_HINTS = False 689 QUERY_HINTS = False 690 AGGREGATE_FILTER_SUPPORTED = False 691 SUPPORTS_TABLE_COPY = False 692 COLLATE_IS_FUNC = True 693 LIMIT_ONLY_LITERALS = True 694 JSON_KEY_VALUE_PAIR_SEP = "," 695 INSERT_OVERWRITE = " OVERWRITE INTO" 696 697 TRANSFORMS = { 698 **generator.Generator.TRANSFORMS, 699 exp.ArgMax: rename_func("MAX_BY"), 700 exp.ArgMin: rename_func("MIN_BY"), 701 exp.Array: inline_array_sql, 702 exp.ArrayConcat: rename_func("ARRAY_CAT"), 703 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 704 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 705 exp.AtTimeZone: lambda self, e: self.func( 706 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 707 ), 708 exp.BitwiseXor: rename_func("BITXOR"), 709 exp.DateAdd: date_delta_sql("DATEADD"), 710 exp.DateDiff: date_delta_sql("DATEDIFF"), 711 exp.DateStrToDate: datestrtodate_sql, 712 exp.DataType: _datatype_sql, 713 exp.DayOfMonth: rename_func("DAYOFMONTH"), 714 exp.DayOfWeek: rename_func("DAYOFWEEK"), 715 exp.DayOfYear: rename_func("DAYOFYEAR"), 716 exp.Explode: rename_func("FLATTEN"), 717 exp.Extract: rename_func("DATE_PART"), 718 exp.FromTimeZone: lambda self, e: self.func( 719 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 720 ), 721 exp.GenerateSeries: lambda self, e: self.func( 722 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 723 ), 724 exp.GroupConcat: rename_func("LISTAGG"), 725 exp.If: if_sql(name="IFF", false_value="NULL"), 726 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 727 exp.JSONExtractScalar: lambda self, e: self.func( 728 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 729 ), 730 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 731 exp.JSONPathRoot: lambda *_: "", 732 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 733 exp.LogicalOr: rename_func("BOOLOR_AGG"), 734 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 735 exp.Max: max_or_greatest, 736 exp.Min: min_or_least, 737 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 738 exp.PercentileCont: transforms.preprocess( 739 [transforms.add_within_group_for_percentiles] 740 ), 741 exp.PercentileDisc: transforms.preprocess( 742 [transforms.add_within_group_for_percentiles] 743 ), 744 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 745 exp.RegexpILike: _regexpilike_sql, 746 exp.Rand: rename_func("RANDOM"), 747 exp.Select: transforms.preprocess( 748 [ 749 transforms.eliminate_distinct_on, 750 transforms.explode_to_unnest(), 751 transforms.eliminate_semi_and_anti_joins, 752 ] 753 ), 754 exp.SHA: rename_func("SHA1"), 755 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 756 exp.StartsWith: rename_func("STARTSWITH"), 757 exp.StrPosition: lambda self, e: self.func( 758 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 759 ), 760 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 761 exp.Struct: lambda self, e: self.func( 762 "OBJECT_CONSTRUCT", 763 *(arg for expression in e.expressions for arg in expression.flatten()), 764 ), 765 exp.Stuff: rename_func("INSERT"), 766 exp.TimestampDiff: lambda self, e: self.func( 767 "TIMESTAMPDIFF", e.unit, e.expression, e.this 768 ), 769 exp.TimestampTrunc: timestamptrunc_sql, 770 exp.TimeStrToTime: timestrtotime_sql, 771 exp.TimeToStr: lambda self, e: self.func( 772 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 773 ), 774 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 775 exp.ToArray: rename_func("TO_ARRAY"), 776 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 777 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 778 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 779 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 780 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 781 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 782 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 783 exp.Xor: rename_func("BOOLXOR"), 784 } 785 786 SUPPORTED_JSON_PATH_PARTS = { 787 exp.JSONPathKey, 788 exp.JSONPathRoot, 789 exp.JSONPathSubscript, 790 } 791 792 TYPE_MAPPING = { 793 **generator.Generator.TYPE_MAPPING, 794 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 795 } 796 797 STAR_MAPPING = { 798 "except": "EXCLUDE", 799 "replace": "RENAME", 800 } 801 802 PROPERTIES_LOCATION = { 803 **generator.Generator.PROPERTIES_LOCATION, 804 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 805 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 806 } 807 808 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 809 milli = expression.args.get("milli") 810 if milli is not None: 811 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 812 expression.set("nano", milli_to_nano) 813 814 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 815 816 def trycast_sql(self, expression: exp.TryCast) -> str: 817 value = expression.this 818 819 if value.type is None: 820 from sqlglot.optimizer.annotate_types import annotate_types 821 822 value = annotate_types(value) 823 824 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 825 return super().trycast_sql(expression) 826 827 # TRY_CAST only works for string values in Snowflake 828 return self.cast_sql(expression) 829 830 def log_sql(self, expression: exp.Log) -> str: 831 if not expression.expression: 832 return self.func("LN", expression.this) 833 834 return super().log_sql(expression) 835 836 def unnest_sql(self, expression: exp.Unnest) -> str: 837 unnest_alias = expression.args.get("alias") 838 offset = expression.args.get("offset") 839 840 columns = [ 841 exp.to_identifier("seq"), 842 exp.to_identifier("key"), 843 exp.to_identifier("path"), 844 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 845 seq_get(unnest_alias.columns if unnest_alias else [], 0) 846 or exp.to_identifier("value"), 847 exp.to_identifier("this"), 848 ] 849 850 if unnest_alias: 851 unnest_alias.set("columns", columns) 852 else: 853 unnest_alias = exp.TableAlias(this="_u", columns=columns) 854 855 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 856 alias = self.sql(unnest_alias) 857 alias = f" AS {alias}" if alias else "" 858 return f"{explode}{alias}" 859 860 def show_sql(self, expression: exp.Show) -> str: 861 terse = "TERSE " if expression.args.get("terse") else "" 862 history = " HISTORY" if expression.args.get("history") else "" 863 like = self.sql(expression, "like") 864 like = f" LIKE {like}" if like else "" 865 866 scope = self.sql(expression, "scope") 867 scope = f" {scope}" if scope else "" 868 869 scope_kind = self.sql(expression, "scope_kind") 870 if scope_kind: 871 scope_kind = f" IN {scope_kind}" 872 873 starts_with = self.sql(expression, "starts_with") 874 if starts_with: 875 starts_with = f" STARTS WITH {starts_with}" 876 877 limit = self.sql(expression, "limit") 878 879 from_ = self.sql(expression, "from") 880 if from_: 881 from_ = f" FROM {from_}" 882 883 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 884 885 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 886 # Other dialects don't support all of the following parameters, so we need to 887 # generate default values as necessary to ensure the transpilation is correct 888 group = expression.args.get("group") 889 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 890 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 891 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 892 893 return self.func( 894 "REGEXP_SUBSTR", 895 expression.this, 896 expression.expression, 897 position, 898 occurrence, 899 parameters, 900 group, 901 ) 902 903 def except_op(self, expression: exp.Except) -> str: 904 if not expression.args.get("distinct"): 905 self.unsupported("EXCEPT with All is not supported in Snowflake") 906 return super().except_op(expression) 907 908 def intersect_op(self, expression: exp.Intersect) -> str: 909 if not expression.args.get("distinct"): 910 self.unsupported("INTERSECT with All is not supported in Snowflake") 911 return super().intersect_op(expression) 912 913 def describe_sql(self, expression: exp.Describe) -> str: 914 # Default to table if kind is unknown 915 kind_value = expression.args.get("kind") or "TABLE" 916 kind = f" {kind_value}" if kind_value else "" 917 this = f" {self.sql(expression, 'this')}" 918 expressions = self.expressions(expression, flat=True) 919 expressions = f" {expressions}" if expressions else "" 920 return f"DESCRIBE{kind}{this}{expressions}" 921 922 def generatedasidentitycolumnconstraint_sql( 923 self, expression: exp.GeneratedAsIdentityColumnConstraint 924 ) -> str: 925 start = expression.args.get("start") 926 start = f" START {start}" if start else "" 927 increment = expression.args.get("increment") 928 increment = f" INCREMENT {increment}" if increment else "" 929 return f"AUTOINCREMENT{start}{increment}" 930 931 def swaptable_sql(self, expression: exp.SwapTable) -> str: 932 this = self.sql(expression, "this") 933 return f"SWAP WITH {this}" 934 935 def with_properties(self, properties: exp.Properties) -> str: 936 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 937 938 def cluster_sql(self, expression: exp.Cluster) -> str: 939 return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
262class Snowflake(Dialect): 263 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 264 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 265 NULL_ORDERING = "nulls_are_large" 266 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 267 SUPPORTS_USER_DEFINED_TYPES = False 268 SUPPORTS_SEMI_ANTI_JOIN = False 269 PREFER_CTE_ALIAS_COLUMN = True 270 TABLESAMPLE_SIZE_IS_PERCENT = True 271 272 TIME_MAPPING = { 273 "YYYY": "%Y", 274 "yyyy": "%Y", 275 "YY": "%y", 276 "yy": "%y", 277 "MMMM": "%B", 278 "mmmm": "%B", 279 "MON": "%b", 280 "mon": "%b", 281 "MM": "%m", 282 "mm": "%m", 283 "DD": "%d", 284 "dd": "%-d", 285 "DY": "%a", 286 "dy": "%w", 287 "HH24": "%H", 288 "hh24": "%H", 289 "HH12": "%I", 290 "hh12": "%I", 291 "MI": "%M", 292 "mi": "%M", 293 "SS": "%S", 294 "ss": "%S", 295 "FF": "%f", 296 "ff": "%f", 297 "FF6": "%f", 298 "ff6": "%f", 299 } 300 301 def quote_identifier(self, expression: E, identify: bool = True) -> E: 302 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 303 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 304 if ( 305 isinstance(expression, exp.Identifier) 306 and isinstance(expression.parent, exp.Table) 307 and expression.name.lower() == "dual" 308 ): 309 return expression # type: ignore 310 311 return super().quote_identifier(expression, identify=identify) 312 313 class Parser(parser.Parser): 314 IDENTIFY_PIVOT_STRINGS = True 315 316 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 321 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 322 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 323 this=seq_get(args, 1), expression=seq_get(args, 0) 324 ), 325 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 326 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 327 start=seq_get(args, 0), 328 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 329 step=seq_get(args, 2), 330 ), 331 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 332 "BITXOR": binary_from_function(exp.BitwiseXor), 333 "BIT_XOR": binary_from_function(exp.BitwiseXor), 334 "BOOLXOR": binary_from_function(exp.Xor), 335 "CONVERT_TIMEZONE": _build_convert_timezone, 336 "DATE_TRUNC": _date_trunc_to_time, 337 "DATEADD": lambda args: exp.DateAdd( 338 this=seq_get(args, 2), 339 expression=seq_get(args, 1), 340 unit=_map_date_part(seq_get(args, 0)), 341 ), 342 "DATEDIFF": _build_datediff, 343 "DIV0": _build_if_from_div0, 344 "FLATTEN": exp.Explode.from_arg_list, 345 "GET_PATH": lambda args, dialect: exp.JSONExtract( 346 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 347 ), 348 "IFF": exp.If.from_arg_list, 349 "LAST_DAY": lambda args: exp.LastDay( 350 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 351 ), 352 "LISTAGG": exp.GroupConcat.from_arg_list, 353 "NULLIFZERO": _build_if_from_nullifzero, 354 "OBJECT_CONSTRUCT": _build_object_construct, 355 "REGEXP_REPLACE": _build_regexp_replace, 356 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 357 "RLIKE": exp.RegexpLike.from_arg_list, 358 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 359 "TIMEDIFF": _build_datediff, 360 "TIMESTAMPDIFF": _build_datediff, 361 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 362 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 363 "TO_TIMESTAMP": _build_to_timestamp, 364 "TO_VARCHAR": exp.ToChar.from_arg_list, 365 "ZEROIFNULL": _build_if_from_zeroifnull, 366 } 367 368 FUNCTION_PARSERS = { 369 **parser.Parser.FUNCTION_PARSERS, 370 "DATE_PART": lambda self: self._parse_date_part(), 371 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 372 } 373 FUNCTION_PARSERS.pop("TRIM") 374 375 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 376 377 RANGE_PARSERS = { 378 **parser.Parser.RANGE_PARSERS, 379 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 380 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 381 TokenType.COLON: lambda self, this: self._parse_colon_get_path(this), 382 } 383 384 ALTER_PARSERS = { 385 **parser.Parser.ALTER_PARSERS, 386 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 387 "UNSET": lambda self: self.expression( 388 exp.Set, 389 tag=self._match_text_seq("TAG"), 390 expressions=self._parse_csv(self._parse_id_var), 391 unset=True, 392 ), 393 "SWAP": lambda self: self._parse_alter_table_swap(), 394 } 395 396 STATEMENT_PARSERS = { 397 **parser.Parser.STATEMENT_PARSERS, 398 TokenType.SHOW: lambda self: self._parse_show(), 399 } 400 401 PROPERTY_PARSERS = { 402 **parser.Parser.PROPERTY_PARSERS, 403 "LOCATION": lambda self: self._parse_location(), 404 } 405 406 SHOW_PARSERS = { 407 "SCHEMAS": _show_parser("SCHEMAS"), 408 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 409 "OBJECTS": _show_parser("OBJECTS"), 410 "TERSE OBJECTS": _show_parser("OBJECTS"), 411 "TABLES": _show_parser("TABLES"), 412 "TERSE TABLES": _show_parser("TABLES"), 413 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 414 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 415 "COLUMNS": _show_parser("COLUMNS"), 416 "USERS": _show_parser("USERS"), 417 "TERSE USERS": _show_parser("USERS"), 418 } 419 420 STAGED_FILE_SINGLE_TOKENS = { 421 TokenType.DOT, 422 TokenType.MOD, 423 TokenType.SLASH, 424 } 425 426 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 427 428 def _parse_colon_get_path( 429 self: parser.Parser, this: t.Optional[exp.Expression] 430 ) -> t.Optional[exp.Expression]: 431 while True: 432 path = self._parse_bitwise() 433 434 # The cast :: operator has a lower precedence than the extraction operator :, so 435 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 436 if isinstance(path, exp.Cast): 437 target_type = path.to 438 path = path.this 439 else: 440 target_type = None 441 442 if isinstance(path, exp.Expression): 443 path = exp.Literal.string(path.sql(dialect="snowflake")) 444 445 # The extraction operator : is left-associative 446 this = self.expression( 447 exp.JSONExtract, this=this, expression=self.dialect.to_json_path(path) 448 ) 449 450 if target_type: 451 this = exp.cast(this, target_type) 452 453 if not self._match(TokenType.COLON): 454 break 455 456 return self._parse_range(this) 457 458 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 459 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 460 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 461 this = self._parse_var() or self._parse_type() 462 463 if not this: 464 return None 465 466 self._match(TokenType.COMMA) 467 expression = self._parse_bitwise() 468 this = _map_date_part(this) 469 name = this.name.upper() 470 471 if name.startswith("EPOCH"): 472 if name == "EPOCH_MILLISECOND": 473 scale = 10**3 474 elif name == "EPOCH_MICROSECOND": 475 scale = 10**6 476 elif name == "EPOCH_NANOSECOND": 477 scale = 10**9 478 else: 479 scale = None 480 481 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 482 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 483 484 if scale: 485 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 486 487 return to_unix 488 489 return self.expression(exp.Extract, this=this, expression=expression) 490 491 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 492 if is_map: 493 # Keys are strings in Snowflake's objects, see also: 494 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 495 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 496 return self._parse_slice(self._parse_string()) 497 498 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 499 500 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 501 lateral = super()._parse_lateral() 502 if not lateral: 503 return lateral 504 505 if isinstance(lateral.this, exp.Explode): 506 table_alias = lateral.args.get("alias") 507 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 508 if table_alias and not table_alias.args.get("columns"): 509 table_alias.set("columns", columns) 510 elif not table_alias: 511 exp.alias_(lateral, "_flattened", table=columns, copy=False) 512 513 return lateral 514 515 def _parse_at_before(self, table: exp.Table) -> exp.Table: 516 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 517 index = self._index 518 if self._match_texts(("AT", "BEFORE")): 519 this = self._prev.text.upper() 520 kind = ( 521 self._match(TokenType.L_PAREN) 522 and self._match_texts(self.HISTORICAL_DATA_KIND) 523 and self._prev.text.upper() 524 ) 525 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 526 527 if expression: 528 self._match_r_paren() 529 when = self.expression( 530 exp.HistoricalData, this=this, kind=kind, expression=expression 531 ) 532 table.set("when", when) 533 else: 534 self._retreat(index) 535 536 return table 537 538 def _parse_table_parts( 539 self, schema: bool = False, is_db_reference: bool = False 540 ) -> exp.Table: 541 # https://docs.snowflake.com/en/user-guide/querying-stage 542 if self._match(TokenType.STRING, advance=False): 543 table = self._parse_string() 544 elif self._match_text_seq("@", advance=False): 545 table = self._parse_location_path() 546 else: 547 table = None 548 549 if table: 550 file_format = None 551 pattern = None 552 553 self._match(TokenType.L_PAREN) 554 while self._curr and not self._match(TokenType.R_PAREN): 555 if self._match_text_seq("FILE_FORMAT", "=>"): 556 file_format = self._parse_string() or super()._parse_table_parts( 557 is_db_reference=is_db_reference 558 ) 559 elif self._match_text_seq("PATTERN", "=>"): 560 pattern = self._parse_string() 561 else: 562 break 563 564 self._match(TokenType.COMMA) 565 566 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 567 else: 568 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 569 570 return self._parse_at_before(table) 571 572 def _parse_id_var( 573 self, 574 any_token: bool = True, 575 tokens: t.Optional[t.Collection[TokenType]] = None, 576 ) -> t.Optional[exp.Expression]: 577 if self._match_text_seq("IDENTIFIER", "("): 578 identifier = ( 579 super()._parse_id_var(any_token=any_token, tokens=tokens) 580 or self._parse_string() 581 ) 582 self._match_r_paren() 583 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 584 585 return super()._parse_id_var(any_token=any_token, tokens=tokens) 586 587 def _parse_show_snowflake(self, this: str) -> exp.Show: 588 scope = None 589 scope_kind = None 590 591 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 592 # which is syntactically valid but has no effect on the output 593 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 594 595 history = self._match_text_seq("HISTORY") 596 597 like = self._parse_string() if self._match(TokenType.LIKE) else None 598 599 if self._match(TokenType.IN): 600 if self._match_text_seq("ACCOUNT"): 601 scope_kind = "ACCOUNT" 602 elif self._match_set(self.DB_CREATABLES): 603 scope_kind = self._prev.text.upper() 604 if self._curr: 605 scope = self._parse_table_parts() 606 elif self._curr: 607 scope_kind = "SCHEMA" if this in ("OBJECTS", "TABLES") else "TABLE" 608 scope = self._parse_table_parts() 609 610 return self.expression( 611 exp.Show, 612 **{ 613 "terse": terse, 614 "this": this, 615 "history": history, 616 "like": like, 617 "scope": scope, 618 "scope_kind": scope_kind, 619 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 620 "limit": self._parse_limit(), 621 "from": self._parse_string() if self._match(TokenType.FROM) else None, 622 }, 623 ) 624 625 def _parse_alter_table_swap(self) -> exp.SwapTable: 626 self._match_text_seq("WITH") 627 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 628 629 def _parse_location(self) -> exp.LocationProperty: 630 self._match(TokenType.EQ) 631 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 632 633 def _parse_location_path(self) -> exp.Var: 634 parts = [self._advance_any(ignore_reserved=True)] 635 636 # We avoid consuming a comma token because external tables like @foo and @bar 637 # can be joined in a query with a comma separator. 638 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 639 parts.append(self._advance_any(ignore_reserved=True)) 640 641 return exp.var("".join(part.text for part in parts if part)) 642 643 class Tokenizer(tokens.Tokenizer): 644 STRING_ESCAPES = ["\\", "'"] 645 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 646 RAW_STRINGS = ["$$"] 647 COMMENTS = ["--", "//", ("/*", "*/")] 648 649 KEYWORDS = { 650 **tokens.Tokenizer.KEYWORDS, 651 "BYTEINT": TokenType.INT, 652 "CHAR VARYING": TokenType.VARCHAR, 653 "CHARACTER VARYING": TokenType.VARCHAR, 654 "EXCLUDE": TokenType.EXCEPT, 655 "ILIKE ANY": TokenType.ILIKE_ANY, 656 "LIKE ANY": TokenType.LIKE_ANY, 657 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 658 "MINUS": TokenType.EXCEPT, 659 "NCHAR VARYING": TokenType.VARCHAR, 660 "PUT": TokenType.COMMAND, 661 "REMOVE": TokenType.COMMAND, 662 "RENAME": TokenType.REPLACE, 663 "RM": TokenType.COMMAND, 664 "SAMPLE": TokenType.TABLE_SAMPLE, 665 "SQL_DOUBLE": TokenType.DOUBLE, 666 "SQL_VARCHAR": TokenType.VARCHAR, 667 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 668 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 669 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 670 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 671 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 672 "TOP": TokenType.TOP, 673 } 674 675 SINGLE_TOKENS = { 676 **tokens.Tokenizer.SINGLE_TOKENS, 677 "$": TokenType.PARAMETER, 678 } 679 680 VAR_SINGLE_TOKENS = {"$"} 681 682 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 683 684 class Generator(generator.Generator): 685 PARAMETER_TOKEN = "$" 686 MATCHED_BY_SOURCE = False 687 SINGLE_STRING_INTERVAL = True 688 JOIN_HINTS = False 689 TABLE_HINTS = False 690 QUERY_HINTS = False 691 AGGREGATE_FILTER_SUPPORTED = False 692 SUPPORTS_TABLE_COPY = False 693 COLLATE_IS_FUNC = True 694 LIMIT_ONLY_LITERALS = True 695 JSON_KEY_VALUE_PAIR_SEP = "," 696 INSERT_OVERWRITE = " OVERWRITE INTO" 697 698 TRANSFORMS = { 699 **generator.Generator.TRANSFORMS, 700 exp.ArgMax: rename_func("MAX_BY"), 701 exp.ArgMin: rename_func("MIN_BY"), 702 exp.Array: inline_array_sql, 703 exp.ArrayConcat: rename_func("ARRAY_CAT"), 704 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 705 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 706 exp.AtTimeZone: lambda self, e: self.func( 707 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 708 ), 709 exp.BitwiseXor: rename_func("BITXOR"), 710 exp.DateAdd: date_delta_sql("DATEADD"), 711 exp.DateDiff: date_delta_sql("DATEDIFF"), 712 exp.DateStrToDate: datestrtodate_sql, 713 exp.DataType: _datatype_sql, 714 exp.DayOfMonth: rename_func("DAYOFMONTH"), 715 exp.DayOfWeek: rename_func("DAYOFWEEK"), 716 exp.DayOfYear: rename_func("DAYOFYEAR"), 717 exp.Explode: rename_func("FLATTEN"), 718 exp.Extract: rename_func("DATE_PART"), 719 exp.FromTimeZone: lambda self, e: self.func( 720 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 721 ), 722 exp.GenerateSeries: lambda self, e: self.func( 723 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 724 ), 725 exp.GroupConcat: rename_func("LISTAGG"), 726 exp.If: if_sql(name="IFF", false_value="NULL"), 727 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 728 exp.JSONExtractScalar: lambda self, e: self.func( 729 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 730 ), 731 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 732 exp.JSONPathRoot: lambda *_: "", 733 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 734 exp.LogicalOr: rename_func("BOOLOR_AGG"), 735 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 736 exp.Max: max_or_greatest, 737 exp.Min: min_or_least, 738 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 739 exp.PercentileCont: transforms.preprocess( 740 [transforms.add_within_group_for_percentiles] 741 ), 742 exp.PercentileDisc: transforms.preprocess( 743 [transforms.add_within_group_for_percentiles] 744 ), 745 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 746 exp.RegexpILike: _regexpilike_sql, 747 exp.Rand: rename_func("RANDOM"), 748 exp.Select: transforms.preprocess( 749 [ 750 transforms.eliminate_distinct_on, 751 transforms.explode_to_unnest(), 752 transforms.eliminate_semi_and_anti_joins, 753 ] 754 ), 755 exp.SHA: rename_func("SHA1"), 756 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 757 exp.StartsWith: rename_func("STARTSWITH"), 758 exp.StrPosition: lambda self, e: self.func( 759 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 760 ), 761 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 762 exp.Struct: lambda self, e: self.func( 763 "OBJECT_CONSTRUCT", 764 *(arg for expression in e.expressions for arg in expression.flatten()), 765 ), 766 exp.Stuff: rename_func("INSERT"), 767 exp.TimestampDiff: lambda self, e: self.func( 768 "TIMESTAMPDIFF", e.unit, e.expression, e.this 769 ), 770 exp.TimestampTrunc: timestamptrunc_sql, 771 exp.TimeStrToTime: timestrtotime_sql, 772 exp.TimeToStr: lambda self, e: self.func( 773 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 774 ), 775 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 776 exp.ToArray: rename_func("TO_ARRAY"), 777 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 778 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 779 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 780 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 781 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 782 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 783 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 784 exp.Xor: rename_func("BOOLXOR"), 785 } 786 787 SUPPORTED_JSON_PATH_PARTS = { 788 exp.JSONPathKey, 789 exp.JSONPathRoot, 790 exp.JSONPathSubscript, 791 } 792 793 TYPE_MAPPING = { 794 **generator.Generator.TYPE_MAPPING, 795 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 796 } 797 798 STAR_MAPPING = { 799 "except": "EXCLUDE", 800 "replace": "RENAME", 801 } 802 803 PROPERTIES_LOCATION = { 804 **generator.Generator.PROPERTIES_LOCATION, 805 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 806 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 807 } 808 809 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 810 milli = expression.args.get("milli") 811 if milli is not None: 812 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 813 expression.set("nano", milli_to_nano) 814 815 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 816 817 def trycast_sql(self, expression: exp.TryCast) -> str: 818 value = expression.this 819 820 if value.type is None: 821 from sqlglot.optimizer.annotate_types import annotate_types 822 823 value = annotate_types(value) 824 825 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 826 return super().trycast_sql(expression) 827 828 # TRY_CAST only works for string values in Snowflake 829 return self.cast_sql(expression) 830 831 def log_sql(self, expression: exp.Log) -> str: 832 if not expression.expression: 833 return self.func("LN", expression.this) 834 835 return super().log_sql(expression) 836 837 def unnest_sql(self, expression: exp.Unnest) -> str: 838 unnest_alias = expression.args.get("alias") 839 offset = expression.args.get("offset") 840 841 columns = [ 842 exp.to_identifier("seq"), 843 exp.to_identifier("key"), 844 exp.to_identifier("path"), 845 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 846 seq_get(unnest_alias.columns if unnest_alias else [], 0) 847 or exp.to_identifier("value"), 848 exp.to_identifier("this"), 849 ] 850 851 if unnest_alias: 852 unnest_alias.set("columns", columns) 853 else: 854 unnest_alias = exp.TableAlias(this="_u", columns=columns) 855 856 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 857 alias = self.sql(unnest_alias) 858 alias = f" AS {alias}" if alias else "" 859 return f"{explode}{alias}" 860 861 def show_sql(self, expression: exp.Show) -> str: 862 terse = "TERSE " if expression.args.get("terse") else "" 863 history = " HISTORY" if expression.args.get("history") else "" 864 like = self.sql(expression, "like") 865 like = f" LIKE {like}" if like else "" 866 867 scope = self.sql(expression, "scope") 868 scope = f" {scope}" if scope else "" 869 870 scope_kind = self.sql(expression, "scope_kind") 871 if scope_kind: 872 scope_kind = f" IN {scope_kind}" 873 874 starts_with = self.sql(expression, "starts_with") 875 if starts_with: 876 starts_with = f" STARTS WITH {starts_with}" 877 878 limit = self.sql(expression, "limit") 879 880 from_ = self.sql(expression, "from") 881 if from_: 882 from_ = f" FROM {from_}" 883 884 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 885 886 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 887 # Other dialects don't support all of the following parameters, so we need to 888 # generate default values as necessary to ensure the transpilation is correct 889 group = expression.args.get("group") 890 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 891 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 892 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 893 894 return self.func( 895 "REGEXP_SUBSTR", 896 expression.this, 897 expression.expression, 898 position, 899 occurrence, 900 parameters, 901 group, 902 ) 903 904 def except_op(self, expression: exp.Except) -> str: 905 if not expression.args.get("distinct"): 906 self.unsupported("EXCEPT with All is not supported in Snowflake") 907 return super().except_op(expression) 908 909 def intersect_op(self, expression: exp.Intersect) -> str: 910 if not expression.args.get("distinct"): 911 self.unsupported("INTERSECT with All is not supported in Snowflake") 912 return super().intersect_op(expression) 913 914 def describe_sql(self, expression: exp.Describe) -> str: 915 # Default to table if kind is unknown 916 kind_value = expression.args.get("kind") or "TABLE" 917 kind = f" {kind_value}" if kind_value else "" 918 this = f" {self.sql(expression, 'this')}" 919 expressions = self.expressions(expression, flat=True) 920 expressions = f" {expressions}" if expressions else "" 921 return f"DESCRIBE{kind}{this}{expressions}" 922 923 def generatedasidentitycolumnconstraint_sql( 924 self, expression: exp.GeneratedAsIdentityColumnConstraint 925 ) -> str: 926 start = expression.args.get("start") 927 start = f" START {start}" if start else "" 928 increment = expression.args.get("increment") 929 increment = f" INCREMENT {increment}" if increment else "" 930 return f"AUTOINCREMENT{start}{increment}" 931 932 def swaptable_sql(self, expression: exp.SwapTable) -> str: 933 this = self.sql(expression, "this") 934 return f"SWAP WITH {this}" 935 936 def with_properties(self, properties: exp.Properties) -> str: 937 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 938 939 def cluster_sql(self, expression: exp.Cluster) -> str: 940 return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
301 def quote_identifier(self, expression: E, identify: bool = True) -> E: 302 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 303 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 304 if ( 305 isinstance(expression, exp.Identifier) 306 and isinstance(expression.parent, exp.Table) 307 and expression.name.lower() == "dual" 308 ): 309 return expression # type: ignore 310 311 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
313 class Parser(parser.Parser): 314 IDENTIFY_PIVOT_STRINGS = True 315 316 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 321 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 322 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 323 this=seq_get(args, 1), expression=seq_get(args, 0) 324 ), 325 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 326 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 327 start=seq_get(args, 0), 328 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 329 step=seq_get(args, 2), 330 ), 331 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 332 "BITXOR": binary_from_function(exp.BitwiseXor), 333 "BIT_XOR": binary_from_function(exp.BitwiseXor), 334 "BOOLXOR": binary_from_function(exp.Xor), 335 "CONVERT_TIMEZONE": _build_convert_timezone, 336 "DATE_TRUNC": _date_trunc_to_time, 337 "DATEADD": lambda args: exp.DateAdd( 338 this=seq_get(args, 2), 339 expression=seq_get(args, 1), 340 unit=_map_date_part(seq_get(args, 0)), 341 ), 342 "DATEDIFF": _build_datediff, 343 "DIV0": _build_if_from_div0, 344 "FLATTEN": exp.Explode.from_arg_list, 345 "GET_PATH": lambda args, dialect: exp.JSONExtract( 346 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 347 ), 348 "IFF": exp.If.from_arg_list, 349 "LAST_DAY": lambda args: exp.LastDay( 350 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 351 ), 352 "LISTAGG": exp.GroupConcat.from_arg_list, 353 "NULLIFZERO": _build_if_from_nullifzero, 354 "OBJECT_CONSTRUCT": _build_object_construct, 355 "REGEXP_REPLACE": _build_regexp_replace, 356 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 357 "RLIKE": exp.RegexpLike.from_arg_list, 358 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 359 "TIMEDIFF": _build_datediff, 360 "TIMESTAMPDIFF": _build_datediff, 361 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 362 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 363 "TO_TIMESTAMP": _build_to_timestamp, 364 "TO_VARCHAR": exp.ToChar.from_arg_list, 365 "ZEROIFNULL": _build_if_from_zeroifnull, 366 } 367 368 FUNCTION_PARSERS = { 369 **parser.Parser.FUNCTION_PARSERS, 370 "DATE_PART": lambda self: self._parse_date_part(), 371 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 372 } 373 FUNCTION_PARSERS.pop("TRIM") 374 375 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 376 377 RANGE_PARSERS = { 378 **parser.Parser.RANGE_PARSERS, 379 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 380 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 381 TokenType.COLON: lambda self, this: self._parse_colon_get_path(this), 382 } 383 384 ALTER_PARSERS = { 385 **parser.Parser.ALTER_PARSERS, 386 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 387 "UNSET": lambda self: self.expression( 388 exp.Set, 389 tag=self._match_text_seq("TAG"), 390 expressions=self._parse_csv(self._parse_id_var), 391 unset=True, 392 ), 393 "SWAP": lambda self: self._parse_alter_table_swap(), 394 } 395 396 STATEMENT_PARSERS = { 397 **parser.Parser.STATEMENT_PARSERS, 398 TokenType.SHOW: lambda self: self._parse_show(), 399 } 400 401 PROPERTY_PARSERS = { 402 **parser.Parser.PROPERTY_PARSERS, 403 "LOCATION": lambda self: self._parse_location(), 404 } 405 406 SHOW_PARSERS = { 407 "SCHEMAS": _show_parser("SCHEMAS"), 408 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 409 "OBJECTS": _show_parser("OBJECTS"), 410 "TERSE OBJECTS": _show_parser("OBJECTS"), 411 "TABLES": _show_parser("TABLES"), 412 "TERSE TABLES": _show_parser("TABLES"), 413 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 414 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 415 "COLUMNS": _show_parser("COLUMNS"), 416 "USERS": _show_parser("USERS"), 417 "TERSE USERS": _show_parser("USERS"), 418 } 419 420 STAGED_FILE_SINGLE_TOKENS = { 421 TokenType.DOT, 422 TokenType.MOD, 423 TokenType.SLASH, 424 } 425 426 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 427 428 def _parse_colon_get_path( 429 self: parser.Parser, this: t.Optional[exp.Expression] 430 ) -> t.Optional[exp.Expression]: 431 while True: 432 path = self._parse_bitwise() 433 434 # The cast :: operator has a lower precedence than the extraction operator :, so 435 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 436 if isinstance(path, exp.Cast): 437 target_type = path.to 438 path = path.this 439 else: 440 target_type = None 441 442 if isinstance(path, exp.Expression): 443 path = exp.Literal.string(path.sql(dialect="snowflake")) 444 445 # The extraction operator : is left-associative 446 this = self.expression( 447 exp.JSONExtract, this=this, expression=self.dialect.to_json_path(path) 448 ) 449 450 if target_type: 451 this = exp.cast(this, target_type) 452 453 if not self._match(TokenType.COLON): 454 break 455 456 return self._parse_range(this) 457 458 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 459 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 460 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 461 this = self._parse_var() or self._parse_type() 462 463 if not this: 464 return None 465 466 self._match(TokenType.COMMA) 467 expression = self._parse_bitwise() 468 this = _map_date_part(this) 469 name = this.name.upper() 470 471 if name.startswith("EPOCH"): 472 if name == "EPOCH_MILLISECOND": 473 scale = 10**3 474 elif name == "EPOCH_MICROSECOND": 475 scale = 10**6 476 elif name == "EPOCH_NANOSECOND": 477 scale = 10**9 478 else: 479 scale = None 480 481 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 482 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 483 484 if scale: 485 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 486 487 return to_unix 488 489 return self.expression(exp.Extract, this=this, expression=expression) 490 491 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 492 if is_map: 493 # Keys are strings in Snowflake's objects, see also: 494 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 495 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 496 return self._parse_slice(self._parse_string()) 497 498 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 499 500 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 501 lateral = super()._parse_lateral() 502 if not lateral: 503 return lateral 504 505 if isinstance(lateral.this, exp.Explode): 506 table_alias = lateral.args.get("alias") 507 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 508 if table_alias and not table_alias.args.get("columns"): 509 table_alias.set("columns", columns) 510 elif not table_alias: 511 exp.alias_(lateral, "_flattened", table=columns, copy=False) 512 513 return lateral 514 515 def _parse_at_before(self, table: exp.Table) -> exp.Table: 516 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 517 index = self._index 518 if self._match_texts(("AT", "BEFORE")): 519 this = self._prev.text.upper() 520 kind = ( 521 self._match(TokenType.L_PAREN) 522 and self._match_texts(self.HISTORICAL_DATA_KIND) 523 and self._prev.text.upper() 524 ) 525 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 526 527 if expression: 528 self._match_r_paren() 529 when = self.expression( 530 exp.HistoricalData, this=this, kind=kind, expression=expression 531 ) 532 table.set("when", when) 533 else: 534 self._retreat(index) 535 536 return table 537 538 def _parse_table_parts( 539 self, schema: bool = False, is_db_reference: bool = False 540 ) -> exp.Table: 541 # https://docs.snowflake.com/en/user-guide/querying-stage 542 if self._match(TokenType.STRING, advance=False): 543 table = self._parse_string() 544 elif self._match_text_seq("@", advance=False): 545 table = self._parse_location_path() 546 else: 547 table = None 548 549 if table: 550 file_format = None 551 pattern = None 552 553 self._match(TokenType.L_PAREN) 554 while self._curr and not self._match(TokenType.R_PAREN): 555 if self._match_text_seq("FILE_FORMAT", "=>"): 556 file_format = self._parse_string() or super()._parse_table_parts( 557 is_db_reference=is_db_reference 558 ) 559 elif self._match_text_seq("PATTERN", "=>"): 560 pattern = self._parse_string() 561 else: 562 break 563 564 self._match(TokenType.COMMA) 565 566 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 567 else: 568 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 569 570 return self._parse_at_before(table) 571 572 def _parse_id_var( 573 self, 574 any_token: bool = True, 575 tokens: t.Optional[t.Collection[TokenType]] = None, 576 ) -> t.Optional[exp.Expression]: 577 if self._match_text_seq("IDENTIFIER", "("): 578 identifier = ( 579 super()._parse_id_var(any_token=any_token, tokens=tokens) 580 or self._parse_string() 581 ) 582 self._match_r_paren() 583 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 584 585 return super()._parse_id_var(any_token=any_token, tokens=tokens) 586 587 def _parse_show_snowflake(self, this: str) -> exp.Show: 588 scope = None 589 scope_kind = None 590 591 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 592 # which is syntactically valid but has no effect on the output 593 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 594 595 history = self._match_text_seq("HISTORY") 596 597 like = self._parse_string() if self._match(TokenType.LIKE) else None 598 599 if self._match(TokenType.IN): 600 if self._match_text_seq("ACCOUNT"): 601 scope_kind = "ACCOUNT" 602 elif self._match_set(self.DB_CREATABLES): 603 scope_kind = self._prev.text.upper() 604 if self._curr: 605 scope = self._parse_table_parts() 606 elif self._curr: 607 scope_kind = "SCHEMA" if this in ("OBJECTS", "TABLES") else "TABLE" 608 scope = self._parse_table_parts() 609 610 return self.expression( 611 exp.Show, 612 **{ 613 "terse": terse, 614 "this": this, 615 "history": history, 616 "like": like, 617 "scope": scope, 618 "scope_kind": scope_kind, 619 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 620 "limit": self._parse_limit(), 621 "from": self._parse_string() if self._match(TokenType.FROM) else None, 622 }, 623 ) 624 625 def _parse_alter_table_swap(self) -> exp.SwapTable: 626 self._match_text_seq("WITH") 627 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 628 629 def _parse_location(self) -> exp.LocationProperty: 630 self._match(TokenType.EQ) 631 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 632 633 def _parse_location_path(self) -> exp.Var: 634 parts = [self._advance_any(ignore_reserved=True)] 635 636 # We avoid consuming a comma token because external tables like @foo and @bar 637 # can be joined in a query with a comma separator. 638 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 639 parts.append(self._advance_any(ignore_reserved=True)) 640 641 return exp.var("".join(part.text for part in parts if part))
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
643 class Tokenizer(tokens.Tokenizer): 644 STRING_ESCAPES = ["\\", "'"] 645 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 646 RAW_STRINGS = ["$$"] 647 COMMENTS = ["--", "//", ("/*", "*/")] 648 649 KEYWORDS = { 650 **tokens.Tokenizer.KEYWORDS, 651 "BYTEINT": TokenType.INT, 652 "CHAR VARYING": TokenType.VARCHAR, 653 "CHARACTER VARYING": TokenType.VARCHAR, 654 "EXCLUDE": TokenType.EXCEPT, 655 "ILIKE ANY": TokenType.ILIKE_ANY, 656 "LIKE ANY": TokenType.LIKE_ANY, 657 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 658 "MINUS": TokenType.EXCEPT, 659 "NCHAR VARYING": TokenType.VARCHAR, 660 "PUT": TokenType.COMMAND, 661 "REMOVE": TokenType.COMMAND, 662 "RENAME": TokenType.REPLACE, 663 "RM": TokenType.COMMAND, 664 "SAMPLE": TokenType.TABLE_SAMPLE, 665 "SQL_DOUBLE": TokenType.DOUBLE, 666 "SQL_VARCHAR": TokenType.VARCHAR, 667 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 668 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 669 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 670 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 671 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 672 "TOP": TokenType.TOP, 673 } 674 675 SINGLE_TOKENS = { 676 **tokens.Tokenizer.SINGLE_TOKENS, 677 "$": TokenType.PARAMETER, 678 } 679 680 VAR_SINGLE_TOKENS = {"$"} 681 682 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
684 class Generator(generator.Generator): 685 PARAMETER_TOKEN = "$" 686 MATCHED_BY_SOURCE = False 687 SINGLE_STRING_INTERVAL = True 688 JOIN_HINTS = False 689 TABLE_HINTS = False 690 QUERY_HINTS = False 691 AGGREGATE_FILTER_SUPPORTED = False 692 SUPPORTS_TABLE_COPY = False 693 COLLATE_IS_FUNC = True 694 LIMIT_ONLY_LITERALS = True 695 JSON_KEY_VALUE_PAIR_SEP = "," 696 INSERT_OVERWRITE = " OVERWRITE INTO" 697 698 TRANSFORMS = { 699 **generator.Generator.TRANSFORMS, 700 exp.ArgMax: rename_func("MAX_BY"), 701 exp.ArgMin: rename_func("MIN_BY"), 702 exp.Array: inline_array_sql, 703 exp.ArrayConcat: rename_func("ARRAY_CAT"), 704 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 705 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 706 exp.AtTimeZone: lambda self, e: self.func( 707 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 708 ), 709 exp.BitwiseXor: rename_func("BITXOR"), 710 exp.DateAdd: date_delta_sql("DATEADD"), 711 exp.DateDiff: date_delta_sql("DATEDIFF"), 712 exp.DateStrToDate: datestrtodate_sql, 713 exp.DataType: _datatype_sql, 714 exp.DayOfMonth: rename_func("DAYOFMONTH"), 715 exp.DayOfWeek: rename_func("DAYOFWEEK"), 716 exp.DayOfYear: rename_func("DAYOFYEAR"), 717 exp.Explode: rename_func("FLATTEN"), 718 exp.Extract: rename_func("DATE_PART"), 719 exp.FromTimeZone: lambda self, e: self.func( 720 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 721 ), 722 exp.GenerateSeries: lambda self, e: self.func( 723 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 724 ), 725 exp.GroupConcat: rename_func("LISTAGG"), 726 exp.If: if_sql(name="IFF", false_value="NULL"), 727 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 728 exp.JSONExtractScalar: lambda self, e: self.func( 729 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 730 ), 731 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 732 exp.JSONPathRoot: lambda *_: "", 733 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 734 exp.LogicalOr: rename_func("BOOLOR_AGG"), 735 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 736 exp.Max: max_or_greatest, 737 exp.Min: min_or_least, 738 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 739 exp.PercentileCont: transforms.preprocess( 740 [transforms.add_within_group_for_percentiles] 741 ), 742 exp.PercentileDisc: transforms.preprocess( 743 [transforms.add_within_group_for_percentiles] 744 ), 745 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 746 exp.RegexpILike: _regexpilike_sql, 747 exp.Rand: rename_func("RANDOM"), 748 exp.Select: transforms.preprocess( 749 [ 750 transforms.eliminate_distinct_on, 751 transforms.explode_to_unnest(), 752 transforms.eliminate_semi_and_anti_joins, 753 ] 754 ), 755 exp.SHA: rename_func("SHA1"), 756 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 757 exp.StartsWith: rename_func("STARTSWITH"), 758 exp.StrPosition: lambda self, e: self.func( 759 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 760 ), 761 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 762 exp.Struct: lambda self, e: self.func( 763 "OBJECT_CONSTRUCT", 764 *(arg for expression in e.expressions for arg in expression.flatten()), 765 ), 766 exp.Stuff: rename_func("INSERT"), 767 exp.TimestampDiff: lambda self, e: self.func( 768 "TIMESTAMPDIFF", e.unit, e.expression, e.this 769 ), 770 exp.TimestampTrunc: timestamptrunc_sql, 771 exp.TimeStrToTime: timestrtotime_sql, 772 exp.TimeToStr: lambda self, e: self.func( 773 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 774 ), 775 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 776 exp.ToArray: rename_func("TO_ARRAY"), 777 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 778 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 779 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 780 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 781 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 782 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 783 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 784 exp.Xor: rename_func("BOOLXOR"), 785 } 786 787 SUPPORTED_JSON_PATH_PARTS = { 788 exp.JSONPathKey, 789 exp.JSONPathRoot, 790 exp.JSONPathSubscript, 791 } 792 793 TYPE_MAPPING = { 794 **generator.Generator.TYPE_MAPPING, 795 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 796 } 797 798 STAR_MAPPING = { 799 "except": "EXCLUDE", 800 "replace": "RENAME", 801 } 802 803 PROPERTIES_LOCATION = { 804 **generator.Generator.PROPERTIES_LOCATION, 805 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 806 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 807 } 808 809 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 810 milli = expression.args.get("milli") 811 if milli is not None: 812 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 813 expression.set("nano", milli_to_nano) 814 815 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 816 817 def trycast_sql(self, expression: exp.TryCast) -> str: 818 value = expression.this 819 820 if value.type is None: 821 from sqlglot.optimizer.annotate_types import annotate_types 822 823 value = annotate_types(value) 824 825 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 826 return super().trycast_sql(expression) 827 828 # TRY_CAST only works for string values in Snowflake 829 return self.cast_sql(expression) 830 831 def log_sql(self, expression: exp.Log) -> str: 832 if not expression.expression: 833 return self.func("LN", expression.this) 834 835 return super().log_sql(expression) 836 837 def unnest_sql(self, expression: exp.Unnest) -> str: 838 unnest_alias = expression.args.get("alias") 839 offset = expression.args.get("offset") 840 841 columns = [ 842 exp.to_identifier("seq"), 843 exp.to_identifier("key"), 844 exp.to_identifier("path"), 845 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 846 seq_get(unnest_alias.columns if unnest_alias else [], 0) 847 or exp.to_identifier("value"), 848 exp.to_identifier("this"), 849 ] 850 851 if unnest_alias: 852 unnest_alias.set("columns", columns) 853 else: 854 unnest_alias = exp.TableAlias(this="_u", columns=columns) 855 856 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 857 alias = self.sql(unnest_alias) 858 alias = f" AS {alias}" if alias else "" 859 return f"{explode}{alias}" 860 861 def show_sql(self, expression: exp.Show) -> str: 862 terse = "TERSE " if expression.args.get("terse") else "" 863 history = " HISTORY" if expression.args.get("history") else "" 864 like = self.sql(expression, "like") 865 like = f" LIKE {like}" if like else "" 866 867 scope = self.sql(expression, "scope") 868 scope = f" {scope}" if scope else "" 869 870 scope_kind = self.sql(expression, "scope_kind") 871 if scope_kind: 872 scope_kind = f" IN {scope_kind}" 873 874 starts_with = self.sql(expression, "starts_with") 875 if starts_with: 876 starts_with = f" STARTS WITH {starts_with}" 877 878 limit = self.sql(expression, "limit") 879 880 from_ = self.sql(expression, "from") 881 if from_: 882 from_ = f" FROM {from_}" 883 884 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 885 886 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 887 # Other dialects don't support all of the following parameters, so we need to 888 # generate default values as necessary to ensure the transpilation is correct 889 group = expression.args.get("group") 890 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 891 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 892 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 893 894 return self.func( 895 "REGEXP_SUBSTR", 896 expression.this, 897 expression.expression, 898 position, 899 occurrence, 900 parameters, 901 group, 902 ) 903 904 def except_op(self, expression: exp.Except) -> str: 905 if not expression.args.get("distinct"): 906 self.unsupported("EXCEPT with All is not supported in Snowflake") 907 return super().except_op(expression) 908 909 def intersect_op(self, expression: exp.Intersect) -> str: 910 if not expression.args.get("distinct"): 911 self.unsupported("INTERSECT with All is not supported in Snowflake") 912 return super().intersect_op(expression) 913 914 def describe_sql(self, expression: exp.Describe) -> str: 915 # Default to table if kind is unknown 916 kind_value = expression.args.get("kind") or "TABLE" 917 kind = f" {kind_value}" if kind_value else "" 918 this = f" {self.sql(expression, 'this')}" 919 expressions = self.expressions(expression, flat=True) 920 expressions = f" {expressions}" if expressions else "" 921 return f"DESCRIBE{kind}{this}{expressions}" 922 923 def generatedasidentitycolumnconstraint_sql( 924 self, expression: exp.GeneratedAsIdentityColumnConstraint 925 ) -> str: 926 start = expression.args.get("start") 927 start = f" START {start}" if start else "" 928 increment = expression.args.get("increment") 929 increment = f" INCREMENT {increment}" if increment else "" 930 return f"AUTOINCREMENT{start}{increment}" 931 932 def swaptable_sql(self, expression: exp.SwapTable) -> str: 933 this = self.sql(expression, "this") 934 return f"SWAP WITH {this}" 935 936 def with_properties(self, properties: exp.Properties) -> str: 937 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 938 939 def cluster_sql(self, expression: exp.Cluster) -> str: 940 return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. Default: 2.
- indent: The indentation size in a formatted string. Default: 2.
- normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
809 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 810 milli = expression.args.get("milli") 811 if milli is not None: 812 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 813 expression.set("nano", milli_to_nano) 814 815 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
817 def trycast_sql(self, expression: exp.TryCast) -> str: 818 value = expression.this 819 820 if value.type is None: 821 from sqlglot.optimizer.annotate_types import annotate_types 822 823 value = annotate_types(value) 824 825 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 826 return super().trycast_sql(expression) 827 828 # TRY_CAST only works for string values in Snowflake 829 return self.cast_sql(expression)
837 def unnest_sql(self, expression: exp.Unnest) -> str: 838 unnest_alias = expression.args.get("alias") 839 offset = expression.args.get("offset") 840 841 columns = [ 842 exp.to_identifier("seq"), 843 exp.to_identifier("key"), 844 exp.to_identifier("path"), 845 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 846 seq_get(unnest_alias.columns if unnest_alias else [], 0) 847 or exp.to_identifier("value"), 848 exp.to_identifier("this"), 849 ] 850 851 if unnest_alias: 852 unnest_alias.set("columns", columns) 853 else: 854 unnest_alias = exp.TableAlias(this="_u", columns=columns) 855 856 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 857 alias = self.sql(unnest_alias) 858 alias = f" AS {alias}" if alias else "" 859 return f"{explode}{alias}"
861 def show_sql(self, expression: exp.Show) -> str: 862 terse = "TERSE " if expression.args.get("terse") else "" 863 history = " HISTORY" if expression.args.get("history") else "" 864 like = self.sql(expression, "like") 865 like = f" LIKE {like}" if like else "" 866 867 scope = self.sql(expression, "scope") 868 scope = f" {scope}" if scope else "" 869 870 scope_kind = self.sql(expression, "scope_kind") 871 if scope_kind: 872 scope_kind = f" IN {scope_kind}" 873 874 starts_with = self.sql(expression, "starts_with") 875 if starts_with: 876 starts_with = f" STARTS WITH {starts_with}" 877 878 limit = self.sql(expression, "limit") 879 880 from_ = self.sql(expression, "from") 881 if from_: 882 from_ = f" FROM {from_}" 883 884 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
886 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 887 # Other dialects don't support all of the following parameters, so we need to 888 # generate default values as necessary to ensure the transpilation is correct 889 group = expression.args.get("group") 890 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 891 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 892 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 893 894 return self.func( 895 "REGEXP_SUBSTR", 896 expression.this, 897 expression.expression, 898 position, 899 occurrence, 900 parameters, 901 group, 902 )
914 def describe_sql(self, expression: exp.Describe) -> str: 915 # Default to table if kind is unknown 916 kind_value = expression.args.get("kind") or "TABLE" 917 kind = f" {kind_value}" if kind_value else "" 918 this = f" {self.sql(expression, 'this')}" 919 expressions = self.expressions(expression, flat=True) 920 expressions = f" {expressions}" if expressions else "" 921 return f"DESCRIBE{kind}{this}{expressions}"
923 def generatedasidentitycolumnconstraint_sql( 924 self, expression: exp.GeneratedAsIdentityColumnConstraint 925 ) -> str: 926 start = expression.args.get("start") 927 start = f" START {start}" if start else "" 928 increment = expression.args.get("increment") 929 increment = f" INCREMENT {increment}" if increment else "" 930 return f"AUTOINCREMENT{start}{increment}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql