sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 date_delta_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 build_formatted_time, 14 if_sql, 15 inline_array_sql, 16 max_or_greatest, 17 min_or_least, 18 rename_func, 19 timestamptrunc_sql, 20 timestrtotime_sql, 21 var_map_sql, 22) 23from sqlglot.expressions import Literal 24from sqlglot.helper import flatten, is_int, seq_get 25from sqlglot.tokens import TokenType 26 27if t.TYPE_CHECKING: 28 from sqlglot._typing import E 29 30 31# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 32def _build_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime, exp.TimeStrToTime]: 33 if len(args) == 2: 34 first_arg, second_arg = args 35 if second_arg.is_string: 36 # case: <string_expr> [ , <format> ] 37 return build_formatted_time(exp.StrToTime, "snowflake")(args) 38 return exp.UnixToTime(this=first_arg, scale=second_arg) 39 40 from sqlglot.optimizer.simplify import simplify_literals 41 42 # The first argument might be an expression like 40 * 365 * 86400, so we try to 43 # reduce it using `simplify_literals` first and then check if it's a Literal. 44 first_arg = seq_get(args, 0) 45 if not isinstance(simplify_literals(first_arg, root=True), Literal): 46 # case: <variant_expr> or other expressions such as columns 47 return exp.TimeStrToTime.from_arg_list(args) 48 49 if first_arg.is_string: 50 if is_int(first_arg.this): 51 # case: <integer> 52 return exp.UnixToTime.from_arg_list(args) 53 54 # case: <date_expr> 55 return build_formatted_time(exp.StrToTime, "snowflake", default=True)(args) 56 57 # case: <numeric_expr> 58 return exp.UnixToTime.from_arg_list(args) 59 60 61def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 62 expression = parser.build_var_map(args) 63 64 if isinstance(expression, exp.StarMap): 65 return expression 66 67 return exp.Struct( 68 expressions=[ 69 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 70 ] 71 ) 72 73 74def _build_datediff(args: t.List) -> exp.DateDiff: 75 return exp.DateDiff( 76 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 77 ) 78 79 80# https://docs.snowflake.com/en/sql-reference/functions/div0 81def _build_if_from_div0(args: t.List) -> exp.If: 82 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 83 true = exp.Literal.number(0) 84 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 85 return exp.If(this=cond, true=true, false=false) 86 87 88# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 89def _build_if_from_zeroifnull(args: t.List) -> exp.If: 90 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 91 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 95def _build_if_from_nullifzero(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 97 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 98 99 100def _datatype_sql(self: Snowflake.Generator, expression: exp.DataType) -> str: 101 if expression.is_type("array"): 102 return "ARRAY" 103 elif expression.is_type("map"): 104 return "OBJECT" 105 return self.datatype_sql(expression) 106 107 108def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 109 flag = expression.text("flag") 110 111 if "i" not in flag: 112 flag += "i" 113 114 return self.func( 115 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 116 ) 117 118 119def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 120 if len(args) == 3: 121 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 122 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 123 124 125def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 126 regexp_replace = exp.RegexpReplace.from_arg_list(args) 127 128 if not regexp_replace.args.get("replacement"): 129 regexp_replace.set("replacement", exp.Literal.string("")) 130 131 return regexp_replace 132 133 134def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 135 def _parse(self: Snowflake.Parser) -> exp.Show: 136 return self._parse_show_snowflake(*args, **kwargs) 137 138 return _parse 139 140 141DATE_PART_MAPPING = { 142 "Y": "YEAR", 143 "YY": "YEAR", 144 "YYY": "YEAR", 145 "YYYY": "YEAR", 146 "YR": "YEAR", 147 "YEARS": "YEAR", 148 "YRS": "YEAR", 149 "MM": "MONTH", 150 "MON": "MONTH", 151 "MONS": "MONTH", 152 "MONTHS": "MONTH", 153 "D": "DAY", 154 "DD": "DAY", 155 "DAYS": "DAY", 156 "DAYOFMONTH": "DAY", 157 "WEEKDAY": "DAYOFWEEK", 158 "DOW": "DAYOFWEEK", 159 "DW": "DAYOFWEEK", 160 "WEEKDAY_ISO": "DAYOFWEEKISO", 161 "DOW_ISO": "DAYOFWEEKISO", 162 "DW_ISO": "DAYOFWEEKISO", 163 "YEARDAY": "DAYOFYEAR", 164 "DOY": "DAYOFYEAR", 165 "DY": "DAYOFYEAR", 166 "W": "WEEK", 167 "WK": "WEEK", 168 "WEEKOFYEAR": "WEEK", 169 "WOY": "WEEK", 170 "WY": "WEEK", 171 "WEEK_ISO": "WEEKISO", 172 "WEEKOFYEARISO": "WEEKISO", 173 "WEEKOFYEAR_ISO": "WEEKISO", 174 "Q": "QUARTER", 175 "QTR": "QUARTER", 176 "QTRS": "QUARTER", 177 "QUARTERS": "QUARTER", 178 "H": "HOUR", 179 "HH": "HOUR", 180 "HR": "HOUR", 181 "HOURS": "HOUR", 182 "HRS": "HOUR", 183 "M": "MINUTE", 184 "MI": "MINUTE", 185 "MIN": "MINUTE", 186 "MINUTES": "MINUTE", 187 "MINS": "MINUTE", 188 "S": "SECOND", 189 "SEC": "SECOND", 190 "SECONDS": "SECOND", 191 "SECS": "SECOND", 192 "MS": "MILLISECOND", 193 "MSEC": "MILLISECOND", 194 "MILLISECONDS": "MILLISECOND", 195 "US": "MICROSECOND", 196 "USEC": "MICROSECOND", 197 "MICROSECONDS": "MICROSECOND", 198 "NS": "NANOSECOND", 199 "NSEC": "NANOSECOND", 200 "NANOSEC": "NANOSECOND", 201 "NSECOND": "NANOSECOND", 202 "NSECONDS": "NANOSECOND", 203 "NANOSECS": "NANOSECOND", 204 "EPOCH": "EPOCH_SECOND", 205 "EPOCH_SECONDS": "EPOCH_SECOND", 206 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 207 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 208 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 209 "TZH": "TIMEZONE_HOUR", 210 "TZM": "TIMEZONE_MINUTE", 211} 212 213 214@t.overload 215def _map_date_part(part: exp.Expression) -> exp.Var: 216 pass 217 218 219@t.overload 220def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 221 pass 222 223 224def _map_date_part(part): 225 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 226 return exp.var(mapped) if mapped else part 227 228 229def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 230 trunc = date_trunc_to_time(args) 231 trunc.set("unit", _map_date_part(trunc.args["unit"])) 232 return trunc 233 234 235def _build_timestamp_from_parts(args: t.List) -> exp.Func: 236 if len(args) == 2: 237 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 238 # so we parse this into Anonymous for now instead of introducing complexity 239 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 240 241 return exp.TimestampFromParts.from_arg_list(args) 242 243 244def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 245 """ 246 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 247 so we need to unqualify them. 248 249 Example: 250 >>> from sqlglot import parse_one 251 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 252 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 253 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 254 """ 255 if isinstance(expression, exp.Pivot) and expression.unpivot: 256 expression = transforms.unqualify_columns(expression) 257 258 return expression 259 260 261class Snowflake(Dialect): 262 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 263 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 264 NULL_ORDERING = "nulls_are_large" 265 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 266 SUPPORTS_USER_DEFINED_TYPES = False 267 SUPPORTS_SEMI_ANTI_JOIN = False 268 PREFER_CTE_ALIAS_COLUMN = True 269 TABLESAMPLE_SIZE_IS_PERCENT = True 270 271 TIME_MAPPING = { 272 "YYYY": "%Y", 273 "yyyy": "%Y", 274 "YY": "%y", 275 "yy": "%y", 276 "MMMM": "%B", 277 "mmmm": "%B", 278 "MON": "%b", 279 "mon": "%b", 280 "MM": "%m", 281 "mm": "%m", 282 "DD": "%d", 283 "dd": "%-d", 284 "DY": "%a", 285 "dy": "%w", 286 "HH24": "%H", 287 "hh24": "%H", 288 "HH12": "%I", 289 "hh12": "%I", 290 "MI": "%M", 291 "mi": "%M", 292 "SS": "%S", 293 "ss": "%S", 294 "FF": "%f", 295 "ff": "%f", 296 "FF6": "%f", 297 "ff6": "%f", 298 } 299 300 def quote_identifier(self, expression: E, identify: bool = True) -> E: 301 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 302 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 303 if ( 304 isinstance(expression, exp.Identifier) 305 and isinstance(expression.parent, exp.Table) 306 and expression.name.lower() == "dual" 307 ): 308 return expression # type: ignore 309 310 return super().quote_identifier(expression, identify=identify) 311 312 class Parser(parser.Parser): 313 IDENTIFY_PIVOT_STRINGS = True 314 315 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 316 317 FUNCTIONS = { 318 **parser.Parser.FUNCTIONS, 319 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 320 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 321 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 322 this=seq_get(args, 1), expression=seq_get(args, 0) 323 ), 324 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 325 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 326 start=seq_get(args, 0), 327 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 328 step=seq_get(args, 2), 329 ), 330 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 331 "BITXOR": binary_from_function(exp.BitwiseXor), 332 "BIT_XOR": binary_from_function(exp.BitwiseXor), 333 "BOOLXOR": binary_from_function(exp.Xor), 334 "CONVERT_TIMEZONE": _build_convert_timezone, 335 "DATE_TRUNC": _date_trunc_to_time, 336 "DATEADD": lambda args: exp.DateAdd( 337 this=seq_get(args, 2), 338 expression=seq_get(args, 1), 339 unit=_map_date_part(seq_get(args, 0)), 340 ), 341 "DATEDIFF": _build_datediff, 342 "DIV0": _build_if_from_div0, 343 "FLATTEN": exp.Explode.from_arg_list, 344 "GET_PATH": lambda args, dialect: exp.JSONExtract( 345 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 346 ), 347 "IFF": exp.If.from_arg_list, 348 "LAST_DAY": lambda args: exp.LastDay( 349 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 350 ), 351 "LISTAGG": exp.GroupConcat.from_arg_list, 352 "NULLIFZERO": _build_if_from_nullifzero, 353 "OBJECT_CONSTRUCT": _build_object_construct, 354 "REGEXP_REPLACE": _build_regexp_replace, 355 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 356 "RLIKE": exp.RegexpLike.from_arg_list, 357 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 358 "TIMEDIFF": _build_datediff, 359 "TIMESTAMPDIFF": _build_datediff, 360 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 361 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 362 "TO_TIMESTAMP": _build_to_timestamp, 363 "TO_VARCHAR": exp.ToChar.from_arg_list, 364 "ZEROIFNULL": _build_if_from_zeroifnull, 365 } 366 367 FUNCTION_PARSERS = { 368 **parser.Parser.FUNCTION_PARSERS, 369 "DATE_PART": lambda self: self._parse_date_part(), 370 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 371 } 372 FUNCTION_PARSERS.pop("TRIM") 373 374 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 375 376 RANGE_PARSERS = { 377 **parser.Parser.RANGE_PARSERS, 378 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 379 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 380 TokenType.COLON: lambda self, this: self._parse_colon_get_path(this), 381 } 382 383 ALTER_PARSERS = { 384 **parser.Parser.ALTER_PARSERS, 385 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 386 "UNSET": lambda self: self.expression( 387 exp.Set, 388 tag=self._match_text_seq("TAG"), 389 expressions=self._parse_csv(self._parse_id_var), 390 unset=True, 391 ), 392 "SWAP": lambda self: self._parse_alter_table_swap(), 393 } 394 395 STATEMENT_PARSERS = { 396 **parser.Parser.STATEMENT_PARSERS, 397 TokenType.SHOW: lambda self: self._parse_show(), 398 } 399 400 PROPERTY_PARSERS = { 401 **parser.Parser.PROPERTY_PARSERS, 402 "LOCATION": lambda self: self._parse_location(), 403 } 404 405 SHOW_PARSERS = { 406 "SCHEMAS": _show_parser("SCHEMAS"), 407 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 408 "OBJECTS": _show_parser("OBJECTS"), 409 "TERSE OBJECTS": _show_parser("OBJECTS"), 410 "TABLES": _show_parser("TABLES"), 411 "TERSE TABLES": _show_parser("TABLES"), 412 "VIEWS": _show_parser("VIEWS"), 413 "TERSE VIEWS": _show_parser("VIEWS"), 414 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 415 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 416 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 417 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 418 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 419 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 420 "SEQUENCES": _show_parser("SEQUENCES"), 421 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 422 "COLUMNS": _show_parser("COLUMNS"), 423 "USERS": _show_parser("USERS"), 424 "TERSE USERS": _show_parser("USERS"), 425 } 426 427 STAGED_FILE_SINGLE_TOKENS = { 428 TokenType.DOT, 429 TokenType.MOD, 430 TokenType.SLASH, 431 } 432 433 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 434 435 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 436 437 def _parse_colon_get_path( 438 self: parser.Parser, this: t.Optional[exp.Expression] 439 ) -> t.Optional[exp.Expression]: 440 while True: 441 path = self._parse_bitwise() 442 443 # The cast :: operator has a lower precedence than the extraction operator :, so 444 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 445 if isinstance(path, exp.Cast): 446 target_type = path.to 447 path = path.this 448 else: 449 target_type = None 450 451 if isinstance(path, exp.Expression): 452 path = exp.Literal.string(path.sql(dialect="snowflake")) 453 454 # The extraction operator : is left-associative 455 this = self.expression( 456 exp.JSONExtract, this=this, expression=self.dialect.to_json_path(path) 457 ) 458 459 if target_type: 460 this = exp.cast(this, target_type) 461 462 if not self._match(TokenType.COLON): 463 break 464 465 return self._parse_range(this) 466 467 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 468 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 469 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 470 this = self._parse_var() or self._parse_type() 471 472 if not this: 473 return None 474 475 self._match(TokenType.COMMA) 476 expression = self._parse_bitwise() 477 this = _map_date_part(this) 478 name = this.name.upper() 479 480 if name.startswith("EPOCH"): 481 if name == "EPOCH_MILLISECOND": 482 scale = 10**3 483 elif name == "EPOCH_MICROSECOND": 484 scale = 10**6 485 elif name == "EPOCH_NANOSECOND": 486 scale = 10**9 487 else: 488 scale = None 489 490 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 491 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 492 493 if scale: 494 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 495 496 return to_unix 497 498 return self.expression(exp.Extract, this=this, expression=expression) 499 500 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 501 if is_map: 502 # Keys are strings in Snowflake's objects, see also: 503 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 504 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 505 return self._parse_slice(self._parse_string()) 506 507 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 508 509 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 510 lateral = super()._parse_lateral() 511 if not lateral: 512 return lateral 513 514 if isinstance(lateral.this, exp.Explode): 515 table_alias = lateral.args.get("alias") 516 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 517 if table_alias and not table_alias.args.get("columns"): 518 table_alias.set("columns", columns) 519 elif not table_alias: 520 exp.alias_(lateral, "_flattened", table=columns, copy=False) 521 522 return lateral 523 524 def _parse_at_before(self, table: exp.Table) -> exp.Table: 525 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 526 index = self._index 527 if self._match_texts(("AT", "BEFORE")): 528 this = self._prev.text.upper() 529 kind = ( 530 self._match(TokenType.L_PAREN) 531 and self._match_texts(self.HISTORICAL_DATA_KIND) 532 and self._prev.text.upper() 533 ) 534 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 535 536 if expression: 537 self._match_r_paren() 538 when = self.expression( 539 exp.HistoricalData, this=this, kind=kind, expression=expression 540 ) 541 table.set("when", when) 542 else: 543 self._retreat(index) 544 545 return table 546 547 def _parse_table_parts( 548 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 549 ) -> exp.Table: 550 # https://docs.snowflake.com/en/user-guide/querying-stage 551 if self._match(TokenType.STRING, advance=False): 552 table = self._parse_string() 553 elif self._match_text_seq("@", advance=False): 554 table = self._parse_location_path() 555 else: 556 table = None 557 558 if table: 559 file_format = None 560 pattern = None 561 562 self._match(TokenType.L_PAREN) 563 while self._curr and not self._match(TokenType.R_PAREN): 564 if self._match_text_seq("FILE_FORMAT", "=>"): 565 file_format = self._parse_string() or super()._parse_table_parts( 566 is_db_reference=is_db_reference 567 ) 568 elif self._match_text_seq("PATTERN", "=>"): 569 pattern = self._parse_string() 570 else: 571 break 572 573 self._match(TokenType.COMMA) 574 575 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 576 else: 577 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 578 579 return self._parse_at_before(table) 580 581 def _parse_id_var( 582 self, 583 any_token: bool = True, 584 tokens: t.Optional[t.Collection[TokenType]] = None, 585 ) -> t.Optional[exp.Expression]: 586 if self._match_text_seq("IDENTIFIER", "("): 587 identifier = ( 588 super()._parse_id_var(any_token=any_token, tokens=tokens) 589 or self._parse_string() 590 ) 591 self._match_r_paren() 592 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 593 594 return super()._parse_id_var(any_token=any_token, tokens=tokens) 595 596 def _parse_show_snowflake(self, this: str) -> exp.Show: 597 scope = None 598 scope_kind = None 599 600 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 601 # which is syntactically valid but has no effect on the output 602 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 603 604 history = self._match_text_seq("HISTORY") 605 606 like = self._parse_string() if self._match(TokenType.LIKE) else None 607 608 if self._match(TokenType.IN): 609 if self._match_text_seq("ACCOUNT"): 610 scope_kind = "ACCOUNT" 611 elif self._match_set(self.DB_CREATABLES): 612 scope_kind = self._prev.text.upper() 613 if self._curr: 614 scope = self._parse_table_parts() 615 elif self._curr: 616 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 617 scope = self._parse_table_parts() 618 619 return self.expression( 620 exp.Show, 621 **{ 622 "terse": terse, 623 "this": this, 624 "history": history, 625 "like": like, 626 "scope": scope, 627 "scope_kind": scope_kind, 628 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 629 "limit": self._parse_limit(), 630 "from": self._parse_string() if self._match(TokenType.FROM) else None, 631 }, 632 ) 633 634 def _parse_alter_table_swap(self) -> exp.SwapTable: 635 self._match_text_seq("WITH") 636 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 637 638 def _parse_location(self) -> exp.LocationProperty: 639 self._match(TokenType.EQ) 640 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 641 642 def _parse_location_path(self) -> exp.Var: 643 parts = [self._advance_any(ignore_reserved=True)] 644 645 # We avoid consuming a comma token because external tables like @foo and @bar 646 # can be joined in a query with a comma separator. 647 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 648 parts.append(self._advance_any(ignore_reserved=True)) 649 650 return exp.var("".join(part.text for part in parts if part)) 651 652 class Tokenizer(tokens.Tokenizer): 653 STRING_ESCAPES = ["\\", "'"] 654 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 655 RAW_STRINGS = ["$$"] 656 COMMENTS = ["--", "//", ("/*", "*/")] 657 658 KEYWORDS = { 659 **tokens.Tokenizer.KEYWORDS, 660 "BYTEINT": TokenType.INT, 661 "CHAR VARYING": TokenType.VARCHAR, 662 "CHARACTER VARYING": TokenType.VARCHAR, 663 "EXCLUDE": TokenType.EXCEPT, 664 "ILIKE ANY": TokenType.ILIKE_ANY, 665 "LIKE ANY": TokenType.LIKE_ANY, 666 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 667 "MINUS": TokenType.EXCEPT, 668 "NCHAR VARYING": TokenType.VARCHAR, 669 "PUT": TokenType.COMMAND, 670 "REMOVE": TokenType.COMMAND, 671 "RENAME": TokenType.REPLACE, 672 "RM": TokenType.COMMAND, 673 "SAMPLE": TokenType.TABLE_SAMPLE, 674 "SQL_DOUBLE": TokenType.DOUBLE, 675 "SQL_VARCHAR": TokenType.VARCHAR, 676 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 677 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 678 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 679 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 680 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 681 "TOP": TokenType.TOP, 682 } 683 684 SINGLE_TOKENS = { 685 **tokens.Tokenizer.SINGLE_TOKENS, 686 "$": TokenType.PARAMETER, 687 } 688 689 VAR_SINGLE_TOKENS = {"$"} 690 691 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 692 693 class Generator(generator.Generator): 694 PARAMETER_TOKEN = "$" 695 MATCHED_BY_SOURCE = False 696 SINGLE_STRING_INTERVAL = True 697 JOIN_HINTS = False 698 TABLE_HINTS = False 699 QUERY_HINTS = False 700 AGGREGATE_FILTER_SUPPORTED = False 701 SUPPORTS_TABLE_COPY = False 702 COLLATE_IS_FUNC = True 703 LIMIT_ONLY_LITERALS = True 704 JSON_KEY_VALUE_PAIR_SEP = "," 705 INSERT_OVERWRITE = " OVERWRITE INTO" 706 707 TRANSFORMS = { 708 **generator.Generator.TRANSFORMS, 709 exp.ArgMax: rename_func("MAX_BY"), 710 exp.ArgMin: rename_func("MIN_BY"), 711 exp.Array: inline_array_sql, 712 exp.ArrayConcat: rename_func("ARRAY_CAT"), 713 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 714 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 715 exp.AtTimeZone: lambda self, e: self.func( 716 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 717 ), 718 exp.BitwiseXor: rename_func("BITXOR"), 719 exp.DateAdd: date_delta_sql("DATEADD"), 720 exp.DateDiff: date_delta_sql("DATEDIFF"), 721 exp.DateStrToDate: datestrtodate_sql, 722 exp.DataType: _datatype_sql, 723 exp.DayOfMonth: rename_func("DAYOFMONTH"), 724 exp.DayOfWeek: rename_func("DAYOFWEEK"), 725 exp.DayOfYear: rename_func("DAYOFYEAR"), 726 exp.Explode: rename_func("FLATTEN"), 727 exp.Extract: rename_func("DATE_PART"), 728 exp.FromTimeZone: lambda self, e: self.func( 729 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 730 ), 731 exp.GenerateSeries: lambda self, e: self.func( 732 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 733 ), 734 exp.GroupConcat: rename_func("LISTAGG"), 735 exp.If: if_sql(name="IFF", false_value="NULL"), 736 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 737 exp.JSONExtractScalar: lambda self, e: self.func( 738 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 739 ), 740 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 741 exp.JSONPathRoot: lambda *_: "", 742 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 743 exp.LogicalOr: rename_func("BOOLOR_AGG"), 744 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 745 exp.Max: max_or_greatest, 746 exp.Min: min_or_least, 747 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 748 exp.PercentileCont: transforms.preprocess( 749 [transforms.add_within_group_for_percentiles] 750 ), 751 exp.PercentileDisc: transforms.preprocess( 752 [transforms.add_within_group_for_percentiles] 753 ), 754 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 755 exp.RegexpILike: _regexpilike_sql, 756 exp.Rand: rename_func("RANDOM"), 757 exp.Select: transforms.preprocess( 758 [ 759 transforms.eliminate_distinct_on, 760 transforms.explode_to_unnest(), 761 transforms.eliminate_semi_and_anti_joins, 762 ] 763 ), 764 exp.SHA: rename_func("SHA1"), 765 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 766 exp.StartsWith: rename_func("STARTSWITH"), 767 exp.StrPosition: lambda self, e: self.func( 768 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 769 ), 770 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 771 exp.Stuff: rename_func("INSERT"), 772 exp.TimestampDiff: lambda self, e: self.func( 773 "TIMESTAMPDIFF", e.unit, e.expression, e.this 774 ), 775 exp.TimestampTrunc: timestamptrunc_sql, 776 exp.TimeStrToTime: timestrtotime_sql, 777 exp.TimeToStr: lambda self, e: self.func( 778 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 779 ), 780 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 781 exp.ToArray: rename_func("TO_ARRAY"), 782 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 783 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 784 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 785 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 786 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 787 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 788 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 789 exp.Xor: rename_func("BOOLXOR"), 790 } 791 792 SUPPORTED_JSON_PATH_PARTS = { 793 exp.JSONPathKey, 794 exp.JSONPathRoot, 795 exp.JSONPathSubscript, 796 } 797 798 TYPE_MAPPING = { 799 **generator.Generator.TYPE_MAPPING, 800 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 801 } 802 803 STAR_MAPPING = { 804 "except": "EXCLUDE", 805 "replace": "RENAME", 806 } 807 808 PROPERTIES_LOCATION = { 809 **generator.Generator.PROPERTIES_LOCATION, 810 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 811 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 812 } 813 814 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 815 milli = expression.args.get("milli") 816 if milli is not None: 817 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 818 expression.set("nano", milli_to_nano) 819 820 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 821 822 def trycast_sql(self, expression: exp.TryCast) -> str: 823 value = expression.this 824 825 if value.type is None: 826 from sqlglot.optimizer.annotate_types import annotate_types 827 828 value = annotate_types(value) 829 830 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 831 return super().trycast_sql(expression) 832 833 # TRY_CAST only works for string values in Snowflake 834 return self.cast_sql(expression) 835 836 def log_sql(self, expression: exp.Log) -> str: 837 if not expression.expression: 838 return self.func("LN", expression.this) 839 840 return super().log_sql(expression) 841 842 def unnest_sql(self, expression: exp.Unnest) -> str: 843 unnest_alias = expression.args.get("alias") 844 offset = expression.args.get("offset") 845 846 columns = [ 847 exp.to_identifier("seq"), 848 exp.to_identifier("key"), 849 exp.to_identifier("path"), 850 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 851 seq_get(unnest_alias.columns if unnest_alias else [], 0) 852 or exp.to_identifier("value"), 853 exp.to_identifier("this"), 854 ] 855 856 if unnest_alias: 857 unnest_alias.set("columns", columns) 858 else: 859 unnest_alias = exp.TableAlias(this="_u", columns=columns) 860 861 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 862 alias = self.sql(unnest_alias) 863 alias = f" AS {alias}" if alias else "" 864 return f"{explode}{alias}" 865 866 def show_sql(self, expression: exp.Show) -> str: 867 terse = "TERSE " if expression.args.get("terse") else "" 868 history = " HISTORY" if expression.args.get("history") else "" 869 like = self.sql(expression, "like") 870 like = f" LIKE {like}" if like else "" 871 872 scope = self.sql(expression, "scope") 873 scope = f" {scope}" if scope else "" 874 875 scope_kind = self.sql(expression, "scope_kind") 876 if scope_kind: 877 scope_kind = f" IN {scope_kind}" 878 879 starts_with = self.sql(expression, "starts_with") 880 if starts_with: 881 starts_with = f" STARTS WITH {starts_with}" 882 883 limit = self.sql(expression, "limit") 884 885 from_ = self.sql(expression, "from") 886 if from_: 887 from_ = f" FROM {from_}" 888 889 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 890 891 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 892 # Other dialects don't support all of the following parameters, so we need to 893 # generate default values as necessary to ensure the transpilation is correct 894 group = expression.args.get("group") 895 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 896 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 897 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 898 899 return self.func( 900 "REGEXP_SUBSTR", 901 expression.this, 902 expression.expression, 903 position, 904 occurrence, 905 parameters, 906 group, 907 ) 908 909 def except_op(self, expression: exp.Except) -> str: 910 if not expression.args.get("distinct"): 911 self.unsupported("EXCEPT with All is not supported in Snowflake") 912 return super().except_op(expression) 913 914 def intersect_op(self, expression: exp.Intersect) -> str: 915 if not expression.args.get("distinct"): 916 self.unsupported("INTERSECT with All is not supported in Snowflake") 917 return super().intersect_op(expression) 918 919 def describe_sql(self, expression: exp.Describe) -> str: 920 # Default to table if kind is unknown 921 kind_value = expression.args.get("kind") or "TABLE" 922 kind = f" {kind_value}" if kind_value else "" 923 this = f" {self.sql(expression, 'this')}" 924 expressions = self.expressions(expression, flat=True) 925 expressions = f" {expressions}" if expressions else "" 926 return f"DESCRIBE{kind}{this}{expressions}" 927 928 def generatedasidentitycolumnconstraint_sql( 929 self, expression: exp.GeneratedAsIdentityColumnConstraint 930 ) -> str: 931 start = expression.args.get("start") 932 start = f" START {start}" if start else "" 933 increment = expression.args.get("increment") 934 increment = f" INCREMENT {increment}" if increment else "" 935 return f"AUTOINCREMENT{start}{increment}" 936 937 def swaptable_sql(self, expression: exp.SwapTable) -> str: 938 this = self.sql(expression, "this") 939 return f"SWAP WITH {this}" 940 941 def with_properties(self, properties: exp.Properties) -> str: 942 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 943 944 def cluster_sql(self, expression: exp.Cluster) -> str: 945 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 946 947 def struct_sql(self, expression: exp.Struct) -> str: 948 keys = [] 949 values = [] 950 951 for i, e in enumerate(expression.expressions): 952 if isinstance(e, exp.PropertyEQ): 953 keys.append( 954 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 955 ) 956 values.append(e.expression) 957 else: 958 keys.append(exp.Literal.string(f"_{i}")) 959 values.append(e) 960 961 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
262class Snowflake(Dialect): 263 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 264 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 265 NULL_ORDERING = "nulls_are_large" 266 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 267 SUPPORTS_USER_DEFINED_TYPES = False 268 SUPPORTS_SEMI_ANTI_JOIN = False 269 PREFER_CTE_ALIAS_COLUMN = True 270 TABLESAMPLE_SIZE_IS_PERCENT = True 271 272 TIME_MAPPING = { 273 "YYYY": "%Y", 274 "yyyy": "%Y", 275 "YY": "%y", 276 "yy": "%y", 277 "MMMM": "%B", 278 "mmmm": "%B", 279 "MON": "%b", 280 "mon": "%b", 281 "MM": "%m", 282 "mm": "%m", 283 "DD": "%d", 284 "dd": "%-d", 285 "DY": "%a", 286 "dy": "%w", 287 "HH24": "%H", 288 "hh24": "%H", 289 "HH12": "%I", 290 "hh12": "%I", 291 "MI": "%M", 292 "mi": "%M", 293 "SS": "%S", 294 "ss": "%S", 295 "FF": "%f", 296 "ff": "%f", 297 "FF6": "%f", 298 "ff6": "%f", 299 } 300 301 def quote_identifier(self, expression: E, identify: bool = True) -> E: 302 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 303 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 304 if ( 305 isinstance(expression, exp.Identifier) 306 and isinstance(expression.parent, exp.Table) 307 and expression.name.lower() == "dual" 308 ): 309 return expression # type: ignore 310 311 return super().quote_identifier(expression, identify=identify) 312 313 class Parser(parser.Parser): 314 IDENTIFY_PIVOT_STRINGS = True 315 316 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 321 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 322 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 323 this=seq_get(args, 1), expression=seq_get(args, 0) 324 ), 325 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 326 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 327 start=seq_get(args, 0), 328 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 329 step=seq_get(args, 2), 330 ), 331 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 332 "BITXOR": binary_from_function(exp.BitwiseXor), 333 "BIT_XOR": binary_from_function(exp.BitwiseXor), 334 "BOOLXOR": binary_from_function(exp.Xor), 335 "CONVERT_TIMEZONE": _build_convert_timezone, 336 "DATE_TRUNC": _date_trunc_to_time, 337 "DATEADD": lambda args: exp.DateAdd( 338 this=seq_get(args, 2), 339 expression=seq_get(args, 1), 340 unit=_map_date_part(seq_get(args, 0)), 341 ), 342 "DATEDIFF": _build_datediff, 343 "DIV0": _build_if_from_div0, 344 "FLATTEN": exp.Explode.from_arg_list, 345 "GET_PATH": lambda args, dialect: exp.JSONExtract( 346 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 347 ), 348 "IFF": exp.If.from_arg_list, 349 "LAST_DAY": lambda args: exp.LastDay( 350 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 351 ), 352 "LISTAGG": exp.GroupConcat.from_arg_list, 353 "NULLIFZERO": _build_if_from_nullifzero, 354 "OBJECT_CONSTRUCT": _build_object_construct, 355 "REGEXP_REPLACE": _build_regexp_replace, 356 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 357 "RLIKE": exp.RegexpLike.from_arg_list, 358 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 359 "TIMEDIFF": _build_datediff, 360 "TIMESTAMPDIFF": _build_datediff, 361 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 362 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 363 "TO_TIMESTAMP": _build_to_timestamp, 364 "TO_VARCHAR": exp.ToChar.from_arg_list, 365 "ZEROIFNULL": _build_if_from_zeroifnull, 366 } 367 368 FUNCTION_PARSERS = { 369 **parser.Parser.FUNCTION_PARSERS, 370 "DATE_PART": lambda self: self._parse_date_part(), 371 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 372 } 373 FUNCTION_PARSERS.pop("TRIM") 374 375 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 376 377 RANGE_PARSERS = { 378 **parser.Parser.RANGE_PARSERS, 379 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 380 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 381 TokenType.COLON: lambda self, this: self._parse_colon_get_path(this), 382 } 383 384 ALTER_PARSERS = { 385 **parser.Parser.ALTER_PARSERS, 386 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 387 "UNSET": lambda self: self.expression( 388 exp.Set, 389 tag=self._match_text_seq("TAG"), 390 expressions=self._parse_csv(self._parse_id_var), 391 unset=True, 392 ), 393 "SWAP": lambda self: self._parse_alter_table_swap(), 394 } 395 396 STATEMENT_PARSERS = { 397 **parser.Parser.STATEMENT_PARSERS, 398 TokenType.SHOW: lambda self: self._parse_show(), 399 } 400 401 PROPERTY_PARSERS = { 402 **parser.Parser.PROPERTY_PARSERS, 403 "LOCATION": lambda self: self._parse_location(), 404 } 405 406 SHOW_PARSERS = { 407 "SCHEMAS": _show_parser("SCHEMAS"), 408 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 409 "OBJECTS": _show_parser("OBJECTS"), 410 "TERSE OBJECTS": _show_parser("OBJECTS"), 411 "TABLES": _show_parser("TABLES"), 412 "TERSE TABLES": _show_parser("TABLES"), 413 "VIEWS": _show_parser("VIEWS"), 414 "TERSE VIEWS": _show_parser("VIEWS"), 415 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 416 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 417 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 418 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 419 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 420 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 421 "SEQUENCES": _show_parser("SEQUENCES"), 422 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 423 "COLUMNS": _show_parser("COLUMNS"), 424 "USERS": _show_parser("USERS"), 425 "TERSE USERS": _show_parser("USERS"), 426 } 427 428 STAGED_FILE_SINGLE_TOKENS = { 429 TokenType.DOT, 430 TokenType.MOD, 431 TokenType.SLASH, 432 } 433 434 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 435 436 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 437 438 def _parse_colon_get_path( 439 self: parser.Parser, this: t.Optional[exp.Expression] 440 ) -> t.Optional[exp.Expression]: 441 while True: 442 path = self._parse_bitwise() 443 444 # The cast :: operator has a lower precedence than the extraction operator :, so 445 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 446 if isinstance(path, exp.Cast): 447 target_type = path.to 448 path = path.this 449 else: 450 target_type = None 451 452 if isinstance(path, exp.Expression): 453 path = exp.Literal.string(path.sql(dialect="snowflake")) 454 455 # The extraction operator : is left-associative 456 this = self.expression( 457 exp.JSONExtract, this=this, expression=self.dialect.to_json_path(path) 458 ) 459 460 if target_type: 461 this = exp.cast(this, target_type) 462 463 if not self._match(TokenType.COLON): 464 break 465 466 return self._parse_range(this) 467 468 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 469 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 470 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 471 this = self._parse_var() or self._parse_type() 472 473 if not this: 474 return None 475 476 self._match(TokenType.COMMA) 477 expression = self._parse_bitwise() 478 this = _map_date_part(this) 479 name = this.name.upper() 480 481 if name.startswith("EPOCH"): 482 if name == "EPOCH_MILLISECOND": 483 scale = 10**3 484 elif name == "EPOCH_MICROSECOND": 485 scale = 10**6 486 elif name == "EPOCH_NANOSECOND": 487 scale = 10**9 488 else: 489 scale = None 490 491 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 492 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 493 494 if scale: 495 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 496 497 return to_unix 498 499 return self.expression(exp.Extract, this=this, expression=expression) 500 501 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 502 if is_map: 503 # Keys are strings in Snowflake's objects, see also: 504 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 505 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 506 return self._parse_slice(self._parse_string()) 507 508 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 509 510 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 511 lateral = super()._parse_lateral() 512 if not lateral: 513 return lateral 514 515 if isinstance(lateral.this, exp.Explode): 516 table_alias = lateral.args.get("alias") 517 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 518 if table_alias and not table_alias.args.get("columns"): 519 table_alias.set("columns", columns) 520 elif not table_alias: 521 exp.alias_(lateral, "_flattened", table=columns, copy=False) 522 523 return lateral 524 525 def _parse_at_before(self, table: exp.Table) -> exp.Table: 526 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 527 index = self._index 528 if self._match_texts(("AT", "BEFORE")): 529 this = self._prev.text.upper() 530 kind = ( 531 self._match(TokenType.L_PAREN) 532 and self._match_texts(self.HISTORICAL_DATA_KIND) 533 and self._prev.text.upper() 534 ) 535 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 536 537 if expression: 538 self._match_r_paren() 539 when = self.expression( 540 exp.HistoricalData, this=this, kind=kind, expression=expression 541 ) 542 table.set("when", when) 543 else: 544 self._retreat(index) 545 546 return table 547 548 def _parse_table_parts( 549 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 550 ) -> exp.Table: 551 # https://docs.snowflake.com/en/user-guide/querying-stage 552 if self._match(TokenType.STRING, advance=False): 553 table = self._parse_string() 554 elif self._match_text_seq("@", advance=False): 555 table = self._parse_location_path() 556 else: 557 table = None 558 559 if table: 560 file_format = None 561 pattern = None 562 563 self._match(TokenType.L_PAREN) 564 while self._curr and not self._match(TokenType.R_PAREN): 565 if self._match_text_seq("FILE_FORMAT", "=>"): 566 file_format = self._parse_string() or super()._parse_table_parts( 567 is_db_reference=is_db_reference 568 ) 569 elif self._match_text_seq("PATTERN", "=>"): 570 pattern = self._parse_string() 571 else: 572 break 573 574 self._match(TokenType.COMMA) 575 576 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 577 else: 578 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 579 580 return self._parse_at_before(table) 581 582 def _parse_id_var( 583 self, 584 any_token: bool = True, 585 tokens: t.Optional[t.Collection[TokenType]] = None, 586 ) -> t.Optional[exp.Expression]: 587 if self._match_text_seq("IDENTIFIER", "("): 588 identifier = ( 589 super()._parse_id_var(any_token=any_token, tokens=tokens) 590 or self._parse_string() 591 ) 592 self._match_r_paren() 593 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 594 595 return super()._parse_id_var(any_token=any_token, tokens=tokens) 596 597 def _parse_show_snowflake(self, this: str) -> exp.Show: 598 scope = None 599 scope_kind = None 600 601 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 602 # which is syntactically valid but has no effect on the output 603 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 604 605 history = self._match_text_seq("HISTORY") 606 607 like = self._parse_string() if self._match(TokenType.LIKE) else None 608 609 if self._match(TokenType.IN): 610 if self._match_text_seq("ACCOUNT"): 611 scope_kind = "ACCOUNT" 612 elif self._match_set(self.DB_CREATABLES): 613 scope_kind = self._prev.text.upper() 614 if self._curr: 615 scope = self._parse_table_parts() 616 elif self._curr: 617 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 618 scope = self._parse_table_parts() 619 620 return self.expression( 621 exp.Show, 622 **{ 623 "terse": terse, 624 "this": this, 625 "history": history, 626 "like": like, 627 "scope": scope, 628 "scope_kind": scope_kind, 629 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 630 "limit": self._parse_limit(), 631 "from": self._parse_string() if self._match(TokenType.FROM) else None, 632 }, 633 ) 634 635 def _parse_alter_table_swap(self) -> exp.SwapTable: 636 self._match_text_seq("WITH") 637 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 638 639 def _parse_location(self) -> exp.LocationProperty: 640 self._match(TokenType.EQ) 641 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 642 643 def _parse_location_path(self) -> exp.Var: 644 parts = [self._advance_any(ignore_reserved=True)] 645 646 # We avoid consuming a comma token because external tables like @foo and @bar 647 # can be joined in a query with a comma separator. 648 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 649 parts.append(self._advance_any(ignore_reserved=True)) 650 651 return exp.var("".join(part.text for part in parts if part)) 652 653 class Tokenizer(tokens.Tokenizer): 654 STRING_ESCAPES = ["\\", "'"] 655 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 656 RAW_STRINGS = ["$$"] 657 COMMENTS = ["--", "//", ("/*", "*/")] 658 659 KEYWORDS = { 660 **tokens.Tokenizer.KEYWORDS, 661 "BYTEINT": TokenType.INT, 662 "CHAR VARYING": TokenType.VARCHAR, 663 "CHARACTER VARYING": TokenType.VARCHAR, 664 "EXCLUDE": TokenType.EXCEPT, 665 "ILIKE ANY": TokenType.ILIKE_ANY, 666 "LIKE ANY": TokenType.LIKE_ANY, 667 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 668 "MINUS": TokenType.EXCEPT, 669 "NCHAR VARYING": TokenType.VARCHAR, 670 "PUT": TokenType.COMMAND, 671 "REMOVE": TokenType.COMMAND, 672 "RENAME": TokenType.REPLACE, 673 "RM": TokenType.COMMAND, 674 "SAMPLE": TokenType.TABLE_SAMPLE, 675 "SQL_DOUBLE": TokenType.DOUBLE, 676 "SQL_VARCHAR": TokenType.VARCHAR, 677 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 678 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 679 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 680 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 681 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 682 "TOP": TokenType.TOP, 683 } 684 685 SINGLE_TOKENS = { 686 **tokens.Tokenizer.SINGLE_TOKENS, 687 "$": TokenType.PARAMETER, 688 } 689 690 VAR_SINGLE_TOKENS = {"$"} 691 692 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 693 694 class Generator(generator.Generator): 695 PARAMETER_TOKEN = "$" 696 MATCHED_BY_SOURCE = False 697 SINGLE_STRING_INTERVAL = True 698 JOIN_HINTS = False 699 TABLE_HINTS = False 700 QUERY_HINTS = False 701 AGGREGATE_FILTER_SUPPORTED = False 702 SUPPORTS_TABLE_COPY = False 703 COLLATE_IS_FUNC = True 704 LIMIT_ONLY_LITERALS = True 705 JSON_KEY_VALUE_PAIR_SEP = "," 706 INSERT_OVERWRITE = " OVERWRITE INTO" 707 708 TRANSFORMS = { 709 **generator.Generator.TRANSFORMS, 710 exp.ArgMax: rename_func("MAX_BY"), 711 exp.ArgMin: rename_func("MIN_BY"), 712 exp.Array: inline_array_sql, 713 exp.ArrayConcat: rename_func("ARRAY_CAT"), 714 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 715 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 716 exp.AtTimeZone: lambda self, e: self.func( 717 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 718 ), 719 exp.BitwiseXor: rename_func("BITXOR"), 720 exp.DateAdd: date_delta_sql("DATEADD"), 721 exp.DateDiff: date_delta_sql("DATEDIFF"), 722 exp.DateStrToDate: datestrtodate_sql, 723 exp.DataType: _datatype_sql, 724 exp.DayOfMonth: rename_func("DAYOFMONTH"), 725 exp.DayOfWeek: rename_func("DAYOFWEEK"), 726 exp.DayOfYear: rename_func("DAYOFYEAR"), 727 exp.Explode: rename_func("FLATTEN"), 728 exp.Extract: rename_func("DATE_PART"), 729 exp.FromTimeZone: lambda self, e: self.func( 730 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 731 ), 732 exp.GenerateSeries: lambda self, e: self.func( 733 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 734 ), 735 exp.GroupConcat: rename_func("LISTAGG"), 736 exp.If: if_sql(name="IFF", false_value="NULL"), 737 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 738 exp.JSONExtractScalar: lambda self, e: self.func( 739 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 740 ), 741 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 742 exp.JSONPathRoot: lambda *_: "", 743 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 744 exp.LogicalOr: rename_func("BOOLOR_AGG"), 745 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 746 exp.Max: max_or_greatest, 747 exp.Min: min_or_least, 748 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 749 exp.PercentileCont: transforms.preprocess( 750 [transforms.add_within_group_for_percentiles] 751 ), 752 exp.PercentileDisc: transforms.preprocess( 753 [transforms.add_within_group_for_percentiles] 754 ), 755 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 756 exp.RegexpILike: _regexpilike_sql, 757 exp.Rand: rename_func("RANDOM"), 758 exp.Select: transforms.preprocess( 759 [ 760 transforms.eliminate_distinct_on, 761 transforms.explode_to_unnest(), 762 transforms.eliminate_semi_and_anti_joins, 763 ] 764 ), 765 exp.SHA: rename_func("SHA1"), 766 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 767 exp.StartsWith: rename_func("STARTSWITH"), 768 exp.StrPosition: lambda self, e: self.func( 769 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 770 ), 771 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 772 exp.Stuff: rename_func("INSERT"), 773 exp.TimestampDiff: lambda self, e: self.func( 774 "TIMESTAMPDIFF", e.unit, e.expression, e.this 775 ), 776 exp.TimestampTrunc: timestamptrunc_sql, 777 exp.TimeStrToTime: timestrtotime_sql, 778 exp.TimeToStr: lambda self, e: self.func( 779 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 780 ), 781 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 782 exp.ToArray: rename_func("TO_ARRAY"), 783 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 784 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 785 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 786 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 787 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 788 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 789 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 790 exp.Xor: rename_func("BOOLXOR"), 791 } 792 793 SUPPORTED_JSON_PATH_PARTS = { 794 exp.JSONPathKey, 795 exp.JSONPathRoot, 796 exp.JSONPathSubscript, 797 } 798 799 TYPE_MAPPING = { 800 **generator.Generator.TYPE_MAPPING, 801 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 802 } 803 804 STAR_MAPPING = { 805 "except": "EXCLUDE", 806 "replace": "RENAME", 807 } 808 809 PROPERTIES_LOCATION = { 810 **generator.Generator.PROPERTIES_LOCATION, 811 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 812 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 813 } 814 815 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 816 milli = expression.args.get("milli") 817 if milli is not None: 818 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 819 expression.set("nano", milli_to_nano) 820 821 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 822 823 def trycast_sql(self, expression: exp.TryCast) -> str: 824 value = expression.this 825 826 if value.type is None: 827 from sqlglot.optimizer.annotate_types import annotate_types 828 829 value = annotate_types(value) 830 831 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 832 return super().trycast_sql(expression) 833 834 # TRY_CAST only works for string values in Snowflake 835 return self.cast_sql(expression) 836 837 def log_sql(self, expression: exp.Log) -> str: 838 if not expression.expression: 839 return self.func("LN", expression.this) 840 841 return super().log_sql(expression) 842 843 def unnest_sql(self, expression: exp.Unnest) -> str: 844 unnest_alias = expression.args.get("alias") 845 offset = expression.args.get("offset") 846 847 columns = [ 848 exp.to_identifier("seq"), 849 exp.to_identifier("key"), 850 exp.to_identifier("path"), 851 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 852 seq_get(unnest_alias.columns if unnest_alias else [], 0) 853 or exp.to_identifier("value"), 854 exp.to_identifier("this"), 855 ] 856 857 if unnest_alias: 858 unnest_alias.set("columns", columns) 859 else: 860 unnest_alias = exp.TableAlias(this="_u", columns=columns) 861 862 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 863 alias = self.sql(unnest_alias) 864 alias = f" AS {alias}" if alias else "" 865 return f"{explode}{alias}" 866 867 def show_sql(self, expression: exp.Show) -> str: 868 terse = "TERSE " if expression.args.get("terse") else "" 869 history = " HISTORY" if expression.args.get("history") else "" 870 like = self.sql(expression, "like") 871 like = f" LIKE {like}" if like else "" 872 873 scope = self.sql(expression, "scope") 874 scope = f" {scope}" if scope else "" 875 876 scope_kind = self.sql(expression, "scope_kind") 877 if scope_kind: 878 scope_kind = f" IN {scope_kind}" 879 880 starts_with = self.sql(expression, "starts_with") 881 if starts_with: 882 starts_with = f" STARTS WITH {starts_with}" 883 884 limit = self.sql(expression, "limit") 885 886 from_ = self.sql(expression, "from") 887 if from_: 888 from_ = f" FROM {from_}" 889 890 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 891 892 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 893 # Other dialects don't support all of the following parameters, so we need to 894 # generate default values as necessary to ensure the transpilation is correct 895 group = expression.args.get("group") 896 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 897 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 898 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 899 900 return self.func( 901 "REGEXP_SUBSTR", 902 expression.this, 903 expression.expression, 904 position, 905 occurrence, 906 parameters, 907 group, 908 ) 909 910 def except_op(self, expression: exp.Except) -> str: 911 if not expression.args.get("distinct"): 912 self.unsupported("EXCEPT with All is not supported in Snowflake") 913 return super().except_op(expression) 914 915 def intersect_op(self, expression: exp.Intersect) -> str: 916 if not expression.args.get("distinct"): 917 self.unsupported("INTERSECT with All is not supported in Snowflake") 918 return super().intersect_op(expression) 919 920 def describe_sql(self, expression: exp.Describe) -> str: 921 # Default to table if kind is unknown 922 kind_value = expression.args.get("kind") or "TABLE" 923 kind = f" {kind_value}" if kind_value else "" 924 this = f" {self.sql(expression, 'this')}" 925 expressions = self.expressions(expression, flat=True) 926 expressions = f" {expressions}" if expressions else "" 927 return f"DESCRIBE{kind}{this}{expressions}" 928 929 def generatedasidentitycolumnconstraint_sql( 930 self, expression: exp.GeneratedAsIdentityColumnConstraint 931 ) -> str: 932 start = expression.args.get("start") 933 start = f" START {start}" if start else "" 934 increment = expression.args.get("increment") 935 increment = f" INCREMENT {increment}" if increment else "" 936 return f"AUTOINCREMENT{start}{increment}" 937 938 def swaptable_sql(self, expression: exp.SwapTable) -> str: 939 this = self.sql(expression, "this") 940 return f"SWAP WITH {this}" 941 942 def with_properties(self, properties: exp.Properties) -> str: 943 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 944 945 def cluster_sql(self, expression: exp.Cluster) -> str: 946 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 947 948 def struct_sql(self, expression: exp.Struct) -> str: 949 keys = [] 950 values = [] 951 952 for i, e in enumerate(expression.expressions): 953 if isinstance(e, exp.PropertyEQ): 954 keys.append( 955 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 956 ) 957 values.append(e.expression) 958 else: 959 keys.append(exp.Literal.string(f"_{i}")) 960 values.append(e) 961 962 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
301 def quote_identifier(self, expression: E, identify: bool = True) -> E: 302 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 303 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 304 if ( 305 isinstance(expression, exp.Identifier) 306 and isinstance(expression.parent, exp.Table) 307 and expression.name.lower() == "dual" 308 ): 309 return expression # type: ignore 310 311 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
313 class Parser(parser.Parser): 314 IDENTIFY_PIVOT_STRINGS = True 315 316 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 321 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 322 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 323 this=seq_get(args, 1), expression=seq_get(args, 0) 324 ), 325 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 326 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 327 start=seq_get(args, 0), 328 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 329 step=seq_get(args, 2), 330 ), 331 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 332 "BITXOR": binary_from_function(exp.BitwiseXor), 333 "BIT_XOR": binary_from_function(exp.BitwiseXor), 334 "BOOLXOR": binary_from_function(exp.Xor), 335 "CONVERT_TIMEZONE": _build_convert_timezone, 336 "DATE_TRUNC": _date_trunc_to_time, 337 "DATEADD": lambda args: exp.DateAdd( 338 this=seq_get(args, 2), 339 expression=seq_get(args, 1), 340 unit=_map_date_part(seq_get(args, 0)), 341 ), 342 "DATEDIFF": _build_datediff, 343 "DIV0": _build_if_from_div0, 344 "FLATTEN": exp.Explode.from_arg_list, 345 "GET_PATH": lambda args, dialect: exp.JSONExtract( 346 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 347 ), 348 "IFF": exp.If.from_arg_list, 349 "LAST_DAY": lambda args: exp.LastDay( 350 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 351 ), 352 "LISTAGG": exp.GroupConcat.from_arg_list, 353 "NULLIFZERO": _build_if_from_nullifzero, 354 "OBJECT_CONSTRUCT": _build_object_construct, 355 "REGEXP_REPLACE": _build_regexp_replace, 356 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 357 "RLIKE": exp.RegexpLike.from_arg_list, 358 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 359 "TIMEDIFF": _build_datediff, 360 "TIMESTAMPDIFF": _build_datediff, 361 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 362 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 363 "TO_TIMESTAMP": _build_to_timestamp, 364 "TO_VARCHAR": exp.ToChar.from_arg_list, 365 "ZEROIFNULL": _build_if_from_zeroifnull, 366 } 367 368 FUNCTION_PARSERS = { 369 **parser.Parser.FUNCTION_PARSERS, 370 "DATE_PART": lambda self: self._parse_date_part(), 371 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 372 } 373 FUNCTION_PARSERS.pop("TRIM") 374 375 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 376 377 RANGE_PARSERS = { 378 **parser.Parser.RANGE_PARSERS, 379 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 380 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 381 TokenType.COLON: lambda self, this: self._parse_colon_get_path(this), 382 } 383 384 ALTER_PARSERS = { 385 **parser.Parser.ALTER_PARSERS, 386 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 387 "UNSET": lambda self: self.expression( 388 exp.Set, 389 tag=self._match_text_seq("TAG"), 390 expressions=self._parse_csv(self._parse_id_var), 391 unset=True, 392 ), 393 "SWAP": lambda self: self._parse_alter_table_swap(), 394 } 395 396 STATEMENT_PARSERS = { 397 **parser.Parser.STATEMENT_PARSERS, 398 TokenType.SHOW: lambda self: self._parse_show(), 399 } 400 401 PROPERTY_PARSERS = { 402 **parser.Parser.PROPERTY_PARSERS, 403 "LOCATION": lambda self: self._parse_location(), 404 } 405 406 SHOW_PARSERS = { 407 "SCHEMAS": _show_parser("SCHEMAS"), 408 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 409 "OBJECTS": _show_parser("OBJECTS"), 410 "TERSE OBJECTS": _show_parser("OBJECTS"), 411 "TABLES": _show_parser("TABLES"), 412 "TERSE TABLES": _show_parser("TABLES"), 413 "VIEWS": _show_parser("VIEWS"), 414 "TERSE VIEWS": _show_parser("VIEWS"), 415 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 416 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 417 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 418 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 419 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 420 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 421 "SEQUENCES": _show_parser("SEQUENCES"), 422 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 423 "COLUMNS": _show_parser("COLUMNS"), 424 "USERS": _show_parser("USERS"), 425 "TERSE USERS": _show_parser("USERS"), 426 } 427 428 STAGED_FILE_SINGLE_TOKENS = { 429 TokenType.DOT, 430 TokenType.MOD, 431 TokenType.SLASH, 432 } 433 434 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 435 436 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 437 438 def _parse_colon_get_path( 439 self: parser.Parser, this: t.Optional[exp.Expression] 440 ) -> t.Optional[exp.Expression]: 441 while True: 442 path = self._parse_bitwise() 443 444 # The cast :: operator has a lower precedence than the extraction operator :, so 445 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 446 if isinstance(path, exp.Cast): 447 target_type = path.to 448 path = path.this 449 else: 450 target_type = None 451 452 if isinstance(path, exp.Expression): 453 path = exp.Literal.string(path.sql(dialect="snowflake")) 454 455 # The extraction operator : is left-associative 456 this = self.expression( 457 exp.JSONExtract, this=this, expression=self.dialect.to_json_path(path) 458 ) 459 460 if target_type: 461 this = exp.cast(this, target_type) 462 463 if not self._match(TokenType.COLON): 464 break 465 466 return self._parse_range(this) 467 468 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 469 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 470 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 471 this = self._parse_var() or self._parse_type() 472 473 if not this: 474 return None 475 476 self._match(TokenType.COMMA) 477 expression = self._parse_bitwise() 478 this = _map_date_part(this) 479 name = this.name.upper() 480 481 if name.startswith("EPOCH"): 482 if name == "EPOCH_MILLISECOND": 483 scale = 10**3 484 elif name == "EPOCH_MICROSECOND": 485 scale = 10**6 486 elif name == "EPOCH_NANOSECOND": 487 scale = 10**9 488 else: 489 scale = None 490 491 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 492 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 493 494 if scale: 495 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 496 497 return to_unix 498 499 return self.expression(exp.Extract, this=this, expression=expression) 500 501 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 502 if is_map: 503 # Keys are strings in Snowflake's objects, see also: 504 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 505 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 506 return self._parse_slice(self._parse_string()) 507 508 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 509 510 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 511 lateral = super()._parse_lateral() 512 if not lateral: 513 return lateral 514 515 if isinstance(lateral.this, exp.Explode): 516 table_alias = lateral.args.get("alias") 517 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 518 if table_alias and not table_alias.args.get("columns"): 519 table_alias.set("columns", columns) 520 elif not table_alias: 521 exp.alias_(lateral, "_flattened", table=columns, copy=False) 522 523 return lateral 524 525 def _parse_at_before(self, table: exp.Table) -> exp.Table: 526 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 527 index = self._index 528 if self._match_texts(("AT", "BEFORE")): 529 this = self._prev.text.upper() 530 kind = ( 531 self._match(TokenType.L_PAREN) 532 and self._match_texts(self.HISTORICAL_DATA_KIND) 533 and self._prev.text.upper() 534 ) 535 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 536 537 if expression: 538 self._match_r_paren() 539 when = self.expression( 540 exp.HistoricalData, this=this, kind=kind, expression=expression 541 ) 542 table.set("when", when) 543 else: 544 self._retreat(index) 545 546 return table 547 548 def _parse_table_parts( 549 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 550 ) -> exp.Table: 551 # https://docs.snowflake.com/en/user-guide/querying-stage 552 if self._match(TokenType.STRING, advance=False): 553 table = self._parse_string() 554 elif self._match_text_seq("@", advance=False): 555 table = self._parse_location_path() 556 else: 557 table = None 558 559 if table: 560 file_format = None 561 pattern = None 562 563 self._match(TokenType.L_PAREN) 564 while self._curr and not self._match(TokenType.R_PAREN): 565 if self._match_text_seq("FILE_FORMAT", "=>"): 566 file_format = self._parse_string() or super()._parse_table_parts( 567 is_db_reference=is_db_reference 568 ) 569 elif self._match_text_seq("PATTERN", "=>"): 570 pattern = self._parse_string() 571 else: 572 break 573 574 self._match(TokenType.COMMA) 575 576 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 577 else: 578 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 579 580 return self._parse_at_before(table) 581 582 def _parse_id_var( 583 self, 584 any_token: bool = True, 585 tokens: t.Optional[t.Collection[TokenType]] = None, 586 ) -> t.Optional[exp.Expression]: 587 if self._match_text_seq("IDENTIFIER", "("): 588 identifier = ( 589 super()._parse_id_var(any_token=any_token, tokens=tokens) 590 or self._parse_string() 591 ) 592 self._match_r_paren() 593 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 594 595 return super()._parse_id_var(any_token=any_token, tokens=tokens) 596 597 def _parse_show_snowflake(self, this: str) -> exp.Show: 598 scope = None 599 scope_kind = None 600 601 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 602 # which is syntactically valid but has no effect on the output 603 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 604 605 history = self._match_text_seq("HISTORY") 606 607 like = self._parse_string() if self._match(TokenType.LIKE) else None 608 609 if self._match(TokenType.IN): 610 if self._match_text_seq("ACCOUNT"): 611 scope_kind = "ACCOUNT" 612 elif self._match_set(self.DB_CREATABLES): 613 scope_kind = self._prev.text.upper() 614 if self._curr: 615 scope = self._parse_table_parts() 616 elif self._curr: 617 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 618 scope = self._parse_table_parts() 619 620 return self.expression( 621 exp.Show, 622 **{ 623 "terse": terse, 624 "this": this, 625 "history": history, 626 "like": like, 627 "scope": scope, 628 "scope_kind": scope_kind, 629 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 630 "limit": self._parse_limit(), 631 "from": self._parse_string() if self._match(TokenType.FROM) else None, 632 }, 633 ) 634 635 def _parse_alter_table_swap(self) -> exp.SwapTable: 636 self._match_text_seq("WITH") 637 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 638 639 def _parse_location(self) -> exp.LocationProperty: 640 self._match(TokenType.EQ) 641 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 642 643 def _parse_location_path(self) -> exp.Var: 644 parts = [self._advance_any(ignore_reserved=True)] 645 646 # We avoid consuming a comma token because external tables like @foo and @bar 647 # can be joined in a query with a comma separator. 648 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 649 parts.append(self._advance_any(ignore_reserved=True)) 650 651 return exp.var("".join(part.text for part in parts if part))
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- USABLES
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
653 class Tokenizer(tokens.Tokenizer): 654 STRING_ESCAPES = ["\\", "'"] 655 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 656 RAW_STRINGS = ["$$"] 657 COMMENTS = ["--", "//", ("/*", "*/")] 658 659 KEYWORDS = { 660 **tokens.Tokenizer.KEYWORDS, 661 "BYTEINT": TokenType.INT, 662 "CHAR VARYING": TokenType.VARCHAR, 663 "CHARACTER VARYING": TokenType.VARCHAR, 664 "EXCLUDE": TokenType.EXCEPT, 665 "ILIKE ANY": TokenType.ILIKE_ANY, 666 "LIKE ANY": TokenType.LIKE_ANY, 667 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 668 "MINUS": TokenType.EXCEPT, 669 "NCHAR VARYING": TokenType.VARCHAR, 670 "PUT": TokenType.COMMAND, 671 "REMOVE": TokenType.COMMAND, 672 "RENAME": TokenType.REPLACE, 673 "RM": TokenType.COMMAND, 674 "SAMPLE": TokenType.TABLE_SAMPLE, 675 "SQL_DOUBLE": TokenType.DOUBLE, 676 "SQL_VARCHAR": TokenType.VARCHAR, 677 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 678 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 679 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 680 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 681 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 682 "TOP": TokenType.TOP, 683 } 684 685 SINGLE_TOKENS = { 686 **tokens.Tokenizer.SINGLE_TOKENS, 687 "$": TokenType.PARAMETER, 688 } 689 690 VAR_SINGLE_TOKENS = {"$"} 691 692 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
694 class Generator(generator.Generator): 695 PARAMETER_TOKEN = "$" 696 MATCHED_BY_SOURCE = False 697 SINGLE_STRING_INTERVAL = True 698 JOIN_HINTS = False 699 TABLE_HINTS = False 700 QUERY_HINTS = False 701 AGGREGATE_FILTER_SUPPORTED = False 702 SUPPORTS_TABLE_COPY = False 703 COLLATE_IS_FUNC = True 704 LIMIT_ONLY_LITERALS = True 705 JSON_KEY_VALUE_PAIR_SEP = "," 706 INSERT_OVERWRITE = " OVERWRITE INTO" 707 708 TRANSFORMS = { 709 **generator.Generator.TRANSFORMS, 710 exp.ArgMax: rename_func("MAX_BY"), 711 exp.ArgMin: rename_func("MIN_BY"), 712 exp.Array: inline_array_sql, 713 exp.ArrayConcat: rename_func("ARRAY_CAT"), 714 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 715 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 716 exp.AtTimeZone: lambda self, e: self.func( 717 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 718 ), 719 exp.BitwiseXor: rename_func("BITXOR"), 720 exp.DateAdd: date_delta_sql("DATEADD"), 721 exp.DateDiff: date_delta_sql("DATEDIFF"), 722 exp.DateStrToDate: datestrtodate_sql, 723 exp.DataType: _datatype_sql, 724 exp.DayOfMonth: rename_func("DAYOFMONTH"), 725 exp.DayOfWeek: rename_func("DAYOFWEEK"), 726 exp.DayOfYear: rename_func("DAYOFYEAR"), 727 exp.Explode: rename_func("FLATTEN"), 728 exp.Extract: rename_func("DATE_PART"), 729 exp.FromTimeZone: lambda self, e: self.func( 730 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 731 ), 732 exp.GenerateSeries: lambda self, e: self.func( 733 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 734 ), 735 exp.GroupConcat: rename_func("LISTAGG"), 736 exp.If: if_sql(name="IFF", false_value="NULL"), 737 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 738 exp.JSONExtractScalar: lambda self, e: self.func( 739 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 740 ), 741 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 742 exp.JSONPathRoot: lambda *_: "", 743 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 744 exp.LogicalOr: rename_func("BOOLOR_AGG"), 745 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 746 exp.Max: max_or_greatest, 747 exp.Min: min_or_least, 748 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 749 exp.PercentileCont: transforms.preprocess( 750 [transforms.add_within_group_for_percentiles] 751 ), 752 exp.PercentileDisc: transforms.preprocess( 753 [transforms.add_within_group_for_percentiles] 754 ), 755 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 756 exp.RegexpILike: _regexpilike_sql, 757 exp.Rand: rename_func("RANDOM"), 758 exp.Select: transforms.preprocess( 759 [ 760 transforms.eliminate_distinct_on, 761 transforms.explode_to_unnest(), 762 transforms.eliminate_semi_and_anti_joins, 763 ] 764 ), 765 exp.SHA: rename_func("SHA1"), 766 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 767 exp.StartsWith: rename_func("STARTSWITH"), 768 exp.StrPosition: lambda self, e: self.func( 769 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 770 ), 771 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 772 exp.Stuff: rename_func("INSERT"), 773 exp.TimestampDiff: lambda self, e: self.func( 774 "TIMESTAMPDIFF", e.unit, e.expression, e.this 775 ), 776 exp.TimestampTrunc: timestamptrunc_sql, 777 exp.TimeStrToTime: timestrtotime_sql, 778 exp.TimeToStr: lambda self, e: self.func( 779 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 780 ), 781 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 782 exp.ToArray: rename_func("TO_ARRAY"), 783 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 784 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 785 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 786 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 787 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 788 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 789 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 790 exp.Xor: rename_func("BOOLXOR"), 791 } 792 793 SUPPORTED_JSON_PATH_PARTS = { 794 exp.JSONPathKey, 795 exp.JSONPathRoot, 796 exp.JSONPathSubscript, 797 } 798 799 TYPE_MAPPING = { 800 **generator.Generator.TYPE_MAPPING, 801 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 802 } 803 804 STAR_MAPPING = { 805 "except": "EXCLUDE", 806 "replace": "RENAME", 807 } 808 809 PROPERTIES_LOCATION = { 810 **generator.Generator.PROPERTIES_LOCATION, 811 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 812 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 813 } 814 815 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 816 milli = expression.args.get("milli") 817 if milli is not None: 818 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 819 expression.set("nano", milli_to_nano) 820 821 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 822 823 def trycast_sql(self, expression: exp.TryCast) -> str: 824 value = expression.this 825 826 if value.type is None: 827 from sqlglot.optimizer.annotate_types import annotate_types 828 829 value = annotate_types(value) 830 831 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 832 return super().trycast_sql(expression) 833 834 # TRY_CAST only works for string values in Snowflake 835 return self.cast_sql(expression) 836 837 def log_sql(self, expression: exp.Log) -> str: 838 if not expression.expression: 839 return self.func("LN", expression.this) 840 841 return super().log_sql(expression) 842 843 def unnest_sql(self, expression: exp.Unnest) -> str: 844 unnest_alias = expression.args.get("alias") 845 offset = expression.args.get("offset") 846 847 columns = [ 848 exp.to_identifier("seq"), 849 exp.to_identifier("key"), 850 exp.to_identifier("path"), 851 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 852 seq_get(unnest_alias.columns if unnest_alias else [], 0) 853 or exp.to_identifier("value"), 854 exp.to_identifier("this"), 855 ] 856 857 if unnest_alias: 858 unnest_alias.set("columns", columns) 859 else: 860 unnest_alias = exp.TableAlias(this="_u", columns=columns) 861 862 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 863 alias = self.sql(unnest_alias) 864 alias = f" AS {alias}" if alias else "" 865 return f"{explode}{alias}" 866 867 def show_sql(self, expression: exp.Show) -> str: 868 terse = "TERSE " if expression.args.get("terse") else "" 869 history = " HISTORY" if expression.args.get("history") else "" 870 like = self.sql(expression, "like") 871 like = f" LIKE {like}" if like else "" 872 873 scope = self.sql(expression, "scope") 874 scope = f" {scope}" if scope else "" 875 876 scope_kind = self.sql(expression, "scope_kind") 877 if scope_kind: 878 scope_kind = f" IN {scope_kind}" 879 880 starts_with = self.sql(expression, "starts_with") 881 if starts_with: 882 starts_with = f" STARTS WITH {starts_with}" 883 884 limit = self.sql(expression, "limit") 885 886 from_ = self.sql(expression, "from") 887 if from_: 888 from_ = f" FROM {from_}" 889 890 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 891 892 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 893 # Other dialects don't support all of the following parameters, so we need to 894 # generate default values as necessary to ensure the transpilation is correct 895 group = expression.args.get("group") 896 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 897 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 898 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 899 900 return self.func( 901 "REGEXP_SUBSTR", 902 expression.this, 903 expression.expression, 904 position, 905 occurrence, 906 parameters, 907 group, 908 ) 909 910 def except_op(self, expression: exp.Except) -> str: 911 if not expression.args.get("distinct"): 912 self.unsupported("EXCEPT with All is not supported in Snowflake") 913 return super().except_op(expression) 914 915 def intersect_op(self, expression: exp.Intersect) -> str: 916 if not expression.args.get("distinct"): 917 self.unsupported("INTERSECT with All is not supported in Snowflake") 918 return super().intersect_op(expression) 919 920 def describe_sql(self, expression: exp.Describe) -> str: 921 # Default to table if kind is unknown 922 kind_value = expression.args.get("kind") or "TABLE" 923 kind = f" {kind_value}" if kind_value else "" 924 this = f" {self.sql(expression, 'this')}" 925 expressions = self.expressions(expression, flat=True) 926 expressions = f" {expressions}" if expressions else "" 927 return f"DESCRIBE{kind}{this}{expressions}" 928 929 def generatedasidentitycolumnconstraint_sql( 930 self, expression: exp.GeneratedAsIdentityColumnConstraint 931 ) -> str: 932 start = expression.args.get("start") 933 start = f" START {start}" if start else "" 934 increment = expression.args.get("increment") 935 increment = f" INCREMENT {increment}" if increment else "" 936 return f"AUTOINCREMENT{start}{increment}" 937 938 def swaptable_sql(self, expression: exp.SwapTable) -> str: 939 this = self.sql(expression, "this") 940 return f"SWAP WITH {this}" 941 942 def with_properties(self, properties: exp.Properties) -> str: 943 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 944 945 def cluster_sql(self, expression: exp.Cluster) -> str: 946 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 947 948 def struct_sql(self, expression: exp.Struct) -> str: 949 keys = [] 950 values = [] 951 952 for i, e in enumerate(expression.expressions): 953 if isinstance(e, exp.PropertyEQ): 954 keys.append( 955 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 956 ) 957 values.append(e.expression) 958 else: 959 keys.append(exp.Literal.string(f"_{i}")) 960 values.append(e) 961 962 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. Default: 2.
- indent: The indentation size in a formatted string. Default: 2.
- normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
815 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 816 milli = expression.args.get("milli") 817 if milli is not None: 818 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 819 expression.set("nano", milli_to_nano) 820 821 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
823 def trycast_sql(self, expression: exp.TryCast) -> str: 824 value = expression.this 825 826 if value.type is None: 827 from sqlglot.optimizer.annotate_types import annotate_types 828 829 value = annotate_types(value) 830 831 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 832 return super().trycast_sql(expression) 833 834 # TRY_CAST only works for string values in Snowflake 835 return self.cast_sql(expression)
843 def unnest_sql(self, expression: exp.Unnest) -> str: 844 unnest_alias = expression.args.get("alias") 845 offset = expression.args.get("offset") 846 847 columns = [ 848 exp.to_identifier("seq"), 849 exp.to_identifier("key"), 850 exp.to_identifier("path"), 851 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 852 seq_get(unnest_alias.columns if unnest_alias else [], 0) 853 or exp.to_identifier("value"), 854 exp.to_identifier("this"), 855 ] 856 857 if unnest_alias: 858 unnest_alias.set("columns", columns) 859 else: 860 unnest_alias = exp.TableAlias(this="_u", columns=columns) 861 862 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 863 alias = self.sql(unnest_alias) 864 alias = f" AS {alias}" if alias else "" 865 return f"{explode}{alias}"
867 def show_sql(self, expression: exp.Show) -> str: 868 terse = "TERSE " if expression.args.get("terse") else "" 869 history = " HISTORY" if expression.args.get("history") else "" 870 like = self.sql(expression, "like") 871 like = f" LIKE {like}" if like else "" 872 873 scope = self.sql(expression, "scope") 874 scope = f" {scope}" if scope else "" 875 876 scope_kind = self.sql(expression, "scope_kind") 877 if scope_kind: 878 scope_kind = f" IN {scope_kind}" 879 880 starts_with = self.sql(expression, "starts_with") 881 if starts_with: 882 starts_with = f" STARTS WITH {starts_with}" 883 884 limit = self.sql(expression, "limit") 885 886 from_ = self.sql(expression, "from") 887 if from_: 888 from_ = f" FROM {from_}" 889 890 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
892 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 893 # Other dialects don't support all of the following parameters, so we need to 894 # generate default values as necessary to ensure the transpilation is correct 895 group = expression.args.get("group") 896 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 897 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 898 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 899 900 return self.func( 901 "REGEXP_SUBSTR", 902 expression.this, 903 expression.expression, 904 position, 905 occurrence, 906 parameters, 907 group, 908 )
920 def describe_sql(self, expression: exp.Describe) -> str: 921 # Default to table if kind is unknown 922 kind_value = expression.args.get("kind") or "TABLE" 923 kind = f" {kind_value}" if kind_value else "" 924 this = f" {self.sql(expression, 'this')}" 925 expressions = self.expressions(expression, flat=True) 926 expressions = f" {expressions}" if expressions else "" 927 return f"DESCRIBE{kind}{this}{expressions}"
929 def generatedasidentitycolumnconstraint_sql( 930 self, expression: exp.GeneratedAsIdentityColumnConstraint 931 ) -> str: 932 start = expression.args.get("start") 933 start = f" START {start}" if start else "" 934 increment = expression.args.get("increment") 935 increment = f" INCREMENT {increment}" if increment else "" 936 return f"AUTOINCREMENT{start}{increment}"
948 def struct_sql(self, expression: exp.Struct) -> str: 949 keys = [] 950 values = [] 951 952 for i, e in enumerate(expression.expressions): 953 if isinstance(e, exp.PropertyEQ): 954 keys.append( 955 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 956 ) 957 values.append(e.expression) 958 else: 959 keys.append(exp.Literal.string(f"_{i}")) 960 values.append(e) 961 962 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql