sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 date_delta_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 build_formatted_time, 14 if_sql, 15 inline_array_sql, 16 max_or_greatest, 17 min_or_least, 18 rename_func, 19 timestamptrunc_sql, 20 timestrtotime_sql, 21 var_map_sql, 22) 23from sqlglot.expressions import Literal 24from sqlglot.helper import flatten, is_int, seq_get 25from sqlglot.tokens import TokenType 26 27if t.TYPE_CHECKING: 28 from sqlglot._typing import E 29 30 31# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 32def _build_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime, exp.TimeStrToTime]: 33 if len(args) == 2: 34 first_arg, second_arg = args 35 if second_arg.is_string: 36 # case: <string_expr> [ , <format> ] 37 return build_formatted_time(exp.StrToTime, "snowflake")(args) 38 return exp.UnixToTime(this=first_arg, scale=second_arg) 39 40 from sqlglot.optimizer.simplify import simplify_literals 41 42 # The first argument might be an expression like 40 * 365 * 86400, so we try to 43 # reduce it using `simplify_literals` first and then check if it's a Literal. 44 first_arg = seq_get(args, 0) 45 if not isinstance(simplify_literals(first_arg, root=True), Literal): 46 # case: <variant_expr> or other expressions such as columns 47 return exp.TimeStrToTime.from_arg_list(args) 48 49 if first_arg.is_string: 50 if is_int(first_arg.this): 51 # case: <integer> 52 return exp.UnixToTime.from_arg_list(args) 53 54 # case: <date_expr> 55 return build_formatted_time(exp.StrToTime, "snowflake", default=True)(args) 56 57 # case: <numeric_expr> 58 return exp.UnixToTime.from_arg_list(args) 59 60 61def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 62 expression = parser.build_var_map(args) 63 64 if isinstance(expression, exp.StarMap): 65 return expression 66 67 return exp.Struct( 68 expressions=[ 69 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 70 ] 71 ) 72 73 74def _build_datediff(args: t.List) -> exp.DateDiff: 75 return exp.DateDiff( 76 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 77 ) 78 79 80# https://docs.snowflake.com/en/sql-reference/functions/div0 81def _build_if_from_div0(args: t.List) -> exp.If: 82 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 83 true = exp.Literal.number(0) 84 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 85 return exp.If(this=cond, true=true, false=false) 86 87 88# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 89def _build_if_from_zeroifnull(args: t.List) -> exp.If: 90 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 91 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 95def _build_if_from_nullifzero(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 97 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 98 99 100def _datatype_sql(self: Snowflake.Generator, expression: exp.DataType) -> str: 101 if expression.is_type("array"): 102 return "ARRAY" 103 elif expression.is_type("map"): 104 return "OBJECT" 105 return self.datatype_sql(expression) 106 107 108def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 109 flag = expression.text("flag") 110 111 if "i" not in flag: 112 flag += "i" 113 114 return self.func( 115 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 116 ) 117 118 119def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 120 if len(args) == 3: 121 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 122 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 123 124 125def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 126 regexp_replace = exp.RegexpReplace.from_arg_list(args) 127 128 if not regexp_replace.args.get("replacement"): 129 regexp_replace.set("replacement", exp.Literal.string("")) 130 131 return regexp_replace 132 133 134def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 135 def _parse(self: Snowflake.Parser) -> exp.Show: 136 return self._parse_show_snowflake(*args, **kwargs) 137 138 return _parse 139 140 141DATE_PART_MAPPING = { 142 "Y": "YEAR", 143 "YY": "YEAR", 144 "YYY": "YEAR", 145 "YYYY": "YEAR", 146 "YR": "YEAR", 147 "YEARS": "YEAR", 148 "YRS": "YEAR", 149 "MM": "MONTH", 150 "MON": "MONTH", 151 "MONS": "MONTH", 152 "MONTHS": "MONTH", 153 "D": "DAY", 154 "DD": "DAY", 155 "DAYS": "DAY", 156 "DAYOFMONTH": "DAY", 157 "WEEKDAY": "DAYOFWEEK", 158 "DOW": "DAYOFWEEK", 159 "DW": "DAYOFWEEK", 160 "WEEKDAY_ISO": "DAYOFWEEKISO", 161 "DOW_ISO": "DAYOFWEEKISO", 162 "DW_ISO": "DAYOFWEEKISO", 163 "YEARDAY": "DAYOFYEAR", 164 "DOY": "DAYOFYEAR", 165 "DY": "DAYOFYEAR", 166 "W": "WEEK", 167 "WK": "WEEK", 168 "WEEKOFYEAR": "WEEK", 169 "WOY": "WEEK", 170 "WY": "WEEK", 171 "WEEK_ISO": "WEEKISO", 172 "WEEKOFYEARISO": "WEEKISO", 173 "WEEKOFYEAR_ISO": "WEEKISO", 174 "Q": "QUARTER", 175 "QTR": "QUARTER", 176 "QTRS": "QUARTER", 177 "QUARTERS": "QUARTER", 178 "H": "HOUR", 179 "HH": "HOUR", 180 "HR": "HOUR", 181 "HOURS": "HOUR", 182 "HRS": "HOUR", 183 "M": "MINUTE", 184 "MI": "MINUTE", 185 "MIN": "MINUTE", 186 "MINUTES": "MINUTE", 187 "MINS": "MINUTE", 188 "S": "SECOND", 189 "SEC": "SECOND", 190 "SECONDS": "SECOND", 191 "SECS": "SECOND", 192 "MS": "MILLISECOND", 193 "MSEC": "MILLISECOND", 194 "MILLISECONDS": "MILLISECOND", 195 "US": "MICROSECOND", 196 "USEC": "MICROSECOND", 197 "MICROSECONDS": "MICROSECOND", 198 "NS": "NANOSECOND", 199 "NSEC": "NANOSECOND", 200 "NANOSEC": "NANOSECOND", 201 "NSECOND": "NANOSECOND", 202 "NSECONDS": "NANOSECOND", 203 "NANOSECS": "NANOSECOND", 204 "EPOCH": "EPOCH_SECOND", 205 "EPOCH_SECONDS": "EPOCH_SECOND", 206 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 207 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 208 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 209 "TZH": "TIMEZONE_HOUR", 210 "TZM": "TIMEZONE_MINUTE", 211} 212 213 214@t.overload 215def _map_date_part(part: exp.Expression) -> exp.Var: 216 pass 217 218 219@t.overload 220def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 221 pass 222 223 224def _map_date_part(part): 225 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 226 return exp.var(mapped) if mapped else part 227 228 229def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 230 trunc = date_trunc_to_time(args) 231 trunc.set("unit", _map_date_part(trunc.args["unit"])) 232 return trunc 233 234 235def _build_timestamp_from_parts(args: t.List) -> exp.Func: 236 if len(args) == 2: 237 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 238 # so we parse this into Anonymous for now instead of introducing complexity 239 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 240 241 return exp.TimestampFromParts.from_arg_list(args) 242 243 244def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 245 """ 246 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 247 so we need to unqualify them. 248 249 Example: 250 >>> from sqlglot import parse_one 251 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 252 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 253 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 254 """ 255 if isinstance(expression, exp.Pivot) and expression.unpivot: 256 expression = transforms.unqualify_columns(expression) 257 258 return expression 259 260 261class Snowflake(Dialect): 262 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 263 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 264 NULL_ORDERING = "nulls_are_large" 265 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 266 SUPPORTS_USER_DEFINED_TYPES = False 267 SUPPORTS_SEMI_ANTI_JOIN = False 268 PREFER_CTE_ALIAS_COLUMN = True 269 TABLESAMPLE_SIZE_IS_PERCENT = True 270 271 TIME_MAPPING = { 272 "YYYY": "%Y", 273 "yyyy": "%Y", 274 "YY": "%y", 275 "yy": "%y", 276 "MMMM": "%B", 277 "mmmm": "%B", 278 "MON": "%b", 279 "mon": "%b", 280 "MM": "%m", 281 "mm": "%m", 282 "DD": "%d", 283 "dd": "%-d", 284 "DY": "%a", 285 "dy": "%w", 286 "HH24": "%H", 287 "hh24": "%H", 288 "HH12": "%I", 289 "hh12": "%I", 290 "MI": "%M", 291 "mi": "%M", 292 "SS": "%S", 293 "ss": "%S", 294 "FF": "%f", 295 "ff": "%f", 296 "FF6": "%f", 297 "ff6": "%f", 298 } 299 300 def quote_identifier(self, expression: E, identify: bool = True) -> E: 301 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 302 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 303 if ( 304 isinstance(expression, exp.Identifier) 305 and isinstance(expression.parent, exp.Table) 306 and expression.name.lower() == "dual" 307 ): 308 return expression # type: ignore 309 310 return super().quote_identifier(expression, identify=identify) 311 312 class Parser(parser.Parser): 313 IDENTIFY_PIVOT_STRINGS = True 314 315 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 316 317 FUNCTIONS = { 318 **parser.Parser.FUNCTIONS, 319 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 320 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 321 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 322 this=seq_get(args, 1), expression=seq_get(args, 0) 323 ), 324 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 325 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 326 start=seq_get(args, 0), 327 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 328 step=seq_get(args, 2), 329 ), 330 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 331 "BITXOR": binary_from_function(exp.BitwiseXor), 332 "BIT_XOR": binary_from_function(exp.BitwiseXor), 333 "BOOLXOR": binary_from_function(exp.Xor), 334 "CONVERT_TIMEZONE": _build_convert_timezone, 335 "DATE_TRUNC": _date_trunc_to_time, 336 "DATEADD": lambda args: exp.DateAdd( 337 this=seq_get(args, 2), 338 expression=seq_get(args, 1), 339 unit=_map_date_part(seq_get(args, 0)), 340 ), 341 "DATEDIFF": _build_datediff, 342 "DIV0": _build_if_from_div0, 343 "FLATTEN": exp.Explode.from_arg_list, 344 "GET_PATH": lambda args, dialect: exp.JSONExtract( 345 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 346 ), 347 "IFF": exp.If.from_arg_list, 348 "LAST_DAY": lambda args: exp.LastDay( 349 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 350 ), 351 "LISTAGG": exp.GroupConcat.from_arg_list, 352 "NULLIFZERO": _build_if_from_nullifzero, 353 "OBJECT_CONSTRUCT": _build_object_construct, 354 "REGEXP_REPLACE": _build_regexp_replace, 355 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 356 "RLIKE": exp.RegexpLike.from_arg_list, 357 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 358 "TIMEDIFF": _build_datediff, 359 "TIMESTAMPDIFF": _build_datediff, 360 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 361 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 362 "TO_TIMESTAMP": _build_to_timestamp, 363 "TO_VARCHAR": exp.ToChar.from_arg_list, 364 "ZEROIFNULL": _build_if_from_zeroifnull, 365 } 366 367 FUNCTION_PARSERS = { 368 **parser.Parser.FUNCTION_PARSERS, 369 "DATE_PART": lambda self: self._parse_date_part(), 370 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 371 } 372 FUNCTION_PARSERS.pop("TRIM") 373 374 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 375 376 RANGE_PARSERS = { 377 **parser.Parser.RANGE_PARSERS, 378 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 379 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 380 } 381 382 ALTER_PARSERS = { 383 **parser.Parser.ALTER_PARSERS, 384 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 385 "UNSET": lambda self: self.expression( 386 exp.Set, 387 tag=self._match_text_seq("TAG"), 388 expressions=self._parse_csv(self._parse_id_var), 389 unset=True, 390 ), 391 "SWAP": lambda self: self._parse_alter_table_swap(), 392 } 393 394 STATEMENT_PARSERS = { 395 **parser.Parser.STATEMENT_PARSERS, 396 TokenType.SHOW: lambda self: self._parse_show(), 397 } 398 399 PROPERTY_PARSERS = { 400 **parser.Parser.PROPERTY_PARSERS, 401 "LOCATION": lambda self: self._parse_location(), 402 } 403 404 SHOW_PARSERS = { 405 "SCHEMAS": _show_parser("SCHEMAS"), 406 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 407 "OBJECTS": _show_parser("OBJECTS"), 408 "TERSE OBJECTS": _show_parser("OBJECTS"), 409 "TABLES": _show_parser("TABLES"), 410 "TERSE TABLES": _show_parser("TABLES"), 411 "VIEWS": _show_parser("VIEWS"), 412 "TERSE VIEWS": _show_parser("VIEWS"), 413 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 414 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 415 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 416 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 417 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 418 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 419 "SEQUENCES": _show_parser("SEQUENCES"), 420 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 421 "COLUMNS": _show_parser("COLUMNS"), 422 "USERS": _show_parser("USERS"), 423 "TERSE USERS": _show_parser("USERS"), 424 } 425 426 STAGED_FILE_SINGLE_TOKENS = { 427 TokenType.DOT, 428 TokenType.MOD, 429 TokenType.SLASH, 430 } 431 432 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 433 434 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 435 436 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 437 this = super()._parse_column_ops(this) 438 439 casts = [] 440 json_path = [] 441 442 while self._match(TokenType.COLON): 443 path = super()._parse_column_ops(self._parse_field(any_token=True)) 444 445 # The cast :: operator has a lower precedence than the extraction operator :, so 446 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 447 while isinstance(path, exp.Cast): 448 casts.append(path.to) 449 path = path.this 450 451 if path: 452 json_path.append(path.sql(dialect="snowflake", copy=False)) 453 454 if json_path: 455 this = self.expression( 456 exp.JSONExtract, 457 this=this, 458 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 459 ) 460 461 while casts: 462 this = self.expression(exp.Cast, this=this, to=casts.pop()) 463 464 return this 465 466 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 467 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 468 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 469 this = self._parse_var() or self._parse_type() 470 471 if not this: 472 return None 473 474 self._match(TokenType.COMMA) 475 expression = self._parse_bitwise() 476 this = _map_date_part(this) 477 name = this.name.upper() 478 479 if name.startswith("EPOCH"): 480 if name == "EPOCH_MILLISECOND": 481 scale = 10**3 482 elif name == "EPOCH_MICROSECOND": 483 scale = 10**6 484 elif name == "EPOCH_NANOSECOND": 485 scale = 10**9 486 else: 487 scale = None 488 489 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 490 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 491 492 if scale: 493 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 494 495 return to_unix 496 497 return self.expression(exp.Extract, this=this, expression=expression) 498 499 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 500 if is_map: 501 # Keys are strings in Snowflake's objects, see also: 502 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 503 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 504 return self._parse_slice(self._parse_string()) 505 506 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 507 508 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 509 lateral = super()._parse_lateral() 510 if not lateral: 511 return lateral 512 513 if isinstance(lateral.this, exp.Explode): 514 table_alias = lateral.args.get("alias") 515 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 516 if table_alias and not table_alias.args.get("columns"): 517 table_alias.set("columns", columns) 518 elif not table_alias: 519 exp.alias_(lateral, "_flattened", table=columns, copy=False) 520 521 return lateral 522 523 def _parse_at_before(self, table: exp.Table) -> exp.Table: 524 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 525 index = self._index 526 if self._match_texts(("AT", "BEFORE")): 527 this = self._prev.text.upper() 528 kind = ( 529 self._match(TokenType.L_PAREN) 530 and self._match_texts(self.HISTORICAL_DATA_KIND) 531 and self._prev.text.upper() 532 ) 533 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 534 535 if expression: 536 self._match_r_paren() 537 when = self.expression( 538 exp.HistoricalData, this=this, kind=kind, expression=expression 539 ) 540 table.set("when", when) 541 else: 542 self._retreat(index) 543 544 return table 545 546 def _parse_table_parts( 547 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 548 ) -> exp.Table: 549 # https://docs.snowflake.com/en/user-guide/querying-stage 550 if self._match(TokenType.STRING, advance=False): 551 table = self._parse_string() 552 elif self._match_text_seq("@", advance=False): 553 table = self._parse_location_path() 554 else: 555 table = None 556 557 if table: 558 file_format = None 559 pattern = None 560 561 self._match(TokenType.L_PAREN) 562 while self._curr and not self._match(TokenType.R_PAREN): 563 if self._match_text_seq("FILE_FORMAT", "=>"): 564 file_format = self._parse_string() or super()._parse_table_parts( 565 is_db_reference=is_db_reference 566 ) 567 elif self._match_text_seq("PATTERN", "=>"): 568 pattern = self._parse_string() 569 else: 570 break 571 572 self._match(TokenType.COMMA) 573 574 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 575 else: 576 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 577 578 return self._parse_at_before(table) 579 580 def _parse_id_var( 581 self, 582 any_token: bool = True, 583 tokens: t.Optional[t.Collection[TokenType]] = None, 584 ) -> t.Optional[exp.Expression]: 585 if self._match_text_seq("IDENTIFIER", "("): 586 identifier = ( 587 super()._parse_id_var(any_token=any_token, tokens=tokens) 588 or self._parse_string() 589 ) 590 self._match_r_paren() 591 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 592 593 return super()._parse_id_var(any_token=any_token, tokens=tokens) 594 595 def _parse_show_snowflake(self, this: str) -> exp.Show: 596 scope = None 597 scope_kind = None 598 599 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 600 # which is syntactically valid but has no effect on the output 601 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 602 603 history = self._match_text_seq("HISTORY") 604 605 like = self._parse_string() if self._match(TokenType.LIKE) else None 606 607 if self._match(TokenType.IN): 608 if self._match_text_seq("ACCOUNT"): 609 scope_kind = "ACCOUNT" 610 elif self._match_set(self.DB_CREATABLES): 611 scope_kind = self._prev.text.upper() 612 if self._curr: 613 scope = self._parse_table_parts() 614 elif self._curr: 615 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 616 scope = self._parse_table_parts() 617 618 return self.expression( 619 exp.Show, 620 **{ 621 "terse": terse, 622 "this": this, 623 "history": history, 624 "like": like, 625 "scope": scope, 626 "scope_kind": scope_kind, 627 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 628 "limit": self._parse_limit(), 629 "from": self._parse_string() if self._match(TokenType.FROM) else None, 630 }, 631 ) 632 633 def _parse_alter_table_swap(self) -> exp.SwapTable: 634 self._match_text_seq("WITH") 635 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 636 637 def _parse_location(self) -> exp.LocationProperty: 638 self._match(TokenType.EQ) 639 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 640 641 def _parse_location_path(self) -> exp.Var: 642 parts = [self._advance_any(ignore_reserved=True)] 643 644 # We avoid consuming a comma token because external tables like @foo and @bar 645 # can be joined in a query with a comma separator. 646 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 647 parts.append(self._advance_any(ignore_reserved=True)) 648 649 return exp.var("".join(part.text for part in parts if part)) 650 651 class Tokenizer(tokens.Tokenizer): 652 STRING_ESCAPES = ["\\", "'"] 653 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 654 RAW_STRINGS = ["$$"] 655 COMMENTS = ["--", "//", ("/*", "*/")] 656 657 KEYWORDS = { 658 **tokens.Tokenizer.KEYWORDS, 659 "BYTEINT": TokenType.INT, 660 "CHAR VARYING": TokenType.VARCHAR, 661 "CHARACTER VARYING": TokenType.VARCHAR, 662 "EXCLUDE": TokenType.EXCEPT, 663 "ILIKE ANY": TokenType.ILIKE_ANY, 664 "LIKE ANY": TokenType.LIKE_ANY, 665 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 666 "MINUS": TokenType.EXCEPT, 667 "NCHAR VARYING": TokenType.VARCHAR, 668 "PUT": TokenType.COMMAND, 669 "REMOVE": TokenType.COMMAND, 670 "RENAME": TokenType.REPLACE, 671 "RM": TokenType.COMMAND, 672 "SAMPLE": TokenType.TABLE_SAMPLE, 673 "SQL_DOUBLE": TokenType.DOUBLE, 674 "SQL_VARCHAR": TokenType.VARCHAR, 675 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 676 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 677 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 678 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 679 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 680 "TOP": TokenType.TOP, 681 } 682 683 SINGLE_TOKENS = { 684 **tokens.Tokenizer.SINGLE_TOKENS, 685 "$": TokenType.PARAMETER, 686 } 687 688 VAR_SINGLE_TOKENS = {"$"} 689 690 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 691 692 class Generator(generator.Generator): 693 PARAMETER_TOKEN = "$" 694 MATCHED_BY_SOURCE = False 695 SINGLE_STRING_INTERVAL = True 696 JOIN_HINTS = False 697 TABLE_HINTS = False 698 QUERY_HINTS = False 699 AGGREGATE_FILTER_SUPPORTED = False 700 SUPPORTS_TABLE_COPY = False 701 COLLATE_IS_FUNC = True 702 LIMIT_ONLY_LITERALS = True 703 JSON_KEY_VALUE_PAIR_SEP = "," 704 INSERT_OVERWRITE = " OVERWRITE INTO" 705 706 TRANSFORMS = { 707 **generator.Generator.TRANSFORMS, 708 exp.ArgMax: rename_func("MAX_BY"), 709 exp.ArgMin: rename_func("MIN_BY"), 710 exp.Array: inline_array_sql, 711 exp.ArrayConcat: rename_func("ARRAY_CAT"), 712 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 713 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 714 exp.AtTimeZone: lambda self, e: self.func( 715 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 716 ), 717 exp.BitwiseXor: rename_func("BITXOR"), 718 exp.DateAdd: date_delta_sql("DATEADD"), 719 exp.DateDiff: date_delta_sql("DATEDIFF"), 720 exp.DateStrToDate: datestrtodate_sql, 721 exp.DataType: _datatype_sql, 722 exp.DayOfMonth: rename_func("DAYOFMONTH"), 723 exp.DayOfWeek: rename_func("DAYOFWEEK"), 724 exp.DayOfYear: rename_func("DAYOFYEAR"), 725 exp.Explode: rename_func("FLATTEN"), 726 exp.Extract: rename_func("DATE_PART"), 727 exp.FromTimeZone: lambda self, e: self.func( 728 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 729 ), 730 exp.GenerateSeries: lambda self, e: self.func( 731 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 732 ), 733 exp.GroupConcat: rename_func("LISTAGG"), 734 exp.If: if_sql(name="IFF", false_value="NULL"), 735 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 736 exp.JSONExtractScalar: lambda self, e: self.func( 737 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 738 ), 739 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 740 exp.JSONPathRoot: lambda *_: "", 741 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 742 exp.LogicalOr: rename_func("BOOLOR_AGG"), 743 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 744 exp.Max: max_or_greatest, 745 exp.Min: min_or_least, 746 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 747 exp.PercentileCont: transforms.preprocess( 748 [transforms.add_within_group_for_percentiles] 749 ), 750 exp.PercentileDisc: transforms.preprocess( 751 [transforms.add_within_group_for_percentiles] 752 ), 753 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 754 exp.RegexpILike: _regexpilike_sql, 755 exp.Rand: rename_func("RANDOM"), 756 exp.Select: transforms.preprocess( 757 [ 758 transforms.eliminate_distinct_on, 759 transforms.explode_to_unnest(), 760 transforms.eliminate_semi_and_anti_joins, 761 ] 762 ), 763 exp.SHA: rename_func("SHA1"), 764 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 765 exp.StartsWith: rename_func("STARTSWITH"), 766 exp.StrPosition: lambda self, e: self.func( 767 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 768 ), 769 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 770 exp.Stuff: rename_func("INSERT"), 771 exp.TimestampDiff: lambda self, e: self.func( 772 "TIMESTAMPDIFF", e.unit, e.expression, e.this 773 ), 774 exp.TimestampTrunc: timestamptrunc_sql, 775 exp.TimeStrToTime: timestrtotime_sql, 776 exp.TimeToStr: lambda self, e: self.func( 777 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 778 ), 779 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 780 exp.ToArray: rename_func("TO_ARRAY"), 781 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 782 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 783 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 784 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 785 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 786 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 787 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 788 exp.Xor: rename_func("BOOLXOR"), 789 } 790 791 SUPPORTED_JSON_PATH_PARTS = { 792 exp.JSONPathKey, 793 exp.JSONPathRoot, 794 exp.JSONPathSubscript, 795 } 796 797 TYPE_MAPPING = { 798 **generator.Generator.TYPE_MAPPING, 799 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 800 } 801 802 STAR_MAPPING = { 803 "except": "EXCLUDE", 804 "replace": "RENAME", 805 } 806 807 PROPERTIES_LOCATION = { 808 **generator.Generator.PROPERTIES_LOCATION, 809 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 810 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 811 } 812 813 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 814 milli = expression.args.get("milli") 815 if milli is not None: 816 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 817 expression.set("nano", milli_to_nano) 818 819 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 820 821 def trycast_sql(self, expression: exp.TryCast) -> str: 822 value = expression.this 823 824 if value.type is None: 825 from sqlglot.optimizer.annotate_types import annotate_types 826 827 value = annotate_types(value) 828 829 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 830 return super().trycast_sql(expression) 831 832 # TRY_CAST only works for string values in Snowflake 833 return self.cast_sql(expression) 834 835 def log_sql(self, expression: exp.Log) -> str: 836 if not expression.expression: 837 return self.func("LN", expression.this) 838 839 return super().log_sql(expression) 840 841 def unnest_sql(self, expression: exp.Unnest) -> str: 842 unnest_alias = expression.args.get("alias") 843 offset = expression.args.get("offset") 844 845 columns = [ 846 exp.to_identifier("seq"), 847 exp.to_identifier("key"), 848 exp.to_identifier("path"), 849 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 850 seq_get(unnest_alias.columns if unnest_alias else [], 0) 851 or exp.to_identifier("value"), 852 exp.to_identifier("this"), 853 ] 854 855 if unnest_alias: 856 unnest_alias.set("columns", columns) 857 else: 858 unnest_alias = exp.TableAlias(this="_u", columns=columns) 859 860 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 861 alias = self.sql(unnest_alias) 862 alias = f" AS {alias}" if alias else "" 863 return f"{explode}{alias}" 864 865 def show_sql(self, expression: exp.Show) -> str: 866 terse = "TERSE " if expression.args.get("terse") else "" 867 history = " HISTORY" if expression.args.get("history") else "" 868 like = self.sql(expression, "like") 869 like = f" LIKE {like}" if like else "" 870 871 scope = self.sql(expression, "scope") 872 scope = f" {scope}" if scope else "" 873 874 scope_kind = self.sql(expression, "scope_kind") 875 if scope_kind: 876 scope_kind = f" IN {scope_kind}" 877 878 starts_with = self.sql(expression, "starts_with") 879 if starts_with: 880 starts_with = f" STARTS WITH {starts_with}" 881 882 limit = self.sql(expression, "limit") 883 884 from_ = self.sql(expression, "from") 885 if from_: 886 from_ = f" FROM {from_}" 887 888 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 889 890 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 891 # Other dialects don't support all of the following parameters, so we need to 892 # generate default values as necessary to ensure the transpilation is correct 893 group = expression.args.get("group") 894 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 895 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 896 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 897 898 return self.func( 899 "REGEXP_SUBSTR", 900 expression.this, 901 expression.expression, 902 position, 903 occurrence, 904 parameters, 905 group, 906 ) 907 908 def except_op(self, expression: exp.Except) -> str: 909 if not expression.args.get("distinct"): 910 self.unsupported("EXCEPT with All is not supported in Snowflake") 911 return super().except_op(expression) 912 913 def intersect_op(self, expression: exp.Intersect) -> str: 914 if not expression.args.get("distinct"): 915 self.unsupported("INTERSECT with All is not supported in Snowflake") 916 return super().intersect_op(expression) 917 918 def describe_sql(self, expression: exp.Describe) -> str: 919 # Default to table if kind is unknown 920 kind_value = expression.args.get("kind") or "TABLE" 921 kind = f" {kind_value}" if kind_value else "" 922 this = f" {self.sql(expression, 'this')}" 923 expressions = self.expressions(expression, flat=True) 924 expressions = f" {expressions}" if expressions else "" 925 return f"DESCRIBE{kind}{this}{expressions}" 926 927 def generatedasidentitycolumnconstraint_sql( 928 self, expression: exp.GeneratedAsIdentityColumnConstraint 929 ) -> str: 930 start = expression.args.get("start") 931 start = f" START {start}" if start else "" 932 increment = expression.args.get("increment") 933 increment = f" INCREMENT {increment}" if increment else "" 934 return f"AUTOINCREMENT{start}{increment}" 935 936 def swaptable_sql(self, expression: exp.SwapTable) -> str: 937 this = self.sql(expression, "this") 938 return f"SWAP WITH {this}" 939 940 def with_properties(self, properties: exp.Properties) -> str: 941 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 942 943 def cluster_sql(self, expression: exp.Cluster) -> str: 944 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 945 946 def struct_sql(self, expression: exp.Struct) -> str: 947 keys = [] 948 values = [] 949 950 for i, e in enumerate(expression.expressions): 951 if isinstance(e, exp.PropertyEQ): 952 keys.append( 953 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 954 ) 955 values.append(e.expression) 956 else: 957 keys.append(exp.Literal.string(f"_{i}")) 958 values.append(e) 959 960 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
262class Snowflake(Dialect): 263 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 264 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 265 NULL_ORDERING = "nulls_are_large" 266 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 267 SUPPORTS_USER_DEFINED_TYPES = False 268 SUPPORTS_SEMI_ANTI_JOIN = False 269 PREFER_CTE_ALIAS_COLUMN = True 270 TABLESAMPLE_SIZE_IS_PERCENT = True 271 272 TIME_MAPPING = { 273 "YYYY": "%Y", 274 "yyyy": "%Y", 275 "YY": "%y", 276 "yy": "%y", 277 "MMMM": "%B", 278 "mmmm": "%B", 279 "MON": "%b", 280 "mon": "%b", 281 "MM": "%m", 282 "mm": "%m", 283 "DD": "%d", 284 "dd": "%-d", 285 "DY": "%a", 286 "dy": "%w", 287 "HH24": "%H", 288 "hh24": "%H", 289 "HH12": "%I", 290 "hh12": "%I", 291 "MI": "%M", 292 "mi": "%M", 293 "SS": "%S", 294 "ss": "%S", 295 "FF": "%f", 296 "ff": "%f", 297 "FF6": "%f", 298 "ff6": "%f", 299 } 300 301 def quote_identifier(self, expression: E, identify: bool = True) -> E: 302 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 303 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 304 if ( 305 isinstance(expression, exp.Identifier) 306 and isinstance(expression.parent, exp.Table) 307 and expression.name.lower() == "dual" 308 ): 309 return expression # type: ignore 310 311 return super().quote_identifier(expression, identify=identify) 312 313 class Parser(parser.Parser): 314 IDENTIFY_PIVOT_STRINGS = True 315 316 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 321 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 322 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 323 this=seq_get(args, 1), expression=seq_get(args, 0) 324 ), 325 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 326 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 327 start=seq_get(args, 0), 328 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 329 step=seq_get(args, 2), 330 ), 331 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 332 "BITXOR": binary_from_function(exp.BitwiseXor), 333 "BIT_XOR": binary_from_function(exp.BitwiseXor), 334 "BOOLXOR": binary_from_function(exp.Xor), 335 "CONVERT_TIMEZONE": _build_convert_timezone, 336 "DATE_TRUNC": _date_trunc_to_time, 337 "DATEADD": lambda args: exp.DateAdd( 338 this=seq_get(args, 2), 339 expression=seq_get(args, 1), 340 unit=_map_date_part(seq_get(args, 0)), 341 ), 342 "DATEDIFF": _build_datediff, 343 "DIV0": _build_if_from_div0, 344 "FLATTEN": exp.Explode.from_arg_list, 345 "GET_PATH": lambda args, dialect: exp.JSONExtract( 346 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 347 ), 348 "IFF": exp.If.from_arg_list, 349 "LAST_DAY": lambda args: exp.LastDay( 350 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 351 ), 352 "LISTAGG": exp.GroupConcat.from_arg_list, 353 "NULLIFZERO": _build_if_from_nullifzero, 354 "OBJECT_CONSTRUCT": _build_object_construct, 355 "REGEXP_REPLACE": _build_regexp_replace, 356 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 357 "RLIKE": exp.RegexpLike.from_arg_list, 358 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 359 "TIMEDIFF": _build_datediff, 360 "TIMESTAMPDIFF": _build_datediff, 361 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 362 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 363 "TO_TIMESTAMP": _build_to_timestamp, 364 "TO_VARCHAR": exp.ToChar.from_arg_list, 365 "ZEROIFNULL": _build_if_from_zeroifnull, 366 } 367 368 FUNCTION_PARSERS = { 369 **parser.Parser.FUNCTION_PARSERS, 370 "DATE_PART": lambda self: self._parse_date_part(), 371 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 372 } 373 FUNCTION_PARSERS.pop("TRIM") 374 375 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 376 377 RANGE_PARSERS = { 378 **parser.Parser.RANGE_PARSERS, 379 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 380 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 381 } 382 383 ALTER_PARSERS = { 384 **parser.Parser.ALTER_PARSERS, 385 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 386 "UNSET": lambda self: self.expression( 387 exp.Set, 388 tag=self._match_text_seq("TAG"), 389 expressions=self._parse_csv(self._parse_id_var), 390 unset=True, 391 ), 392 "SWAP": lambda self: self._parse_alter_table_swap(), 393 } 394 395 STATEMENT_PARSERS = { 396 **parser.Parser.STATEMENT_PARSERS, 397 TokenType.SHOW: lambda self: self._parse_show(), 398 } 399 400 PROPERTY_PARSERS = { 401 **parser.Parser.PROPERTY_PARSERS, 402 "LOCATION": lambda self: self._parse_location(), 403 } 404 405 SHOW_PARSERS = { 406 "SCHEMAS": _show_parser("SCHEMAS"), 407 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 408 "OBJECTS": _show_parser("OBJECTS"), 409 "TERSE OBJECTS": _show_parser("OBJECTS"), 410 "TABLES": _show_parser("TABLES"), 411 "TERSE TABLES": _show_parser("TABLES"), 412 "VIEWS": _show_parser("VIEWS"), 413 "TERSE VIEWS": _show_parser("VIEWS"), 414 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 415 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 416 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 417 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 418 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 419 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 420 "SEQUENCES": _show_parser("SEQUENCES"), 421 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 422 "COLUMNS": _show_parser("COLUMNS"), 423 "USERS": _show_parser("USERS"), 424 "TERSE USERS": _show_parser("USERS"), 425 } 426 427 STAGED_FILE_SINGLE_TOKENS = { 428 TokenType.DOT, 429 TokenType.MOD, 430 TokenType.SLASH, 431 } 432 433 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 434 435 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 436 437 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 438 this = super()._parse_column_ops(this) 439 440 casts = [] 441 json_path = [] 442 443 while self._match(TokenType.COLON): 444 path = super()._parse_column_ops(self._parse_field(any_token=True)) 445 446 # The cast :: operator has a lower precedence than the extraction operator :, so 447 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 448 while isinstance(path, exp.Cast): 449 casts.append(path.to) 450 path = path.this 451 452 if path: 453 json_path.append(path.sql(dialect="snowflake", copy=False)) 454 455 if json_path: 456 this = self.expression( 457 exp.JSONExtract, 458 this=this, 459 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 460 ) 461 462 while casts: 463 this = self.expression(exp.Cast, this=this, to=casts.pop()) 464 465 return this 466 467 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 468 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 469 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 470 this = self._parse_var() or self._parse_type() 471 472 if not this: 473 return None 474 475 self._match(TokenType.COMMA) 476 expression = self._parse_bitwise() 477 this = _map_date_part(this) 478 name = this.name.upper() 479 480 if name.startswith("EPOCH"): 481 if name == "EPOCH_MILLISECOND": 482 scale = 10**3 483 elif name == "EPOCH_MICROSECOND": 484 scale = 10**6 485 elif name == "EPOCH_NANOSECOND": 486 scale = 10**9 487 else: 488 scale = None 489 490 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 491 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 492 493 if scale: 494 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 495 496 return to_unix 497 498 return self.expression(exp.Extract, this=this, expression=expression) 499 500 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 501 if is_map: 502 # Keys are strings in Snowflake's objects, see also: 503 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 504 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 505 return self._parse_slice(self._parse_string()) 506 507 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 508 509 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 510 lateral = super()._parse_lateral() 511 if not lateral: 512 return lateral 513 514 if isinstance(lateral.this, exp.Explode): 515 table_alias = lateral.args.get("alias") 516 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 517 if table_alias and not table_alias.args.get("columns"): 518 table_alias.set("columns", columns) 519 elif not table_alias: 520 exp.alias_(lateral, "_flattened", table=columns, copy=False) 521 522 return lateral 523 524 def _parse_at_before(self, table: exp.Table) -> exp.Table: 525 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 526 index = self._index 527 if self._match_texts(("AT", "BEFORE")): 528 this = self._prev.text.upper() 529 kind = ( 530 self._match(TokenType.L_PAREN) 531 and self._match_texts(self.HISTORICAL_DATA_KIND) 532 and self._prev.text.upper() 533 ) 534 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 535 536 if expression: 537 self._match_r_paren() 538 when = self.expression( 539 exp.HistoricalData, this=this, kind=kind, expression=expression 540 ) 541 table.set("when", when) 542 else: 543 self._retreat(index) 544 545 return table 546 547 def _parse_table_parts( 548 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 549 ) -> exp.Table: 550 # https://docs.snowflake.com/en/user-guide/querying-stage 551 if self._match(TokenType.STRING, advance=False): 552 table = self._parse_string() 553 elif self._match_text_seq("@", advance=False): 554 table = self._parse_location_path() 555 else: 556 table = None 557 558 if table: 559 file_format = None 560 pattern = None 561 562 self._match(TokenType.L_PAREN) 563 while self._curr and not self._match(TokenType.R_PAREN): 564 if self._match_text_seq("FILE_FORMAT", "=>"): 565 file_format = self._parse_string() or super()._parse_table_parts( 566 is_db_reference=is_db_reference 567 ) 568 elif self._match_text_seq("PATTERN", "=>"): 569 pattern = self._parse_string() 570 else: 571 break 572 573 self._match(TokenType.COMMA) 574 575 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 576 else: 577 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 578 579 return self._parse_at_before(table) 580 581 def _parse_id_var( 582 self, 583 any_token: bool = True, 584 tokens: t.Optional[t.Collection[TokenType]] = None, 585 ) -> t.Optional[exp.Expression]: 586 if self._match_text_seq("IDENTIFIER", "("): 587 identifier = ( 588 super()._parse_id_var(any_token=any_token, tokens=tokens) 589 or self._parse_string() 590 ) 591 self._match_r_paren() 592 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 593 594 return super()._parse_id_var(any_token=any_token, tokens=tokens) 595 596 def _parse_show_snowflake(self, this: str) -> exp.Show: 597 scope = None 598 scope_kind = None 599 600 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 601 # which is syntactically valid but has no effect on the output 602 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 603 604 history = self._match_text_seq("HISTORY") 605 606 like = self._parse_string() if self._match(TokenType.LIKE) else None 607 608 if self._match(TokenType.IN): 609 if self._match_text_seq("ACCOUNT"): 610 scope_kind = "ACCOUNT" 611 elif self._match_set(self.DB_CREATABLES): 612 scope_kind = self._prev.text.upper() 613 if self._curr: 614 scope = self._parse_table_parts() 615 elif self._curr: 616 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 617 scope = self._parse_table_parts() 618 619 return self.expression( 620 exp.Show, 621 **{ 622 "terse": terse, 623 "this": this, 624 "history": history, 625 "like": like, 626 "scope": scope, 627 "scope_kind": scope_kind, 628 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 629 "limit": self._parse_limit(), 630 "from": self._parse_string() if self._match(TokenType.FROM) else None, 631 }, 632 ) 633 634 def _parse_alter_table_swap(self) -> exp.SwapTable: 635 self._match_text_seq("WITH") 636 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 637 638 def _parse_location(self) -> exp.LocationProperty: 639 self._match(TokenType.EQ) 640 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 641 642 def _parse_location_path(self) -> exp.Var: 643 parts = [self._advance_any(ignore_reserved=True)] 644 645 # We avoid consuming a comma token because external tables like @foo and @bar 646 # can be joined in a query with a comma separator. 647 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 648 parts.append(self._advance_any(ignore_reserved=True)) 649 650 return exp.var("".join(part.text for part in parts if part)) 651 652 class Tokenizer(tokens.Tokenizer): 653 STRING_ESCAPES = ["\\", "'"] 654 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 655 RAW_STRINGS = ["$$"] 656 COMMENTS = ["--", "//", ("/*", "*/")] 657 658 KEYWORDS = { 659 **tokens.Tokenizer.KEYWORDS, 660 "BYTEINT": TokenType.INT, 661 "CHAR VARYING": TokenType.VARCHAR, 662 "CHARACTER VARYING": TokenType.VARCHAR, 663 "EXCLUDE": TokenType.EXCEPT, 664 "ILIKE ANY": TokenType.ILIKE_ANY, 665 "LIKE ANY": TokenType.LIKE_ANY, 666 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 667 "MINUS": TokenType.EXCEPT, 668 "NCHAR VARYING": TokenType.VARCHAR, 669 "PUT": TokenType.COMMAND, 670 "REMOVE": TokenType.COMMAND, 671 "RENAME": TokenType.REPLACE, 672 "RM": TokenType.COMMAND, 673 "SAMPLE": TokenType.TABLE_SAMPLE, 674 "SQL_DOUBLE": TokenType.DOUBLE, 675 "SQL_VARCHAR": TokenType.VARCHAR, 676 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 677 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 678 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 679 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 680 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 681 "TOP": TokenType.TOP, 682 } 683 684 SINGLE_TOKENS = { 685 **tokens.Tokenizer.SINGLE_TOKENS, 686 "$": TokenType.PARAMETER, 687 } 688 689 VAR_SINGLE_TOKENS = {"$"} 690 691 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 692 693 class Generator(generator.Generator): 694 PARAMETER_TOKEN = "$" 695 MATCHED_BY_SOURCE = False 696 SINGLE_STRING_INTERVAL = True 697 JOIN_HINTS = False 698 TABLE_HINTS = False 699 QUERY_HINTS = False 700 AGGREGATE_FILTER_SUPPORTED = False 701 SUPPORTS_TABLE_COPY = False 702 COLLATE_IS_FUNC = True 703 LIMIT_ONLY_LITERALS = True 704 JSON_KEY_VALUE_PAIR_SEP = "," 705 INSERT_OVERWRITE = " OVERWRITE INTO" 706 707 TRANSFORMS = { 708 **generator.Generator.TRANSFORMS, 709 exp.ArgMax: rename_func("MAX_BY"), 710 exp.ArgMin: rename_func("MIN_BY"), 711 exp.Array: inline_array_sql, 712 exp.ArrayConcat: rename_func("ARRAY_CAT"), 713 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 714 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 715 exp.AtTimeZone: lambda self, e: self.func( 716 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 717 ), 718 exp.BitwiseXor: rename_func("BITXOR"), 719 exp.DateAdd: date_delta_sql("DATEADD"), 720 exp.DateDiff: date_delta_sql("DATEDIFF"), 721 exp.DateStrToDate: datestrtodate_sql, 722 exp.DataType: _datatype_sql, 723 exp.DayOfMonth: rename_func("DAYOFMONTH"), 724 exp.DayOfWeek: rename_func("DAYOFWEEK"), 725 exp.DayOfYear: rename_func("DAYOFYEAR"), 726 exp.Explode: rename_func("FLATTEN"), 727 exp.Extract: rename_func("DATE_PART"), 728 exp.FromTimeZone: lambda self, e: self.func( 729 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 730 ), 731 exp.GenerateSeries: lambda self, e: self.func( 732 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 733 ), 734 exp.GroupConcat: rename_func("LISTAGG"), 735 exp.If: if_sql(name="IFF", false_value="NULL"), 736 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 737 exp.JSONExtractScalar: lambda self, e: self.func( 738 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 739 ), 740 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 741 exp.JSONPathRoot: lambda *_: "", 742 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 743 exp.LogicalOr: rename_func("BOOLOR_AGG"), 744 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 745 exp.Max: max_or_greatest, 746 exp.Min: min_or_least, 747 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 748 exp.PercentileCont: transforms.preprocess( 749 [transforms.add_within_group_for_percentiles] 750 ), 751 exp.PercentileDisc: transforms.preprocess( 752 [transforms.add_within_group_for_percentiles] 753 ), 754 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 755 exp.RegexpILike: _regexpilike_sql, 756 exp.Rand: rename_func("RANDOM"), 757 exp.Select: transforms.preprocess( 758 [ 759 transforms.eliminate_distinct_on, 760 transforms.explode_to_unnest(), 761 transforms.eliminate_semi_and_anti_joins, 762 ] 763 ), 764 exp.SHA: rename_func("SHA1"), 765 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 766 exp.StartsWith: rename_func("STARTSWITH"), 767 exp.StrPosition: lambda self, e: self.func( 768 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 769 ), 770 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 771 exp.Stuff: rename_func("INSERT"), 772 exp.TimestampDiff: lambda self, e: self.func( 773 "TIMESTAMPDIFF", e.unit, e.expression, e.this 774 ), 775 exp.TimestampTrunc: timestamptrunc_sql, 776 exp.TimeStrToTime: timestrtotime_sql, 777 exp.TimeToStr: lambda self, e: self.func( 778 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 779 ), 780 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 781 exp.ToArray: rename_func("TO_ARRAY"), 782 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 783 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 784 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 785 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 786 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 787 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 788 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 789 exp.Xor: rename_func("BOOLXOR"), 790 } 791 792 SUPPORTED_JSON_PATH_PARTS = { 793 exp.JSONPathKey, 794 exp.JSONPathRoot, 795 exp.JSONPathSubscript, 796 } 797 798 TYPE_MAPPING = { 799 **generator.Generator.TYPE_MAPPING, 800 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 801 } 802 803 STAR_MAPPING = { 804 "except": "EXCLUDE", 805 "replace": "RENAME", 806 } 807 808 PROPERTIES_LOCATION = { 809 **generator.Generator.PROPERTIES_LOCATION, 810 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 811 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 812 } 813 814 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 815 milli = expression.args.get("milli") 816 if milli is not None: 817 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 818 expression.set("nano", milli_to_nano) 819 820 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 821 822 def trycast_sql(self, expression: exp.TryCast) -> str: 823 value = expression.this 824 825 if value.type is None: 826 from sqlglot.optimizer.annotate_types import annotate_types 827 828 value = annotate_types(value) 829 830 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 831 return super().trycast_sql(expression) 832 833 # TRY_CAST only works for string values in Snowflake 834 return self.cast_sql(expression) 835 836 def log_sql(self, expression: exp.Log) -> str: 837 if not expression.expression: 838 return self.func("LN", expression.this) 839 840 return super().log_sql(expression) 841 842 def unnest_sql(self, expression: exp.Unnest) -> str: 843 unnest_alias = expression.args.get("alias") 844 offset = expression.args.get("offset") 845 846 columns = [ 847 exp.to_identifier("seq"), 848 exp.to_identifier("key"), 849 exp.to_identifier("path"), 850 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 851 seq_get(unnest_alias.columns if unnest_alias else [], 0) 852 or exp.to_identifier("value"), 853 exp.to_identifier("this"), 854 ] 855 856 if unnest_alias: 857 unnest_alias.set("columns", columns) 858 else: 859 unnest_alias = exp.TableAlias(this="_u", columns=columns) 860 861 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 862 alias = self.sql(unnest_alias) 863 alias = f" AS {alias}" if alias else "" 864 return f"{explode}{alias}" 865 866 def show_sql(self, expression: exp.Show) -> str: 867 terse = "TERSE " if expression.args.get("terse") else "" 868 history = " HISTORY" if expression.args.get("history") else "" 869 like = self.sql(expression, "like") 870 like = f" LIKE {like}" if like else "" 871 872 scope = self.sql(expression, "scope") 873 scope = f" {scope}" if scope else "" 874 875 scope_kind = self.sql(expression, "scope_kind") 876 if scope_kind: 877 scope_kind = f" IN {scope_kind}" 878 879 starts_with = self.sql(expression, "starts_with") 880 if starts_with: 881 starts_with = f" STARTS WITH {starts_with}" 882 883 limit = self.sql(expression, "limit") 884 885 from_ = self.sql(expression, "from") 886 if from_: 887 from_ = f" FROM {from_}" 888 889 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 890 891 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 892 # Other dialects don't support all of the following parameters, so we need to 893 # generate default values as necessary to ensure the transpilation is correct 894 group = expression.args.get("group") 895 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 896 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 897 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 898 899 return self.func( 900 "REGEXP_SUBSTR", 901 expression.this, 902 expression.expression, 903 position, 904 occurrence, 905 parameters, 906 group, 907 ) 908 909 def except_op(self, expression: exp.Except) -> str: 910 if not expression.args.get("distinct"): 911 self.unsupported("EXCEPT with All is not supported in Snowflake") 912 return super().except_op(expression) 913 914 def intersect_op(self, expression: exp.Intersect) -> str: 915 if not expression.args.get("distinct"): 916 self.unsupported("INTERSECT with All is not supported in Snowflake") 917 return super().intersect_op(expression) 918 919 def describe_sql(self, expression: exp.Describe) -> str: 920 # Default to table if kind is unknown 921 kind_value = expression.args.get("kind") or "TABLE" 922 kind = f" {kind_value}" if kind_value else "" 923 this = f" {self.sql(expression, 'this')}" 924 expressions = self.expressions(expression, flat=True) 925 expressions = f" {expressions}" if expressions else "" 926 return f"DESCRIBE{kind}{this}{expressions}" 927 928 def generatedasidentitycolumnconstraint_sql( 929 self, expression: exp.GeneratedAsIdentityColumnConstraint 930 ) -> str: 931 start = expression.args.get("start") 932 start = f" START {start}" if start else "" 933 increment = expression.args.get("increment") 934 increment = f" INCREMENT {increment}" if increment else "" 935 return f"AUTOINCREMENT{start}{increment}" 936 937 def swaptable_sql(self, expression: exp.SwapTable) -> str: 938 this = self.sql(expression, "this") 939 return f"SWAP WITH {this}" 940 941 def with_properties(self, properties: exp.Properties) -> str: 942 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 943 944 def cluster_sql(self, expression: exp.Cluster) -> str: 945 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 946 947 def struct_sql(self, expression: exp.Struct) -> str: 948 keys = [] 949 values = [] 950 951 for i, e in enumerate(expression.expressions): 952 if isinstance(e, exp.PropertyEQ): 953 keys.append( 954 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 955 ) 956 values.append(e.expression) 957 else: 958 keys.append(exp.Literal.string(f"_{i}")) 959 values.append(e) 960 961 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
301 def quote_identifier(self, expression: E, identify: bool = True) -> E: 302 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 303 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 304 if ( 305 isinstance(expression, exp.Identifier) 306 and isinstance(expression.parent, exp.Table) 307 and expression.name.lower() == "dual" 308 ): 309 return expression # type: ignore 310 311 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
313 class Parser(parser.Parser): 314 IDENTIFY_PIVOT_STRINGS = True 315 316 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 321 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 322 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 323 this=seq_get(args, 1), expression=seq_get(args, 0) 324 ), 325 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 326 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 327 start=seq_get(args, 0), 328 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 329 step=seq_get(args, 2), 330 ), 331 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 332 "BITXOR": binary_from_function(exp.BitwiseXor), 333 "BIT_XOR": binary_from_function(exp.BitwiseXor), 334 "BOOLXOR": binary_from_function(exp.Xor), 335 "CONVERT_TIMEZONE": _build_convert_timezone, 336 "DATE_TRUNC": _date_trunc_to_time, 337 "DATEADD": lambda args: exp.DateAdd( 338 this=seq_get(args, 2), 339 expression=seq_get(args, 1), 340 unit=_map_date_part(seq_get(args, 0)), 341 ), 342 "DATEDIFF": _build_datediff, 343 "DIV0": _build_if_from_div0, 344 "FLATTEN": exp.Explode.from_arg_list, 345 "GET_PATH": lambda args, dialect: exp.JSONExtract( 346 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 347 ), 348 "IFF": exp.If.from_arg_list, 349 "LAST_DAY": lambda args: exp.LastDay( 350 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 351 ), 352 "LISTAGG": exp.GroupConcat.from_arg_list, 353 "NULLIFZERO": _build_if_from_nullifzero, 354 "OBJECT_CONSTRUCT": _build_object_construct, 355 "REGEXP_REPLACE": _build_regexp_replace, 356 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 357 "RLIKE": exp.RegexpLike.from_arg_list, 358 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 359 "TIMEDIFF": _build_datediff, 360 "TIMESTAMPDIFF": _build_datediff, 361 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 362 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 363 "TO_TIMESTAMP": _build_to_timestamp, 364 "TO_VARCHAR": exp.ToChar.from_arg_list, 365 "ZEROIFNULL": _build_if_from_zeroifnull, 366 } 367 368 FUNCTION_PARSERS = { 369 **parser.Parser.FUNCTION_PARSERS, 370 "DATE_PART": lambda self: self._parse_date_part(), 371 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 372 } 373 FUNCTION_PARSERS.pop("TRIM") 374 375 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 376 377 RANGE_PARSERS = { 378 **parser.Parser.RANGE_PARSERS, 379 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 380 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 381 } 382 383 ALTER_PARSERS = { 384 **parser.Parser.ALTER_PARSERS, 385 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 386 "UNSET": lambda self: self.expression( 387 exp.Set, 388 tag=self._match_text_seq("TAG"), 389 expressions=self._parse_csv(self._parse_id_var), 390 unset=True, 391 ), 392 "SWAP": lambda self: self._parse_alter_table_swap(), 393 } 394 395 STATEMENT_PARSERS = { 396 **parser.Parser.STATEMENT_PARSERS, 397 TokenType.SHOW: lambda self: self._parse_show(), 398 } 399 400 PROPERTY_PARSERS = { 401 **parser.Parser.PROPERTY_PARSERS, 402 "LOCATION": lambda self: self._parse_location(), 403 } 404 405 SHOW_PARSERS = { 406 "SCHEMAS": _show_parser("SCHEMAS"), 407 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 408 "OBJECTS": _show_parser("OBJECTS"), 409 "TERSE OBJECTS": _show_parser("OBJECTS"), 410 "TABLES": _show_parser("TABLES"), 411 "TERSE TABLES": _show_parser("TABLES"), 412 "VIEWS": _show_parser("VIEWS"), 413 "TERSE VIEWS": _show_parser("VIEWS"), 414 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 415 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 416 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 417 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 418 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 419 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 420 "SEQUENCES": _show_parser("SEQUENCES"), 421 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 422 "COLUMNS": _show_parser("COLUMNS"), 423 "USERS": _show_parser("USERS"), 424 "TERSE USERS": _show_parser("USERS"), 425 } 426 427 STAGED_FILE_SINGLE_TOKENS = { 428 TokenType.DOT, 429 TokenType.MOD, 430 TokenType.SLASH, 431 } 432 433 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 434 435 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 436 437 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 438 this = super()._parse_column_ops(this) 439 440 casts = [] 441 json_path = [] 442 443 while self._match(TokenType.COLON): 444 path = super()._parse_column_ops(self._parse_field(any_token=True)) 445 446 # The cast :: operator has a lower precedence than the extraction operator :, so 447 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 448 while isinstance(path, exp.Cast): 449 casts.append(path.to) 450 path = path.this 451 452 if path: 453 json_path.append(path.sql(dialect="snowflake", copy=False)) 454 455 if json_path: 456 this = self.expression( 457 exp.JSONExtract, 458 this=this, 459 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 460 ) 461 462 while casts: 463 this = self.expression(exp.Cast, this=this, to=casts.pop()) 464 465 return this 466 467 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 468 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 469 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 470 this = self._parse_var() or self._parse_type() 471 472 if not this: 473 return None 474 475 self._match(TokenType.COMMA) 476 expression = self._parse_bitwise() 477 this = _map_date_part(this) 478 name = this.name.upper() 479 480 if name.startswith("EPOCH"): 481 if name == "EPOCH_MILLISECOND": 482 scale = 10**3 483 elif name == "EPOCH_MICROSECOND": 484 scale = 10**6 485 elif name == "EPOCH_NANOSECOND": 486 scale = 10**9 487 else: 488 scale = None 489 490 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 491 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 492 493 if scale: 494 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 495 496 return to_unix 497 498 return self.expression(exp.Extract, this=this, expression=expression) 499 500 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 501 if is_map: 502 # Keys are strings in Snowflake's objects, see also: 503 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 504 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 505 return self._parse_slice(self._parse_string()) 506 507 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 508 509 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 510 lateral = super()._parse_lateral() 511 if not lateral: 512 return lateral 513 514 if isinstance(lateral.this, exp.Explode): 515 table_alias = lateral.args.get("alias") 516 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 517 if table_alias and not table_alias.args.get("columns"): 518 table_alias.set("columns", columns) 519 elif not table_alias: 520 exp.alias_(lateral, "_flattened", table=columns, copy=False) 521 522 return lateral 523 524 def _parse_at_before(self, table: exp.Table) -> exp.Table: 525 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 526 index = self._index 527 if self._match_texts(("AT", "BEFORE")): 528 this = self._prev.text.upper() 529 kind = ( 530 self._match(TokenType.L_PAREN) 531 and self._match_texts(self.HISTORICAL_DATA_KIND) 532 and self._prev.text.upper() 533 ) 534 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 535 536 if expression: 537 self._match_r_paren() 538 when = self.expression( 539 exp.HistoricalData, this=this, kind=kind, expression=expression 540 ) 541 table.set("when", when) 542 else: 543 self._retreat(index) 544 545 return table 546 547 def _parse_table_parts( 548 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 549 ) -> exp.Table: 550 # https://docs.snowflake.com/en/user-guide/querying-stage 551 if self._match(TokenType.STRING, advance=False): 552 table = self._parse_string() 553 elif self._match_text_seq("@", advance=False): 554 table = self._parse_location_path() 555 else: 556 table = None 557 558 if table: 559 file_format = None 560 pattern = None 561 562 self._match(TokenType.L_PAREN) 563 while self._curr and not self._match(TokenType.R_PAREN): 564 if self._match_text_seq("FILE_FORMAT", "=>"): 565 file_format = self._parse_string() or super()._parse_table_parts( 566 is_db_reference=is_db_reference 567 ) 568 elif self._match_text_seq("PATTERN", "=>"): 569 pattern = self._parse_string() 570 else: 571 break 572 573 self._match(TokenType.COMMA) 574 575 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 576 else: 577 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 578 579 return self._parse_at_before(table) 580 581 def _parse_id_var( 582 self, 583 any_token: bool = True, 584 tokens: t.Optional[t.Collection[TokenType]] = None, 585 ) -> t.Optional[exp.Expression]: 586 if self._match_text_seq("IDENTIFIER", "("): 587 identifier = ( 588 super()._parse_id_var(any_token=any_token, tokens=tokens) 589 or self._parse_string() 590 ) 591 self._match_r_paren() 592 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 593 594 return super()._parse_id_var(any_token=any_token, tokens=tokens) 595 596 def _parse_show_snowflake(self, this: str) -> exp.Show: 597 scope = None 598 scope_kind = None 599 600 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 601 # which is syntactically valid but has no effect on the output 602 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 603 604 history = self._match_text_seq("HISTORY") 605 606 like = self._parse_string() if self._match(TokenType.LIKE) else None 607 608 if self._match(TokenType.IN): 609 if self._match_text_seq("ACCOUNT"): 610 scope_kind = "ACCOUNT" 611 elif self._match_set(self.DB_CREATABLES): 612 scope_kind = self._prev.text.upper() 613 if self._curr: 614 scope = self._parse_table_parts() 615 elif self._curr: 616 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 617 scope = self._parse_table_parts() 618 619 return self.expression( 620 exp.Show, 621 **{ 622 "terse": terse, 623 "this": this, 624 "history": history, 625 "like": like, 626 "scope": scope, 627 "scope_kind": scope_kind, 628 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 629 "limit": self._parse_limit(), 630 "from": self._parse_string() if self._match(TokenType.FROM) else None, 631 }, 632 ) 633 634 def _parse_alter_table_swap(self) -> exp.SwapTable: 635 self._match_text_seq("WITH") 636 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 637 638 def _parse_location(self) -> exp.LocationProperty: 639 self._match(TokenType.EQ) 640 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 641 642 def _parse_location_path(self) -> exp.Var: 643 parts = [self._advance_any(ignore_reserved=True)] 644 645 # We avoid consuming a comma token because external tables like @foo and @bar 646 # can be joined in a query with a comma separator. 647 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 648 parts.append(self._advance_any(ignore_reserved=True)) 649 650 return exp.var("".join(part.text for part in parts if part))
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- USABLES
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
652 class Tokenizer(tokens.Tokenizer): 653 STRING_ESCAPES = ["\\", "'"] 654 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 655 RAW_STRINGS = ["$$"] 656 COMMENTS = ["--", "//", ("/*", "*/")] 657 658 KEYWORDS = { 659 **tokens.Tokenizer.KEYWORDS, 660 "BYTEINT": TokenType.INT, 661 "CHAR VARYING": TokenType.VARCHAR, 662 "CHARACTER VARYING": TokenType.VARCHAR, 663 "EXCLUDE": TokenType.EXCEPT, 664 "ILIKE ANY": TokenType.ILIKE_ANY, 665 "LIKE ANY": TokenType.LIKE_ANY, 666 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 667 "MINUS": TokenType.EXCEPT, 668 "NCHAR VARYING": TokenType.VARCHAR, 669 "PUT": TokenType.COMMAND, 670 "REMOVE": TokenType.COMMAND, 671 "RENAME": TokenType.REPLACE, 672 "RM": TokenType.COMMAND, 673 "SAMPLE": TokenType.TABLE_SAMPLE, 674 "SQL_DOUBLE": TokenType.DOUBLE, 675 "SQL_VARCHAR": TokenType.VARCHAR, 676 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 677 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 678 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 679 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 680 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 681 "TOP": TokenType.TOP, 682 } 683 684 SINGLE_TOKENS = { 685 **tokens.Tokenizer.SINGLE_TOKENS, 686 "$": TokenType.PARAMETER, 687 } 688 689 VAR_SINGLE_TOKENS = {"$"} 690 691 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
693 class Generator(generator.Generator): 694 PARAMETER_TOKEN = "$" 695 MATCHED_BY_SOURCE = False 696 SINGLE_STRING_INTERVAL = True 697 JOIN_HINTS = False 698 TABLE_HINTS = False 699 QUERY_HINTS = False 700 AGGREGATE_FILTER_SUPPORTED = False 701 SUPPORTS_TABLE_COPY = False 702 COLLATE_IS_FUNC = True 703 LIMIT_ONLY_LITERALS = True 704 JSON_KEY_VALUE_PAIR_SEP = "," 705 INSERT_OVERWRITE = " OVERWRITE INTO" 706 707 TRANSFORMS = { 708 **generator.Generator.TRANSFORMS, 709 exp.ArgMax: rename_func("MAX_BY"), 710 exp.ArgMin: rename_func("MIN_BY"), 711 exp.Array: inline_array_sql, 712 exp.ArrayConcat: rename_func("ARRAY_CAT"), 713 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 714 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 715 exp.AtTimeZone: lambda self, e: self.func( 716 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 717 ), 718 exp.BitwiseXor: rename_func("BITXOR"), 719 exp.DateAdd: date_delta_sql("DATEADD"), 720 exp.DateDiff: date_delta_sql("DATEDIFF"), 721 exp.DateStrToDate: datestrtodate_sql, 722 exp.DataType: _datatype_sql, 723 exp.DayOfMonth: rename_func("DAYOFMONTH"), 724 exp.DayOfWeek: rename_func("DAYOFWEEK"), 725 exp.DayOfYear: rename_func("DAYOFYEAR"), 726 exp.Explode: rename_func("FLATTEN"), 727 exp.Extract: rename_func("DATE_PART"), 728 exp.FromTimeZone: lambda self, e: self.func( 729 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 730 ), 731 exp.GenerateSeries: lambda self, e: self.func( 732 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 733 ), 734 exp.GroupConcat: rename_func("LISTAGG"), 735 exp.If: if_sql(name="IFF", false_value="NULL"), 736 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 737 exp.JSONExtractScalar: lambda self, e: self.func( 738 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 739 ), 740 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 741 exp.JSONPathRoot: lambda *_: "", 742 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 743 exp.LogicalOr: rename_func("BOOLOR_AGG"), 744 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 745 exp.Max: max_or_greatest, 746 exp.Min: min_or_least, 747 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 748 exp.PercentileCont: transforms.preprocess( 749 [transforms.add_within_group_for_percentiles] 750 ), 751 exp.PercentileDisc: transforms.preprocess( 752 [transforms.add_within_group_for_percentiles] 753 ), 754 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 755 exp.RegexpILike: _regexpilike_sql, 756 exp.Rand: rename_func("RANDOM"), 757 exp.Select: transforms.preprocess( 758 [ 759 transforms.eliminate_distinct_on, 760 transforms.explode_to_unnest(), 761 transforms.eliminate_semi_and_anti_joins, 762 ] 763 ), 764 exp.SHA: rename_func("SHA1"), 765 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 766 exp.StartsWith: rename_func("STARTSWITH"), 767 exp.StrPosition: lambda self, e: self.func( 768 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 769 ), 770 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 771 exp.Stuff: rename_func("INSERT"), 772 exp.TimestampDiff: lambda self, e: self.func( 773 "TIMESTAMPDIFF", e.unit, e.expression, e.this 774 ), 775 exp.TimestampTrunc: timestamptrunc_sql, 776 exp.TimeStrToTime: timestrtotime_sql, 777 exp.TimeToStr: lambda self, e: self.func( 778 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 779 ), 780 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 781 exp.ToArray: rename_func("TO_ARRAY"), 782 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 783 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 784 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 785 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 786 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 787 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 788 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 789 exp.Xor: rename_func("BOOLXOR"), 790 } 791 792 SUPPORTED_JSON_PATH_PARTS = { 793 exp.JSONPathKey, 794 exp.JSONPathRoot, 795 exp.JSONPathSubscript, 796 } 797 798 TYPE_MAPPING = { 799 **generator.Generator.TYPE_MAPPING, 800 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 801 } 802 803 STAR_MAPPING = { 804 "except": "EXCLUDE", 805 "replace": "RENAME", 806 } 807 808 PROPERTIES_LOCATION = { 809 **generator.Generator.PROPERTIES_LOCATION, 810 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 811 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 812 } 813 814 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 815 milli = expression.args.get("milli") 816 if milli is not None: 817 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 818 expression.set("nano", milli_to_nano) 819 820 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 821 822 def trycast_sql(self, expression: exp.TryCast) -> str: 823 value = expression.this 824 825 if value.type is None: 826 from sqlglot.optimizer.annotate_types import annotate_types 827 828 value = annotate_types(value) 829 830 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 831 return super().trycast_sql(expression) 832 833 # TRY_CAST only works for string values in Snowflake 834 return self.cast_sql(expression) 835 836 def log_sql(self, expression: exp.Log) -> str: 837 if not expression.expression: 838 return self.func("LN", expression.this) 839 840 return super().log_sql(expression) 841 842 def unnest_sql(self, expression: exp.Unnest) -> str: 843 unnest_alias = expression.args.get("alias") 844 offset = expression.args.get("offset") 845 846 columns = [ 847 exp.to_identifier("seq"), 848 exp.to_identifier("key"), 849 exp.to_identifier("path"), 850 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 851 seq_get(unnest_alias.columns if unnest_alias else [], 0) 852 or exp.to_identifier("value"), 853 exp.to_identifier("this"), 854 ] 855 856 if unnest_alias: 857 unnest_alias.set("columns", columns) 858 else: 859 unnest_alias = exp.TableAlias(this="_u", columns=columns) 860 861 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 862 alias = self.sql(unnest_alias) 863 alias = f" AS {alias}" if alias else "" 864 return f"{explode}{alias}" 865 866 def show_sql(self, expression: exp.Show) -> str: 867 terse = "TERSE " if expression.args.get("terse") else "" 868 history = " HISTORY" if expression.args.get("history") else "" 869 like = self.sql(expression, "like") 870 like = f" LIKE {like}" if like else "" 871 872 scope = self.sql(expression, "scope") 873 scope = f" {scope}" if scope else "" 874 875 scope_kind = self.sql(expression, "scope_kind") 876 if scope_kind: 877 scope_kind = f" IN {scope_kind}" 878 879 starts_with = self.sql(expression, "starts_with") 880 if starts_with: 881 starts_with = f" STARTS WITH {starts_with}" 882 883 limit = self.sql(expression, "limit") 884 885 from_ = self.sql(expression, "from") 886 if from_: 887 from_ = f" FROM {from_}" 888 889 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 890 891 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 892 # Other dialects don't support all of the following parameters, so we need to 893 # generate default values as necessary to ensure the transpilation is correct 894 group = expression.args.get("group") 895 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 896 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 897 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 898 899 return self.func( 900 "REGEXP_SUBSTR", 901 expression.this, 902 expression.expression, 903 position, 904 occurrence, 905 parameters, 906 group, 907 ) 908 909 def except_op(self, expression: exp.Except) -> str: 910 if not expression.args.get("distinct"): 911 self.unsupported("EXCEPT with All is not supported in Snowflake") 912 return super().except_op(expression) 913 914 def intersect_op(self, expression: exp.Intersect) -> str: 915 if not expression.args.get("distinct"): 916 self.unsupported("INTERSECT with All is not supported in Snowflake") 917 return super().intersect_op(expression) 918 919 def describe_sql(self, expression: exp.Describe) -> str: 920 # Default to table if kind is unknown 921 kind_value = expression.args.get("kind") or "TABLE" 922 kind = f" {kind_value}" if kind_value else "" 923 this = f" {self.sql(expression, 'this')}" 924 expressions = self.expressions(expression, flat=True) 925 expressions = f" {expressions}" if expressions else "" 926 return f"DESCRIBE{kind}{this}{expressions}" 927 928 def generatedasidentitycolumnconstraint_sql( 929 self, expression: exp.GeneratedAsIdentityColumnConstraint 930 ) -> str: 931 start = expression.args.get("start") 932 start = f" START {start}" if start else "" 933 increment = expression.args.get("increment") 934 increment = f" INCREMENT {increment}" if increment else "" 935 return f"AUTOINCREMENT{start}{increment}" 936 937 def swaptable_sql(self, expression: exp.SwapTable) -> str: 938 this = self.sql(expression, "this") 939 return f"SWAP WITH {this}" 940 941 def with_properties(self, properties: exp.Properties) -> str: 942 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 943 944 def cluster_sql(self, expression: exp.Cluster) -> str: 945 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 946 947 def struct_sql(self, expression: exp.Struct) -> str: 948 keys = [] 949 values = [] 950 951 for i, e in enumerate(expression.expressions): 952 if isinstance(e, exp.PropertyEQ): 953 keys.append( 954 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 955 ) 956 values.append(e.expression) 957 else: 958 keys.append(exp.Literal.string(f"_{i}")) 959 values.append(e) 960 961 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. Default: 2.
- indent: The indentation size in a formatted string. Default: 2.
- normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
814 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 815 milli = expression.args.get("milli") 816 if milli is not None: 817 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 818 expression.set("nano", milli_to_nano) 819 820 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
822 def trycast_sql(self, expression: exp.TryCast) -> str: 823 value = expression.this 824 825 if value.type is None: 826 from sqlglot.optimizer.annotate_types import annotate_types 827 828 value = annotate_types(value) 829 830 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 831 return super().trycast_sql(expression) 832 833 # TRY_CAST only works for string values in Snowflake 834 return self.cast_sql(expression)
842 def unnest_sql(self, expression: exp.Unnest) -> str: 843 unnest_alias = expression.args.get("alias") 844 offset = expression.args.get("offset") 845 846 columns = [ 847 exp.to_identifier("seq"), 848 exp.to_identifier("key"), 849 exp.to_identifier("path"), 850 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 851 seq_get(unnest_alias.columns if unnest_alias else [], 0) 852 or exp.to_identifier("value"), 853 exp.to_identifier("this"), 854 ] 855 856 if unnest_alias: 857 unnest_alias.set("columns", columns) 858 else: 859 unnest_alias = exp.TableAlias(this="_u", columns=columns) 860 861 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 862 alias = self.sql(unnest_alias) 863 alias = f" AS {alias}" if alias else "" 864 return f"{explode}{alias}"
866 def show_sql(self, expression: exp.Show) -> str: 867 terse = "TERSE " if expression.args.get("terse") else "" 868 history = " HISTORY" if expression.args.get("history") else "" 869 like = self.sql(expression, "like") 870 like = f" LIKE {like}" if like else "" 871 872 scope = self.sql(expression, "scope") 873 scope = f" {scope}" if scope else "" 874 875 scope_kind = self.sql(expression, "scope_kind") 876 if scope_kind: 877 scope_kind = f" IN {scope_kind}" 878 879 starts_with = self.sql(expression, "starts_with") 880 if starts_with: 881 starts_with = f" STARTS WITH {starts_with}" 882 883 limit = self.sql(expression, "limit") 884 885 from_ = self.sql(expression, "from") 886 if from_: 887 from_ = f" FROM {from_}" 888 889 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
891 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 892 # Other dialects don't support all of the following parameters, so we need to 893 # generate default values as necessary to ensure the transpilation is correct 894 group = expression.args.get("group") 895 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 896 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 897 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 898 899 return self.func( 900 "REGEXP_SUBSTR", 901 expression.this, 902 expression.expression, 903 position, 904 occurrence, 905 parameters, 906 group, 907 )
919 def describe_sql(self, expression: exp.Describe) -> str: 920 # Default to table if kind is unknown 921 kind_value = expression.args.get("kind") or "TABLE" 922 kind = f" {kind_value}" if kind_value else "" 923 this = f" {self.sql(expression, 'this')}" 924 expressions = self.expressions(expression, flat=True) 925 expressions = f" {expressions}" if expressions else "" 926 return f"DESCRIBE{kind}{this}{expressions}"
928 def generatedasidentitycolumnconstraint_sql( 929 self, expression: exp.GeneratedAsIdentityColumnConstraint 930 ) -> str: 931 start = expression.args.get("start") 932 start = f" START {start}" if start else "" 933 increment = expression.args.get("increment") 934 increment = f" INCREMENT {increment}" if increment else "" 935 return f"AUTOINCREMENT{start}{increment}"
947 def struct_sql(self, expression: exp.Struct) -> str: 948 keys = [] 949 values = [] 950 951 for i, e in enumerate(expression.expressions): 952 if isinstance(e, exp.PropertyEQ): 953 keys.append( 954 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 955 ) 956 values.append(e.expression) 957 else: 958 keys.append(exp.Literal.string(f"_{i}")) 959 values.append(e) 960 961 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql