sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 date_delta_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 build_formatted_time, 14 if_sql, 15 inline_array_sql, 16 max_or_greatest, 17 min_or_least, 18 rename_func, 19 timestamptrunc_sql, 20 timestrtotime_sql, 21 var_map_sql, 22) 23from sqlglot.expressions import Literal 24from sqlglot.helper import flatten, is_int, seq_get 25from sqlglot.tokens import TokenType 26 27if t.TYPE_CHECKING: 28 from sqlglot._typing import E 29 30 31# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 32def _build_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime, exp.TimeStrToTime]: 33 if len(args) == 2: 34 first_arg, second_arg = args 35 if second_arg.is_string: 36 # case: <string_expr> [ , <format> ] 37 return build_formatted_time(exp.StrToTime, "snowflake")(args) 38 return exp.UnixToTime(this=first_arg, scale=second_arg) 39 40 from sqlglot.optimizer.simplify import simplify_literals 41 42 # The first argument might be an expression like 40 * 365 * 86400, so we try to 43 # reduce it using `simplify_literals` first and then check if it's a Literal. 44 first_arg = seq_get(args, 0) 45 if not isinstance(simplify_literals(first_arg, root=True), Literal): 46 # case: <variant_expr> or other expressions such as columns 47 return exp.TimeStrToTime.from_arg_list(args) 48 49 if first_arg.is_string: 50 if is_int(first_arg.this): 51 # case: <integer> 52 return exp.UnixToTime.from_arg_list(args) 53 54 # case: <date_expr> 55 return build_formatted_time(exp.StrToTime, "snowflake", default=True)(args) 56 57 # case: <numeric_expr> 58 return exp.UnixToTime.from_arg_list(args) 59 60 61def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 62 expression = parser.build_var_map(args) 63 64 if isinstance(expression, exp.StarMap): 65 return expression 66 67 return exp.Struct( 68 expressions=[ 69 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 70 ] 71 ) 72 73 74def _build_datediff(args: t.List) -> exp.DateDiff: 75 return exp.DateDiff( 76 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 77 ) 78 79 80# https://docs.snowflake.com/en/sql-reference/functions/div0 81def _build_if_from_div0(args: t.List) -> exp.If: 82 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 83 true = exp.Literal.number(0) 84 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 85 return exp.If(this=cond, true=true, false=false) 86 87 88# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 89def _build_if_from_zeroifnull(args: t.List) -> exp.If: 90 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 91 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 95def _build_if_from_nullifzero(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 97 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 98 99 100def _datatype_sql(self: Snowflake.Generator, expression: exp.DataType) -> str: 101 if expression.is_type("array"): 102 return "ARRAY" 103 elif expression.is_type("map"): 104 return "OBJECT" 105 return self.datatype_sql(expression) 106 107 108def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 109 flag = expression.text("flag") 110 111 if "i" not in flag: 112 flag += "i" 113 114 return self.func( 115 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 116 ) 117 118 119def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 120 if len(args) == 3: 121 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 122 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 123 124 125def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 126 regexp_replace = exp.RegexpReplace.from_arg_list(args) 127 128 if not regexp_replace.args.get("replacement"): 129 regexp_replace.set("replacement", exp.Literal.string("")) 130 131 return regexp_replace 132 133 134def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 135 def _parse(self: Snowflake.Parser) -> exp.Show: 136 return self._parse_show_snowflake(*args, **kwargs) 137 138 return _parse 139 140 141DATE_PART_MAPPING = { 142 "Y": "YEAR", 143 "YY": "YEAR", 144 "YYY": "YEAR", 145 "YYYY": "YEAR", 146 "YR": "YEAR", 147 "YEARS": "YEAR", 148 "YRS": "YEAR", 149 "MM": "MONTH", 150 "MON": "MONTH", 151 "MONS": "MONTH", 152 "MONTHS": "MONTH", 153 "D": "DAY", 154 "DD": "DAY", 155 "DAYS": "DAY", 156 "DAYOFMONTH": "DAY", 157 "WEEKDAY": "DAYOFWEEK", 158 "DOW": "DAYOFWEEK", 159 "DW": "DAYOFWEEK", 160 "WEEKDAY_ISO": "DAYOFWEEKISO", 161 "DOW_ISO": "DAYOFWEEKISO", 162 "DW_ISO": "DAYOFWEEKISO", 163 "YEARDAY": "DAYOFYEAR", 164 "DOY": "DAYOFYEAR", 165 "DY": "DAYOFYEAR", 166 "W": "WEEK", 167 "WK": "WEEK", 168 "WEEKOFYEAR": "WEEK", 169 "WOY": "WEEK", 170 "WY": "WEEK", 171 "WEEK_ISO": "WEEKISO", 172 "WEEKOFYEARISO": "WEEKISO", 173 "WEEKOFYEAR_ISO": "WEEKISO", 174 "Q": "QUARTER", 175 "QTR": "QUARTER", 176 "QTRS": "QUARTER", 177 "QUARTERS": "QUARTER", 178 "H": "HOUR", 179 "HH": "HOUR", 180 "HR": "HOUR", 181 "HOURS": "HOUR", 182 "HRS": "HOUR", 183 "M": "MINUTE", 184 "MI": "MINUTE", 185 "MIN": "MINUTE", 186 "MINUTES": "MINUTE", 187 "MINS": "MINUTE", 188 "S": "SECOND", 189 "SEC": "SECOND", 190 "SECONDS": "SECOND", 191 "SECS": "SECOND", 192 "MS": "MILLISECOND", 193 "MSEC": "MILLISECOND", 194 "MILLISECONDS": "MILLISECOND", 195 "US": "MICROSECOND", 196 "USEC": "MICROSECOND", 197 "MICROSECONDS": "MICROSECOND", 198 "NS": "NANOSECOND", 199 "NSEC": "NANOSECOND", 200 "NANOSEC": "NANOSECOND", 201 "NSECOND": "NANOSECOND", 202 "NSECONDS": "NANOSECOND", 203 "NANOSECS": "NANOSECOND", 204 "EPOCH": "EPOCH_SECOND", 205 "EPOCH_SECONDS": "EPOCH_SECOND", 206 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 207 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 208 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 209 "TZH": "TIMEZONE_HOUR", 210 "TZM": "TIMEZONE_MINUTE", 211} 212 213 214@t.overload 215def _map_date_part(part: exp.Expression) -> exp.Var: 216 pass 217 218 219@t.overload 220def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 221 pass 222 223 224def _map_date_part(part): 225 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 226 return exp.var(mapped) if mapped else part 227 228 229def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 230 trunc = date_trunc_to_time(args) 231 trunc.set("unit", _map_date_part(trunc.args["unit"])) 232 return trunc 233 234 235def _build_timestamp_from_parts(args: t.List) -> exp.Func: 236 if len(args) == 2: 237 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 238 # so we parse this into Anonymous for now instead of introducing complexity 239 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 240 241 return exp.TimestampFromParts.from_arg_list(args) 242 243 244def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 245 """ 246 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 247 so we need to unqualify them. 248 249 Example: 250 >>> from sqlglot import parse_one 251 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 252 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 253 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 254 """ 255 if isinstance(expression, exp.Pivot) and expression.unpivot: 256 expression = transforms.unqualify_columns(expression) 257 258 return expression 259 260 261class Snowflake(Dialect): 262 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 263 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 264 NULL_ORDERING = "nulls_are_large" 265 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 266 SUPPORTS_USER_DEFINED_TYPES = False 267 SUPPORTS_SEMI_ANTI_JOIN = False 268 PREFER_CTE_ALIAS_COLUMN = True 269 TABLESAMPLE_SIZE_IS_PERCENT = True 270 271 TIME_MAPPING = { 272 "YYYY": "%Y", 273 "yyyy": "%Y", 274 "YY": "%y", 275 "yy": "%y", 276 "MMMM": "%B", 277 "mmmm": "%B", 278 "MON": "%b", 279 "mon": "%b", 280 "MM": "%m", 281 "mm": "%m", 282 "DD": "%d", 283 "dd": "%-d", 284 "DY": "%a", 285 "dy": "%w", 286 "HH24": "%H", 287 "hh24": "%H", 288 "HH12": "%I", 289 "hh12": "%I", 290 "MI": "%M", 291 "mi": "%M", 292 "SS": "%S", 293 "ss": "%S", 294 "FF": "%f", 295 "ff": "%f", 296 "FF6": "%f", 297 "ff6": "%f", 298 } 299 300 def quote_identifier(self, expression: E, identify: bool = True) -> E: 301 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 302 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 303 if ( 304 isinstance(expression, exp.Identifier) 305 and isinstance(expression.parent, exp.Table) 306 and expression.name.lower() == "dual" 307 ): 308 return expression # type: ignore 309 310 return super().quote_identifier(expression, identify=identify) 311 312 class Parser(parser.Parser): 313 IDENTIFY_PIVOT_STRINGS = True 314 315 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 316 317 FUNCTIONS = { 318 **parser.Parser.FUNCTIONS, 319 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 320 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 321 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 322 this=seq_get(args, 1), expression=seq_get(args, 0) 323 ), 324 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 325 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 326 start=seq_get(args, 0), 327 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 328 step=seq_get(args, 2), 329 ), 330 "BITXOR": binary_from_function(exp.BitwiseXor), 331 "BIT_XOR": binary_from_function(exp.BitwiseXor), 332 "BOOLXOR": binary_from_function(exp.Xor), 333 "CONVERT_TIMEZONE": _build_convert_timezone, 334 "DATE_TRUNC": _date_trunc_to_time, 335 "DATEADD": lambda args: exp.DateAdd( 336 this=seq_get(args, 2), 337 expression=seq_get(args, 1), 338 unit=_map_date_part(seq_get(args, 0)), 339 ), 340 "DATEDIFF": _build_datediff, 341 "DIV0": _build_if_from_div0, 342 "FLATTEN": exp.Explode.from_arg_list, 343 "GET_PATH": lambda args, dialect: exp.JSONExtract( 344 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 345 ), 346 "IFF": exp.If.from_arg_list, 347 "LAST_DAY": lambda args: exp.LastDay( 348 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 349 ), 350 "LISTAGG": exp.GroupConcat.from_arg_list, 351 "NULLIFZERO": _build_if_from_nullifzero, 352 "OBJECT_CONSTRUCT": _build_object_construct, 353 "REGEXP_REPLACE": _build_regexp_replace, 354 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 355 "RLIKE": exp.RegexpLike.from_arg_list, 356 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 357 "TIMEDIFF": _build_datediff, 358 "TIMESTAMPDIFF": _build_datediff, 359 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 360 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 361 "TO_TIMESTAMP": _build_to_timestamp, 362 "TO_VARCHAR": exp.ToChar.from_arg_list, 363 "ZEROIFNULL": _build_if_from_zeroifnull, 364 } 365 366 FUNCTION_PARSERS = { 367 **parser.Parser.FUNCTION_PARSERS, 368 "DATE_PART": lambda self: self._parse_date_part(), 369 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 370 } 371 FUNCTION_PARSERS.pop("TRIM") 372 373 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 374 375 RANGE_PARSERS = { 376 **parser.Parser.RANGE_PARSERS, 377 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 378 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 379 } 380 381 ALTER_PARSERS = { 382 **parser.Parser.ALTER_PARSERS, 383 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 384 "UNSET": lambda self: self.expression( 385 exp.Set, 386 tag=self._match_text_seq("TAG"), 387 expressions=self._parse_csv(self._parse_id_var), 388 unset=True, 389 ), 390 "SWAP": lambda self: self._parse_alter_table_swap(), 391 } 392 393 STATEMENT_PARSERS = { 394 **parser.Parser.STATEMENT_PARSERS, 395 TokenType.SHOW: lambda self: self._parse_show(), 396 } 397 398 PROPERTY_PARSERS = { 399 **parser.Parser.PROPERTY_PARSERS, 400 "LOCATION": lambda self: self._parse_location(), 401 } 402 403 SHOW_PARSERS = { 404 "SCHEMAS": _show_parser("SCHEMAS"), 405 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 406 "OBJECTS": _show_parser("OBJECTS"), 407 "TERSE OBJECTS": _show_parser("OBJECTS"), 408 "TABLES": _show_parser("TABLES"), 409 "TERSE TABLES": _show_parser("TABLES"), 410 "VIEWS": _show_parser("VIEWS"), 411 "TERSE VIEWS": _show_parser("VIEWS"), 412 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 413 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 414 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 415 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 416 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 417 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 418 "SEQUENCES": _show_parser("SEQUENCES"), 419 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 420 "COLUMNS": _show_parser("COLUMNS"), 421 "USERS": _show_parser("USERS"), 422 "TERSE USERS": _show_parser("USERS"), 423 } 424 425 STAGED_FILE_SINGLE_TOKENS = { 426 TokenType.DOT, 427 TokenType.MOD, 428 TokenType.SLASH, 429 } 430 431 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 432 433 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 434 435 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 436 this = super()._parse_column_ops(this) 437 438 casts = [] 439 json_path = [] 440 441 while self._match(TokenType.COLON): 442 path = super()._parse_column_ops(self._parse_field(any_token=True)) 443 444 # The cast :: operator has a lower precedence than the extraction operator :, so 445 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 446 while isinstance(path, exp.Cast): 447 casts.append(path.to) 448 path = path.this 449 450 if path: 451 json_path.append(path.sql(dialect="snowflake", copy=False)) 452 453 if json_path: 454 this = self.expression( 455 exp.JSONExtract, 456 this=this, 457 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 458 ) 459 460 while casts: 461 this = self.expression(exp.Cast, this=this, to=casts.pop()) 462 463 return this 464 465 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 466 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 467 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 468 this = self._parse_var() or self._parse_type() 469 470 if not this: 471 return None 472 473 self._match(TokenType.COMMA) 474 expression = self._parse_bitwise() 475 this = _map_date_part(this) 476 name = this.name.upper() 477 478 if name.startswith("EPOCH"): 479 if name == "EPOCH_MILLISECOND": 480 scale = 10**3 481 elif name == "EPOCH_MICROSECOND": 482 scale = 10**6 483 elif name == "EPOCH_NANOSECOND": 484 scale = 10**9 485 else: 486 scale = None 487 488 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 489 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 490 491 if scale: 492 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 493 494 return to_unix 495 496 return self.expression(exp.Extract, this=this, expression=expression) 497 498 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 499 if is_map: 500 # Keys are strings in Snowflake's objects, see also: 501 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 502 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 503 return self._parse_slice(self._parse_string()) 504 505 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 506 507 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 508 lateral = super()._parse_lateral() 509 if not lateral: 510 return lateral 511 512 if isinstance(lateral.this, exp.Explode): 513 table_alias = lateral.args.get("alias") 514 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 515 if table_alias and not table_alias.args.get("columns"): 516 table_alias.set("columns", columns) 517 elif not table_alias: 518 exp.alias_(lateral, "_flattened", table=columns, copy=False) 519 520 return lateral 521 522 def _parse_at_before(self, table: exp.Table) -> exp.Table: 523 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 524 index = self._index 525 if self._match_texts(("AT", "BEFORE")): 526 this = self._prev.text.upper() 527 kind = ( 528 self._match(TokenType.L_PAREN) 529 and self._match_texts(self.HISTORICAL_DATA_KIND) 530 and self._prev.text.upper() 531 ) 532 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 533 534 if expression: 535 self._match_r_paren() 536 when = self.expression( 537 exp.HistoricalData, this=this, kind=kind, expression=expression 538 ) 539 table.set("when", when) 540 else: 541 self._retreat(index) 542 543 return table 544 545 def _parse_table_parts( 546 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 547 ) -> exp.Table: 548 # https://docs.snowflake.com/en/user-guide/querying-stage 549 if self._match(TokenType.STRING, advance=False): 550 table = self._parse_string() 551 elif self._match_text_seq("@", advance=False): 552 table = self._parse_location_path() 553 else: 554 table = None 555 556 if table: 557 file_format = None 558 pattern = None 559 560 self._match(TokenType.L_PAREN) 561 while self._curr and not self._match(TokenType.R_PAREN): 562 if self._match_text_seq("FILE_FORMAT", "=>"): 563 file_format = self._parse_string() or super()._parse_table_parts( 564 is_db_reference=is_db_reference 565 ) 566 elif self._match_text_seq("PATTERN", "=>"): 567 pattern = self._parse_string() 568 else: 569 break 570 571 self._match(TokenType.COMMA) 572 573 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 574 else: 575 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 576 577 return self._parse_at_before(table) 578 579 def _parse_id_var( 580 self, 581 any_token: bool = True, 582 tokens: t.Optional[t.Collection[TokenType]] = None, 583 ) -> t.Optional[exp.Expression]: 584 if self._match_text_seq("IDENTIFIER", "("): 585 identifier = ( 586 super()._parse_id_var(any_token=any_token, tokens=tokens) 587 or self._parse_string() 588 ) 589 self._match_r_paren() 590 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 591 592 return super()._parse_id_var(any_token=any_token, tokens=tokens) 593 594 def _parse_show_snowflake(self, this: str) -> exp.Show: 595 scope = None 596 scope_kind = None 597 598 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 599 # which is syntactically valid but has no effect on the output 600 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 601 602 history = self._match_text_seq("HISTORY") 603 604 like = self._parse_string() if self._match(TokenType.LIKE) else None 605 606 if self._match(TokenType.IN): 607 if self._match_text_seq("ACCOUNT"): 608 scope_kind = "ACCOUNT" 609 elif self._match_set(self.DB_CREATABLES): 610 scope_kind = self._prev.text.upper() 611 if self._curr: 612 scope = self._parse_table_parts() 613 elif self._curr: 614 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 615 scope = self._parse_table_parts() 616 617 return self.expression( 618 exp.Show, 619 **{ 620 "terse": terse, 621 "this": this, 622 "history": history, 623 "like": like, 624 "scope": scope, 625 "scope_kind": scope_kind, 626 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 627 "limit": self._parse_limit(), 628 "from": self._parse_string() if self._match(TokenType.FROM) else None, 629 }, 630 ) 631 632 def _parse_alter_table_swap(self) -> exp.SwapTable: 633 self._match_text_seq("WITH") 634 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 635 636 def _parse_location(self) -> exp.LocationProperty: 637 self._match(TokenType.EQ) 638 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 639 640 def _parse_location_path(self) -> exp.Var: 641 parts = [self._advance_any(ignore_reserved=True)] 642 643 # We avoid consuming a comma token because external tables like @foo and @bar 644 # can be joined in a query with a comma separator. 645 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 646 parts.append(self._advance_any(ignore_reserved=True)) 647 648 return exp.var("".join(part.text for part in parts if part)) 649 650 class Tokenizer(tokens.Tokenizer): 651 STRING_ESCAPES = ["\\", "'"] 652 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 653 RAW_STRINGS = ["$$"] 654 COMMENTS = ["--", "//", ("/*", "*/")] 655 656 KEYWORDS = { 657 **tokens.Tokenizer.KEYWORDS, 658 "BYTEINT": TokenType.INT, 659 "CHAR VARYING": TokenType.VARCHAR, 660 "CHARACTER VARYING": TokenType.VARCHAR, 661 "EXCLUDE": TokenType.EXCEPT, 662 "ILIKE ANY": TokenType.ILIKE_ANY, 663 "LIKE ANY": TokenType.LIKE_ANY, 664 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 665 "MINUS": TokenType.EXCEPT, 666 "NCHAR VARYING": TokenType.VARCHAR, 667 "PUT": TokenType.COMMAND, 668 "REMOVE": TokenType.COMMAND, 669 "RENAME": TokenType.REPLACE, 670 "RM": TokenType.COMMAND, 671 "SAMPLE": TokenType.TABLE_SAMPLE, 672 "SQL_DOUBLE": TokenType.DOUBLE, 673 "SQL_VARCHAR": TokenType.VARCHAR, 674 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 675 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 676 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 677 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 678 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 679 "TOP": TokenType.TOP, 680 } 681 682 SINGLE_TOKENS = { 683 **tokens.Tokenizer.SINGLE_TOKENS, 684 "$": TokenType.PARAMETER, 685 } 686 687 VAR_SINGLE_TOKENS = {"$"} 688 689 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 690 691 class Generator(generator.Generator): 692 PARAMETER_TOKEN = "$" 693 MATCHED_BY_SOURCE = False 694 SINGLE_STRING_INTERVAL = True 695 JOIN_HINTS = False 696 TABLE_HINTS = False 697 QUERY_HINTS = False 698 AGGREGATE_FILTER_SUPPORTED = False 699 SUPPORTS_TABLE_COPY = False 700 COLLATE_IS_FUNC = True 701 LIMIT_ONLY_LITERALS = True 702 JSON_KEY_VALUE_PAIR_SEP = "," 703 INSERT_OVERWRITE = " OVERWRITE INTO" 704 705 TRANSFORMS = { 706 **generator.Generator.TRANSFORMS, 707 exp.ArgMax: rename_func("MAX_BY"), 708 exp.ArgMin: rename_func("MIN_BY"), 709 exp.Array: inline_array_sql, 710 exp.ArrayConcat: rename_func("ARRAY_CAT"), 711 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 712 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 713 exp.AtTimeZone: lambda self, e: self.func( 714 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 715 ), 716 exp.BitwiseXor: rename_func("BITXOR"), 717 exp.DateAdd: date_delta_sql("DATEADD"), 718 exp.DateDiff: date_delta_sql("DATEDIFF"), 719 exp.DateStrToDate: datestrtodate_sql, 720 exp.DataType: _datatype_sql, 721 exp.DayOfMonth: rename_func("DAYOFMONTH"), 722 exp.DayOfWeek: rename_func("DAYOFWEEK"), 723 exp.DayOfYear: rename_func("DAYOFYEAR"), 724 exp.Explode: rename_func("FLATTEN"), 725 exp.Extract: rename_func("DATE_PART"), 726 exp.FromTimeZone: lambda self, e: self.func( 727 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 728 ), 729 exp.GenerateSeries: lambda self, e: self.func( 730 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 731 ), 732 exp.GroupConcat: rename_func("LISTAGG"), 733 exp.If: if_sql(name="IFF", false_value="NULL"), 734 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 735 exp.JSONExtractScalar: lambda self, e: self.func( 736 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 737 ), 738 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 739 exp.JSONPathRoot: lambda *_: "", 740 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 741 exp.LogicalOr: rename_func("BOOLOR_AGG"), 742 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 743 exp.Max: max_or_greatest, 744 exp.Min: min_or_least, 745 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 746 exp.PercentileCont: transforms.preprocess( 747 [transforms.add_within_group_for_percentiles] 748 ), 749 exp.PercentileDisc: transforms.preprocess( 750 [transforms.add_within_group_for_percentiles] 751 ), 752 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 753 exp.RegexpILike: _regexpilike_sql, 754 exp.Rand: rename_func("RANDOM"), 755 exp.Select: transforms.preprocess( 756 [ 757 transforms.eliminate_distinct_on, 758 transforms.explode_to_unnest(), 759 transforms.eliminate_semi_and_anti_joins, 760 ] 761 ), 762 exp.SHA: rename_func("SHA1"), 763 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 764 exp.StartsWith: rename_func("STARTSWITH"), 765 exp.StrPosition: lambda self, e: self.func( 766 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 767 ), 768 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 769 exp.Stuff: rename_func("INSERT"), 770 exp.TimestampDiff: lambda self, e: self.func( 771 "TIMESTAMPDIFF", e.unit, e.expression, e.this 772 ), 773 exp.TimestampTrunc: timestamptrunc_sql, 774 exp.TimeStrToTime: timestrtotime_sql, 775 exp.TimeToStr: lambda self, e: self.func( 776 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 777 ), 778 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 779 exp.ToArray: rename_func("TO_ARRAY"), 780 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 781 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 782 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 783 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 784 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 785 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 786 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 787 exp.Xor: rename_func("BOOLXOR"), 788 } 789 790 SUPPORTED_JSON_PATH_PARTS = { 791 exp.JSONPathKey, 792 exp.JSONPathRoot, 793 exp.JSONPathSubscript, 794 } 795 796 TYPE_MAPPING = { 797 **generator.Generator.TYPE_MAPPING, 798 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 799 } 800 801 STAR_MAPPING = { 802 "except": "EXCLUDE", 803 "replace": "RENAME", 804 } 805 806 PROPERTIES_LOCATION = { 807 **generator.Generator.PROPERTIES_LOCATION, 808 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 809 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 810 } 811 812 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 813 milli = expression.args.get("milli") 814 if milli is not None: 815 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 816 expression.set("nano", milli_to_nano) 817 818 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 819 820 def trycast_sql(self, expression: exp.TryCast) -> str: 821 value = expression.this 822 823 if value.type is None: 824 from sqlglot.optimizer.annotate_types import annotate_types 825 826 value = annotate_types(value) 827 828 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 829 return super().trycast_sql(expression) 830 831 # TRY_CAST only works for string values in Snowflake 832 return self.cast_sql(expression) 833 834 def log_sql(self, expression: exp.Log) -> str: 835 if not expression.expression: 836 return self.func("LN", expression.this) 837 838 return super().log_sql(expression) 839 840 def unnest_sql(self, expression: exp.Unnest) -> str: 841 unnest_alias = expression.args.get("alias") 842 offset = expression.args.get("offset") 843 844 columns = [ 845 exp.to_identifier("seq"), 846 exp.to_identifier("key"), 847 exp.to_identifier("path"), 848 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 849 seq_get(unnest_alias.columns if unnest_alias else [], 0) 850 or exp.to_identifier("value"), 851 exp.to_identifier("this"), 852 ] 853 854 if unnest_alias: 855 unnest_alias.set("columns", columns) 856 else: 857 unnest_alias = exp.TableAlias(this="_u", columns=columns) 858 859 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 860 alias = self.sql(unnest_alias) 861 alias = f" AS {alias}" if alias else "" 862 return f"{explode}{alias}" 863 864 def show_sql(self, expression: exp.Show) -> str: 865 terse = "TERSE " if expression.args.get("terse") else "" 866 history = " HISTORY" if expression.args.get("history") else "" 867 like = self.sql(expression, "like") 868 like = f" LIKE {like}" if like else "" 869 870 scope = self.sql(expression, "scope") 871 scope = f" {scope}" if scope else "" 872 873 scope_kind = self.sql(expression, "scope_kind") 874 if scope_kind: 875 scope_kind = f" IN {scope_kind}" 876 877 starts_with = self.sql(expression, "starts_with") 878 if starts_with: 879 starts_with = f" STARTS WITH {starts_with}" 880 881 limit = self.sql(expression, "limit") 882 883 from_ = self.sql(expression, "from") 884 if from_: 885 from_ = f" FROM {from_}" 886 887 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 888 889 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 890 # Other dialects don't support all of the following parameters, so we need to 891 # generate default values as necessary to ensure the transpilation is correct 892 group = expression.args.get("group") 893 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 894 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 895 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 896 897 return self.func( 898 "REGEXP_SUBSTR", 899 expression.this, 900 expression.expression, 901 position, 902 occurrence, 903 parameters, 904 group, 905 ) 906 907 def except_op(self, expression: exp.Except) -> str: 908 if not expression.args.get("distinct"): 909 self.unsupported("EXCEPT with All is not supported in Snowflake") 910 return super().except_op(expression) 911 912 def intersect_op(self, expression: exp.Intersect) -> str: 913 if not expression.args.get("distinct"): 914 self.unsupported("INTERSECT with All is not supported in Snowflake") 915 return super().intersect_op(expression) 916 917 def describe_sql(self, expression: exp.Describe) -> str: 918 # Default to table if kind is unknown 919 kind_value = expression.args.get("kind") or "TABLE" 920 kind = f" {kind_value}" if kind_value else "" 921 this = f" {self.sql(expression, 'this')}" 922 expressions = self.expressions(expression, flat=True) 923 expressions = f" {expressions}" if expressions else "" 924 return f"DESCRIBE{kind}{this}{expressions}" 925 926 def generatedasidentitycolumnconstraint_sql( 927 self, expression: exp.GeneratedAsIdentityColumnConstraint 928 ) -> str: 929 start = expression.args.get("start") 930 start = f" START {start}" if start else "" 931 increment = expression.args.get("increment") 932 increment = f" INCREMENT {increment}" if increment else "" 933 return f"AUTOINCREMENT{start}{increment}" 934 935 def swaptable_sql(self, expression: exp.SwapTable) -> str: 936 this = self.sql(expression, "this") 937 return f"SWAP WITH {this}" 938 939 def with_properties(self, properties: exp.Properties) -> str: 940 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 941 942 def cluster_sql(self, expression: exp.Cluster) -> str: 943 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 944 945 def struct_sql(self, expression: exp.Struct) -> str: 946 keys = [] 947 values = [] 948 949 for i, e in enumerate(expression.expressions): 950 if isinstance(e, exp.PropertyEQ): 951 keys.append( 952 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 953 ) 954 values.append(e.expression) 955 else: 956 keys.append(exp.Literal.string(f"_{i}")) 957 values.append(e) 958 959 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
262class Snowflake(Dialect): 263 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 264 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 265 NULL_ORDERING = "nulls_are_large" 266 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 267 SUPPORTS_USER_DEFINED_TYPES = False 268 SUPPORTS_SEMI_ANTI_JOIN = False 269 PREFER_CTE_ALIAS_COLUMN = True 270 TABLESAMPLE_SIZE_IS_PERCENT = True 271 272 TIME_MAPPING = { 273 "YYYY": "%Y", 274 "yyyy": "%Y", 275 "YY": "%y", 276 "yy": "%y", 277 "MMMM": "%B", 278 "mmmm": "%B", 279 "MON": "%b", 280 "mon": "%b", 281 "MM": "%m", 282 "mm": "%m", 283 "DD": "%d", 284 "dd": "%-d", 285 "DY": "%a", 286 "dy": "%w", 287 "HH24": "%H", 288 "hh24": "%H", 289 "HH12": "%I", 290 "hh12": "%I", 291 "MI": "%M", 292 "mi": "%M", 293 "SS": "%S", 294 "ss": "%S", 295 "FF": "%f", 296 "ff": "%f", 297 "FF6": "%f", 298 "ff6": "%f", 299 } 300 301 def quote_identifier(self, expression: E, identify: bool = True) -> E: 302 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 303 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 304 if ( 305 isinstance(expression, exp.Identifier) 306 and isinstance(expression.parent, exp.Table) 307 and expression.name.lower() == "dual" 308 ): 309 return expression # type: ignore 310 311 return super().quote_identifier(expression, identify=identify) 312 313 class Parser(parser.Parser): 314 IDENTIFY_PIVOT_STRINGS = True 315 316 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 321 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 322 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 323 this=seq_get(args, 1), expression=seq_get(args, 0) 324 ), 325 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 326 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 327 start=seq_get(args, 0), 328 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 329 step=seq_get(args, 2), 330 ), 331 "BITXOR": binary_from_function(exp.BitwiseXor), 332 "BIT_XOR": binary_from_function(exp.BitwiseXor), 333 "BOOLXOR": binary_from_function(exp.Xor), 334 "CONVERT_TIMEZONE": _build_convert_timezone, 335 "DATE_TRUNC": _date_trunc_to_time, 336 "DATEADD": lambda args: exp.DateAdd( 337 this=seq_get(args, 2), 338 expression=seq_get(args, 1), 339 unit=_map_date_part(seq_get(args, 0)), 340 ), 341 "DATEDIFF": _build_datediff, 342 "DIV0": _build_if_from_div0, 343 "FLATTEN": exp.Explode.from_arg_list, 344 "GET_PATH": lambda args, dialect: exp.JSONExtract( 345 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 346 ), 347 "IFF": exp.If.from_arg_list, 348 "LAST_DAY": lambda args: exp.LastDay( 349 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 350 ), 351 "LISTAGG": exp.GroupConcat.from_arg_list, 352 "NULLIFZERO": _build_if_from_nullifzero, 353 "OBJECT_CONSTRUCT": _build_object_construct, 354 "REGEXP_REPLACE": _build_regexp_replace, 355 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 356 "RLIKE": exp.RegexpLike.from_arg_list, 357 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 358 "TIMEDIFF": _build_datediff, 359 "TIMESTAMPDIFF": _build_datediff, 360 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 361 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 362 "TO_TIMESTAMP": _build_to_timestamp, 363 "TO_VARCHAR": exp.ToChar.from_arg_list, 364 "ZEROIFNULL": _build_if_from_zeroifnull, 365 } 366 367 FUNCTION_PARSERS = { 368 **parser.Parser.FUNCTION_PARSERS, 369 "DATE_PART": lambda self: self._parse_date_part(), 370 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 371 } 372 FUNCTION_PARSERS.pop("TRIM") 373 374 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 375 376 RANGE_PARSERS = { 377 **parser.Parser.RANGE_PARSERS, 378 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 379 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 380 } 381 382 ALTER_PARSERS = { 383 **parser.Parser.ALTER_PARSERS, 384 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 385 "UNSET": lambda self: self.expression( 386 exp.Set, 387 tag=self._match_text_seq("TAG"), 388 expressions=self._parse_csv(self._parse_id_var), 389 unset=True, 390 ), 391 "SWAP": lambda self: self._parse_alter_table_swap(), 392 } 393 394 STATEMENT_PARSERS = { 395 **parser.Parser.STATEMENT_PARSERS, 396 TokenType.SHOW: lambda self: self._parse_show(), 397 } 398 399 PROPERTY_PARSERS = { 400 **parser.Parser.PROPERTY_PARSERS, 401 "LOCATION": lambda self: self._parse_location(), 402 } 403 404 SHOW_PARSERS = { 405 "SCHEMAS": _show_parser("SCHEMAS"), 406 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 407 "OBJECTS": _show_parser("OBJECTS"), 408 "TERSE OBJECTS": _show_parser("OBJECTS"), 409 "TABLES": _show_parser("TABLES"), 410 "TERSE TABLES": _show_parser("TABLES"), 411 "VIEWS": _show_parser("VIEWS"), 412 "TERSE VIEWS": _show_parser("VIEWS"), 413 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 414 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 415 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 416 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 417 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 418 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 419 "SEQUENCES": _show_parser("SEQUENCES"), 420 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 421 "COLUMNS": _show_parser("COLUMNS"), 422 "USERS": _show_parser("USERS"), 423 "TERSE USERS": _show_parser("USERS"), 424 } 425 426 STAGED_FILE_SINGLE_TOKENS = { 427 TokenType.DOT, 428 TokenType.MOD, 429 TokenType.SLASH, 430 } 431 432 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 433 434 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 435 436 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 437 this = super()._parse_column_ops(this) 438 439 casts = [] 440 json_path = [] 441 442 while self._match(TokenType.COLON): 443 path = super()._parse_column_ops(self._parse_field(any_token=True)) 444 445 # The cast :: operator has a lower precedence than the extraction operator :, so 446 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 447 while isinstance(path, exp.Cast): 448 casts.append(path.to) 449 path = path.this 450 451 if path: 452 json_path.append(path.sql(dialect="snowflake", copy=False)) 453 454 if json_path: 455 this = self.expression( 456 exp.JSONExtract, 457 this=this, 458 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 459 ) 460 461 while casts: 462 this = self.expression(exp.Cast, this=this, to=casts.pop()) 463 464 return this 465 466 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 467 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 468 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 469 this = self._parse_var() or self._parse_type() 470 471 if not this: 472 return None 473 474 self._match(TokenType.COMMA) 475 expression = self._parse_bitwise() 476 this = _map_date_part(this) 477 name = this.name.upper() 478 479 if name.startswith("EPOCH"): 480 if name == "EPOCH_MILLISECOND": 481 scale = 10**3 482 elif name == "EPOCH_MICROSECOND": 483 scale = 10**6 484 elif name == "EPOCH_NANOSECOND": 485 scale = 10**9 486 else: 487 scale = None 488 489 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 490 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 491 492 if scale: 493 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 494 495 return to_unix 496 497 return self.expression(exp.Extract, this=this, expression=expression) 498 499 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 500 if is_map: 501 # Keys are strings in Snowflake's objects, see also: 502 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 503 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 504 return self._parse_slice(self._parse_string()) 505 506 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 507 508 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 509 lateral = super()._parse_lateral() 510 if not lateral: 511 return lateral 512 513 if isinstance(lateral.this, exp.Explode): 514 table_alias = lateral.args.get("alias") 515 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 516 if table_alias and not table_alias.args.get("columns"): 517 table_alias.set("columns", columns) 518 elif not table_alias: 519 exp.alias_(lateral, "_flattened", table=columns, copy=False) 520 521 return lateral 522 523 def _parse_at_before(self, table: exp.Table) -> exp.Table: 524 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 525 index = self._index 526 if self._match_texts(("AT", "BEFORE")): 527 this = self._prev.text.upper() 528 kind = ( 529 self._match(TokenType.L_PAREN) 530 and self._match_texts(self.HISTORICAL_DATA_KIND) 531 and self._prev.text.upper() 532 ) 533 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 534 535 if expression: 536 self._match_r_paren() 537 when = self.expression( 538 exp.HistoricalData, this=this, kind=kind, expression=expression 539 ) 540 table.set("when", when) 541 else: 542 self._retreat(index) 543 544 return table 545 546 def _parse_table_parts( 547 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 548 ) -> exp.Table: 549 # https://docs.snowflake.com/en/user-guide/querying-stage 550 if self._match(TokenType.STRING, advance=False): 551 table = self._parse_string() 552 elif self._match_text_seq("@", advance=False): 553 table = self._parse_location_path() 554 else: 555 table = None 556 557 if table: 558 file_format = None 559 pattern = None 560 561 self._match(TokenType.L_PAREN) 562 while self._curr and not self._match(TokenType.R_PAREN): 563 if self._match_text_seq("FILE_FORMAT", "=>"): 564 file_format = self._parse_string() or super()._parse_table_parts( 565 is_db_reference=is_db_reference 566 ) 567 elif self._match_text_seq("PATTERN", "=>"): 568 pattern = self._parse_string() 569 else: 570 break 571 572 self._match(TokenType.COMMA) 573 574 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 575 else: 576 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 577 578 return self._parse_at_before(table) 579 580 def _parse_id_var( 581 self, 582 any_token: bool = True, 583 tokens: t.Optional[t.Collection[TokenType]] = None, 584 ) -> t.Optional[exp.Expression]: 585 if self._match_text_seq("IDENTIFIER", "("): 586 identifier = ( 587 super()._parse_id_var(any_token=any_token, tokens=tokens) 588 or self._parse_string() 589 ) 590 self._match_r_paren() 591 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 592 593 return super()._parse_id_var(any_token=any_token, tokens=tokens) 594 595 def _parse_show_snowflake(self, this: str) -> exp.Show: 596 scope = None 597 scope_kind = None 598 599 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 600 # which is syntactically valid but has no effect on the output 601 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 602 603 history = self._match_text_seq("HISTORY") 604 605 like = self._parse_string() if self._match(TokenType.LIKE) else None 606 607 if self._match(TokenType.IN): 608 if self._match_text_seq("ACCOUNT"): 609 scope_kind = "ACCOUNT" 610 elif self._match_set(self.DB_CREATABLES): 611 scope_kind = self._prev.text.upper() 612 if self._curr: 613 scope = self._parse_table_parts() 614 elif self._curr: 615 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 616 scope = self._parse_table_parts() 617 618 return self.expression( 619 exp.Show, 620 **{ 621 "terse": terse, 622 "this": this, 623 "history": history, 624 "like": like, 625 "scope": scope, 626 "scope_kind": scope_kind, 627 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 628 "limit": self._parse_limit(), 629 "from": self._parse_string() if self._match(TokenType.FROM) else None, 630 }, 631 ) 632 633 def _parse_alter_table_swap(self) -> exp.SwapTable: 634 self._match_text_seq("WITH") 635 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 636 637 def _parse_location(self) -> exp.LocationProperty: 638 self._match(TokenType.EQ) 639 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 640 641 def _parse_location_path(self) -> exp.Var: 642 parts = [self._advance_any(ignore_reserved=True)] 643 644 # We avoid consuming a comma token because external tables like @foo and @bar 645 # can be joined in a query with a comma separator. 646 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 647 parts.append(self._advance_any(ignore_reserved=True)) 648 649 return exp.var("".join(part.text for part in parts if part)) 650 651 class Tokenizer(tokens.Tokenizer): 652 STRING_ESCAPES = ["\\", "'"] 653 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 654 RAW_STRINGS = ["$$"] 655 COMMENTS = ["--", "//", ("/*", "*/")] 656 657 KEYWORDS = { 658 **tokens.Tokenizer.KEYWORDS, 659 "BYTEINT": TokenType.INT, 660 "CHAR VARYING": TokenType.VARCHAR, 661 "CHARACTER VARYING": TokenType.VARCHAR, 662 "EXCLUDE": TokenType.EXCEPT, 663 "ILIKE ANY": TokenType.ILIKE_ANY, 664 "LIKE ANY": TokenType.LIKE_ANY, 665 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 666 "MINUS": TokenType.EXCEPT, 667 "NCHAR VARYING": TokenType.VARCHAR, 668 "PUT": TokenType.COMMAND, 669 "REMOVE": TokenType.COMMAND, 670 "RENAME": TokenType.REPLACE, 671 "RM": TokenType.COMMAND, 672 "SAMPLE": TokenType.TABLE_SAMPLE, 673 "SQL_DOUBLE": TokenType.DOUBLE, 674 "SQL_VARCHAR": TokenType.VARCHAR, 675 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 676 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 677 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 678 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 679 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 680 "TOP": TokenType.TOP, 681 } 682 683 SINGLE_TOKENS = { 684 **tokens.Tokenizer.SINGLE_TOKENS, 685 "$": TokenType.PARAMETER, 686 } 687 688 VAR_SINGLE_TOKENS = {"$"} 689 690 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 691 692 class Generator(generator.Generator): 693 PARAMETER_TOKEN = "$" 694 MATCHED_BY_SOURCE = False 695 SINGLE_STRING_INTERVAL = True 696 JOIN_HINTS = False 697 TABLE_HINTS = False 698 QUERY_HINTS = False 699 AGGREGATE_FILTER_SUPPORTED = False 700 SUPPORTS_TABLE_COPY = False 701 COLLATE_IS_FUNC = True 702 LIMIT_ONLY_LITERALS = True 703 JSON_KEY_VALUE_PAIR_SEP = "," 704 INSERT_OVERWRITE = " OVERWRITE INTO" 705 706 TRANSFORMS = { 707 **generator.Generator.TRANSFORMS, 708 exp.ArgMax: rename_func("MAX_BY"), 709 exp.ArgMin: rename_func("MIN_BY"), 710 exp.Array: inline_array_sql, 711 exp.ArrayConcat: rename_func("ARRAY_CAT"), 712 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 713 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 714 exp.AtTimeZone: lambda self, e: self.func( 715 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 716 ), 717 exp.BitwiseXor: rename_func("BITXOR"), 718 exp.DateAdd: date_delta_sql("DATEADD"), 719 exp.DateDiff: date_delta_sql("DATEDIFF"), 720 exp.DateStrToDate: datestrtodate_sql, 721 exp.DataType: _datatype_sql, 722 exp.DayOfMonth: rename_func("DAYOFMONTH"), 723 exp.DayOfWeek: rename_func("DAYOFWEEK"), 724 exp.DayOfYear: rename_func("DAYOFYEAR"), 725 exp.Explode: rename_func("FLATTEN"), 726 exp.Extract: rename_func("DATE_PART"), 727 exp.FromTimeZone: lambda self, e: self.func( 728 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 729 ), 730 exp.GenerateSeries: lambda self, e: self.func( 731 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 732 ), 733 exp.GroupConcat: rename_func("LISTAGG"), 734 exp.If: if_sql(name="IFF", false_value="NULL"), 735 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 736 exp.JSONExtractScalar: lambda self, e: self.func( 737 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 738 ), 739 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 740 exp.JSONPathRoot: lambda *_: "", 741 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 742 exp.LogicalOr: rename_func("BOOLOR_AGG"), 743 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 744 exp.Max: max_or_greatest, 745 exp.Min: min_or_least, 746 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 747 exp.PercentileCont: transforms.preprocess( 748 [transforms.add_within_group_for_percentiles] 749 ), 750 exp.PercentileDisc: transforms.preprocess( 751 [transforms.add_within_group_for_percentiles] 752 ), 753 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 754 exp.RegexpILike: _regexpilike_sql, 755 exp.Rand: rename_func("RANDOM"), 756 exp.Select: transforms.preprocess( 757 [ 758 transforms.eliminate_distinct_on, 759 transforms.explode_to_unnest(), 760 transforms.eliminate_semi_and_anti_joins, 761 ] 762 ), 763 exp.SHA: rename_func("SHA1"), 764 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 765 exp.StartsWith: rename_func("STARTSWITH"), 766 exp.StrPosition: lambda self, e: self.func( 767 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 768 ), 769 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 770 exp.Stuff: rename_func("INSERT"), 771 exp.TimestampDiff: lambda self, e: self.func( 772 "TIMESTAMPDIFF", e.unit, e.expression, e.this 773 ), 774 exp.TimestampTrunc: timestamptrunc_sql, 775 exp.TimeStrToTime: timestrtotime_sql, 776 exp.TimeToStr: lambda self, e: self.func( 777 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 778 ), 779 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 780 exp.ToArray: rename_func("TO_ARRAY"), 781 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 782 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 783 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 784 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 785 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 786 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 787 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 788 exp.Xor: rename_func("BOOLXOR"), 789 } 790 791 SUPPORTED_JSON_PATH_PARTS = { 792 exp.JSONPathKey, 793 exp.JSONPathRoot, 794 exp.JSONPathSubscript, 795 } 796 797 TYPE_MAPPING = { 798 **generator.Generator.TYPE_MAPPING, 799 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 800 } 801 802 STAR_MAPPING = { 803 "except": "EXCLUDE", 804 "replace": "RENAME", 805 } 806 807 PROPERTIES_LOCATION = { 808 **generator.Generator.PROPERTIES_LOCATION, 809 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 810 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 811 } 812 813 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 814 milli = expression.args.get("milli") 815 if milli is not None: 816 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 817 expression.set("nano", milli_to_nano) 818 819 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 820 821 def trycast_sql(self, expression: exp.TryCast) -> str: 822 value = expression.this 823 824 if value.type is None: 825 from sqlglot.optimizer.annotate_types import annotate_types 826 827 value = annotate_types(value) 828 829 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 830 return super().trycast_sql(expression) 831 832 # TRY_CAST only works for string values in Snowflake 833 return self.cast_sql(expression) 834 835 def log_sql(self, expression: exp.Log) -> str: 836 if not expression.expression: 837 return self.func("LN", expression.this) 838 839 return super().log_sql(expression) 840 841 def unnest_sql(self, expression: exp.Unnest) -> str: 842 unnest_alias = expression.args.get("alias") 843 offset = expression.args.get("offset") 844 845 columns = [ 846 exp.to_identifier("seq"), 847 exp.to_identifier("key"), 848 exp.to_identifier("path"), 849 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 850 seq_get(unnest_alias.columns if unnest_alias else [], 0) 851 or exp.to_identifier("value"), 852 exp.to_identifier("this"), 853 ] 854 855 if unnest_alias: 856 unnest_alias.set("columns", columns) 857 else: 858 unnest_alias = exp.TableAlias(this="_u", columns=columns) 859 860 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 861 alias = self.sql(unnest_alias) 862 alias = f" AS {alias}" if alias else "" 863 return f"{explode}{alias}" 864 865 def show_sql(self, expression: exp.Show) -> str: 866 terse = "TERSE " if expression.args.get("terse") else "" 867 history = " HISTORY" if expression.args.get("history") else "" 868 like = self.sql(expression, "like") 869 like = f" LIKE {like}" if like else "" 870 871 scope = self.sql(expression, "scope") 872 scope = f" {scope}" if scope else "" 873 874 scope_kind = self.sql(expression, "scope_kind") 875 if scope_kind: 876 scope_kind = f" IN {scope_kind}" 877 878 starts_with = self.sql(expression, "starts_with") 879 if starts_with: 880 starts_with = f" STARTS WITH {starts_with}" 881 882 limit = self.sql(expression, "limit") 883 884 from_ = self.sql(expression, "from") 885 if from_: 886 from_ = f" FROM {from_}" 887 888 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 889 890 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 891 # Other dialects don't support all of the following parameters, so we need to 892 # generate default values as necessary to ensure the transpilation is correct 893 group = expression.args.get("group") 894 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 895 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 896 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 897 898 return self.func( 899 "REGEXP_SUBSTR", 900 expression.this, 901 expression.expression, 902 position, 903 occurrence, 904 parameters, 905 group, 906 ) 907 908 def except_op(self, expression: exp.Except) -> str: 909 if not expression.args.get("distinct"): 910 self.unsupported("EXCEPT with All is not supported in Snowflake") 911 return super().except_op(expression) 912 913 def intersect_op(self, expression: exp.Intersect) -> str: 914 if not expression.args.get("distinct"): 915 self.unsupported("INTERSECT with All is not supported in Snowflake") 916 return super().intersect_op(expression) 917 918 def describe_sql(self, expression: exp.Describe) -> str: 919 # Default to table if kind is unknown 920 kind_value = expression.args.get("kind") or "TABLE" 921 kind = f" {kind_value}" if kind_value else "" 922 this = f" {self.sql(expression, 'this')}" 923 expressions = self.expressions(expression, flat=True) 924 expressions = f" {expressions}" if expressions else "" 925 return f"DESCRIBE{kind}{this}{expressions}" 926 927 def generatedasidentitycolumnconstraint_sql( 928 self, expression: exp.GeneratedAsIdentityColumnConstraint 929 ) -> str: 930 start = expression.args.get("start") 931 start = f" START {start}" if start else "" 932 increment = expression.args.get("increment") 933 increment = f" INCREMENT {increment}" if increment else "" 934 return f"AUTOINCREMENT{start}{increment}" 935 936 def swaptable_sql(self, expression: exp.SwapTable) -> str: 937 this = self.sql(expression, "this") 938 return f"SWAP WITH {this}" 939 940 def with_properties(self, properties: exp.Properties) -> str: 941 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 942 943 def cluster_sql(self, expression: exp.Cluster) -> str: 944 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 945 946 def struct_sql(self, expression: exp.Struct) -> str: 947 keys = [] 948 values = [] 949 950 for i, e in enumerate(expression.expressions): 951 if isinstance(e, exp.PropertyEQ): 952 keys.append( 953 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 954 ) 955 values.append(e.expression) 956 else: 957 keys.append(exp.Literal.string(f"_{i}")) 958 values.append(e) 959 960 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
301 def quote_identifier(self, expression: E, identify: bool = True) -> E: 302 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 303 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 304 if ( 305 isinstance(expression, exp.Identifier) 306 and isinstance(expression.parent, exp.Table) 307 and expression.name.lower() == "dual" 308 ): 309 return expression # type: ignore 310 311 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
313 class Parser(parser.Parser): 314 IDENTIFY_PIVOT_STRINGS = True 315 316 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 321 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 322 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 323 this=seq_get(args, 1), expression=seq_get(args, 0) 324 ), 325 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 326 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 327 start=seq_get(args, 0), 328 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 329 step=seq_get(args, 2), 330 ), 331 "BITXOR": binary_from_function(exp.BitwiseXor), 332 "BIT_XOR": binary_from_function(exp.BitwiseXor), 333 "BOOLXOR": binary_from_function(exp.Xor), 334 "CONVERT_TIMEZONE": _build_convert_timezone, 335 "DATE_TRUNC": _date_trunc_to_time, 336 "DATEADD": lambda args: exp.DateAdd( 337 this=seq_get(args, 2), 338 expression=seq_get(args, 1), 339 unit=_map_date_part(seq_get(args, 0)), 340 ), 341 "DATEDIFF": _build_datediff, 342 "DIV0": _build_if_from_div0, 343 "FLATTEN": exp.Explode.from_arg_list, 344 "GET_PATH": lambda args, dialect: exp.JSONExtract( 345 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 346 ), 347 "IFF": exp.If.from_arg_list, 348 "LAST_DAY": lambda args: exp.LastDay( 349 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 350 ), 351 "LISTAGG": exp.GroupConcat.from_arg_list, 352 "NULLIFZERO": _build_if_from_nullifzero, 353 "OBJECT_CONSTRUCT": _build_object_construct, 354 "REGEXP_REPLACE": _build_regexp_replace, 355 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 356 "RLIKE": exp.RegexpLike.from_arg_list, 357 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 358 "TIMEDIFF": _build_datediff, 359 "TIMESTAMPDIFF": _build_datediff, 360 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 361 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 362 "TO_TIMESTAMP": _build_to_timestamp, 363 "TO_VARCHAR": exp.ToChar.from_arg_list, 364 "ZEROIFNULL": _build_if_from_zeroifnull, 365 } 366 367 FUNCTION_PARSERS = { 368 **parser.Parser.FUNCTION_PARSERS, 369 "DATE_PART": lambda self: self._parse_date_part(), 370 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 371 } 372 FUNCTION_PARSERS.pop("TRIM") 373 374 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 375 376 RANGE_PARSERS = { 377 **parser.Parser.RANGE_PARSERS, 378 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 379 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 380 } 381 382 ALTER_PARSERS = { 383 **parser.Parser.ALTER_PARSERS, 384 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 385 "UNSET": lambda self: self.expression( 386 exp.Set, 387 tag=self._match_text_seq("TAG"), 388 expressions=self._parse_csv(self._parse_id_var), 389 unset=True, 390 ), 391 "SWAP": lambda self: self._parse_alter_table_swap(), 392 } 393 394 STATEMENT_PARSERS = { 395 **parser.Parser.STATEMENT_PARSERS, 396 TokenType.SHOW: lambda self: self._parse_show(), 397 } 398 399 PROPERTY_PARSERS = { 400 **parser.Parser.PROPERTY_PARSERS, 401 "LOCATION": lambda self: self._parse_location(), 402 } 403 404 SHOW_PARSERS = { 405 "SCHEMAS": _show_parser("SCHEMAS"), 406 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 407 "OBJECTS": _show_parser("OBJECTS"), 408 "TERSE OBJECTS": _show_parser("OBJECTS"), 409 "TABLES": _show_parser("TABLES"), 410 "TERSE TABLES": _show_parser("TABLES"), 411 "VIEWS": _show_parser("VIEWS"), 412 "TERSE VIEWS": _show_parser("VIEWS"), 413 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 414 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 415 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 416 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 417 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 418 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 419 "SEQUENCES": _show_parser("SEQUENCES"), 420 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 421 "COLUMNS": _show_parser("COLUMNS"), 422 "USERS": _show_parser("USERS"), 423 "TERSE USERS": _show_parser("USERS"), 424 } 425 426 STAGED_FILE_SINGLE_TOKENS = { 427 TokenType.DOT, 428 TokenType.MOD, 429 TokenType.SLASH, 430 } 431 432 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 433 434 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 435 436 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 437 this = super()._parse_column_ops(this) 438 439 casts = [] 440 json_path = [] 441 442 while self._match(TokenType.COLON): 443 path = super()._parse_column_ops(self._parse_field(any_token=True)) 444 445 # The cast :: operator has a lower precedence than the extraction operator :, so 446 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 447 while isinstance(path, exp.Cast): 448 casts.append(path.to) 449 path = path.this 450 451 if path: 452 json_path.append(path.sql(dialect="snowflake", copy=False)) 453 454 if json_path: 455 this = self.expression( 456 exp.JSONExtract, 457 this=this, 458 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 459 ) 460 461 while casts: 462 this = self.expression(exp.Cast, this=this, to=casts.pop()) 463 464 return this 465 466 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 467 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 468 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 469 this = self._parse_var() or self._parse_type() 470 471 if not this: 472 return None 473 474 self._match(TokenType.COMMA) 475 expression = self._parse_bitwise() 476 this = _map_date_part(this) 477 name = this.name.upper() 478 479 if name.startswith("EPOCH"): 480 if name == "EPOCH_MILLISECOND": 481 scale = 10**3 482 elif name == "EPOCH_MICROSECOND": 483 scale = 10**6 484 elif name == "EPOCH_NANOSECOND": 485 scale = 10**9 486 else: 487 scale = None 488 489 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 490 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 491 492 if scale: 493 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 494 495 return to_unix 496 497 return self.expression(exp.Extract, this=this, expression=expression) 498 499 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 500 if is_map: 501 # Keys are strings in Snowflake's objects, see also: 502 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 503 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 504 return self._parse_slice(self._parse_string()) 505 506 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 507 508 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 509 lateral = super()._parse_lateral() 510 if not lateral: 511 return lateral 512 513 if isinstance(lateral.this, exp.Explode): 514 table_alias = lateral.args.get("alias") 515 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 516 if table_alias and not table_alias.args.get("columns"): 517 table_alias.set("columns", columns) 518 elif not table_alias: 519 exp.alias_(lateral, "_flattened", table=columns, copy=False) 520 521 return lateral 522 523 def _parse_at_before(self, table: exp.Table) -> exp.Table: 524 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 525 index = self._index 526 if self._match_texts(("AT", "BEFORE")): 527 this = self._prev.text.upper() 528 kind = ( 529 self._match(TokenType.L_PAREN) 530 and self._match_texts(self.HISTORICAL_DATA_KIND) 531 and self._prev.text.upper() 532 ) 533 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 534 535 if expression: 536 self._match_r_paren() 537 when = self.expression( 538 exp.HistoricalData, this=this, kind=kind, expression=expression 539 ) 540 table.set("when", when) 541 else: 542 self._retreat(index) 543 544 return table 545 546 def _parse_table_parts( 547 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 548 ) -> exp.Table: 549 # https://docs.snowflake.com/en/user-guide/querying-stage 550 if self._match(TokenType.STRING, advance=False): 551 table = self._parse_string() 552 elif self._match_text_seq("@", advance=False): 553 table = self._parse_location_path() 554 else: 555 table = None 556 557 if table: 558 file_format = None 559 pattern = None 560 561 self._match(TokenType.L_PAREN) 562 while self._curr and not self._match(TokenType.R_PAREN): 563 if self._match_text_seq("FILE_FORMAT", "=>"): 564 file_format = self._parse_string() or super()._parse_table_parts( 565 is_db_reference=is_db_reference 566 ) 567 elif self._match_text_seq("PATTERN", "=>"): 568 pattern = self._parse_string() 569 else: 570 break 571 572 self._match(TokenType.COMMA) 573 574 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 575 else: 576 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 577 578 return self._parse_at_before(table) 579 580 def _parse_id_var( 581 self, 582 any_token: bool = True, 583 tokens: t.Optional[t.Collection[TokenType]] = None, 584 ) -> t.Optional[exp.Expression]: 585 if self._match_text_seq("IDENTIFIER", "("): 586 identifier = ( 587 super()._parse_id_var(any_token=any_token, tokens=tokens) 588 or self._parse_string() 589 ) 590 self._match_r_paren() 591 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 592 593 return super()._parse_id_var(any_token=any_token, tokens=tokens) 594 595 def _parse_show_snowflake(self, this: str) -> exp.Show: 596 scope = None 597 scope_kind = None 598 599 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 600 # which is syntactically valid but has no effect on the output 601 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 602 603 history = self._match_text_seq("HISTORY") 604 605 like = self._parse_string() if self._match(TokenType.LIKE) else None 606 607 if self._match(TokenType.IN): 608 if self._match_text_seq("ACCOUNT"): 609 scope_kind = "ACCOUNT" 610 elif self._match_set(self.DB_CREATABLES): 611 scope_kind = self._prev.text.upper() 612 if self._curr: 613 scope = self._parse_table_parts() 614 elif self._curr: 615 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 616 scope = self._parse_table_parts() 617 618 return self.expression( 619 exp.Show, 620 **{ 621 "terse": terse, 622 "this": this, 623 "history": history, 624 "like": like, 625 "scope": scope, 626 "scope_kind": scope_kind, 627 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 628 "limit": self._parse_limit(), 629 "from": self._parse_string() if self._match(TokenType.FROM) else None, 630 }, 631 ) 632 633 def _parse_alter_table_swap(self) -> exp.SwapTable: 634 self._match_text_seq("WITH") 635 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 636 637 def _parse_location(self) -> exp.LocationProperty: 638 self._match(TokenType.EQ) 639 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 640 641 def _parse_location_path(self) -> exp.Var: 642 parts = [self._advance_any(ignore_reserved=True)] 643 644 # We avoid consuming a comma token because external tables like @foo and @bar 645 # can be joined in a query with a comma separator. 646 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 647 parts.append(self._advance_any(ignore_reserved=True)) 648 649 return exp.var("".join(part.text for part in parts if part))
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- USABLES
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
651 class Tokenizer(tokens.Tokenizer): 652 STRING_ESCAPES = ["\\", "'"] 653 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 654 RAW_STRINGS = ["$$"] 655 COMMENTS = ["--", "//", ("/*", "*/")] 656 657 KEYWORDS = { 658 **tokens.Tokenizer.KEYWORDS, 659 "BYTEINT": TokenType.INT, 660 "CHAR VARYING": TokenType.VARCHAR, 661 "CHARACTER VARYING": TokenType.VARCHAR, 662 "EXCLUDE": TokenType.EXCEPT, 663 "ILIKE ANY": TokenType.ILIKE_ANY, 664 "LIKE ANY": TokenType.LIKE_ANY, 665 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 666 "MINUS": TokenType.EXCEPT, 667 "NCHAR VARYING": TokenType.VARCHAR, 668 "PUT": TokenType.COMMAND, 669 "REMOVE": TokenType.COMMAND, 670 "RENAME": TokenType.REPLACE, 671 "RM": TokenType.COMMAND, 672 "SAMPLE": TokenType.TABLE_SAMPLE, 673 "SQL_DOUBLE": TokenType.DOUBLE, 674 "SQL_VARCHAR": TokenType.VARCHAR, 675 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 676 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 677 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 678 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 679 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 680 "TOP": TokenType.TOP, 681 } 682 683 SINGLE_TOKENS = { 684 **tokens.Tokenizer.SINGLE_TOKENS, 685 "$": TokenType.PARAMETER, 686 } 687 688 VAR_SINGLE_TOKENS = {"$"} 689 690 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
692 class Generator(generator.Generator): 693 PARAMETER_TOKEN = "$" 694 MATCHED_BY_SOURCE = False 695 SINGLE_STRING_INTERVAL = True 696 JOIN_HINTS = False 697 TABLE_HINTS = False 698 QUERY_HINTS = False 699 AGGREGATE_FILTER_SUPPORTED = False 700 SUPPORTS_TABLE_COPY = False 701 COLLATE_IS_FUNC = True 702 LIMIT_ONLY_LITERALS = True 703 JSON_KEY_VALUE_PAIR_SEP = "," 704 INSERT_OVERWRITE = " OVERWRITE INTO" 705 706 TRANSFORMS = { 707 **generator.Generator.TRANSFORMS, 708 exp.ArgMax: rename_func("MAX_BY"), 709 exp.ArgMin: rename_func("MIN_BY"), 710 exp.Array: inline_array_sql, 711 exp.ArrayConcat: rename_func("ARRAY_CAT"), 712 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 713 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 714 exp.AtTimeZone: lambda self, e: self.func( 715 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 716 ), 717 exp.BitwiseXor: rename_func("BITXOR"), 718 exp.DateAdd: date_delta_sql("DATEADD"), 719 exp.DateDiff: date_delta_sql("DATEDIFF"), 720 exp.DateStrToDate: datestrtodate_sql, 721 exp.DataType: _datatype_sql, 722 exp.DayOfMonth: rename_func("DAYOFMONTH"), 723 exp.DayOfWeek: rename_func("DAYOFWEEK"), 724 exp.DayOfYear: rename_func("DAYOFYEAR"), 725 exp.Explode: rename_func("FLATTEN"), 726 exp.Extract: rename_func("DATE_PART"), 727 exp.FromTimeZone: lambda self, e: self.func( 728 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 729 ), 730 exp.GenerateSeries: lambda self, e: self.func( 731 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 732 ), 733 exp.GroupConcat: rename_func("LISTAGG"), 734 exp.If: if_sql(name="IFF", false_value="NULL"), 735 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 736 exp.JSONExtractScalar: lambda self, e: self.func( 737 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 738 ), 739 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 740 exp.JSONPathRoot: lambda *_: "", 741 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 742 exp.LogicalOr: rename_func("BOOLOR_AGG"), 743 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 744 exp.Max: max_or_greatest, 745 exp.Min: min_or_least, 746 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 747 exp.PercentileCont: transforms.preprocess( 748 [transforms.add_within_group_for_percentiles] 749 ), 750 exp.PercentileDisc: transforms.preprocess( 751 [transforms.add_within_group_for_percentiles] 752 ), 753 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 754 exp.RegexpILike: _regexpilike_sql, 755 exp.Rand: rename_func("RANDOM"), 756 exp.Select: transforms.preprocess( 757 [ 758 transforms.eliminate_distinct_on, 759 transforms.explode_to_unnest(), 760 transforms.eliminate_semi_and_anti_joins, 761 ] 762 ), 763 exp.SHA: rename_func("SHA1"), 764 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 765 exp.StartsWith: rename_func("STARTSWITH"), 766 exp.StrPosition: lambda self, e: self.func( 767 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 768 ), 769 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 770 exp.Stuff: rename_func("INSERT"), 771 exp.TimestampDiff: lambda self, e: self.func( 772 "TIMESTAMPDIFF", e.unit, e.expression, e.this 773 ), 774 exp.TimestampTrunc: timestamptrunc_sql, 775 exp.TimeStrToTime: timestrtotime_sql, 776 exp.TimeToStr: lambda self, e: self.func( 777 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 778 ), 779 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 780 exp.ToArray: rename_func("TO_ARRAY"), 781 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 782 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 783 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 784 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 785 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 786 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 787 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 788 exp.Xor: rename_func("BOOLXOR"), 789 } 790 791 SUPPORTED_JSON_PATH_PARTS = { 792 exp.JSONPathKey, 793 exp.JSONPathRoot, 794 exp.JSONPathSubscript, 795 } 796 797 TYPE_MAPPING = { 798 **generator.Generator.TYPE_MAPPING, 799 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 800 } 801 802 STAR_MAPPING = { 803 "except": "EXCLUDE", 804 "replace": "RENAME", 805 } 806 807 PROPERTIES_LOCATION = { 808 **generator.Generator.PROPERTIES_LOCATION, 809 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 810 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 811 } 812 813 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 814 milli = expression.args.get("milli") 815 if milli is not None: 816 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 817 expression.set("nano", milli_to_nano) 818 819 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 820 821 def trycast_sql(self, expression: exp.TryCast) -> str: 822 value = expression.this 823 824 if value.type is None: 825 from sqlglot.optimizer.annotate_types import annotate_types 826 827 value = annotate_types(value) 828 829 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 830 return super().trycast_sql(expression) 831 832 # TRY_CAST only works for string values in Snowflake 833 return self.cast_sql(expression) 834 835 def log_sql(self, expression: exp.Log) -> str: 836 if not expression.expression: 837 return self.func("LN", expression.this) 838 839 return super().log_sql(expression) 840 841 def unnest_sql(self, expression: exp.Unnest) -> str: 842 unnest_alias = expression.args.get("alias") 843 offset = expression.args.get("offset") 844 845 columns = [ 846 exp.to_identifier("seq"), 847 exp.to_identifier("key"), 848 exp.to_identifier("path"), 849 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 850 seq_get(unnest_alias.columns if unnest_alias else [], 0) 851 or exp.to_identifier("value"), 852 exp.to_identifier("this"), 853 ] 854 855 if unnest_alias: 856 unnest_alias.set("columns", columns) 857 else: 858 unnest_alias = exp.TableAlias(this="_u", columns=columns) 859 860 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 861 alias = self.sql(unnest_alias) 862 alias = f" AS {alias}" if alias else "" 863 return f"{explode}{alias}" 864 865 def show_sql(self, expression: exp.Show) -> str: 866 terse = "TERSE " if expression.args.get("terse") else "" 867 history = " HISTORY" if expression.args.get("history") else "" 868 like = self.sql(expression, "like") 869 like = f" LIKE {like}" if like else "" 870 871 scope = self.sql(expression, "scope") 872 scope = f" {scope}" if scope else "" 873 874 scope_kind = self.sql(expression, "scope_kind") 875 if scope_kind: 876 scope_kind = f" IN {scope_kind}" 877 878 starts_with = self.sql(expression, "starts_with") 879 if starts_with: 880 starts_with = f" STARTS WITH {starts_with}" 881 882 limit = self.sql(expression, "limit") 883 884 from_ = self.sql(expression, "from") 885 if from_: 886 from_ = f" FROM {from_}" 887 888 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 889 890 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 891 # Other dialects don't support all of the following parameters, so we need to 892 # generate default values as necessary to ensure the transpilation is correct 893 group = expression.args.get("group") 894 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 895 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 896 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 897 898 return self.func( 899 "REGEXP_SUBSTR", 900 expression.this, 901 expression.expression, 902 position, 903 occurrence, 904 parameters, 905 group, 906 ) 907 908 def except_op(self, expression: exp.Except) -> str: 909 if not expression.args.get("distinct"): 910 self.unsupported("EXCEPT with All is not supported in Snowflake") 911 return super().except_op(expression) 912 913 def intersect_op(self, expression: exp.Intersect) -> str: 914 if not expression.args.get("distinct"): 915 self.unsupported("INTERSECT with All is not supported in Snowflake") 916 return super().intersect_op(expression) 917 918 def describe_sql(self, expression: exp.Describe) -> str: 919 # Default to table if kind is unknown 920 kind_value = expression.args.get("kind") or "TABLE" 921 kind = f" {kind_value}" if kind_value else "" 922 this = f" {self.sql(expression, 'this')}" 923 expressions = self.expressions(expression, flat=True) 924 expressions = f" {expressions}" if expressions else "" 925 return f"DESCRIBE{kind}{this}{expressions}" 926 927 def generatedasidentitycolumnconstraint_sql( 928 self, expression: exp.GeneratedAsIdentityColumnConstraint 929 ) -> str: 930 start = expression.args.get("start") 931 start = f" START {start}" if start else "" 932 increment = expression.args.get("increment") 933 increment = f" INCREMENT {increment}" if increment else "" 934 return f"AUTOINCREMENT{start}{increment}" 935 936 def swaptable_sql(self, expression: exp.SwapTable) -> str: 937 this = self.sql(expression, "this") 938 return f"SWAP WITH {this}" 939 940 def with_properties(self, properties: exp.Properties) -> str: 941 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 942 943 def cluster_sql(self, expression: exp.Cluster) -> str: 944 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 945 946 def struct_sql(self, expression: exp.Struct) -> str: 947 keys = [] 948 values = [] 949 950 for i, e in enumerate(expression.expressions): 951 if isinstance(e, exp.PropertyEQ): 952 keys.append( 953 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 954 ) 955 values.append(e.expression) 956 else: 957 keys.append(exp.Literal.string(f"_{i}")) 958 values.append(e) 959 960 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. Default: 2.
- indent: The indentation size in a formatted string. Default: 2.
- normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
813 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 814 milli = expression.args.get("milli") 815 if milli is not None: 816 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 817 expression.set("nano", milli_to_nano) 818 819 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
821 def trycast_sql(self, expression: exp.TryCast) -> str: 822 value = expression.this 823 824 if value.type is None: 825 from sqlglot.optimizer.annotate_types import annotate_types 826 827 value = annotate_types(value) 828 829 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 830 return super().trycast_sql(expression) 831 832 # TRY_CAST only works for string values in Snowflake 833 return self.cast_sql(expression)
841 def unnest_sql(self, expression: exp.Unnest) -> str: 842 unnest_alias = expression.args.get("alias") 843 offset = expression.args.get("offset") 844 845 columns = [ 846 exp.to_identifier("seq"), 847 exp.to_identifier("key"), 848 exp.to_identifier("path"), 849 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 850 seq_get(unnest_alias.columns if unnest_alias else [], 0) 851 or exp.to_identifier("value"), 852 exp.to_identifier("this"), 853 ] 854 855 if unnest_alias: 856 unnest_alias.set("columns", columns) 857 else: 858 unnest_alias = exp.TableAlias(this="_u", columns=columns) 859 860 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 861 alias = self.sql(unnest_alias) 862 alias = f" AS {alias}" if alias else "" 863 return f"{explode}{alias}"
865 def show_sql(self, expression: exp.Show) -> str: 866 terse = "TERSE " if expression.args.get("terse") else "" 867 history = " HISTORY" if expression.args.get("history") else "" 868 like = self.sql(expression, "like") 869 like = f" LIKE {like}" if like else "" 870 871 scope = self.sql(expression, "scope") 872 scope = f" {scope}" if scope else "" 873 874 scope_kind = self.sql(expression, "scope_kind") 875 if scope_kind: 876 scope_kind = f" IN {scope_kind}" 877 878 starts_with = self.sql(expression, "starts_with") 879 if starts_with: 880 starts_with = f" STARTS WITH {starts_with}" 881 882 limit = self.sql(expression, "limit") 883 884 from_ = self.sql(expression, "from") 885 if from_: 886 from_ = f" FROM {from_}" 887 888 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
890 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 891 # Other dialects don't support all of the following parameters, so we need to 892 # generate default values as necessary to ensure the transpilation is correct 893 group = expression.args.get("group") 894 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 895 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 896 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 897 898 return self.func( 899 "REGEXP_SUBSTR", 900 expression.this, 901 expression.expression, 902 position, 903 occurrence, 904 parameters, 905 group, 906 )
918 def describe_sql(self, expression: exp.Describe) -> str: 919 # Default to table if kind is unknown 920 kind_value = expression.args.get("kind") or "TABLE" 921 kind = f" {kind_value}" if kind_value else "" 922 this = f" {self.sql(expression, 'this')}" 923 expressions = self.expressions(expression, flat=True) 924 expressions = f" {expressions}" if expressions else "" 925 return f"DESCRIBE{kind}{this}{expressions}"
927 def generatedasidentitycolumnconstraint_sql( 928 self, expression: exp.GeneratedAsIdentityColumnConstraint 929 ) -> str: 930 start = expression.args.get("start") 931 start = f" START {start}" if start else "" 932 increment = expression.args.get("increment") 933 increment = f" INCREMENT {increment}" if increment else "" 934 return f"AUTOINCREMENT{start}{increment}"
946 def struct_sql(self, expression: exp.Struct) -> str: 947 keys = [] 948 values = [] 949 950 for i, e in enumerate(expression.expressions): 951 if isinstance(e, exp.PropertyEQ): 952 keys.append( 953 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 954 ) 955 values.append(e.expression) 956 else: 957 keys.append(exp.Literal.string(f"_{i}")) 958 values.append(e) 959 960 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql