sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 date_delta_sql, 12 date_trunc_to_time, 13 datestrtodate_sql, 14 build_formatted_time, 15 if_sql, 16 inline_array_sql, 17 max_or_greatest, 18 min_or_least, 19 rename_func, 20 timestamptrunc_sql, 21 timestrtotime_sql, 22 var_map_sql, 23) 24from sqlglot.helper import flatten, is_float, is_int, seq_get 25from sqlglot.tokens import TokenType 26 27if t.TYPE_CHECKING: 28 from sqlglot._typing import E 29 30 31# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 32def _build_datetime( 33 name: str, kind: exp.DataType.Type, safe: bool = False 34) -> t.Callable[[t.List], exp.Func]: 35 def _builder(args: t.List) -> exp.Func: 36 value = seq_get(args, 0) 37 int_value = value is not None and is_int(value.name) 38 39 if isinstance(value, exp.Literal): 40 # Converts calls like `TO_TIME('01:02:03')` into casts 41 if len(args) == 1 and value.is_string and not int_value: 42 return exp.cast(value, kind) 43 44 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 45 # cases so we can transpile them, since they're relatively common 46 if kind == exp.DataType.Type.TIMESTAMP: 47 if int_value: 48 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 49 if not is_float(value.this): 50 return build_formatted_time(exp.StrToTime, "snowflake")(args) 51 52 if kind == exp.DataType.Type.DATE and not int_value: 53 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 54 formatted_exp.set("safe", safe) 55 return formatted_exp 56 57 return exp.Anonymous(this=name, expressions=args) 58 59 return _builder 60 61 62def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 63 expression = parser.build_var_map(args) 64 65 if isinstance(expression, exp.StarMap): 66 return expression 67 68 return exp.Struct( 69 expressions=[ 70 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 71 ] 72 ) 73 74 75def _build_datediff(args: t.List) -> exp.DateDiff: 76 return exp.DateDiff( 77 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 78 ) 79 80 81def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 82 def _builder(args: t.List) -> E: 83 return expr_type( 84 this=seq_get(args, 2), 85 expression=seq_get(args, 1), 86 unit=_map_date_part(seq_get(args, 0)), 87 ) 88 89 return _builder 90 91 92# https://docs.snowflake.com/en/sql-reference/functions/div0 93def _build_if_from_div0(args: t.List) -> exp.If: 94 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 95 true = exp.Literal.number(0) 96 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 97 return exp.If(this=cond, true=true, false=false) 98 99 100# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 101def _build_if_from_zeroifnull(args: t.List) -> exp.If: 102 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 103 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 104 105 106# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 107def _build_if_from_nullifzero(args: t.List) -> exp.If: 108 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 109 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 110 111 112def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 113 flag = expression.text("flag") 114 115 if "i" not in flag: 116 flag += "i" 117 118 return self.func( 119 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 120 ) 121 122 123def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 124 if len(args) == 3: 125 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 126 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 127 128 129def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 130 regexp_replace = exp.RegexpReplace.from_arg_list(args) 131 132 if not regexp_replace.args.get("replacement"): 133 regexp_replace.set("replacement", exp.Literal.string("")) 134 135 return regexp_replace 136 137 138def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 139 def _parse(self: Snowflake.Parser) -> exp.Show: 140 return self._parse_show_snowflake(*args, **kwargs) 141 142 return _parse 143 144 145DATE_PART_MAPPING = { 146 "Y": "YEAR", 147 "YY": "YEAR", 148 "YYY": "YEAR", 149 "YYYY": "YEAR", 150 "YR": "YEAR", 151 "YEARS": "YEAR", 152 "YRS": "YEAR", 153 "MM": "MONTH", 154 "MON": "MONTH", 155 "MONS": "MONTH", 156 "MONTHS": "MONTH", 157 "D": "DAY", 158 "DD": "DAY", 159 "DAYS": "DAY", 160 "DAYOFMONTH": "DAY", 161 "WEEKDAY": "DAYOFWEEK", 162 "DOW": "DAYOFWEEK", 163 "DW": "DAYOFWEEK", 164 "WEEKDAY_ISO": "DAYOFWEEKISO", 165 "DOW_ISO": "DAYOFWEEKISO", 166 "DW_ISO": "DAYOFWEEKISO", 167 "YEARDAY": "DAYOFYEAR", 168 "DOY": "DAYOFYEAR", 169 "DY": "DAYOFYEAR", 170 "W": "WEEK", 171 "WK": "WEEK", 172 "WEEKOFYEAR": "WEEK", 173 "WOY": "WEEK", 174 "WY": "WEEK", 175 "WEEK_ISO": "WEEKISO", 176 "WEEKOFYEARISO": "WEEKISO", 177 "WEEKOFYEAR_ISO": "WEEKISO", 178 "Q": "QUARTER", 179 "QTR": "QUARTER", 180 "QTRS": "QUARTER", 181 "QUARTERS": "QUARTER", 182 "H": "HOUR", 183 "HH": "HOUR", 184 "HR": "HOUR", 185 "HOURS": "HOUR", 186 "HRS": "HOUR", 187 "M": "MINUTE", 188 "MI": "MINUTE", 189 "MIN": "MINUTE", 190 "MINUTES": "MINUTE", 191 "MINS": "MINUTE", 192 "S": "SECOND", 193 "SEC": "SECOND", 194 "SECONDS": "SECOND", 195 "SECS": "SECOND", 196 "MS": "MILLISECOND", 197 "MSEC": "MILLISECOND", 198 "MILLISECONDS": "MILLISECOND", 199 "US": "MICROSECOND", 200 "USEC": "MICROSECOND", 201 "MICROSECONDS": "MICROSECOND", 202 "NS": "NANOSECOND", 203 "NSEC": "NANOSECOND", 204 "NANOSEC": "NANOSECOND", 205 "NSECOND": "NANOSECOND", 206 "NSECONDS": "NANOSECOND", 207 "NANOSECS": "NANOSECOND", 208 "EPOCH": "EPOCH_SECOND", 209 "EPOCH_SECONDS": "EPOCH_SECOND", 210 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 211 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 212 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 213 "TZH": "TIMEZONE_HOUR", 214 "TZM": "TIMEZONE_MINUTE", 215} 216 217 218@t.overload 219def _map_date_part(part: exp.Expression) -> exp.Var: 220 pass 221 222 223@t.overload 224def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 225 pass 226 227 228def _map_date_part(part): 229 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 230 return exp.var(mapped) if mapped else part 231 232 233def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 234 trunc = date_trunc_to_time(args) 235 trunc.set("unit", _map_date_part(trunc.args["unit"])) 236 return trunc 237 238 239def _build_timestamp_from_parts(args: t.List) -> exp.Func: 240 if len(args) == 2: 241 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 242 # so we parse this into Anonymous for now instead of introducing complexity 243 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 244 245 return exp.TimestampFromParts.from_arg_list(args) 246 247 248def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 249 """ 250 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 251 so we need to unqualify them. 252 253 Example: 254 >>> from sqlglot import parse_one 255 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 256 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 257 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 258 """ 259 if isinstance(expression, exp.Pivot) and expression.unpivot: 260 expression = transforms.unqualify_columns(expression) 261 262 return expression 263 264 265def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 266 assert isinstance(expression, exp.Create) 267 268 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 269 if expression.this in exp.DataType.NESTED_TYPES: 270 expression.set("expressions", None) 271 return expression 272 273 props = expression.args.get("properties") 274 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 275 for schema_expression in expression.this.expressions: 276 if isinstance(schema_expression, exp.ColumnDef): 277 column_type = schema_expression.kind 278 if isinstance(column_type, exp.DataType): 279 column_type.transform(_flatten_structured_type, copy=False) 280 281 return expression 282 283 284class Snowflake(Dialect): 285 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 286 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 287 NULL_ORDERING = "nulls_are_large" 288 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 289 SUPPORTS_USER_DEFINED_TYPES = False 290 SUPPORTS_SEMI_ANTI_JOIN = False 291 PREFER_CTE_ALIAS_COLUMN = True 292 TABLESAMPLE_SIZE_IS_PERCENT = True 293 COPY_PARAMS_ARE_CSV = False 294 295 TIME_MAPPING = { 296 "YYYY": "%Y", 297 "yyyy": "%Y", 298 "YY": "%y", 299 "yy": "%y", 300 "MMMM": "%B", 301 "mmmm": "%B", 302 "MON": "%b", 303 "mon": "%b", 304 "MM": "%m", 305 "mm": "%m", 306 "DD": "%d", 307 "dd": "%-d", 308 "DY": "%a", 309 "dy": "%w", 310 "HH24": "%H", 311 "hh24": "%H", 312 "HH12": "%I", 313 "hh12": "%I", 314 "MI": "%M", 315 "mi": "%M", 316 "SS": "%S", 317 "ss": "%S", 318 "FF": "%f", 319 "ff": "%f", 320 "FF6": "%f", 321 "ff6": "%f", 322 } 323 324 def quote_identifier(self, expression: E, identify: bool = True) -> E: 325 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 326 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 327 if ( 328 isinstance(expression, exp.Identifier) 329 and isinstance(expression.parent, exp.Table) 330 and expression.name.lower() == "dual" 331 ): 332 return expression # type: ignore 333 334 return super().quote_identifier(expression, identify=identify) 335 336 class Parser(parser.Parser): 337 IDENTIFY_PIVOT_STRINGS = True 338 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 339 COLON_IS_JSON_EXTRACT = True 340 341 ID_VAR_TOKENS = { 342 *parser.Parser.ID_VAR_TOKENS, 343 TokenType.MATCH_CONDITION, 344 } 345 346 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 347 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 348 349 FUNCTIONS = { 350 **parser.Parser.FUNCTIONS, 351 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 352 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 353 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 354 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 355 this=seq_get(args, 1), expression=seq_get(args, 0) 356 ), 357 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 358 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 359 start=seq_get(args, 0), 360 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 361 step=seq_get(args, 2), 362 ), 363 "BITXOR": binary_from_function(exp.BitwiseXor), 364 "BIT_XOR": binary_from_function(exp.BitwiseXor), 365 "BOOLXOR": binary_from_function(exp.Xor), 366 "CONVERT_TIMEZONE": _build_convert_timezone, 367 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 368 "DATE_TRUNC": _date_trunc_to_time, 369 "DATEADD": _build_date_time_add(exp.DateAdd), 370 "DATEDIFF": _build_datediff, 371 "DIV0": _build_if_from_div0, 372 "FLATTEN": exp.Explode.from_arg_list, 373 "GET_PATH": lambda args, dialect: exp.JSONExtract( 374 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 375 ), 376 "IFF": exp.If.from_arg_list, 377 "LAST_DAY": lambda args: exp.LastDay( 378 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 379 ), 380 "LISTAGG": exp.GroupConcat.from_arg_list, 381 "MEDIAN": lambda args: exp.PercentileCont( 382 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 383 ), 384 "NULLIFZERO": _build_if_from_nullifzero, 385 "OBJECT_CONSTRUCT": _build_object_construct, 386 "REGEXP_REPLACE": _build_regexp_replace, 387 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 388 "RLIKE": exp.RegexpLike.from_arg_list, 389 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 390 "TIMEADD": _build_date_time_add(exp.TimeAdd), 391 "TIMEDIFF": _build_datediff, 392 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 393 "TIMESTAMPDIFF": _build_datediff, 394 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 395 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 396 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 397 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 398 "TO_NUMBER": lambda args: exp.ToNumber( 399 this=seq_get(args, 0), 400 format=seq_get(args, 1), 401 precision=seq_get(args, 2), 402 scale=seq_get(args, 3), 403 ), 404 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 405 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 406 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 407 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 408 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 409 "TO_VARCHAR": exp.ToChar.from_arg_list, 410 "ZEROIFNULL": _build_if_from_zeroifnull, 411 } 412 413 FUNCTION_PARSERS = { 414 **parser.Parser.FUNCTION_PARSERS, 415 "DATE_PART": lambda self: self._parse_date_part(), 416 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 417 } 418 FUNCTION_PARSERS.pop("TRIM") 419 420 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 421 422 RANGE_PARSERS = { 423 **parser.Parser.RANGE_PARSERS, 424 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 425 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 426 } 427 428 ALTER_PARSERS = { 429 **parser.Parser.ALTER_PARSERS, 430 "UNSET": lambda self: self.expression( 431 exp.Set, 432 tag=self._match_text_seq("TAG"), 433 expressions=self._parse_csv(self._parse_id_var), 434 unset=True, 435 ), 436 "SWAP": lambda self: self._parse_alter_table_swap(), 437 } 438 439 STATEMENT_PARSERS = { 440 **parser.Parser.STATEMENT_PARSERS, 441 TokenType.SHOW: lambda self: self._parse_show(), 442 } 443 444 PROPERTY_PARSERS = { 445 **parser.Parser.PROPERTY_PARSERS, 446 "LOCATION": lambda self: self._parse_location_property(), 447 } 448 449 TYPE_CONVERTER = { 450 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 451 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 452 } 453 454 SHOW_PARSERS = { 455 "SCHEMAS": _show_parser("SCHEMAS"), 456 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 457 "OBJECTS": _show_parser("OBJECTS"), 458 "TERSE OBJECTS": _show_parser("OBJECTS"), 459 "TABLES": _show_parser("TABLES"), 460 "TERSE TABLES": _show_parser("TABLES"), 461 "VIEWS": _show_parser("VIEWS"), 462 "TERSE VIEWS": _show_parser("VIEWS"), 463 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 464 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 465 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 466 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 467 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 468 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 469 "SEQUENCES": _show_parser("SEQUENCES"), 470 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 471 "COLUMNS": _show_parser("COLUMNS"), 472 "USERS": _show_parser("USERS"), 473 "TERSE USERS": _show_parser("USERS"), 474 } 475 476 STAGED_FILE_SINGLE_TOKENS = { 477 TokenType.DOT, 478 TokenType.MOD, 479 TokenType.SLASH, 480 } 481 482 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 483 484 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 485 486 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 487 488 LAMBDAS = { 489 **parser.Parser.LAMBDAS, 490 TokenType.ARROW: lambda self, expressions: self.expression( 491 exp.Lambda, 492 this=self._replace_lambda( 493 self._parse_conjunction(), 494 expressions, 495 ), 496 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 497 ), 498 } 499 500 def _parse_create(self) -> exp.Create | exp.Command: 501 expression = super()._parse_create() 502 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 503 # Replace the Table node with the enclosed Identifier 504 expression.this.replace(expression.this.this) 505 506 return expression 507 508 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 509 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 510 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 511 this = self._parse_var() or self._parse_type() 512 513 if not this: 514 return None 515 516 self._match(TokenType.COMMA) 517 expression = self._parse_bitwise() 518 this = _map_date_part(this) 519 name = this.name.upper() 520 521 if name.startswith("EPOCH"): 522 if name == "EPOCH_MILLISECOND": 523 scale = 10**3 524 elif name == "EPOCH_MICROSECOND": 525 scale = 10**6 526 elif name == "EPOCH_NANOSECOND": 527 scale = 10**9 528 else: 529 scale = None 530 531 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 532 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 533 534 if scale: 535 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 536 537 return to_unix 538 539 return self.expression(exp.Extract, this=this, expression=expression) 540 541 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 542 if is_map: 543 # Keys are strings in Snowflake's objects, see also: 544 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 545 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 546 return self._parse_slice(self._parse_string()) 547 548 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 549 550 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 551 lateral = super()._parse_lateral() 552 if not lateral: 553 return lateral 554 555 if isinstance(lateral.this, exp.Explode): 556 table_alias = lateral.args.get("alias") 557 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 558 if table_alias and not table_alias.args.get("columns"): 559 table_alias.set("columns", columns) 560 elif not table_alias: 561 exp.alias_(lateral, "_flattened", table=columns, copy=False) 562 563 return lateral 564 565 def _parse_at_before(self, table: exp.Table) -> exp.Table: 566 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 567 index = self._index 568 if self._match_texts(("AT", "BEFORE")): 569 this = self._prev.text.upper() 570 kind = ( 571 self._match(TokenType.L_PAREN) 572 and self._match_texts(self.HISTORICAL_DATA_KIND) 573 and self._prev.text.upper() 574 ) 575 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 576 577 if expression: 578 self._match_r_paren() 579 when = self.expression( 580 exp.HistoricalData, this=this, kind=kind, expression=expression 581 ) 582 table.set("when", when) 583 else: 584 self._retreat(index) 585 586 return table 587 588 def _parse_table_parts( 589 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 590 ) -> exp.Table: 591 # https://docs.snowflake.com/en/user-guide/querying-stage 592 if self._match(TokenType.STRING, advance=False): 593 table = self._parse_string() 594 elif self._match_text_seq("@", advance=False): 595 table = self._parse_location_path() 596 else: 597 table = None 598 599 if table: 600 file_format = None 601 pattern = None 602 603 wrapped = self._match(TokenType.L_PAREN) 604 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 605 if self._match_text_seq("FILE_FORMAT", "=>"): 606 file_format = self._parse_string() or super()._parse_table_parts( 607 is_db_reference=is_db_reference 608 ) 609 elif self._match_text_seq("PATTERN", "=>"): 610 pattern = self._parse_string() 611 else: 612 break 613 614 self._match(TokenType.COMMA) 615 616 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 617 else: 618 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 619 620 return self._parse_at_before(table) 621 622 def _parse_id_var( 623 self, 624 any_token: bool = True, 625 tokens: t.Optional[t.Collection[TokenType]] = None, 626 ) -> t.Optional[exp.Expression]: 627 if self._match_text_seq("IDENTIFIER", "("): 628 identifier = ( 629 super()._parse_id_var(any_token=any_token, tokens=tokens) 630 or self._parse_string() 631 ) 632 self._match_r_paren() 633 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 634 635 return super()._parse_id_var(any_token=any_token, tokens=tokens) 636 637 def _parse_show_snowflake(self, this: str) -> exp.Show: 638 scope = None 639 scope_kind = None 640 641 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 642 # which is syntactically valid but has no effect on the output 643 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 644 645 history = self._match_text_seq("HISTORY") 646 647 like = self._parse_string() if self._match(TokenType.LIKE) else None 648 649 if self._match(TokenType.IN): 650 if self._match_text_seq("ACCOUNT"): 651 scope_kind = "ACCOUNT" 652 elif self._match_set(self.DB_CREATABLES): 653 scope_kind = self._prev.text.upper() 654 if self._curr: 655 scope = self._parse_table_parts() 656 elif self._curr: 657 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 658 scope = self._parse_table_parts() 659 660 return self.expression( 661 exp.Show, 662 **{ 663 "terse": terse, 664 "this": this, 665 "history": history, 666 "like": like, 667 "scope": scope, 668 "scope_kind": scope_kind, 669 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 670 "limit": self._parse_limit(), 671 "from": self._parse_string() if self._match(TokenType.FROM) else None, 672 }, 673 ) 674 675 def _parse_alter_table_swap(self) -> exp.SwapTable: 676 self._match_text_seq("WITH") 677 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 678 679 def _parse_location_property(self) -> exp.LocationProperty: 680 self._match(TokenType.EQ) 681 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 682 683 def _parse_file_location(self) -> t.Optional[exp.Expression]: 684 # Parse either a subquery or a staged file 685 return ( 686 self._parse_select(table=True) 687 if self._match(TokenType.L_PAREN, advance=False) 688 else self._parse_table_parts() 689 ) 690 691 def _parse_location_path(self) -> exp.Var: 692 parts = [self._advance_any(ignore_reserved=True)] 693 694 # We avoid consuming a comma token because external tables like @foo and @bar 695 # can be joined in a query with a comma separator, as well as closing paren 696 # in case of subqueries 697 while self._is_connected() and not self._match_set( 698 (TokenType.COMMA, TokenType.R_PAREN), advance=False 699 ): 700 parts.append(self._advance_any(ignore_reserved=True)) 701 702 return exp.var("".join(part.text for part in parts if part)) 703 704 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 705 this = super()._parse_lambda_arg() 706 707 if not this: 708 return this 709 710 typ = self._parse_types() 711 712 if typ: 713 return self.expression(exp.Cast, this=this, to=typ) 714 715 return this 716 717 class Tokenizer(tokens.Tokenizer): 718 STRING_ESCAPES = ["\\", "'"] 719 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 720 RAW_STRINGS = ["$$"] 721 COMMENTS = ["--", "//", ("/*", "*/")] 722 723 KEYWORDS = { 724 **tokens.Tokenizer.KEYWORDS, 725 "BYTEINT": TokenType.INT, 726 "CHAR VARYING": TokenType.VARCHAR, 727 "CHARACTER VARYING": TokenType.VARCHAR, 728 "EXCLUDE": TokenType.EXCEPT, 729 "ILIKE ANY": TokenType.ILIKE_ANY, 730 "LIKE ANY": TokenType.LIKE_ANY, 731 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 732 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 733 "MINUS": TokenType.EXCEPT, 734 "NCHAR VARYING": TokenType.VARCHAR, 735 "PUT": TokenType.COMMAND, 736 "REMOVE": TokenType.COMMAND, 737 "RM": TokenType.COMMAND, 738 "SAMPLE": TokenType.TABLE_SAMPLE, 739 "SQL_DOUBLE": TokenType.DOUBLE, 740 "SQL_VARCHAR": TokenType.VARCHAR, 741 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 742 "TAG": TokenType.TAG, 743 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 744 "TOP": TokenType.TOP, 745 "WAREHOUSE": TokenType.WAREHOUSE, 746 "STREAMLIT": TokenType.STREAMLIT, 747 } 748 749 SINGLE_TOKENS = { 750 **tokens.Tokenizer.SINGLE_TOKENS, 751 "$": TokenType.PARAMETER, 752 } 753 754 VAR_SINGLE_TOKENS = {"$"} 755 756 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 757 758 class Generator(generator.Generator): 759 PARAMETER_TOKEN = "$" 760 MATCHED_BY_SOURCE = False 761 SINGLE_STRING_INTERVAL = True 762 JOIN_HINTS = False 763 TABLE_HINTS = False 764 QUERY_HINTS = False 765 AGGREGATE_FILTER_SUPPORTED = False 766 SUPPORTS_TABLE_COPY = False 767 COLLATE_IS_FUNC = True 768 LIMIT_ONLY_LITERALS = True 769 JSON_KEY_VALUE_PAIR_SEP = "," 770 INSERT_OVERWRITE = " OVERWRITE INTO" 771 STRUCT_DELIMITER = ("(", ")") 772 COPY_PARAMS_ARE_WRAPPED = False 773 COPY_PARAMS_EQ_REQUIRED = True 774 STAR_EXCEPT = "EXCLUDE" 775 776 TRANSFORMS = { 777 **generator.Generator.TRANSFORMS, 778 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 779 exp.ArgMax: rename_func("MAX_BY"), 780 exp.ArgMin: rename_func("MIN_BY"), 781 exp.Array: inline_array_sql, 782 exp.ArrayConcat: rename_func("ARRAY_CAT"), 783 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 784 exp.AtTimeZone: lambda self, e: self.func( 785 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 786 ), 787 exp.BitwiseXor: rename_func("BITXOR"), 788 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 789 exp.DateAdd: date_delta_sql("DATEADD"), 790 exp.DateDiff: date_delta_sql("DATEDIFF"), 791 exp.DateStrToDate: datestrtodate_sql, 792 exp.DayOfMonth: rename_func("DAYOFMONTH"), 793 exp.DayOfWeek: rename_func("DAYOFWEEK"), 794 exp.DayOfYear: rename_func("DAYOFYEAR"), 795 exp.Explode: rename_func("FLATTEN"), 796 exp.Extract: rename_func("DATE_PART"), 797 exp.FromTimeZone: lambda self, e: self.func( 798 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 799 ), 800 exp.GenerateSeries: lambda self, e: self.func( 801 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 802 ), 803 exp.GroupConcat: rename_func("LISTAGG"), 804 exp.If: if_sql(name="IFF", false_value="NULL"), 805 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 806 exp.JSONExtractScalar: lambda self, e: self.func( 807 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 808 ), 809 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 810 exp.JSONPathRoot: lambda *_: "", 811 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 812 exp.LogicalOr: rename_func("BOOLOR_AGG"), 813 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 814 exp.Max: max_or_greatest, 815 exp.Min: min_or_least, 816 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 817 exp.PercentileCont: transforms.preprocess( 818 [transforms.add_within_group_for_percentiles] 819 ), 820 exp.PercentileDisc: transforms.preprocess( 821 [transforms.add_within_group_for_percentiles] 822 ), 823 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 824 exp.RegexpILike: _regexpilike_sql, 825 exp.Rand: rename_func("RANDOM"), 826 exp.Select: transforms.preprocess( 827 [ 828 transforms.eliminate_distinct_on, 829 transforms.explode_to_unnest(), 830 transforms.eliminate_semi_and_anti_joins, 831 ] 832 ), 833 exp.SHA: rename_func("SHA1"), 834 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 835 exp.StartsWith: rename_func("STARTSWITH"), 836 exp.StrPosition: lambda self, e: self.func( 837 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 838 ), 839 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 840 exp.Stuff: rename_func("INSERT"), 841 exp.TimeAdd: date_delta_sql("TIMEADD"), 842 exp.TimestampDiff: lambda self, e: self.func( 843 "TIMESTAMPDIFF", e.unit, e.expression, e.this 844 ), 845 exp.TimestampTrunc: timestamptrunc_sql(), 846 exp.TimeStrToTime: timestrtotime_sql, 847 exp.TimeToStr: lambda self, e: self.func( 848 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 849 ), 850 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 851 exp.ToArray: rename_func("TO_ARRAY"), 852 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 853 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 854 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 855 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 856 exp.TsOrDsToDate: lambda self, e: self.func( 857 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 858 ), 859 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 860 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 861 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 862 exp.Xor: rename_func("BOOLXOR"), 863 } 864 865 SUPPORTED_JSON_PATH_PARTS = { 866 exp.JSONPathKey, 867 exp.JSONPathRoot, 868 exp.JSONPathSubscript, 869 } 870 871 TYPE_MAPPING = { 872 **generator.Generator.TYPE_MAPPING, 873 exp.DataType.Type.NESTED: "OBJECT", 874 exp.DataType.Type.STRUCT: "OBJECT", 875 } 876 877 PROPERTIES_LOCATION = { 878 **generator.Generator.PROPERTIES_LOCATION, 879 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 880 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 881 } 882 883 UNSUPPORTED_VALUES_EXPRESSIONS = { 884 exp.Map, 885 exp.StarMap, 886 exp.Struct, 887 exp.VarMap, 888 } 889 890 def with_properties(self, properties: exp.Properties) -> str: 891 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 892 893 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 894 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 895 values_as_table = False 896 897 return super().values_sql(expression, values_as_table=values_as_table) 898 899 def datatype_sql(self, expression: exp.DataType) -> str: 900 expressions = expression.expressions 901 if ( 902 expressions 903 and expression.is_type(*exp.DataType.STRUCT_TYPES) 904 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 905 ): 906 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 907 return "OBJECT" 908 909 return super().datatype_sql(expression) 910 911 def tonumber_sql(self, expression: exp.ToNumber) -> str: 912 return self.func( 913 "TO_NUMBER", 914 expression.this, 915 expression.args.get("format"), 916 expression.args.get("precision"), 917 expression.args.get("scale"), 918 ) 919 920 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 921 milli = expression.args.get("milli") 922 if milli is not None: 923 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 924 expression.set("nano", milli_to_nano) 925 926 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 927 928 def trycast_sql(self, expression: exp.TryCast) -> str: 929 value = expression.this 930 931 if value.type is None: 932 from sqlglot.optimizer.annotate_types import annotate_types 933 934 value = annotate_types(value) 935 936 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 937 return super().trycast_sql(expression) 938 939 # TRY_CAST only works for string values in Snowflake 940 return self.cast_sql(expression) 941 942 def log_sql(self, expression: exp.Log) -> str: 943 if not expression.expression: 944 return self.func("LN", expression.this) 945 946 return super().log_sql(expression) 947 948 def unnest_sql(self, expression: exp.Unnest) -> str: 949 unnest_alias = expression.args.get("alias") 950 offset = expression.args.get("offset") 951 952 columns = [ 953 exp.to_identifier("seq"), 954 exp.to_identifier("key"), 955 exp.to_identifier("path"), 956 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 957 seq_get(unnest_alias.columns if unnest_alias else [], 0) 958 or exp.to_identifier("value"), 959 exp.to_identifier("this"), 960 ] 961 962 if unnest_alias: 963 unnest_alias.set("columns", columns) 964 else: 965 unnest_alias = exp.TableAlias(this="_u", columns=columns) 966 967 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 968 alias = self.sql(unnest_alias) 969 alias = f" AS {alias}" if alias else "" 970 return f"{explode}{alias}" 971 972 def show_sql(self, expression: exp.Show) -> str: 973 terse = "TERSE " if expression.args.get("terse") else "" 974 history = " HISTORY" if expression.args.get("history") else "" 975 like = self.sql(expression, "like") 976 like = f" LIKE {like}" if like else "" 977 978 scope = self.sql(expression, "scope") 979 scope = f" {scope}" if scope else "" 980 981 scope_kind = self.sql(expression, "scope_kind") 982 if scope_kind: 983 scope_kind = f" IN {scope_kind}" 984 985 starts_with = self.sql(expression, "starts_with") 986 if starts_with: 987 starts_with = f" STARTS WITH {starts_with}" 988 989 limit = self.sql(expression, "limit") 990 991 from_ = self.sql(expression, "from") 992 if from_: 993 from_ = f" FROM {from_}" 994 995 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 996 997 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 998 # Other dialects don't support all of the following parameters, so we need to 999 # generate default values as necessary to ensure the transpilation is correct 1000 group = expression.args.get("group") 1001 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1002 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1003 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1004 1005 return self.func( 1006 "REGEXP_SUBSTR", 1007 expression.this, 1008 expression.expression, 1009 position, 1010 occurrence, 1011 parameters, 1012 group, 1013 ) 1014 1015 def except_op(self, expression: exp.Except) -> str: 1016 if not expression.args.get("distinct"): 1017 self.unsupported("EXCEPT with All is not supported in Snowflake") 1018 return super().except_op(expression) 1019 1020 def intersect_op(self, expression: exp.Intersect) -> str: 1021 if not expression.args.get("distinct"): 1022 self.unsupported("INTERSECT with All is not supported in Snowflake") 1023 return super().intersect_op(expression) 1024 1025 def describe_sql(self, expression: exp.Describe) -> str: 1026 # Default to table if kind is unknown 1027 kind_value = expression.args.get("kind") or "TABLE" 1028 kind = f" {kind_value}" if kind_value else "" 1029 this = f" {self.sql(expression, 'this')}" 1030 expressions = self.expressions(expression, flat=True) 1031 expressions = f" {expressions}" if expressions else "" 1032 return f"DESCRIBE{kind}{this}{expressions}" 1033 1034 def generatedasidentitycolumnconstraint_sql( 1035 self, expression: exp.GeneratedAsIdentityColumnConstraint 1036 ) -> str: 1037 start = expression.args.get("start") 1038 start = f" START {start}" if start else "" 1039 increment = expression.args.get("increment") 1040 increment = f" INCREMENT {increment}" if increment else "" 1041 return f"AUTOINCREMENT{start}{increment}" 1042 1043 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1044 this = self.sql(expression, "this") 1045 return f"SWAP WITH {this}" 1046 1047 def cluster_sql(self, expression: exp.Cluster) -> str: 1048 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1049 1050 def struct_sql(self, expression: exp.Struct) -> str: 1051 keys = [] 1052 values = [] 1053 1054 for i, e in enumerate(expression.expressions): 1055 if isinstance(e, exp.PropertyEQ): 1056 keys.append( 1057 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1058 ) 1059 values.append(e.expression) 1060 else: 1061 keys.append(exp.Literal.string(f"_{i}")) 1062 values.append(e) 1063 1064 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1065 1066 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1067 if expression.args.get("weight") or expression.args.get("accuracy"): 1068 self.unsupported( 1069 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1070 ) 1071 1072 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1073 1074 def alterset_sql(self, expression: exp.AlterSet) -> str: 1075 exprs = self.expressions(expression, flat=True) 1076 exprs = f" {exprs}" if exprs else "" 1077 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1078 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1079 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1080 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1081 tag = self.expressions(expression, key="tag", flat=True) 1082 tag = f" TAG {tag}" if tag else "" 1083 1084 return f"SET{exprs}{file_format}{copy_options}{tag}"
285class Snowflake(Dialect): 286 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 287 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 288 NULL_ORDERING = "nulls_are_large" 289 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 290 SUPPORTS_USER_DEFINED_TYPES = False 291 SUPPORTS_SEMI_ANTI_JOIN = False 292 PREFER_CTE_ALIAS_COLUMN = True 293 TABLESAMPLE_SIZE_IS_PERCENT = True 294 COPY_PARAMS_ARE_CSV = False 295 296 TIME_MAPPING = { 297 "YYYY": "%Y", 298 "yyyy": "%Y", 299 "YY": "%y", 300 "yy": "%y", 301 "MMMM": "%B", 302 "mmmm": "%B", 303 "MON": "%b", 304 "mon": "%b", 305 "MM": "%m", 306 "mm": "%m", 307 "DD": "%d", 308 "dd": "%-d", 309 "DY": "%a", 310 "dy": "%w", 311 "HH24": "%H", 312 "hh24": "%H", 313 "HH12": "%I", 314 "hh12": "%I", 315 "MI": "%M", 316 "mi": "%M", 317 "SS": "%S", 318 "ss": "%S", 319 "FF": "%f", 320 "ff": "%f", 321 "FF6": "%f", 322 "ff6": "%f", 323 } 324 325 def quote_identifier(self, expression: E, identify: bool = True) -> E: 326 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 327 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 328 if ( 329 isinstance(expression, exp.Identifier) 330 and isinstance(expression.parent, exp.Table) 331 and expression.name.lower() == "dual" 332 ): 333 return expression # type: ignore 334 335 return super().quote_identifier(expression, identify=identify) 336 337 class Parser(parser.Parser): 338 IDENTIFY_PIVOT_STRINGS = True 339 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 340 COLON_IS_JSON_EXTRACT = True 341 342 ID_VAR_TOKENS = { 343 *parser.Parser.ID_VAR_TOKENS, 344 TokenType.MATCH_CONDITION, 345 } 346 347 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 348 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 349 350 FUNCTIONS = { 351 **parser.Parser.FUNCTIONS, 352 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 353 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 354 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 355 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 356 this=seq_get(args, 1), expression=seq_get(args, 0) 357 ), 358 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 359 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 360 start=seq_get(args, 0), 361 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 362 step=seq_get(args, 2), 363 ), 364 "BITXOR": binary_from_function(exp.BitwiseXor), 365 "BIT_XOR": binary_from_function(exp.BitwiseXor), 366 "BOOLXOR": binary_from_function(exp.Xor), 367 "CONVERT_TIMEZONE": _build_convert_timezone, 368 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 369 "DATE_TRUNC": _date_trunc_to_time, 370 "DATEADD": _build_date_time_add(exp.DateAdd), 371 "DATEDIFF": _build_datediff, 372 "DIV0": _build_if_from_div0, 373 "FLATTEN": exp.Explode.from_arg_list, 374 "GET_PATH": lambda args, dialect: exp.JSONExtract( 375 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 376 ), 377 "IFF": exp.If.from_arg_list, 378 "LAST_DAY": lambda args: exp.LastDay( 379 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 380 ), 381 "LISTAGG": exp.GroupConcat.from_arg_list, 382 "MEDIAN": lambda args: exp.PercentileCont( 383 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 384 ), 385 "NULLIFZERO": _build_if_from_nullifzero, 386 "OBJECT_CONSTRUCT": _build_object_construct, 387 "REGEXP_REPLACE": _build_regexp_replace, 388 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 389 "RLIKE": exp.RegexpLike.from_arg_list, 390 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 391 "TIMEADD": _build_date_time_add(exp.TimeAdd), 392 "TIMEDIFF": _build_datediff, 393 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 394 "TIMESTAMPDIFF": _build_datediff, 395 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 396 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 397 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 398 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 399 "TO_NUMBER": lambda args: exp.ToNumber( 400 this=seq_get(args, 0), 401 format=seq_get(args, 1), 402 precision=seq_get(args, 2), 403 scale=seq_get(args, 3), 404 ), 405 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 406 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 407 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 408 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 409 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 410 "TO_VARCHAR": exp.ToChar.from_arg_list, 411 "ZEROIFNULL": _build_if_from_zeroifnull, 412 } 413 414 FUNCTION_PARSERS = { 415 **parser.Parser.FUNCTION_PARSERS, 416 "DATE_PART": lambda self: self._parse_date_part(), 417 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 418 } 419 FUNCTION_PARSERS.pop("TRIM") 420 421 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 422 423 RANGE_PARSERS = { 424 **parser.Parser.RANGE_PARSERS, 425 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 426 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 427 } 428 429 ALTER_PARSERS = { 430 **parser.Parser.ALTER_PARSERS, 431 "UNSET": lambda self: self.expression( 432 exp.Set, 433 tag=self._match_text_seq("TAG"), 434 expressions=self._parse_csv(self._parse_id_var), 435 unset=True, 436 ), 437 "SWAP": lambda self: self._parse_alter_table_swap(), 438 } 439 440 STATEMENT_PARSERS = { 441 **parser.Parser.STATEMENT_PARSERS, 442 TokenType.SHOW: lambda self: self._parse_show(), 443 } 444 445 PROPERTY_PARSERS = { 446 **parser.Parser.PROPERTY_PARSERS, 447 "LOCATION": lambda self: self._parse_location_property(), 448 } 449 450 TYPE_CONVERTER = { 451 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 452 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 453 } 454 455 SHOW_PARSERS = { 456 "SCHEMAS": _show_parser("SCHEMAS"), 457 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 458 "OBJECTS": _show_parser("OBJECTS"), 459 "TERSE OBJECTS": _show_parser("OBJECTS"), 460 "TABLES": _show_parser("TABLES"), 461 "TERSE TABLES": _show_parser("TABLES"), 462 "VIEWS": _show_parser("VIEWS"), 463 "TERSE VIEWS": _show_parser("VIEWS"), 464 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 465 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 466 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 467 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 468 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 469 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 470 "SEQUENCES": _show_parser("SEQUENCES"), 471 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 472 "COLUMNS": _show_parser("COLUMNS"), 473 "USERS": _show_parser("USERS"), 474 "TERSE USERS": _show_parser("USERS"), 475 } 476 477 STAGED_FILE_SINGLE_TOKENS = { 478 TokenType.DOT, 479 TokenType.MOD, 480 TokenType.SLASH, 481 } 482 483 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 484 485 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 486 487 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 488 489 LAMBDAS = { 490 **parser.Parser.LAMBDAS, 491 TokenType.ARROW: lambda self, expressions: self.expression( 492 exp.Lambda, 493 this=self._replace_lambda( 494 self._parse_conjunction(), 495 expressions, 496 ), 497 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 498 ), 499 } 500 501 def _parse_create(self) -> exp.Create | exp.Command: 502 expression = super()._parse_create() 503 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 504 # Replace the Table node with the enclosed Identifier 505 expression.this.replace(expression.this.this) 506 507 return expression 508 509 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 510 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 511 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 512 this = self._parse_var() or self._parse_type() 513 514 if not this: 515 return None 516 517 self._match(TokenType.COMMA) 518 expression = self._parse_bitwise() 519 this = _map_date_part(this) 520 name = this.name.upper() 521 522 if name.startswith("EPOCH"): 523 if name == "EPOCH_MILLISECOND": 524 scale = 10**3 525 elif name == "EPOCH_MICROSECOND": 526 scale = 10**6 527 elif name == "EPOCH_NANOSECOND": 528 scale = 10**9 529 else: 530 scale = None 531 532 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 533 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 534 535 if scale: 536 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 537 538 return to_unix 539 540 return self.expression(exp.Extract, this=this, expression=expression) 541 542 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 543 if is_map: 544 # Keys are strings in Snowflake's objects, see also: 545 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 546 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 547 return self._parse_slice(self._parse_string()) 548 549 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 550 551 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 552 lateral = super()._parse_lateral() 553 if not lateral: 554 return lateral 555 556 if isinstance(lateral.this, exp.Explode): 557 table_alias = lateral.args.get("alias") 558 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 559 if table_alias and not table_alias.args.get("columns"): 560 table_alias.set("columns", columns) 561 elif not table_alias: 562 exp.alias_(lateral, "_flattened", table=columns, copy=False) 563 564 return lateral 565 566 def _parse_at_before(self, table: exp.Table) -> exp.Table: 567 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 568 index = self._index 569 if self._match_texts(("AT", "BEFORE")): 570 this = self._prev.text.upper() 571 kind = ( 572 self._match(TokenType.L_PAREN) 573 and self._match_texts(self.HISTORICAL_DATA_KIND) 574 and self._prev.text.upper() 575 ) 576 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 577 578 if expression: 579 self._match_r_paren() 580 when = self.expression( 581 exp.HistoricalData, this=this, kind=kind, expression=expression 582 ) 583 table.set("when", when) 584 else: 585 self._retreat(index) 586 587 return table 588 589 def _parse_table_parts( 590 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 591 ) -> exp.Table: 592 # https://docs.snowflake.com/en/user-guide/querying-stage 593 if self._match(TokenType.STRING, advance=False): 594 table = self._parse_string() 595 elif self._match_text_seq("@", advance=False): 596 table = self._parse_location_path() 597 else: 598 table = None 599 600 if table: 601 file_format = None 602 pattern = None 603 604 wrapped = self._match(TokenType.L_PAREN) 605 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 606 if self._match_text_seq("FILE_FORMAT", "=>"): 607 file_format = self._parse_string() or super()._parse_table_parts( 608 is_db_reference=is_db_reference 609 ) 610 elif self._match_text_seq("PATTERN", "=>"): 611 pattern = self._parse_string() 612 else: 613 break 614 615 self._match(TokenType.COMMA) 616 617 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 618 else: 619 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 620 621 return self._parse_at_before(table) 622 623 def _parse_id_var( 624 self, 625 any_token: bool = True, 626 tokens: t.Optional[t.Collection[TokenType]] = None, 627 ) -> t.Optional[exp.Expression]: 628 if self._match_text_seq("IDENTIFIER", "("): 629 identifier = ( 630 super()._parse_id_var(any_token=any_token, tokens=tokens) 631 or self._parse_string() 632 ) 633 self._match_r_paren() 634 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 635 636 return super()._parse_id_var(any_token=any_token, tokens=tokens) 637 638 def _parse_show_snowflake(self, this: str) -> exp.Show: 639 scope = None 640 scope_kind = None 641 642 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 643 # which is syntactically valid but has no effect on the output 644 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 645 646 history = self._match_text_seq("HISTORY") 647 648 like = self._parse_string() if self._match(TokenType.LIKE) else None 649 650 if self._match(TokenType.IN): 651 if self._match_text_seq("ACCOUNT"): 652 scope_kind = "ACCOUNT" 653 elif self._match_set(self.DB_CREATABLES): 654 scope_kind = self._prev.text.upper() 655 if self._curr: 656 scope = self._parse_table_parts() 657 elif self._curr: 658 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 659 scope = self._parse_table_parts() 660 661 return self.expression( 662 exp.Show, 663 **{ 664 "terse": terse, 665 "this": this, 666 "history": history, 667 "like": like, 668 "scope": scope, 669 "scope_kind": scope_kind, 670 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 671 "limit": self._parse_limit(), 672 "from": self._parse_string() if self._match(TokenType.FROM) else None, 673 }, 674 ) 675 676 def _parse_alter_table_swap(self) -> exp.SwapTable: 677 self._match_text_seq("WITH") 678 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 679 680 def _parse_location_property(self) -> exp.LocationProperty: 681 self._match(TokenType.EQ) 682 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 683 684 def _parse_file_location(self) -> t.Optional[exp.Expression]: 685 # Parse either a subquery or a staged file 686 return ( 687 self._parse_select(table=True) 688 if self._match(TokenType.L_PAREN, advance=False) 689 else self._parse_table_parts() 690 ) 691 692 def _parse_location_path(self) -> exp.Var: 693 parts = [self._advance_any(ignore_reserved=True)] 694 695 # We avoid consuming a comma token because external tables like @foo and @bar 696 # can be joined in a query with a comma separator, as well as closing paren 697 # in case of subqueries 698 while self._is_connected() and not self._match_set( 699 (TokenType.COMMA, TokenType.R_PAREN), advance=False 700 ): 701 parts.append(self._advance_any(ignore_reserved=True)) 702 703 return exp.var("".join(part.text for part in parts if part)) 704 705 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 706 this = super()._parse_lambda_arg() 707 708 if not this: 709 return this 710 711 typ = self._parse_types() 712 713 if typ: 714 return self.expression(exp.Cast, this=this, to=typ) 715 716 return this 717 718 class Tokenizer(tokens.Tokenizer): 719 STRING_ESCAPES = ["\\", "'"] 720 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 721 RAW_STRINGS = ["$$"] 722 COMMENTS = ["--", "//", ("/*", "*/")] 723 724 KEYWORDS = { 725 **tokens.Tokenizer.KEYWORDS, 726 "BYTEINT": TokenType.INT, 727 "CHAR VARYING": TokenType.VARCHAR, 728 "CHARACTER VARYING": TokenType.VARCHAR, 729 "EXCLUDE": TokenType.EXCEPT, 730 "ILIKE ANY": TokenType.ILIKE_ANY, 731 "LIKE ANY": TokenType.LIKE_ANY, 732 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 733 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 734 "MINUS": TokenType.EXCEPT, 735 "NCHAR VARYING": TokenType.VARCHAR, 736 "PUT": TokenType.COMMAND, 737 "REMOVE": TokenType.COMMAND, 738 "RM": TokenType.COMMAND, 739 "SAMPLE": TokenType.TABLE_SAMPLE, 740 "SQL_DOUBLE": TokenType.DOUBLE, 741 "SQL_VARCHAR": TokenType.VARCHAR, 742 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 743 "TAG": TokenType.TAG, 744 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 745 "TOP": TokenType.TOP, 746 "WAREHOUSE": TokenType.WAREHOUSE, 747 "STREAMLIT": TokenType.STREAMLIT, 748 } 749 750 SINGLE_TOKENS = { 751 **tokens.Tokenizer.SINGLE_TOKENS, 752 "$": TokenType.PARAMETER, 753 } 754 755 VAR_SINGLE_TOKENS = {"$"} 756 757 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 758 759 class Generator(generator.Generator): 760 PARAMETER_TOKEN = "$" 761 MATCHED_BY_SOURCE = False 762 SINGLE_STRING_INTERVAL = True 763 JOIN_HINTS = False 764 TABLE_HINTS = False 765 QUERY_HINTS = False 766 AGGREGATE_FILTER_SUPPORTED = False 767 SUPPORTS_TABLE_COPY = False 768 COLLATE_IS_FUNC = True 769 LIMIT_ONLY_LITERALS = True 770 JSON_KEY_VALUE_PAIR_SEP = "," 771 INSERT_OVERWRITE = " OVERWRITE INTO" 772 STRUCT_DELIMITER = ("(", ")") 773 COPY_PARAMS_ARE_WRAPPED = False 774 COPY_PARAMS_EQ_REQUIRED = True 775 STAR_EXCEPT = "EXCLUDE" 776 777 TRANSFORMS = { 778 **generator.Generator.TRANSFORMS, 779 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 780 exp.ArgMax: rename_func("MAX_BY"), 781 exp.ArgMin: rename_func("MIN_BY"), 782 exp.Array: inline_array_sql, 783 exp.ArrayConcat: rename_func("ARRAY_CAT"), 784 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 785 exp.AtTimeZone: lambda self, e: self.func( 786 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 787 ), 788 exp.BitwiseXor: rename_func("BITXOR"), 789 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 790 exp.DateAdd: date_delta_sql("DATEADD"), 791 exp.DateDiff: date_delta_sql("DATEDIFF"), 792 exp.DateStrToDate: datestrtodate_sql, 793 exp.DayOfMonth: rename_func("DAYOFMONTH"), 794 exp.DayOfWeek: rename_func("DAYOFWEEK"), 795 exp.DayOfYear: rename_func("DAYOFYEAR"), 796 exp.Explode: rename_func("FLATTEN"), 797 exp.Extract: rename_func("DATE_PART"), 798 exp.FromTimeZone: lambda self, e: self.func( 799 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 800 ), 801 exp.GenerateSeries: lambda self, e: self.func( 802 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 803 ), 804 exp.GroupConcat: rename_func("LISTAGG"), 805 exp.If: if_sql(name="IFF", false_value="NULL"), 806 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 807 exp.JSONExtractScalar: lambda self, e: self.func( 808 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 809 ), 810 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 811 exp.JSONPathRoot: lambda *_: "", 812 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 813 exp.LogicalOr: rename_func("BOOLOR_AGG"), 814 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 815 exp.Max: max_or_greatest, 816 exp.Min: min_or_least, 817 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 818 exp.PercentileCont: transforms.preprocess( 819 [transforms.add_within_group_for_percentiles] 820 ), 821 exp.PercentileDisc: transforms.preprocess( 822 [transforms.add_within_group_for_percentiles] 823 ), 824 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 825 exp.RegexpILike: _regexpilike_sql, 826 exp.Rand: rename_func("RANDOM"), 827 exp.Select: transforms.preprocess( 828 [ 829 transforms.eliminate_distinct_on, 830 transforms.explode_to_unnest(), 831 transforms.eliminate_semi_and_anti_joins, 832 ] 833 ), 834 exp.SHA: rename_func("SHA1"), 835 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 836 exp.StartsWith: rename_func("STARTSWITH"), 837 exp.StrPosition: lambda self, e: self.func( 838 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 839 ), 840 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 841 exp.Stuff: rename_func("INSERT"), 842 exp.TimeAdd: date_delta_sql("TIMEADD"), 843 exp.TimestampDiff: lambda self, e: self.func( 844 "TIMESTAMPDIFF", e.unit, e.expression, e.this 845 ), 846 exp.TimestampTrunc: timestamptrunc_sql(), 847 exp.TimeStrToTime: timestrtotime_sql, 848 exp.TimeToStr: lambda self, e: self.func( 849 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 850 ), 851 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 852 exp.ToArray: rename_func("TO_ARRAY"), 853 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 854 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 855 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 856 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 857 exp.TsOrDsToDate: lambda self, e: self.func( 858 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 859 ), 860 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 861 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 862 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 863 exp.Xor: rename_func("BOOLXOR"), 864 } 865 866 SUPPORTED_JSON_PATH_PARTS = { 867 exp.JSONPathKey, 868 exp.JSONPathRoot, 869 exp.JSONPathSubscript, 870 } 871 872 TYPE_MAPPING = { 873 **generator.Generator.TYPE_MAPPING, 874 exp.DataType.Type.NESTED: "OBJECT", 875 exp.DataType.Type.STRUCT: "OBJECT", 876 } 877 878 PROPERTIES_LOCATION = { 879 **generator.Generator.PROPERTIES_LOCATION, 880 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 881 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 882 } 883 884 UNSUPPORTED_VALUES_EXPRESSIONS = { 885 exp.Map, 886 exp.StarMap, 887 exp.Struct, 888 exp.VarMap, 889 } 890 891 def with_properties(self, properties: exp.Properties) -> str: 892 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 893 894 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 895 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 896 values_as_table = False 897 898 return super().values_sql(expression, values_as_table=values_as_table) 899 900 def datatype_sql(self, expression: exp.DataType) -> str: 901 expressions = expression.expressions 902 if ( 903 expressions 904 and expression.is_type(*exp.DataType.STRUCT_TYPES) 905 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 906 ): 907 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 908 return "OBJECT" 909 910 return super().datatype_sql(expression) 911 912 def tonumber_sql(self, expression: exp.ToNumber) -> str: 913 return self.func( 914 "TO_NUMBER", 915 expression.this, 916 expression.args.get("format"), 917 expression.args.get("precision"), 918 expression.args.get("scale"), 919 ) 920 921 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 922 milli = expression.args.get("milli") 923 if milli is not None: 924 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 925 expression.set("nano", milli_to_nano) 926 927 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 928 929 def trycast_sql(self, expression: exp.TryCast) -> str: 930 value = expression.this 931 932 if value.type is None: 933 from sqlglot.optimizer.annotate_types import annotate_types 934 935 value = annotate_types(value) 936 937 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 938 return super().trycast_sql(expression) 939 940 # TRY_CAST only works for string values in Snowflake 941 return self.cast_sql(expression) 942 943 def log_sql(self, expression: exp.Log) -> str: 944 if not expression.expression: 945 return self.func("LN", expression.this) 946 947 return super().log_sql(expression) 948 949 def unnest_sql(self, expression: exp.Unnest) -> str: 950 unnest_alias = expression.args.get("alias") 951 offset = expression.args.get("offset") 952 953 columns = [ 954 exp.to_identifier("seq"), 955 exp.to_identifier("key"), 956 exp.to_identifier("path"), 957 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 958 seq_get(unnest_alias.columns if unnest_alias else [], 0) 959 or exp.to_identifier("value"), 960 exp.to_identifier("this"), 961 ] 962 963 if unnest_alias: 964 unnest_alias.set("columns", columns) 965 else: 966 unnest_alias = exp.TableAlias(this="_u", columns=columns) 967 968 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 969 alias = self.sql(unnest_alias) 970 alias = f" AS {alias}" if alias else "" 971 return f"{explode}{alias}" 972 973 def show_sql(self, expression: exp.Show) -> str: 974 terse = "TERSE " if expression.args.get("terse") else "" 975 history = " HISTORY" if expression.args.get("history") else "" 976 like = self.sql(expression, "like") 977 like = f" LIKE {like}" if like else "" 978 979 scope = self.sql(expression, "scope") 980 scope = f" {scope}" if scope else "" 981 982 scope_kind = self.sql(expression, "scope_kind") 983 if scope_kind: 984 scope_kind = f" IN {scope_kind}" 985 986 starts_with = self.sql(expression, "starts_with") 987 if starts_with: 988 starts_with = f" STARTS WITH {starts_with}" 989 990 limit = self.sql(expression, "limit") 991 992 from_ = self.sql(expression, "from") 993 if from_: 994 from_ = f" FROM {from_}" 995 996 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 997 998 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 999 # Other dialects don't support all of the following parameters, so we need to 1000 # generate default values as necessary to ensure the transpilation is correct 1001 group = expression.args.get("group") 1002 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1003 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1004 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1005 1006 return self.func( 1007 "REGEXP_SUBSTR", 1008 expression.this, 1009 expression.expression, 1010 position, 1011 occurrence, 1012 parameters, 1013 group, 1014 ) 1015 1016 def except_op(self, expression: exp.Except) -> str: 1017 if not expression.args.get("distinct"): 1018 self.unsupported("EXCEPT with All is not supported in Snowflake") 1019 return super().except_op(expression) 1020 1021 def intersect_op(self, expression: exp.Intersect) -> str: 1022 if not expression.args.get("distinct"): 1023 self.unsupported("INTERSECT with All is not supported in Snowflake") 1024 return super().intersect_op(expression) 1025 1026 def describe_sql(self, expression: exp.Describe) -> str: 1027 # Default to table if kind is unknown 1028 kind_value = expression.args.get("kind") or "TABLE" 1029 kind = f" {kind_value}" if kind_value else "" 1030 this = f" {self.sql(expression, 'this')}" 1031 expressions = self.expressions(expression, flat=True) 1032 expressions = f" {expressions}" if expressions else "" 1033 return f"DESCRIBE{kind}{this}{expressions}" 1034 1035 def generatedasidentitycolumnconstraint_sql( 1036 self, expression: exp.GeneratedAsIdentityColumnConstraint 1037 ) -> str: 1038 start = expression.args.get("start") 1039 start = f" START {start}" if start else "" 1040 increment = expression.args.get("increment") 1041 increment = f" INCREMENT {increment}" if increment else "" 1042 return f"AUTOINCREMENT{start}{increment}" 1043 1044 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1045 this = self.sql(expression, "this") 1046 return f"SWAP WITH {this}" 1047 1048 def cluster_sql(self, expression: exp.Cluster) -> str: 1049 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1050 1051 def struct_sql(self, expression: exp.Struct) -> str: 1052 keys = [] 1053 values = [] 1054 1055 for i, e in enumerate(expression.expressions): 1056 if isinstance(e, exp.PropertyEQ): 1057 keys.append( 1058 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1059 ) 1060 values.append(e.expression) 1061 else: 1062 keys.append(exp.Literal.string(f"_{i}")) 1063 values.append(e) 1064 1065 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1066 1067 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1068 if expression.args.get("weight") or expression.args.get("accuracy"): 1069 self.unsupported( 1070 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1071 ) 1072 1073 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1074 1075 def alterset_sql(self, expression: exp.AlterSet) -> str: 1076 exprs = self.expressions(expression, flat=True) 1077 exprs = f" {exprs}" if exprs else "" 1078 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1079 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1080 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1081 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1082 tag = self.expressions(expression, key="tag", flat=True) 1083 tag = f" TAG {tag}" if tag else "" 1084 1085 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
325 def quote_identifier(self, expression: E, identify: bool = True) -> E: 326 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 327 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 328 if ( 329 isinstance(expression, exp.Identifier) 330 and isinstance(expression.parent, exp.Table) 331 and expression.name.lower() == "dual" 332 ): 333 return expression # type: ignore 334 335 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
337 class Parser(parser.Parser): 338 IDENTIFY_PIVOT_STRINGS = True 339 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 340 COLON_IS_JSON_EXTRACT = True 341 342 ID_VAR_TOKENS = { 343 *parser.Parser.ID_VAR_TOKENS, 344 TokenType.MATCH_CONDITION, 345 } 346 347 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 348 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 349 350 FUNCTIONS = { 351 **parser.Parser.FUNCTIONS, 352 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 353 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 354 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 355 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 356 this=seq_get(args, 1), expression=seq_get(args, 0) 357 ), 358 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 359 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 360 start=seq_get(args, 0), 361 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 362 step=seq_get(args, 2), 363 ), 364 "BITXOR": binary_from_function(exp.BitwiseXor), 365 "BIT_XOR": binary_from_function(exp.BitwiseXor), 366 "BOOLXOR": binary_from_function(exp.Xor), 367 "CONVERT_TIMEZONE": _build_convert_timezone, 368 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 369 "DATE_TRUNC": _date_trunc_to_time, 370 "DATEADD": _build_date_time_add(exp.DateAdd), 371 "DATEDIFF": _build_datediff, 372 "DIV0": _build_if_from_div0, 373 "FLATTEN": exp.Explode.from_arg_list, 374 "GET_PATH": lambda args, dialect: exp.JSONExtract( 375 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 376 ), 377 "IFF": exp.If.from_arg_list, 378 "LAST_DAY": lambda args: exp.LastDay( 379 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 380 ), 381 "LISTAGG": exp.GroupConcat.from_arg_list, 382 "MEDIAN": lambda args: exp.PercentileCont( 383 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 384 ), 385 "NULLIFZERO": _build_if_from_nullifzero, 386 "OBJECT_CONSTRUCT": _build_object_construct, 387 "REGEXP_REPLACE": _build_regexp_replace, 388 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 389 "RLIKE": exp.RegexpLike.from_arg_list, 390 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 391 "TIMEADD": _build_date_time_add(exp.TimeAdd), 392 "TIMEDIFF": _build_datediff, 393 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 394 "TIMESTAMPDIFF": _build_datediff, 395 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 396 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 397 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 398 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 399 "TO_NUMBER": lambda args: exp.ToNumber( 400 this=seq_get(args, 0), 401 format=seq_get(args, 1), 402 precision=seq_get(args, 2), 403 scale=seq_get(args, 3), 404 ), 405 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 406 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 407 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 408 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 409 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 410 "TO_VARCHAR": exp.ToChar.from_arg_list, 411 "ZEROIFNULL": _build_if_from_zeroifnull, 412 } 413 414 FUNCTION_PARSERS = { 415 **parser.Parser.FUNCTION_PARSERS, 416 "DATE_PART": lambda self: self._parse_date_part(), 417 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 418 } 419 FUNCTION_PARSERS.pop("TRIM") 420 421 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 422 423 RANGE_PARSERS = { 424 **parser.Parser.RANGE_PARSERS, 425 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 426 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 427 } 428 429 ALTER_PARSERS = { 430 **parser.Parser.ALTER_PARSERS, 431 "UNSET": lambda self: self.expression( 432 exp.Set, 433 tag=self._match_text_seq("TAG"), 434 expressions=self._parse_csv(self._parse_id_var), 435 unset=True, 436 ), 437 "SWAP": lambda self: self._parse_alter_table_swap(), 438 } 439 440 STATEMENT_PARSERS = { 441 **parser.Parser.STATEMENT_PARSERS, 442 TokenType.SHOW: lambda self: self._parse_show(), 443 } 444 445 PROPERTY_PARSERS = { 446 **parser.Parser.PROPERTY_PARSERS, 447 "LOCATION": lambda self: self._parse_location_property(), 448 } 449 450 TYPE_CONVERTER = { 451 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 452 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 453 } 454 455 SHOW_PARSERS = { 456 "SCHEMAS": _show_parser("SCHEMAS"), 457 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 458 "OBJECTS": _show_parser("OBJECTS"), 459 "TERSE OBJECTS": _show_parser("OBJECTS"), 460 "TABLES": _show_parser("TABLES"), 461 "TERSE TABLES": _show_parser("TABLES"), 462 "VIEWS": _show_parser("VIEWS"), 463 "TERSE VIEWS": _show_parser("VIEWS"), 464 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 465 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 466 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 467 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 468 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 469 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 470 "SEQUENCES": _show_parser("SEQUENCES"), 471 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 472 "COLUMNS": _show_parser("COLUMNS"), 473 "USERS": _show_parser("USERS"), 474 "TERSE USERS": _show_parser("USERS"), 475 } 476 477 STAGED_FILE_SINGLE_TOKENS = { 478 TokenType.DOT, 479 TokenType.MOD, 480 TokenType.SLASH, 481 } 482 483 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 484 485 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 486 487 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 488 489 LAMBDAS = { 490 **parser.Parser.LAMBDAS, 491 TokenType.ARROW: lambda self, expressions: self.expression( 492 exp.Lambda, 493 this=self._replace_lambda( 494 self._parse_conjunction(), 495 expressions, 496 ), 497 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 498 ), 499 } 500 501 def _parse_create(self) -> exp.Create | exp.Command: 502 expression = super()._parse_create() 503 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 504 # Replace the Table node with the enclosed Identifier 505 expression.this.replace(expression.this.this) 506 507 return expression 508 509 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 510 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 511 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 512 this = self._parse_var() or self._parse_type() 513 514 if not this: 515 return None 516 517 self._match(TokenType.COMMA) 518 expression = self._parse_bitwise() 519 this = _map_date_part(this) 520 name = this.name.upper() 521 522 if name.startswith("EPOCH"): 523 if name == "EPOCH_MILLISECOND": 524 scale = 10**3 525 elif name == "EPOCH_MICROSECOND": 526 scale = 10**6 527 elif name == "EPOCH_NANOSECOND": 528 scale = 10**9 529 else: 530 scale = None 531 532 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 533 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 534 535 if scale: 536 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 537 538 return to_unix 539 540 return self.expression(exp.Extract, this=this, expression=expression) 541 542 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 543 if is_map: 544 # Keys are strings in Snowflake's objects, see also: 545 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 546 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 547 return self._parse_slice(self._parse_string()) 548 549 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 550 551 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 552 lateral = super()._parse_lateral() 553 if not lateral: 554 return lateral 555 556 if isinstance(lateral.this, exp.Explode): 557 table_alias = lateral.args.get("alias") 558 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 559 if table_alias and not table_alias.args.get("columns"): 560 table_alias.set("columns", columns) 561 elif not table_alias: 562 exp.alias_(lateral, "_flattened", table=columns, copy=False) 563 564 return lateral 565 566 def _parse_at_before(self, table: exp.Table) -> exp.Table: 567 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 568 index = self._index 569 if self._match_texts(("AT", "BEFORE")): 570 this = self._prev.text.upper() 571 kind = ( 572 self._match(TokenType.L_PAREN) 573 and self._match_texts(self.HISTORICAL_DATA_KIND) 574 and self._prev.text.upper() 575 ) 576 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 577 578 if expression: 579 self._match_r_paren() 580 when = self.expression( 581 exp.HistoricalData, this=this, kind=kind, expression=expression 582 ) 583 table.set("when", when) 584 else: 585 self._retreat(index) 586 587 return table 588 589 def _parse_table_parts( 590 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 591 ) -> exp.Table: 592 # https://docs.snowflake.com/en/user-guide/querying-stage 593 if self._match(TokenType.STRING, advance=False): 594 table = self._parse_string() 595 elif self._match_text_seq("@", advance=False): 596 table = self._parse_location_path() 597 else: 598 table = None 599 600 if table: 601 file_format = None 602 pattern = None 603 604 wrapped = self._match(TokenType.L_PAREN) 605 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 606 if self._match_text_seq("FILE_FORMAT", "=>"): 607 file_format = self._parse_string() or super()._parse_table_parts( 608 is_db_reference=is_db_reference 609 ) 610 elif self._match_text_seq("PATTERN", "=>"): 611 pattern = self._parse_string() 612 else: 613 break 614 615 self._match(TokenType.COMMA) 616 617 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 618 else: 619 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 620 621 return self._parse_at_before(table) 622 623 def _parse_id_var( 624 self, 625 any_token: bool = True, 626 tokens: t.Optional[t.Collection[TokenType]] = None, 627 ) -> t.Optional[exp.Expression]: 628 if self._match_text_seq("IDENTIFIER", "("): 629 identifier = ( 630 super()._parse_id_var(any_token=any_token, tokens=tokens) 631 or self._parse_string() 632 ) 633 self._match_r_paren() 634 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 635 636 return super()._parse_id_var(any_token=any_token, tokens=tokens) 637 638 def _parse_show_snowflake(self, this: str) -> exp.Show: 639 scope = None 640 scope_kind = None 641 642 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 643 # which is syntactically valid but has no effect on the output 644 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 645 646 history = self._match_text_seq("HISTORY") 647 648 like = self._parse_string() if self._match(TokenType.LIKE) else None 649 650 if self._match(TokenType.IN): 651 if self._match_text_seq("ACCOUNT"): 652 scope_kind = "ACCOUNT" 653 elif self._match_set(self.DB_CREATABLES): 654 scope_kind = self._prev.text.upper() 655 if self._curr: 656 scope = self._parse_table_parts() 657 elif self._curr: 658 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 659 scope = self._parse_table_parts() 660 661 return self.expression( 662 exp.Show, 663 **{ 664 "terse": terse, 665 "this": this, 666 "history": history, 667 "like": like, 668 "scope": scope, 669 "scope_kind": scope_kind, 670 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 671 "limit": self._parse_limit(), 672 "from": self._parse_string() if self._match(TokenType.FROM) else None, 673 }, 674 ) 675 676 def _parse_alter_table_swap(self) -> exp.SwapTable: 677 self._match_text_seq("WITH") 678 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 679 680 def _parse_location_property(self) -> exp.LocationProperty: 681 self._match(TokenType.EQ) 682 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 683 684 def _parse_file_location(self) -> t.Optional[exp.Expression]: 685 # Parse either a subquery or a staged file 686 return ( 687 self._parse_select(table=True) 688 if self._match(TokenType.L_PAREN, advance=False) 689 else self._parse_table_parts() 690 ) 691 692 def _parse_location_path(self) -> exp.Var: 693 parts = [self._advance_any(ignore_reserved=True)] 694 695 # We avoid consuming a comma token because external tables like @foo and @bar 696 # can be joined in a query with a comma separator, as well as closing paren 697 # in case of subqueries 698 while self._is_connected() and not self._match_set( 699 (TokenType.COMMA, TokenType.R_PAREN), advance=False 700 ): 701 parts.append(self._advance_any(ignore_reserved=True)) 702 703 return exp.var("".join(part.text for part in parts if part)) 704 705 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 706 this = super()._parse_lambda_arg() 707 708 if not this: 709 return this 710 711 typ = self._parse_types() 712 713 if typ: 714 return self.expression(exp.Cast, this=this, to=typ) 715 716 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
718 class Tokenizer(tokens.Tokenizer): 719 STRING_ESCAPES = ["\\", "'"] 720 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 721 RAW_STRINGS = ["$$"] 722 COMMENTS = ["--", "//", ("/*", "*/")] 723 724 KEYWORDS = { 725 **tokens.Tokenizer.KEYWORDS, 726 "BYTEINT": TokenType.INT, 727 "CHAR VARYING": TokenType.VARCHAR, 728 "CHARACTER VARYING": TokenType.VARCHAR, 729 "EXCLUDE": TokenType.EXCEPT, 730 "ILIKE ANY": TokenType.ILIKE_ANY, 731 "LIKE ANY": TokenType.LIKE_ANY, 732 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 733 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 734 "MINUS": TokenType.EXCEPT, 735 "NCHAR VARYING": TokenType.VARCHAR, 736 "PUT": TokenType.COMMAND, 737 "REMOVE": TokenType.COMMAND, 738 "RM": TokenType.COMMAND, 739 "SAMPLE": TokenType.TABLE_SAMPLE, 740 "SQL_DOUBLE": TokenType.DOUBLE, 741 "SQL_VARCHAR": TokenType.VARCHAR, 742 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 743 "TAG": TokenType.TAG, 744 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 745 "TOP": TokenType.TOP, 746 "WAREHOUSE": TokenType.WAREHOUSE, 747 "STREAMLIT": TokenType.STREAMLIT, 748 } 749 750 SINGLE_TOKENS = { 751 **tokens.Tokenizer.SINGLE_TOKENS, 752 "$": TokenType.PARAMETER, 753 } 754 755 VAR_SINGLE_TOKENS = {"$"} 756 757 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
759 class Generator(generator.Generator): 760 PARAMETER_TOKEN = "$" 761 MATCHED_BY_SOURCE = False 762 SINGLE_STRING_INTERVAL = True 763 JOIN_HINTS = False 764 TABLE_HINTS = False 765 QUERY_HINTS = False 766 AGGREGATE_FILTER_SUPPORTED = False 767 SUPPORTS_TABLE_COPY = False 768 COLLATE_IS_FUNC = True 769 LIMIT_ONLY_LITERALS = True 770 JSON_KEY_VALUE_PAIR_SEP = "," 771 INSERT_OVERWRITE = " OVERWRITE INTO" 772 STRUCT_DELIMITER = ("(", ")") 773 COPY_PARAMS_ARE_WRAPPED = False 774 COPY_PARAMS_EQ_REQUIRED = True 775 STAR_EXCEPT = "EXCLUDE" 776 777 TRANSFORMS = { 778 **generator.Generator.TRANSFORMS, 779 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 780 exp.ArgMax: rename_func("MAX_BY"), 781 exp.ArgMin: rename_func("MIN_BY"), 782 exp.Array: inline_array_sql, 783 exp.ArrayConcat: rename_func("ARRAY_CAT"), 784 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 785 exp.AtTimeZone: lambda self, e: self.func( 786 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 787 ), 788 exp.BitwiseXor: rename_func("BITXOR"), 789 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 790 exp.DateAdd: date_delta_sql("DATEADD"), 791 exp.DateDiff: date_delta_sql("DATEDIFF"), 792 exp.DateStrToDate: datestrtodate_sql, 793 exp.DayOfMonth: rename_func("DAYOFMONTH"), 794 exp.DayOfWeek: rename_func("DAYOFWEEK"), 795 exp.DayOfYear: rename_func("DAYOFYEAR"), 796 exp.Explode: rename_func("FLATTEN"), 797 exp.Extract: rename_func("DATE_PART"), 798 exp.FromTimeZone: lambda self, e: self.func( 799 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 800 ), 801 exp.GenerateSeries: lambda self, e: self.func( 802 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 803 ), 804 exp.GroupConcat: rename_func("LISTAGG"), 805 exp.If: if_sql(name="IFF", false_value="NULL"), 806 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 807 exp.JSONExtractScalar: lambda self, e: self.func( 808 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 809 ), 810 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 811 exp.JSONPathRoot: lambda *_: "", 812 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 813 exp.LogicalOr: rename_func("BOOLOR_AGG"), 814 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 815 exp.Max: max_or_greatest, 816 exp.Min: min_or_least, 817 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 818 exp.PercentileCont: transforms.preprocess( 819 [transforms.add_within_group_for_percentiles] 820 ), 821 exp.PercentileDisc: transforms.preprocess( 822 [transforms.add_within_group_for_percentiles] 823 ), 824 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 825 exp.RegexpILike: _regexpilike_sql, 826 exp.Rand: rename_func("RANDOM"), 827 exp.Select: transforms.preprocess( 828 [ 829 transforms.eliminate_distinct_on, 830 transforms.explode_to_unnest(), 831 transforms.eliminate_semi_and_anti_joins, 832 ] 833 ), 834 exp.SHA: rename_func("SHA1"), 835 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 836 exp.StartsWith: rename_func("STARTSWITH"), 837 exp.StrPosition: lambda self, e: self.func( 838 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 839 ), 840 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 841 exp.Stuff: rename_func("INSERT"), 842 exp.TimeAdd: date_delta_sql("TIMEADD"), 843 exp.TimestampDiff: lambda self, e: self.func( 844 "TIMESTAMPDIFF", e.unit, e.expression, e.this 845 ), 846 exp.TimestampTrunc: timestamptrunc_sql(), 847 exp.TimeStrToTime: timestrtotime_sql, 848 exp.TimeToStr: lambda self, e: self.func( 849 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 850 ), 851 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 852 exp.ToArray: rename_func("TO_ARRAY"), 853 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 854 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 855 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 856 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 857 exp.TsOrDsToDate: lambda self, e: self.func( 858 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 859 ), 860 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 861 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 862 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 863 exp.Xor: rename_func("BOOLXOR"), 864 } 865 866 SUPPORTED_JSON_PATH_PARTS = { 867 exp.JSONPathKey, 868 exp.JSONPathRoot, 869 exp.JSONPathSubscript, 870 } 871 872 TYPE_MAPPING = { 873 **generator.Generator.TYPE_MAPPING, 874 exp.DataType.Type.NESTED: "OBJECT", 875 exp.DataType.Type.STRUCT: "OBJECT", 876 } 877 878 PROPERTIES_LOCATION = { 879 **generator.Generator.PROPERTIES_LOCATION, 880 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 881 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 882 } 883 884 UNSUPPORTED_VALUES_EXPRESSIONS = { 885 exp.Map, 886 exp.StarMap, 887 exp.Struct, 888 exp.VarMap, 889 } 890 891 def with_properties(self, properties: exp.Properties) -> str: 892 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 893 894 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 895 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 896 values_as_table = False 897 898 return super().values_sql(expression, values_as_table=values_as_table) 899 900 def datatype_sql(self, expression: exp.DataType) -> str: 901 expressions = expression.expressions 902 if ( 903 expressions 904 and expression.is_type(*exp.DataType.STRUCT_TYPES) 905 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 906 ): 907 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 908 return "OBJECT" 909 910 return super().datatype_sql(expression) 911 912 def tonumber_sql(self, expression: exp.ToNumber) -> str: 913 return self.func( 914 "TO_NUMBER", 915 expression.this, 916 expression.args.get("format"), 917 expression.args.get("precision"), 918 expression.args.get("scale"), 919 ) 920 921 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 922 milli = expression.args.get("milli") 923 if milli is not None: 924 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 925 expression.set("nano", milli_to_nano) 926 927 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 928 929 def trycast_sql(self, expression: exp.TryCast) -> str: 930 value = expression.this 931 932 if value.type is None: 933 from sqlglot.optimizer.annotate_types import annotate_types 934 935 value = annotate_types(value) 936 937 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 938 return super().trycast_sql(expression) 939 940 # TRY_CAST only works for string values in Snowflake 941 return self.cast_sql(expression) 942 943 def log_sql(self, expression: exp.Log) -> str: 944 if not expression.expression: 945 return self.func("LN", expression.this) 946 947 return super().log_sql(expression) 948 949 def unnest_sql(self, expression: exp.Unnest) -> str: 950 unnest_alias = expression.args.get("alias") 951 offset = expression.args.get("offset") 952 953 columns = [ 954 exp.to_identifier("seq"), 955 exp.to_identifier("key"), 956 exp.to_identifier("path"), 957 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 958 seq_get(unnest_alias.columns if unnest_alias else [], 0) 959 or exp.to_identifier("value"), 960 exp.to_identifier("this"), 961 ] 962 963 if unnest_alias: 964 unnest_alias.set("columns", columns) 965 else: 966 unnest_alias = exp.TableAlias(this="_u", columns=columns) 967 968 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 969 alias = self.sql(unnest_alias) 970 alias = f" AS {alias}" if alias else "" 971 return f"{explode}{alias}" 972 973 def show_sql(self, expression: exp.Show) -> str: 974 terse = "TERSE " if expression.args.get("terse") else "" 975 history = " HISTORY" if expression.args.get("history") else "" 976 like = self.sql(expression, "like") 977 like = f" LIKE {like}" if like else "" 978 979 scope = self.sql(expression, "scope") 980 scope = f" {scope}" if scope else "" 981 982 scope_kind = self.sql(expression, "scope_kind") 983 if scope_kind: 984 scope_kind = f" IN {scope_kind}" 985 986 starts_with = self.sql(expression, "starts_with") 987 if starts_with: 988 starts_with = f" STARTS WITH {starts_with}" 989 990 limit = self.sql(expression, "limit") 991 992 from_ = self.sql(expression, "from") 993 if from_: 994 from_ = f" FROM {from_}" 995 996 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 997 998 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 999 # Other dialects don't support all of the following parameters, so we need to 1000 # generate default values as necessary to ensure the transpilation is correct 1001 group = expression.args.get("group") 1002 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1003 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1004 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1005 1006 return self.func( 1007 "REGEXP_SUBSTR", 1008 expression.this, 1009 expression.expression, 1010 position, 1011 occurrence, 1012 parameters, 1013 group, 1014 ) 1015 1016 def except_op(self, expression: exp.Except) -> str: 1017 if not expression.args.get("distinct"): 1018 self.unsupported("EXCEPT with All is not supported in Snowflake") 1019 return super().except_op(expression) 1020 1021 def intersect_op(self, expression: exp.Intersect) -> str: 1022 if not expression.args.get("distinct"): 1023 self.unsupported("INTERSECT with All is not supported in Snowflake") 1024 return super().intersect_op(expression) 1025 1026 def describe_sql(self, expression: exp.Describe) -> str: 1027 # Default to table if kind is unknown 1028 kind_value = expression.args.get("kind") or "TABLE" 1029 kind = f" {kind_value}" if kind_value else "" 1030 this = f" {self.sql(expression, 'this')}" 1031 expressions = self.expressions(expression, flat=True) 1032 expressions = f" {expressions}" if expressions else "" 1033 return f"DESCRIBE{kind}{this}{expressions}" 1034 1035 def generatedasidentitycolumnconstraint_sql( 1036 self, expression: exp.GeneratedAsIdentityColumnConstraint 1037 ) -> str: 1038 start = expression.args.get("start") 1039 start = f" START {start}" if start else "" 1040 increment = expression.args.get("increment") 1041 increment = f" INCREMENT {increment}" if increment else "" 1042 return f"AUTOINCREMENT{start}{increment}" 1043 1044 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1045 this = self.sql(expression, "this") 1046 return f"SWAP WITH {this}" 1047 1048 def cluster_sql(self, expression: exp.Cluster) -> str: 1049 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1050 1051 def struct_sql(self, expression: exp.Struct) -> str: 1052 keys = [] 1053 values = [] 1054 1055 for i, e in enumerate(expression.expressions): 1056 if isinstance(e, exp.PropertyEQ): 1057 keys.append( 1058 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1059 ) 1060 values.append(e.expression) 1061 else: 1062 keys.append(exp.Literal.string(f"_{i}")) 1063 values.append(e) 1064 1065 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1066 1067 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1068 if expression.args.get("weight") or expression.args.get("accuracy"): 1069 self.unsupported( 1070 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1071 ) 1072 1073 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1074 1075 def alterset_sql(self, expression: exp.AlterSet) -> str: 1076 exprs = self.expressions(expression, flat=True) 1077 exprs = f" {exprs}" if exprs else "" 1078 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1079 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1080 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1081 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1082 tag = self.expressions(expression, key="tag", flat=True) 1083 tag = f" TAG {tag}" if tag else "" 1084 1085 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
900 def datatype_sql(self, expression: exp.DataType) -> str: 901 expressions = expression.expressions 902 if ( 903 expressions 904 and expression.is_type(*exp.DataType.STRUCT_TYPES) 905 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 906 ): 907 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 908 return "OBJECT" 909 910 return super().datatype_sql(expression)
921 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 922 milli = expression.args.get("milli") 923 if milli is not None: 924 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 925 expression.set("nano", milli_to_nano) 926 927 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
929 def trycast_sql(self, expression: exp.TryCast) -> str: 930 value = expression.this 931 932 if value.type is None: 933 from sqlglot.optimizer.annotate_types import annotate_types 934 935 value = annotate_types(value) 936 937 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 938 return super().trycast_sql(expression) 939 940 # TRY_CAST only works for string values in Snowflake 941 return self.cast_sql(expression)
949 def unnest_sql(self, expression: exp.Unnest) -> str: 950 unnest_alias = expression.args.get("alias") 951 offset = expression.args.get("offset") 952 953 columns = [ 954 exp.to_identifier("seq"), 955 exp.to_identifier("key"), 956 exp.to_identifier("path"), 957 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 958 seq_get(unnest_alias.columns if unnest_alias else [], 0) 959 or exp.to_identifier("value"), 960 exp.to_identifier("this"), 961 ] 962 963 if unnest_alias: 964 unnest_alias.set("columns", columns) 965 else: 966 unnest_alias = exp.TableAlias(this="_u", columns=columns) 967 968 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 969 alias = self.sql(unnest_alias) 970 alias = f" AS {alias}" if alias else "" 971 return f"{explode}{alias}"
973 def show_sql(self, expression: exp.Show) -> str: 974 terse = "TERSE " if expression.args.get("terse") else "" 975 history = " HISTORY" if expression.args.get("history") else "" 976 like = self.sql(expression, "like") 977 like = f" LIKE {like}" if like else "" 978 979 scope = self.sql(expression, "scope") 980 scope = f" {scope}" if scope else "" 981 982 scope_kind = self.sql(expression, "scope_kind") 983 if scope_kind: 984 scope_kind = f" IN {scope_kind}" 985 986 starts_with = self.sql(expression, "starts_with") 987 if starts_with: 988 starts_with = f" STARTS WITH {starts_with}" 989 990 limit = self.sql(expression, "limit") 991 992 from_ = self.sql(expression, "from") 993 if from_: 994 from_ = f" FROM {from_}" 995 996 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
998 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 999 # Other dialects don't support all of the following parameters, so we need to 1000 # generate default values as necessary to ensure the transpilation is correct 1001 group = expression.args.get("group") 1002 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1003 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1004 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1005 1006 return self.func( 1007 "REGEXP_SUBSTR", 1008 expression.this, 1009 expression.expression, 1010 position, 1011 occurrence, 1012 parameters, 1013 group, 1014 )
1026 def describe_sql(self, expression: exp.Describe) -> str: 1027 # Default to table if kind is unknown 1028 kind_value = expression.args.get("kind") or "TABLE" 1029 kind = f" {kind_value}" if kind_value else "" 1030 this = f" {self.sql(expression, 'this')}" 1031 expressions = self.expressions(expression, flat=True) 1032 expressions = f" {expressions}" if expressions else "" 1033 return f"DESCRIBE{kind}{this}{expressions}"
1035 def generatedasidentitycolumnconstraint_sql( 1036 self, expression: exp.GeneratedAsIdentityColumnConstraint 1037 ) -> str: 1038 start = expression.args.get("start") 1039 start = f" START {start}" if start else "" 1040 increment = expression.args.get("increment") 1041 increment = f" INCREMENT {increment}" if increment else "" 1042 return f"AUTOINCREMENT{start}{increment}"
1051 def struct_sql(self, expression: exp.Struct) -> str: 1052 keys = [] 1053 values = [] 1054 1055 for i, e in enumerate(expression.expressions): 1056 if isinstance(e, exp.PropertyEQ): 1057 keys.append( 1058 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1059 ) 1060 values.append(e.expression) 1061 else: 1062 keys.append(exp.Literal.string(f"_{i}")) 1063 values.append(e) 1064 1065 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1067 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1068 if expression.args.get("weight") or expression.args.get("accuracy"): 1069 self.unsupported( 1070 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1071 ) 1072 1073 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1075 def alterset_sql(self, expression: exp.AlterSet) -> str: 1076 exprs = self.expressions(expression, flat=True) 1077 exprs = f" {exprs}" if exprs else "" 1078 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1079 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1080 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1081 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1082 tag = self.expressions(expression, key="tag", flat=True) 1083 tag = f" TAG {tag}" if tag else "" 1084 1085 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- OUTER_UNION_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql