sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25) 26from sqlglot.helper import flatten, is_float, is_int, seq_get 27from sqlglot.tokens import TokenType 28 29if t.TYPE_CHECKING: 30 from sqlglot._typing import E 31 32 33# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 34def _build_datetime( 35 name: str, kind: exp.DataType.Type, safe: bool = False 36) -> t.Callable[[t.List], exp.Func]: 37 def _builder(args: t.List) -> exp.Func: 38 value = seq_get(args, 0) 39 int_value = value is not None and is_int(value.name) 40 41 if isinstance(value, exp.Literal): 42 # Converts calls like `TO_TIME('01:02:03')` into casts 43 if len(args) == 1 and value.is_string and not int_value: 44 return exp.cast(value, kind) 45 46 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 47 # cases so we can transpile them, since they're relatively common 48 if kind == exp.DataType.Type.TIMESTAMP: 49 if int_value: 50 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 51 if not is_float(value.this): 52 return build_formatted_time(exp.StrToTime, "snowflake")(args) 53 54 if kind == exp.DataType.Type.DATE and not int_value: 55 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 56 formatted_exp.set("safe", safe) 57 return formatted_exp 58 59 return exp.Anonymous(this=name, expressions=args) 60 61 return _builder 62 63 64def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 65 expression = parser.build_var_map(args) 66 67 if isinstance(expression, exp.StarMap): 68 return expression 69 70 return exp.Struct( 71 expressions=[ 72 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 73 ] 74 ) 75 76 77def _build_datediff(args: t.List) -> exp.DateDiff: 78 return exp.DateDiff( 79 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 80 ) 81 82 83def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 84 def _builder(args: t.List) -> E: 85 return expr_type( 86 this=seq_get(args, 2), 87 expression=seq_get(args, 1), 88 unit=map_date_part(seq_get(args, 0)), 89 ) 90 91 return _builder 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/div0 95def _build_if_from_div0(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 97 true = exp.Literal.number(0) 98 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 99 return exp.If(this=cond, true=true, false=false) 100 101 102# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 103def _build_if_from_zeroifnull(args: t.List) -> exp.If: 104 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 105 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 106 107 108# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 109def _build_if_from_nullifzero(args: t.List) -> exp.If: 110 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 111 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 112 113 114def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 115 flag = expression.text("flag") 116 117 if "i" not in flag: 118 flag += "i" 119 120 return self.func( 121 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 122 ) 123 124 125def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 126 if len(args) == 3: 127 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 128 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 129 130 131def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 132 regexp_replace = exp.RegexpReplace.from_arg_list(args) 133 134 if not regexp_replace.args.get("replacement"): 135 regexp_replace.set("replacement", exp.Literal.string("")) 136 137 return regexp_replace 138 139 140def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 141 def _parse(self: Snowflake.Parser) -> exp.Show: 142 return self._parse_show_snowflake(*args, **kwargs) 143 144 return _parse 145 146 147def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 148 trunc = date_trunc_to_time(args) 149 trunc.set("unit", map_date_part(trunc.args["unit"])) 150 return trunc 151 152 153def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 154 """ 155 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 156 so we need to unqualify them. 157 158 Example: 159 >>> from sqlglot import parse_one 160 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 161 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 162 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 163 """ 164 if isinstance(expression, exp.Pivot) and expression.unpivot: 165 expression = transforms.unqualify_columns(expression) 166 167 return expression 168 169 170def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 171 assert isinstance(expression, exp.Create) 172 173 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 174 if expression.this in exp.DataType.NESTED_TYPES: 175 expression.set("expressions", None) 176 return expression 177 178 props = expression.args.get("properties") 179 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 180 for schema_expression in expression.this.expressions: 181 if isinstance(schema_expression, exp.ColumnDef): 182 column_type = schema_expression.kind 183 if isinstance(column_type, exp.DataType): 184 column_type.transform(_flatten_structured_type, copy=False) 185 186 return expression 187 188 189class Snowflake(Dialect): 190 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 191 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 192 NULL_ORDERING = "nulls_are_large" 193 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 194 SUPPORTS_USER_DEFINED_TYPES = False 195 SUPPORTS_SEMI_ANTI_JOIN = False 196 PREFER_CTE_ALIAS_COLUMN = True 197 TABLESAMPLE_SIZE_IS_PERCENT = True 198 COPY_PARAMS_ARE_CSV = False 199 200 TIME_MAPPING = { 201 "YYYY": "%Y", 202 "yyyy": "%Y", 203 "YY": "%y", 204 "yy": "%y", 205 "MMMM": "%B", 206 "mmmm": "%B", 207 "MON": "%b", 208 "mon": "%b", 209 "MM": "%m", 210 "mm": "%m", 211 "DD": "%d", 212 "dd": "%-d", 213 "DY": "%a", 214 "dy": "%w", 215 "HH24": "%H", 216 "hh24": "%H", 217 "HH12": "%I", 218 "hh12": "%I", 219 "MI": "%M", 220 "mi": "%M", 221 "SS": "%S", 222 "ss": "%S", 223 "FF": "%f", 224 "ff": "%f", 225 "FF6": "%f", 226 "ff6": "%f", 227 } 228 229 def quote_identifier(self, expression: E, identify: bool = True) -> E: 230 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 231 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 232 if ( 233 isinstance(expression, exp.Identifier) 234 and isinstance(expression.parent, exp.Table) 235 and expression.name.lower() == "dual" 236 ): 237 return expression # type: ignore 238 239 return super().quote_identifier(expression, identify=identify) 240 241 class Parser(parser.Parser): 242 IDENTIFY_PIVOT_STRINGS = True 243 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 244 COLON_IS_JSON_EXTRACT = True 245 246 ID_VAR_TOKENS = { 247 *parser.Parser.ID_VAR_TOKENS, 248 TokenType.MATCH_CONDITION, 249 } 250 251 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 252 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 253 254 FUNCTIONS = { 255 **parser.Parser.FUNCTIONS, 256 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 257 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 258 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 259 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 260 this=seq_get(args, 1), expression=seq_get(args, 0) 261 ), 262 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 263 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 264 start=seq_get(args, 0), 265 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 266 step=seq_get(args, 2), 267 ), 268 "BITXOR": binary_from_function(exp.BitwiseXor), 269 "BIT_XOR": binary_from_function(exp.BitwiseXor), 270 "BOOLXOR": binary_from_function(exp.Xor), 271 "CONVERT_TIMEZONE": _build_convert_timezone, 272 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 273 "DATE_TRUNC": _date_trunc_to_time, 274 "DATEADD": _build_date_time_add(exp.DateAdd), 275 "DATEDIFF": _build_datediff, 276 "DIV0": _build_if_from_div0, 277 "FLATTEN": exp.Explode.from_arg_list, 278 "GET_PATH": lambda args, dialect: exp.JSONExtract( 279 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 280 ), 281 "IFF": exp.If.from_arg_list, 282 "LAST_DAY": lambda args: exp.LastDay( 283 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 284 ), 285 "LISTAGG": exp.GroupConcat.from_arg_list, 286 "MEDIAN": lambda args: exp.PercentileCont( 287 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 288 ), 289 "NULLIFZERO": _build_if_from_nullifzero, 290 "OBJECT_CONSTRUCT": _build_object_construct, 291 "REGEXP_REPLACE": _build_regexp_replace, 292 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 293 "RLIKE": exp.RegexpLike.from_arg_list, 294 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 295 "TIMEADD": _build_date_time_add(exp.TimeAdd), 296 "TIMEDIFF": _build_datediff, 297 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 298 "TIMESTAMPDIFF": _build_datediff, 299 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 300 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 301 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 302 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 303 "TO_NUMBER": lambda args: exp.ToNumber( 304 this=seq_get(args, 0), 305 format=seq_get(args, 1), 306 precision=seq_get(args, 2), 307 scale=seq_get(args, 3), 308 ), 309 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 310 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 311 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 312 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 313 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 314 "TO_VARCHAR": exp.ToChar.from_arg_list, 315 "ZEROIFNULL": _build_if_from_zeroifnull, 316 } 317 318 FUNCTION_PARSERS = { 319 **parser.Parser.FUNCTION_PARSERS, 320 "DATE_PART": lambda self: self._parse_date_part(), 321 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 322 } 323 FUNCTION_PARSERS.pop("TRIM") 324 325 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 326 327 RANGE_PARSERS = { 328 **parser.Parser.RANGE_PARSERS, 329 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 330 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 331 } 332 333 ALTER_PARSERS = { 334 **parser.Parser.ALTER_PARSERS, 335 "UNSET": lambda self: self.expression( 336 exp.Set, 337 tag=self._match_text_seq("TAG"), 338 expressions=self._parse_csv(self._parse_id_var), 339 unset=True, 340 ), 341 "SWAP": lambda self: self._parse_alter_table_swap(), 342 } 343 344 STATEMENT_PARSERS = { 345 **parser.Parser.STATEMENT_PARSERS, 346 TokenType.SHOW: lambda self: self._parse_show(), 347 } 348 349 PROPERTY_PARSERS = { 350 **parser.Parser.PROPERTY_PARSERS, 351 "LOCATION": lambda self: self._parse_location_property(), 352 } 353 354 TYPE_CONVERTERS = { 355 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 356 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 357 } 358 359 SHOW_PARSERS = { 360 "SCHEMAS": _show_parser("SCHEMAS"), 361 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 362 "OBJECTS": _show_parser("OBJECTS"), 363 "TERSE OBJECTS": _show_parser("OBJECTS"), 364 "TABLES": _show_parser("TABLES"), 365 "TERSE TABLES": _show_parser("TABLES"), 366 "VIEWS": _show_parser("VIEWS"), 367 "TERSE VIEWS": _show_parser("VIEWS"), 368 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 369 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 370 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 371 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 372 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 373 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 374 "SEQUENCES": _show_parser("SEQUENCES"), 375 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 376 "COLUMNS": _show_parser("COLUMNS"), 377 "USERS": _show_parser("USERS"), 378 "TERSE USERS": _show_parser("USERS"), 379 } 380 381 CONSTRAINT_PARSERS = { 382 **parser.Parser.CONSTRAINT_PARSERS, 383 "WITH": lambda self: self._parse_with_constraint(), 384 "MASKING": lambda self: self._parse_with_constraint(), 385 "PROJECTION": lambda self: self._parse_with_constraint(), 386 "TAG": lambda self: self._parse_with_constraint(), 387 } 388 389 STAGED_FILE_SINGLE_TOKENS = { 390 TokenType.DOT, 391 TokenType.MOD, 392 TokenType.SLASH, 393 } 394 395 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 396 397 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 398 399 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 400 401 LAMBDAS = { 402 **parser.Parser.LAMBDAS, 403 TokenType.ARROW: lambda self, expressions: self.expression( 404 exp.Lambda, 405 this=self._replace_lambda( 406 self._parse_assignment(), 407 expressions, 408 ), 409 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 410 ), 411 } 412 413 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 414 if self._prev.token_type != TokenType.WITH: 415 self._retreat(self._index - 1) 416 417 if self._match_text_seq("MASKING", "POLICY"): 418 policy = self._parse_column() 419 return self.expression( 420 exp.MaskingPolicyColumnConstraint, 421 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 422 expressions=self._match(TokenType.USING) 423 and self._parse_wrapped_csv(self._parse_id_var), 424 ) 425 if self._match_text_seq("PROJECTION", "POLICY"): 426 policy = self._parse_column() 427 return self.expression( 428 exp.ProjectionPolicyColumnConstraint, 429 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 430 ) 431 if self._match(TokenType.TAG): 432 return self.expression( 433 exp.TagColumnConstraint, 434 expressions=self._parse_wrapped_csv(self._parse_property), 435 ) 436 437 return None 438 439 def _parse_create(self) -> exp.Create | exp.Command: 440 expression = super()._parse_create() 441 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 442 # Replace the Table node with the enclosed Identifier 443 expression.this.replace(expression.this.this) 444 445 return expression 446 447 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 448 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 449 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 450 this = self._parse_var() or self._parse_type() 451 452 if not this: 453 return None 454 455 self._match(TokenType.COMMA) 456 expression = self._parse_bitwise() 457 this = map_date_part(this) 458 name = this.name.upper() 459 460 if name.startswith("EPOCH"): 461 if name == "EPOCH_MILLISECOND": 462 scale = 10**3 463 elif name == "EPOCH_MICROSECOND": 464 scale = 10**6 465 elif name == "EPOCH_NANOSECOND": 466 scale = 10**9 467 else: 468 scale = None 469 470 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 471 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 472 473 if scale: 474 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 475 476 return to_unix 477 478 return self.expression(exp.Extract, this=this, expression=expression) 479 480 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 481 if is_map: 482 # Keys are strings in Snowflake's objects, see also: 483 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 484 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 485 return self._parse_slice(self._parse_string()) 486 487 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 488 489 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 490 lateral = super()._parse_lateral() 491 if not lateral: 492 return lateral 493 494 if isinstance(lateral.this, exp.Explode): 495 table_alias = lateral.args.get("alias") 496 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 497 if table_alias and not table_alias.args.get("columns"): 498 table_alias.set("columns", columns) 499 elif not table_alias: 500 exp.alias_(lateral, "_flattened", table=columns, copy=False) 501 502 return lateral 503 504 def _parse_at_before(self, table: exp.Table) -> exp.Table: 505 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 506 index = self._index 507 if self._match_texts(("AT", "BEFORE")): 508 this = self._prev.text.upper() 509 kind = ( 510 self._match(TokenType.L_PAREN) 511 and self._match_texts(self.HISTORICAL_DATA_KIND) 512 and self._prev.text.upper() 513 ) 514 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 515 516 if expression: 517 self._match_r_paren() 518 when = self.expression( 519 exp.HistoricalData, this=this, kind=kind, expression=expression 520 ) 521 table.set("when", when) 522 else: 523 self._retreat(index) 524 525 return table 526 527 def _parse_table_parts( 528 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 529 ) -> exp.Table: 530 # https://docs.snowflake.com/en/user-guide/querying-stage 531 if self._match(TokenType.STRING, advance=False): 532 table = self._parse_string() 533 elif self._match_text_seq("@", advance=False): 534 table = self._parse_location_path() 535 else: 536 table = None 537 538 if table: 539 file_format = None 540 pattern = None 541 542 wrapped = self._match(TokenType.L_PAREN) 543 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 544 if self._match_text_seq("FILE_FORMAT", "=>"): 545 file_format = self._parse_string() or super()._parse_table_parts( 546 is_db_reference=is_db_reference 547 ) 548 elif self._match_text_seq("PATTERN", "=>"): 549 pattern = self._parse_string() 550 else: 551 break 552 553 self._match(TokenType.COMMA) 554 555 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 556 else: 557 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 558 559 return self._parse_at_before(table) 560 561 def _parse_id_var( 562 self, 563 any_token: bool = True, 564 tokens: t.Optional[t.Collection[TokenType]] = None, 565 ) -> t.Optional[exp.Expression]: 566 if self._match_text_seq("IDENTIFIER", "("): 567 identifier = ( 568 super()._parse_id_var(any_token=any_token, tokens=tokens) 569 or self._parse_string() 570 ) 571 self._match_r_paren() 572 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 573 574 return super()._parse_id_var(any_token=any_token, tokens=tokens) 575 576 def _parse_show_snowflake(self, this: str) -> exp.Show: 577 scope = None 578 scope_kind = None 579 580 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 581 # which is syntactically valid but has no effect on the output 582 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 583 584 history = self._match_text_seq("HISTORY") 585 586 like = self._parse_string() if self._match(TokenType.LIKE) else None 587 588 if self._match(TokenType.IN): 589 if self._match_text_seq("ACCOUNT"): 590 scope_kind = "ACCOUNT" 591 elif self._match_set(self.DB_CREATABLES): 592 scope_kind = self._prev.text.upper() 593 if self._curr: 594 scope = self._parse_table_parts() 595 elif self._curr: 596 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 597 scope = self._parse_table_parts() 598 599 return self.expression( 600 exp.Show, 601 **{ 602 "terse": terse, 603 "this": this, 604 "history": history, 605 "like": like, 606 "scope": scope, 607 "scope_kind": scope_kind, 608 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 609 "limit": self._parse_limit(), 610 "from": self._parse_string() if self._match(TokenType.FROM) else None, 611 }, 612 ) 613 614 def _parse_alter_table_swap(self) -> exp.SwapTable: 615 self._match_text_seq("WITH") 616 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 617 618 def _parse_location_property(self) -> exp.LocationProperty: 619 self._match(TokenType.EQ) 620 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 621 622 def _parse_file_location(self) -> t.Optional[exp.Expression]: 623 # Parse either a subquery or a staged file 624 return ( 625 self._parse_select(table=True, parse_subquery_alias=False) 626 if self._match(TokenType.L_PAREN, advance=False) 627 else self._parse_table_parts() 628 ) 629 630 def _parse_location_path(self) -> exp.Var: 631 parts = [self._advance_any(ignore_reserved=True)] 632 633 # We avoid consuming a comma token because external tables like @foo and @bar 634 # can be joined in a query with a comma separator, as well as closing paren 635 # in case of subqueries 636 while self._is_connected() and not self._match_set( 637 (TokenType.COMMA, TokenType.R_PAREN), advance=False 638 ): 639 parts.append(self._advance_any(ignore_reserved=True)) 640 641 return exp.var("".join(part.text for part in parts if part)) 642 643 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 644 this = super()._parse_lambda_arg() 645 646 if not this: 647 return this 648 649 typ = self._parse_types() 650 651 if typ: 652 return self.expression(exp.Cast, this=this, to=typ) 653 654 return this 655 656 class Tokenizer(tokens.Tokenizer): 657 STRING_ESCAPES = ["\\", "'"] 658 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 659 RAW_STRINGS = ["$$"] 660 COMMENTS = ["--", "//", ("/*", "*/")] 661 662 KEYWORDS = { 663 **tokens.Tokenizer.KEYWORDS, 664 "BYTEINT": TokenType.INT, 665 "CHAR VARYING": TokenType.VARCHAR, 666 "CHARACTER VARYING": TokenType.VARCHAR, 667 "EXCLUDE": TokenType.EXCEPT, 668 "ILIKE ANY": TokenType.ILIKE_ANY, 669 "LIKE ANY": TokenType.LIKE_ANY, 670 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 671 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 672 "MINUS": TokenType.EXCEPT, 673 "NCHAR VARYING": TokenType.VARCHAR, 674 "PUT": TokenType.COMMAND, 675 "REMOVE": TokenType.COMMAND, 676 "RM": TokenType.COMMAND, 677 "SAMPLE": TokenType.TABLE_SAMPLE, 678 "SQL_DOUBLE": TokenType.DOUBLE, 679 "SQL_VARCHAR": TokenType.VARCHAR, 680 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 681 "TAG": TokenType.TAG, 682 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 683 "TOP": TokenType.TOP, 684 "WAREHOUSE": TokenType.WAREHOUSE, 685 "STREAMLIT": TokenType.STREAMLIT, 686 } 687 688 SINGLE_TOKENS = { 689 **tokens.Tokenizer.SINGLE_TOKENS, 690 "$": TokenType.PARAMETER, 691 } 692 693 VAR_SINGLE_TOKENS = {"$"} 694 695 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 696 697 class Generator(generator.Generator): 698 PARAMETER_TOKEN = "$" 699 MATCHED_BY_SOURCE = False 700 SINGLE_STRING_INTERVAL = True 701 JOIN_HINTS = False 702 TABLE_HINTS = False 703 QUERY_HINTS = False 704 AGGREGATE_FILTER_SUPPORTED = False 705 SUPPORTS_TABLE_COPY = False 706 COLLATE_IS_FUNC = True 707 LIMIT_ONLY_LITERALS = True 708 JSON_KEY_VALUE_PAIR_SEP = "," 709 INSERT_OVERWRITE = " OVERWRITE INTO" 710 STRUCT_DELIMITER = ("(", ")") 711 COPY_PARAMS_ARE_WRAPPED = False 712 COPY_PARAMS_EQ_REQUIRED = True 713 STAR_EXCEPT = "EXCLUDE" 714 715 TRANSFORMS = { 716 **generator.Generator.TRANSFORMS, 717 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 718 exp.ArgMax: rename_func("MAX_BY"), 719 exp.ArgMin: rename_func("MIN_BY"), 720 exp.Array: inline_array_sql, 721 exp.ArrayConcat: rename_func("ARRAY_CAT"), 722 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 723 exp.AtTimeZone: lambda self, e: self.func( 724 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 725 ), 726 exp.BitwiseXor: rename_func("BITXOR"), 727 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 728 exp.DateAdd: date_delta_sql("DATEADD"), 729 exp.DateDiff: date_delta_sql("DATEDIFF"), 730 exp.DateStrToDate: datestrtodate_sql, 731 exp.DayOfMonth: rename_func("DAYOFMONTH"), 732 exp.DayOfWeek: rename_func("DAYOFWEEK"), 733 exp.DayOfYear: rename_func("DAYOFYEAR"), 734 exp.Explode: rename_func("FLATTEN"), 735 exp.Extract: rename_func("DATE_PART"), 736 exp.FromTimeZone: lambda self, e: self.func( 737 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 738 ), 739 exp.GenerateSeries: lambda self, e: self.func( 740 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 741 ), 742 exp.GroupConcat: rename_func("LISTAGG"), 743 exp.If: if_sql(name="IFF", false_value="NULL"), 744 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 745 exp.JSONExtractScalar: lambda self, e: self.func( 746 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 747 ), 748 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 749 exp.JSONPathRoot: lambda *_: "", 750 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 751 exp.LogicalOr: rename_func("BOOLOR_AGG"), 752 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 753 exp.Max: max_or_greatest, 754 exp.Min: min_or_least, 755 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 756 exp.PercentileCont: transforms.preprocess( 757 [transforms.add_within_group_for_percentiles] 758 ), 759 exp.PercentileDisc: transforms.preprocess( 760 [transforms.add_within_group_for_percentiles] 761 ), 762 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 763 exp.RegexpILike: _regexpilike_sql, 764 exp.Rand: rename_func("RANDOM"), 765 exp.Select: transforms.preprocess( 766 [ 767 transforms.eliminate_distinct_on, 768 transforms.explode_to_unnest(), 769 transforms.eliminate_semi_and_anti_joins, 770 ] 771 ), 772 exp.SHA: rename_func("SHA1"), 773 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 774 exp.StartsWith: rename_func("STARTSWITH"), 775 exp.StrPosition: lambda self, e: self.func( 776 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 777 ), 778 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 779 exp.Stuff: rename_func("INSERT"), 780 exp.TimeAdd: date_delta_sql("TIMEADD"), 781 exp.TimestampDiff: lambda self, e: self.func( 782 "TIMESTAMPDIFF", e.unit, e.expression, e.this 783 ), 784 exp.TimestampTrunc: timestamptrunc_sql(), 785 exp.TimeStrToTime: timestrtotime_sql, 786 exp.TimeToStr: lambda self, e: self.func( 787 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 788 ), 789 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 790 exp.ToArray: rename_func("TO_ARRAY"), 791 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 792 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 793 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 794 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 795 exp.TsOrDsToDate: lambda self, e: self.func( 796 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 797 ), 798 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 799 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 800 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 801 exp.Xor: rename_func("BOOLXOR"), 802 } 803 804 SUPPORTED_JSON_PATH_PARTS = { 805 exp.JSONPathKey, 806 exp.JSONPathRoot, 807 exp.JSONPathSubscript, 808 } 809 810 TYPE_MAPPING = { 811 **generator.Generator.TYPE_MAPPING, 812 exp.DataType.Type.NESTED: "OBJECT", 813 exp.DataType.Type.STRUCT: "OBJECT", 814 } 815 816 PROPERTIES_LOCATION = { 817 **generator.Generator.PROPERTIES_LOCATION, 818 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 819 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 820 } 821 822 UNSUPPORTED_VALUES_EXPRESSIONS = { 823 exp.Map, 824 exp.StarMap, 825 exp.Struct, 826 exp.VarMap, 827 } 828 829 def with_properties(self, properties: exp.Properties) -> str: 830 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 831 832 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 833 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 834 values_as_table = False 835 836 return super().values_sql(expression, values_as_table=values_as_table) 837 838 def datatype_sql(self, expression: exp.DataType) -> str: 839 expressions = expression.expressions 840 if ( 841 expressions 842 and expression.is_type(*exp.DataType.STRUCT_TYPES) 843 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 844 ): 845 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 846 return "OBJECT" 847 848 return super().datatype_sql(expression) 849 850 def tonumber_sql(self, expression: exp.ToNumber) -> str: 851 return self.func( 852 "TO_NUMBER", 853 expression.this, 854 expression.args.get("format"), 855 expression.args.get("precision"), 856 expression.args.get("scale"), 857 ) 858 859 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 860 milli = expression.args.get("milli") 861 if milli is not None: 862 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 863 expression.set("nano", milli_to_nano) 864 865 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 866 867 def trycast_sql(self, expression: exp.TryCast) -> str: 868 value = expression.this 869 870 if value.type is None: 871 from sqlglot.optimizer.annotate_types import annotate_types 872 873 value = annotate_types(value) 874 875 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 876 return super().trycast_sql(expression) 877 878 # TRY_CAST only works for string values in Snowflake 879 return self.cast_sql(expression) 880 881 def log_sql(self, expression: exp.Log) -> str: 882 if not expression.expression: 883 return self.func("LN", expression.this) 884 885 return super().log_sql(expression) 886 887 def unnest_sql(self, expression: exp.Unnest) -> str: 888 unnest_alias = expression.args.get("alias") 889 offset = expression.args.get("offset") 890 891 columns = [ 892 exp.to_identifier("seq"), 893 exp.to_identifier("key"), 894 exp.to_identifier("path"), 895 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 896 seq_get(unnest_alias.columns if unnest_alias else [], 0) 897 or exp.to_identifier("value"), 898 exp.to_identifier("this"), 899 ] 900 901 if unnest_alias: 902 unnest_alias.set("columns", columns) 903 else: 904 unnest_alias = exp.TableAlias(this="_u", columns=columns) 905 906 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 907 alias = self.sql(unnest_alias) 908 alias = f" AS {alias}" if alias else "" 909 return f"{explode}{alias}" 910 911 def show_sql(self, expression: exp.Show) -> str: 912 terse = "TERSE " if expression.args.get("terse") else "" 913 history = " HISTORY" if expression.args.get("history") else "" 914 like = self.sql(expression, "like") 915 like = f" LIKE {like}" if like else "" 916 917 scope = self.sql(expression, "scope") 918 scope = f" {scope}" if scope else "" 919 920 scope_kind = self.sql(expression, "scope_kind") 921 if scope_kind: 922 scope_kind = f" IN {scope_kind}" 923 924 starts_with = self.sql(expression, "starts_with") 925 if starts_with: 926 starts_with = f" STARTS WITH {starts_with}" 927 928 limit = self.sql(expression, "limit") 929 930 from_ = self.sql(expression, "from") 931 if from_: 932 from_ = f" FROM {from_}" 933 934 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 935 936 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 937 # Other dialects don't support all of the following parameters, so we need to 938 # generate default values as necessary to ensure the transpilation is correct 939 group = expression.args.get("group") 940 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 941 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 942 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 943 944 return self.func( 945 "REGEXP_SUBSTR", 946 expression.this, 947 expression.expression, 948 position, 949 occurrence, 950 parameters, 951 group, 952 ) 953 954 def except_op(self, expression: exp.Except) -> str: 955 if not expression.args.get("distinct"): 956 self.unsupported("EXCEPT with All is not supported in Snowflake") 957 return super().except_op(expression) 958 959 def intersect_op(self, expression: exp.Intersect) -> str: 960 if not expression.args.get("distinct"): 961 self.unsupported("INTERSECT with All is not supported in Snowflake") 962 return super().intersect_op(expression) 963 964 def describe_sql(self, expression: exp.Describe) -> str: 965 # Default to table if kind is unknown 966 kind_value = expression.args.get("kind") or "TABLE" 967 kind = f" {kind_value}" if kind_value else "" 968 this = f" {self.sql(expression, 'this')}" 969 expressions = self.expressions(expression, flat=True) 970 expressions = f" {expressions}" if expressions else "" 971 return f"DESCRIBE{kind}{this}{expressions}" 972 973 def generatedasidentitycolumnconstraint_sql( 974 self, expression: exp.GeneratedAsIdentityColumnConstraint 975 ) -> str: 976 start = expression.args.get("start") 977 start = f" START {start}" if start else "" 978 increment = expression.args.get("increment") 979 increment = f" INCREMENT {increment}" if increment else "" 980 return f"AUTOINCREMENT{start}{increment}" 981 982 def swaptable_sql(self, expression: exp.SwapTable) -> str: 983 this = self.sql(expression, "this") 984 return f"SWAP WITH {this}" 985 986 def cluster_sql(self, expression: exp.Cluster) -> str: 987 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 988 989 def struct_sql(self, expression: exp.Struct) -> str: 990 keys = [] 991 values = [] 992 993 for i, e in enumerate(expression.expressions): 994 if isinstance(e, exp.PropertyEQ): 995 keys.append( 996 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 997 ) 998 values.append(e.expression) 999 else: 1000 keys.append(exp.Literal.string(f"_{i}")) 1001 values.append(e) 1002 1003 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1004 1005 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1006 if expression.args.get("weight") or expression.args.get("accuracy"): 1007 self.unsupported( 1008 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1009 ) 1010 1011 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1012 1013 def alterset_sql(self, expression: exp.AlterSet) -> str: 1014 exprs = self.expressions(expression, flat=True) 1015 exprs = f" {exprs}" if exprs else "" 1016 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1017 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1018 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1019 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1020 tag = self.expressions(expression, key="tag", flat=True) 1021 tag = f" TAG {tag}" if tag else "" 1022 1023 return f"SET{exprs}{file_format}{copy_options}{tag}"
190class Snowflake(Dialect): 191 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 192 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 193 NULL_ORDERING = "nulls_are_large" 194 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 195 SUPPORTS_USER_DEFINED_TYPES = False 196 SUPPORTS_SEMI_ANTI_JOIN = False 197 PREFER_CTE_ALIAS_COLUMN = True 198 TABLESAMPLE_SIZE_IS_PERCENT = True 199 COPY_PARAMS_ARE_CSV = False 200 201 TIME_MAPPING = { 202 "YYYY": "%Y", 203 "yyyy": "%Y", 204 "YY": "%y", 205 "yy": "%y", 206 "MMMM": "%B", 207 "mmmm": "%B", 208 "MON": "%b", 209 "mon": "%b", 210 "MM": "%m", 211 "mm": "%m", 212 "DD": "%d", 213 "dd": "%-d", 214 "DY": "%a", 215 "dy": "%w", 216 "HH24": "%H", 217 "hh24": "%H", 218 "HH12": "%I", 219 "hh12": "%I", 220 "MI": "%M", 221 "mi": "%M", 222 "SS": "%S", 223 "ss": "%S", 224 "FF": "%f", 225 "ff": "%f", 226 "FF6": "%f", 227 "ff6": "%f", 228 } 229 230 def quote_identifier(self, expression: E, identify: bool = True) -> E: 231 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 232 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 233 if ( 234 isinstance(expression, exp.Identifier) 235 and isinstance(expression.parent, exp.Table) 236 and expression.name.lower() == "dual" 237 ): 238 return expression # type: ignore 239 240 return super().quote_identifier(expression, identify=identify) 241 242 class Parser(parser.Parser): 243 IDENTIFY_PIVOT_STRINGS = True 244 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 245 COLON_IS_JSON_EXTRACT = True 246 247 ID_VAR_TOKENS = { 248 *parser.Parser.ID_VAR_TOKENS, 249 TokenType.MATCH_CONDITION, 250 } 251 252 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 253 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 254 255 FUNCTIONS = { 256 **parser.Parser.FUNCTIONS, 257 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 258 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 259 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 260 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 261 this=seq_get(args, 1), expression=seq_get(args, 0) 262 ), 263 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 264 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 265 start=seq_get(args, 0), 266 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 267 step=seq_get(args, 2), 268 ), 269 "BITXOR": binary_from_function(exp.BitwiseXor), 270 "BIT_XOR": binary_from_function(exp.BitwiseXor), 271 "BOOLXOR": binary_from_function(exp.Xor), 272 "CONVERT_TIMEZONE": _build_convert_timezone, 273 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 274 "DATE_TRUNC": _date_trunc_to_time, 275 "DATEADD": _build_date_time_add(exp.DateAdd), 276 "DATEDIFF": _build_datediff, 277 "DIV0": _build_if_from_div0, 278 "FLATTEN": exp.Explode.from_arg_list, 279 "GET_PATH": lambda args, dialect: exp.JSONExtract( 280 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 281 ), 282 "IFF": exp.If.from_arg_list, 283 "LAST_DAY": lambda args: exp.LastDay( 284 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 285 ), 286 "LISTAGG": exp.GroupConcat.from_arg_list, 287 "MEDIAN": lambda args: exp.PercentileCont( 288 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 289 ), 290 "NULLIFZERO": _build_if_from_nullifzero, 291 "OBJECT_CONSTRUCT": _build_object_construct, 292 "REGEXP_REPLACE": _build_regexp_replace, 293 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 294 "RLIKE": exp.RegexpLike.from_arg_list, 295 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 296 "TIMEADD": _build_date_time_add(exp.TimeAdd), 297 "TIMEDIFF": _build_datediff, 298 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 299 "TIMESTAMPDIFF": _build_datediff, 300 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 301 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 302 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 303 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 304 "TO_NUMBER": lambda args: exp.ToNumber( 305 this=seq_get(args, 0), 306 format=seq_get(args, 1), 307 precision=seq_get(args, 2), 308 scale=seq_get(args, 3), 309 ), 310 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 311 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 312 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 313 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 314 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 315 "TO_VARCHAR": exp.ToChar.from_arg_list, 316 "ZEROIFNULL": _build_if_from_zeroifnull, 317 } 318 319 FUNCTION_PARSERS = { 320 **parser.Parser.FUNCTION_PARSERS, 321 "DATE_PART": lambda self: self._parse_date_part(), 322 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 323 } 324 FUNCTION_PARSERS.pop("TRIM") 325 326 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 327 328 RANGE_PARSERS = { 329 **parser.Parser.RANGE_PARSERS, 330 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 331 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 332 } 333 334 ALTER_PARSERS = { 335 **parser.Parser.ALTER_PARSERS, 336 "UNSET": lambda self: self.expression( 337 exp.Set, 338 tag=self._match_text_seq("TAG"), 339 expressions=self._parse_csv(self._parse_id_var), 340 unset=True, 341 ), 342 "SWAP": lambda self: self._parse_alter_table_swap(), 343 } 344 345 STATEMENT_PARSERS = { 346 **parser.Parser.STATEMENT_PARSERS, 347 TokenType.SHOW: lambda self: self._parse_show(), 348 } 349 350 PROPERTY_PARSERS = { 351 **parser.Parser.PROPERTY_PARSERS, 352 "LOCATION": lambda self: self._parse_location_property(), 353 } 354 355 TYPE_CONVERTERS = { 356 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 357 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 358 } 359 360 SHOW_PARSERS = { 361 "SCHEMAS": _show_parser("SCHEMAS"), 362 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 363 "OBJECTS": _show_parser("OBJECTS"), 364 "TERSE OBJECTS": _show_parser("OBJECTS"), 365 "TABLES": _show_parser("TABLES"), 366 "TERSE TABLES": _show_parser("TABLES"), 367 "VIEWS": _show_parser("VIEWS"), 368 "TERSE VIEWS": _show_parser("VIEWS"), 369 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 370 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 371 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 372 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 373 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 374 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 375 "SEQUENCES": _show_parser("SEQUENCES"), 376 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 377 "COLUMNS": _show_parser("COLUMNS"), 378 "USERS": _show_parser("USERS"), 379 "TERSE USERS": _show_parser("USERS"), 380 } 381 382 CONSTRAINT_PARSERS = { 383 **parser.Parser.CONSTRAINT_PARSERS, 384 "WITH": lambda self: self._parse_with_constraint(), 385 "MASKING": lambda self: self._parse_with_constraint(), 386 "PROJECTION": lambda self: self._parse_with_constraint(), 387 "TAG": lambda self: self._parse_with_constraint(), 388 } 389 390 STAGED_FILE_SINGLE_TOKENS = { 391 TokenType.DOT, 392 TokenType.MOD, 393 TokenType.SLASH, 394 } 395 396 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 397 398 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 399 400 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 401 402 LAMBDAS = { 403 **parser.Parser.LAMBDAS, 404 TokenType.ARROW: lambda self, expressions: self.expression( 405 exp.Lambda, 406 this=self._replace_lambda( 407 self._parse_assignment(), 408 expressions, 409 ), 410 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 411 ), 412 } 413 414 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 415 if self._prev.token_type != TokenType.WITH: 416 self._retreat(self._index - 1) 417 418 if self._match_text_seq("MASKING", "POLICY"): 419 policy = self._parse_column() 420 return self.expression( 421 exp.MaskingPolicyColumnConstraint, 422 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 423 expressions=self._match(TokenType.USING) 424 and self._parse_wrapped_csv(self._parse_id_var), 425 ) 426 if self._match_text_seq("PROJECTION", "POLICY"): 427 policy = self._parse_column() 428 return self.expression( 429 exp.ProjectionPolicyColumnConstraint, 430 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 431 ) 432 if self._match(TokenType.TAG): 433 return self.expression( 434 exp.TagColumnConstraint, 435 expressions=self._parse_wrapped_csv(self._parse_property), 436 ) 437 438 return None 439 440 def _parse_create(self) -> exp.Create | exp.Command: 441 expression = super()._parse_create() 442 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 443 # Replace the Table node with the enclosed Identifier 444 expression.this.replace(expression.this.this) 445 446 return expression 447 448 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 449 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 450 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 451 this = self._parse_var() or self._parse_type() 452 453 if not this: 454 return None 455 456 self._match(TokenType.COMMA) 457 expression = self._parse_bitwise() 458 this = map_date_part(this) 459 name = this.name.upper() 460 461 if name.startswith("EPOCH"): 462 if name == "EPOCH_MILLISECOND": 463 scale = 10**3 464 elif name == "EPOCH_MICROSECOND": 465 scale = 10**6 466 elif name == "EPOCH_NANOSECOND": 467 scale = 10**9 468 else: 469 scale = None 470 471 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 472 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 473 474 if scale: 475 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 476 477 return to_unix 478 479 return self.expression(exp.Extract, this=this, expression=expression) 480 481 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 482 if is_map: 483 # Keys are strings in Snowflake's objects, see also: 484 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 485 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 486 return self._parse_slice(self._parse_string()) 487 488 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 489 490 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 491 lateral = super()._parse_lateral() 492 if not lateral: 493 return lateral 494 495 if isinstance(lateral.this, exp.Explode): 496 table_alias = lateral.args.get("alias") 497 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 498 if table_alias and not table_alias.args.get("columns"): 499 table_alias.set("columns", columns) 500 elif not table_alias: 501 exp.alias_(lateral, "_flattened", table=columns, copy=False) 502 503 return lateral 504 505 def _parse_at_before(self, table: exp.Table) -> exp.Table: 506 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 507 index = self._index 508 if self._match_texts(("AT", "BEFORE")): 509 this = self._prev.text.upper() 510 kind = ( 511 self._match(TokenType.L_PAREN) 512 and self._match_texts(self.HISTORICAL_DATA_KIND) 513 and self._prev.text.upper() 514 ) 515 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 516 517 if expression: 518 self._match_r_paren() 519 when = self.expression( 520 exp.HistoricalData, this=this, kind=kind, expression=expression 521 ) 522 table.set("when", when) 523 else: 524 self._retreat(index) 525 526 return table 527 528 def _parse_table_parts( 529 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 530 ) -> exp.Table: 531 # https://docs.snowflake.com/en/user-guide/querying-stage 532 if self._match(TokenType.STRING, advance=False): 533 table = self._parse_string() 534 elif self._match_text_seq("@", advance=False): 535 table = self._parse_location_path() 536 else: 537 table = None 538 539 if table: 540 file_format = None 541 pattern = None 542 543 wrapped = self._match(TokenType.L_PAREN) 544 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 545 if self._match_text_seq("FILE_FORMAT", "=>"): 546 file_format = self._parse_string() or super()._parse_table_parts( 547 is_db_reference=is_db_reference 548 ) 549 elif self._match_text_seq("PATTERN", "=>"): 550 pattern = self._parse_string() 551 else: 552 break 553 554 self._match(TokenType.COMMA) 555 556 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 557 else: 558 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 559 560 return self._parse_at_before(table) 561 562 def _parse_id_var( 563 self, 564 any_token: bool = True, 565 tokens: t.Optional[t.Collection[TokenType]] = None, 566 ) -> t.Optional[exp.Expression]: 567 if self._match_text_seq("IDENTIFIER", "("): 568 identifier = ( 569 super()._parse_id_var(any_token=any_token, tokens=tokens) 570 or self._parse_string() 571 ) 572 self._match_r_paren() 573 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 574 575 return super()._parse_id_var(any_token=any_token, tokens=tokens) 576 577 def _parse_show_snowflake(self, this: str) -> exp.Show: 578 scope = None 579 scope_kind = None 580 581 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 582 # which is syntactically valid but has no effect on the output 583 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 584 585 history = self._match_text_seq("HISTORY") 586 587 like = self._parse_string() if self._match(TokenType.LIKE) else None 588 589 if self._match(TokenType.IN): 590 if self._match_text_seq("ACCOUNT"): 591 scope_kind = "ACCOUNT" 592 elif self._match_set(self.DB_CREATABLES): 593 scope_kind = self._prev.text.upper() 594 if self._curr: 595 scope = self._parse_table_parts() 596 elif self._curr: 597 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 598 scope = self._parse_table_parts() 599 600 return self.expression( 601 exp.Show, 602 **{ 603 "terse": terse, 604 "this": this, 605 "history": history, 606 "like": like, 607 "scope": scope, 608 "scope_kind": scope_kind, 609 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 610 "limit": self._parse_limit(), 611 "from": self._parse_string() if self._match(TokenType.FROM) else None, 612 }, 613 ) 614 615 def _parse_alter_table_swap(self) -> exp.SwapTable: 616 self._match_text_seq("WITH") 617 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 618 619 def _parse_location_property(self) -> exp.LocationProperty: 620 self._match(TokenType.EQ) 621 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 622 623 def _parse_file_location(self) -> t.Optional[exp.Expression]: 624 # Parse either a subquery or a staged file 625 return ( 626 self._parse_select(table=True, parse_subquery_alias=False) 627 if self._match(TokenType.L_PAREN, advance=False) 628 else self._parse_table_parts() 629 ) 630 631 def _parse_location_path(self) -> exp.Var: 632 parts = [self._advance_any(ignore_reserved=True)] 633 634 # We avoid consuming a comma token because external tables like @foo and @bar 635 # can be joined in a query with a comma separator, as well as closing paren 636 # in case of subqueries 637 while self._is_connected() and not self._match_set( 638 (TokenType.COMMA, TokenType.R_PAREN), advance=False 639 ): 640 parts.append(self._advance_any(ignore_reserved=True)) 641 642 return exp.var("".join(part.text for part in parts if part)) 643 644 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 645 this = super()._parse_lambda_arg() 646 647 if not this: 648 return this 649 650 typ = self._parse_types() 651 652 if typ: 653 return self.expression(exp.Cast, this=this, to=typ) 654 655 return this 656 657 class Tokenizer(tokens.Tokenizer): 658 STRING_ESCAPES = ["\\", "'"] 659 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 660 RAW_STRINGS = ["$$"] 661 COMMENTS = ["--", "//", ("/*", "*/")] 662 663 KEYWORDS = { 664 **tokens.Tokenizer.KEYWORDS, 665 "BYTEINT": TokenType.INT, 666 "CHAR VARYING": TokenType.VARCHAR, 667 "CHARACTER VARYING": TokenType.VARCHAR, 668 "EXCLUDE": TokenType.EXCEPT, 669 "ILIKE ANY": TokenType.ILIKE_ANY, 670 "LIKE ANY": TokenType.LIKE_ANY, 671 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 672 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 673 "MINUS": TokenType.EXCEPT, 674 "NCHAR VARYING": TokenType.VARCHAR, 675 "PUT": TokenType.COMMAND, 676 "REMOVE": TokenType.COMMAND, 677 "RM": TokenType.COMMAND, 678 "SAMPLE": TokenType.TABLE_SAMPLE, 679 "SQL_DOUBLE": TokenType.DOUBLE, 680 "SQL_VARCHAR": TokenType.VARCHAR, 681 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 682 "TAG": TokenType.TAG, 683 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 684 "TOP": TokenType.TOP, 685 "WAREHOUSE": TokenType.WAREHOUSE, 686 "STREAMLIT": TokenType.STREAMLIT, 687 } 688 689 SINGLE_TOKENS = { 690 **tokens.Tokenizer.SINGLE_TOKENS, 691 "$": TokenType.PARAMETER, 692 } 693 694 VAR_SINGLE_TOKENS = {"$"} 695 696 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 697 698 class Generator(generator.Generator): 699 PARAMETER_TOKEN = "$" 700 MATCHED_BY_SOURCE = False 701 SINGLE_STRING_INTERVAL = True 702 JOIN_HINTS = False 703 TABLE_HINTS = False 704 QUERY_HINTS = False 705 AGGREGATE_FILTER_SUPPORTED = False 706 SUPPORTS_TABLE_COPY = False 707 COLLATE_IS_FUNC = True 708 LIMIT_ONLY_LITERALS = True 709 JSON_KEY_VALUE_PAIR_SEP = "," 710 INSERT_OVERWRITE = " OVERWRITE INTO" 711 STRUCT_DELIMITER = ("(", ")") 712 COPY_PARAMS_ARE_WRAPPED = False 713 COPY_PARAMS_EQ_REQUIRED = True 714 STAR_EXCEPT = "EXCLUDE" 715 716 TRANSFORMS = { 717 **generator.Generator.TRANSFORMS, 718 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 719 exp.ArgMax: rename_func("MAX_BY"), 720 exp.ArgMin: rename_func("MIN_BY"), 721 exp.Array: inline_array_sql, 722 exp.ArrayConcat: rename_func("ARRAY_CAT"), 723 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 724 exp.AtTimeZone: lambda self, e: self.func( 725 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 726 ), 727 exp.BitwiseXor: rename_func("BITXOR"), 728 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 729 exp.DateAdd: date_delta_sql("DATEADD"), 730 exp.DateDiff: date_delta_sql("DATEDIFF"), 731 exp.DateStrToDate: datestrtodate_sql, 732 exp.DayOfMonth: rename_func("DAYOFMONTH"), 733 exp.DayOfWeek: rename_func("DAYOFWEEK"), 734 exp.DayOfYear: rename_func("DAYOFYEAR"), 735 exp.Explode: rename_func("FLATTEN"), 736 exp.Extract: rename_func("DATE_PART"), 737 exp.FromTimeZone: lambda self, e: self.func( 738 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 739 ), 740 exp.GenerateSeries: lambda self, e: self.func( 741 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 742 ), 743 exp.GroupConcat: rename_func("LISTAGG"), 744 exp.If: if_sql(name="IFF", false_value="NULL"), 745 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 746 exp.JSONExtractScalar: lambda self, e: self.func( 747 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 748 ), 749 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 750 exp.JSONPathRoot: lambda *_: "", 751 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 752 exp.LogicalOr: rename_func("BOOLOR_AGG"), 753 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 754 exp.Max: max_or_greatest, 755 exp.Min: min_or_least, 756 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 757 exp.PercentileCont: transforms.preprocess( 758 [transforms.add_within_group_for_percentiles] 759 ), 760 exp.PercentileDisc: transforms.preprocess( 761 [transforms.add_within_group_for_percentiles] 762 ), 763 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 764 exp.RegexpILike: _regexpilike_sql, 765 exp.Rand: rename_func("RANDOM"), 766 exp.Select: transforms.preprocess( 767 [ 768 transforms.eliminate_distinct_on, 769 transforms.explode_to_unnest(), 770 transforms.eliminate_semi_and_anti_joins, 771 ] 772 ), 773 exp.SHA: rename_func("SHA1"), 774 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 775 exp.StartsWith: rename_func("STARTSWITH"), 776 exp.StrPosition: lambda self, e: self.func( 777 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 778 ), 779 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 780 exp.Stuff: rename_func("INSERT"), 781 exp.TimeAdd: date_delta_sql("TIMEADD"), 782 exp.TimestampDiff: lambda self, e: self.func( 783 "TIMESTAMPDIFF", e.unit, e.expression, e.this 784 ), 785 exp.TimestampTrunc: timestamptrunc_sql(), 786 exp.TimeStrToTime: timestrtotime_sql, 787 exp.TimeToStr: lambda self, e: self.func( 788 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 789 ), 790 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 791 exp.ToArray: rename_func("TO_ARRAY"), 792 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 793 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 794 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 795 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 796 exp.TsOrDsToDate: lambda self, e: self.func( 797 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 798 ), 799 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 800 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 801 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 802 exp.Xor: rename_func("BOOLXOR"), 803 } 804 805 SUPPORTED_JSON_PATH_PARTS = { 806 exp.JSONPathKey, 807 exp.JSONPathRoot, 808 exp.JSONPathSubscript, 809 } 810 811 TYPE_MAPPING = { 812 **generator.Generator.TYPE_MAPPING, 813 exp.DataType.Type.NESTED: "OBJECT", 814 exp.DataType.Type.STRUCT: "OBJECT", 815 } 816 817 PROPERTIES_LOCATION = { 818 **generator.Generator.PROPERTIES_LOCATION, 819 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 820 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 821 } 822 823 UNSUPPORTED_VALUES_EXPRESSIONS = { 824 exp.Map, 825 exp.StarMap, 826 exp.Struct, 827 exp.VarMap, 828 } 829 830 def with_properties(self, properties: exp.Properties) -> str: 831 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 832 833 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 834 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 835 values_as_table = False 836 837 return super().values_sql(expression, values_as_table=values_as_table) 838 839 def datatype_sql(self, expression: exp.DataType) -> str: 840 expressions = expression.expressions 841 if ( 842 expressions 843 and expression.is_type(*exp.DataType.STRUCT_TYPES) 844 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 845 ): 846 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 847 return "OBJECT" 848 849 return super().datatype_sql(expression) 850 851 def tonumber_sql(self, expression: exp.ToNumber) -> str: 852 return self.func( 853 "TO_NUMBER", 854 expression.this, 855 expression.args.get("format"), 856 expression.args.get("precision"), 857 expression.args.get("scale"), 858 ) 859 860 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 861 milli = expression.args.get("milli") 862 if milli is not None: 863 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 864 expression.set("nano", milli_to_nano) 865 866 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 867 868 def trycast_sql(self, expression: exp.TryCast) -> str: 869 value = expression.this 870 871 if value.type is None: 872 from sqlglot.optimizer.annotate_types import annotate_types 873 874 value = annotate_types(value) 875 876 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 877 return super().trycast_sql(expression) 878 879 # TRY_CAST only works for string values in Snowflake 880 return self.cast_sql(expression) 881 882 def log_sql(self, expression: exp.Log) -> str: 883 if not expression.expression: 884 return self.func("LN", expression.this) 885 886 return super().log_sql(expression) 887 888 def unnest_sql(self, expression: exp.Unnest) -> str: 889 unnest_alias = expression.args.get("alias") 890 offset = expression.args.get("offset") 891 892 columns = [ 893 exp.to_identifier("seq"), 894 exp.to_identifier("key"), 895 exp.to_identifier("path"), 896 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 897 seq_get(unnest_alias.columns if unnest_alias else [], 0) 898 or exp.to_identifier("value"), 899 exp.to_identifier("this"), 900 ] 901 902 if unnest_alias: 903 unnest_alias.set("columns", columns) 904 else: 905 unnest_alias = exp.TableAlias(this="_u", columns=columns) 906 907 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 908 alias = self.sql(unnest_alias) 909 alias = f" AS {alias}" if alias else "" 910 return f"{explode}{alias}" 911 912 def show_sql(self, expression: exp.Show) -> str: 913 terse = "TERSE " if expression.args.get("terse") else "" 914 history = " HISTORY" if expression.args.get("history") else "" 915 like = self.sql(expression, "like") 916 like = f" LIKE {like}" if like else "" 917 918 scope = self.sql(expression, "scope") 919 scope = f" {scope}" if scope else "" 920 921 scope_kind = self.sql(expression, "scope_kind") 922 if scope_kind: 923 scope_kind = f" IN {scope_kind}" 924 925 starts_with = self.sql(expression, "starts_with") 926 if starts_with: 927 starts_with = f" STARTS WITH {starts_with}" 928 929 limit = self.sql(expression, "limit") 930 931 from_ = self.sql(expression, "from") 932 if from_: 933 from_ = f" FROM {from_}" 934 935 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 936 937 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 938 # Other dialects don't support all of the following parameters, so we need to 939 # generate default values as necessary to ensure the transpilation is correct 940 group = expression.args.get("group") 941 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 942 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 943 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 944 945 return self.func( 946 "REGEXP_SUBSTR", 947 expression.this, 948 expression.expression, 949 position, 950 occurrence, 951 parameters, 952 group, 953 ) 954 955 def except_op(self, expression: exp.Except) -> str: 956 if not expression.args.get("distinct"): 957 self.unsupported("EXCEPT with All is not supported in Snowflake") 958 return super().except_op(expression) 959 960 def intersect_op(self, expression: exp.Intersect) -> str: 961 if not expression.args.get("distinct"): 962 self.unsupported("INTERSECT with All is not supported in Snowflake") 963 return super().intersect_op(expression) 964 965 def describe_sql(self, expression: exp.Describe) -> str: 966 # Default to table if kind is unknown 967 kind_value = expression.args.get("kind") or "TABLE" 968 kind = f" {kind_value}" if kind_value else "" 969 this = f" {self.sql(expression, 'this')}" 970 expressions = self.expressions(expression, flat=True) 971 expressions = f" {expressions}" if expressions else "" 972 return f"DESCRIBE{kind}{this}{expressions}" 973 974 def generatedasidentitycolumnconstraint_sql( 975 self, expression: exp.GeneratedAsIdentityColumnConstraint 976 ) -> str: 977 start = expression.args.get("start") 978 start = f" START {start}" if start else "" 979 increment = expression.args.get("increment") 980 increment = f" INCREMENT {increment}" if increment else "" 981 return f"AUTOINCREMENT{start}{increment}" 982 983 def swaptable_sql(self, expression: exp.SwapTable) -> str: 984 this = self.sql(expression, "this") 985 return f"SWAP WITH {this}" 986 987 def cluster_sql(self, expression: exp.Cluster) -> str: 988 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 989 990 def struct_sql(self, expression: exp.Struct) -> str: 991 keys = [] 992 values = [] 993 994 for i, e in enumerate(expression.expressions): 995 if isinstance(e, exp.PropertyEQ): 996 keys.append( 997 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 998 ) 999 values.append(e.expression) 1000 else: 1001 keys.append(exp.Literal.string(f"_{i}")) 1002 values.append(e) 1003 1004 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1005 1006 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1007 if expression.args.get("weight") or expression.args.get("accuracy"): 1008 self.unsupported( 1009 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1010 ) 1011 1012 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1013 1014 def alterset_sql(self, expression: exp.AlterSet) -> str: 1015 exprs = self.expressions(expression, flat=True) 1016 exprs = f" {exprs}" if exprs else "" 1017 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1018 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1019 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1020 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1021 tag = self.expressions(expression, key="tag", flat=True) 1022 tag = f" TAG {tag}" if tag else "" 1023 1024 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
230 def quote_identifier(self, expression: E, identify: bool = True) -> E: 231 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 232 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 233 if ( 234 isinstance(expression, exp.Identifier) 235 and isinstance(expression.parent, exp.Table) 236 and expression.name.lower() == "dual" 237 ): 238 return expression # type: ignore 239 240 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- DATE_PART_MAPPING
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
242 class Parser(parser.Parser): 243 IDENTIFY_PIVOT_STRINGS = True 244 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 245 COLON_IS_JSON_EXTRACT = True 246 247 ID_VAR_TOKENS = { 248 *parser.Parser.ID_VAR_TOKENS, 249 TokenType.MATCH_CONDITION, 250 } 251 252 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 253 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 254 255 FUNCTIONS = { 256 **parser.Parser.FUNCTIONS, 257 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 258 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 259 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 260 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 261 this=seq_get(args, 1), expression=seq_get(args, 0) 262 ), 263 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 264 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 265 start=seq_get(args, 0), 266 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 267 step=seq_get(args, 2), 268 ), 269 "BITXOR": binary_from_function(exp.BitwiseXor), 270 "BIT_XOR": binary_from_function(exp.BitwiseXor), 271 "BOOLXOR": binary_from_function(exp.Xor), 272 "CONVERT_TIMEZONE": _build_convert_timezone, 273 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 274 "DATE_TRUNC": _date_trunc_to_time, 275 "DATEADD": _build_date_time_add(exp.DateAdd), 276 "DATEDIFF": _build_datediff, 277 "DIV0": _build_if_from_div0, 278 "FLATTEN": exp.Explode.from_arg_list, 279 "GET_PATH": lambda args, dialect: exp.JSONExtract( 280 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 281 ), 282 "IFF": exp.If.from_arg_list, 283 "LAST_DAY": lambda args: exp.LastDay( 284 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 285 ), 286 "LISTAGG": exp.GroupConcat.from_arg_list, 287 "MEDIAN": lambda args: exp.PercentileCont( 288 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 289 ), 290 "NULLIFZERO": _build_if_from_nullifzero, 291 "OBJECT_CONSTRUCT": _build_object_construct, 292 "REGEXP_REPLACE": _build_regexp_replace, 293 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 294 "RLIKE": exp.RegexpLike.from_arg_list, 295 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 296 "TIMEADD": _build_date_time_add(exp.TimeAdd), 297 "TIMEDIFF": _build_datediff, 298 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 299 "TIMESTAMPDIFF": _build_datediff, 300 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 301 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 302 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 303 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 304 "TO_NUMBER": lambda args: exp.ToNumber( 305 this=seq_get(args, 0), 306 format=seq_get(args, 1), 307 precision=seq_get(args, 2), 308 scale=seq_get(args, 3), 309 ), 310 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 311 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 312 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 313 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 314 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 315 "TO_VARCHAR": exp.ToChar.from_arg_list, 316 "ZEROIFNULL": _build_if_from_zeroifnull, 317 } 318 319 FUNCTION_PARSERS = { 320 **parser.Parser.FUNCTION_PARSERS, 321 "DATE_PART": lambda self: self._parse_date_part(), 322 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 323 } 324 FUNCTION_PARSERS.pop("TRIM") 325 326 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 327 328 RANGE_PARSERS = { 329 **parser.Parser.RANGE_PARSERS, 330 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 331 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 332 } 333 334 ALTER_PARSERS = { 335 **parser.Parser.ALTER_PARSERS, 336 "UNSET": lambda self: self.expression( 337 exp.Set, 338 tag=self._match_text_seq("TAG"), 339 expressions=self._parse_csv(self._parse_id_var), 340 unset=True, 341 ), 342 "SWAP": lambda self: self._parse_alter_table_swap(), 343 } 344 345 STATEMENT_PARSERS = { 346 **parser.Parser.STATEMENT_PARSERS, 347 TokenType.SHOW: lambda self: self._parse_show(), 348 } 349 350 PROPERTY_PARSERS = { 351 **parser.Parser.PROPERTY_PARSERS, 352 "LOCATION": lambda self: self._parse_location_property(), 353 } 354 355 TYPE_CONVERTERS = { 356 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 357 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 358 } 359 360 SHOW_PARSERS = { 361 "SCHEMAS": _show_parser("SCHEMAS"), 362 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 363 "OBJECTS": _show_parser("OBJECTS"), 364 "TERSE OBJECTS": _show_parser("OBJECTS"), 365 "TABLES": _show_parser("TABLES"), 366 "TERSE TABLES": _show_parser("TABLES"), 367 "VIEWS": _show_parser("VIEWS"), 368 "TERSE VIEWS": _show_parser("VIEWS"), 369 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 370 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 371 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 372 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 373 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 374 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 375 "SEQUENCES": _show_parser("SEQUENCES"), 376 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 377 "COLUMNS": _show_parser("COLUMNS"), 378 "USERS": _show_parser("USERS"), 379 "TERSE USERS": _show_parser("USERS"), 380 } 381 382 CONSTRAINT_PARSERS = { 383 **parser.Parser.CONSTRAINT_PARSERS, 384 "WITH": lambda self: self._parse_with_constraint(), 385 "MASKING": lambda self: self._parse_with_constraint(), 386 "PROJECTION": lambda self: self._parse_with_constraint(), 387 "TAG": lambda self: self._parse_with_constraint(), 388 } 389 390 STAGED_FILE_SINGLE_TOKENS = { 391 TokenType.DOT, 392 TokenType.MOD, 393 TokenType.SLASH, 394 } 395 396 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 397 398 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 399 400 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 401 402 LAMBDAS = { 403 **parser.Parser.LAMBDAS, 404 TokenType.ARROW: lambda self, expressions: self.expression( 405 exp.Lambda, 406 this=self._replace_lambda( 407 self._parse_assignment(), 408 expressions, 409 ), 410 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 411 ), 412 } 413 414 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 415 if self._prev.token_type != TokenType.WITH: 416 self._retreat(self._index - 1) 417 418 if self._match_text_seq("MASKING", "POLICY"): 419 policy = self._parse_column() 420 return self.expression( 421 exp.MaskingPolicyColumnConstraint, 422 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 423 expressions=self._match(TokenType.USING) 424 and self._parse_wrapped_csv(self._parse_id_var), 425 ) 426 if self._match_text_seq("PROJECTION", "POLICY"): 427 policy = self._parse_column() 428 return self.expression( 429 exp.ProjectionPolicyColumnConstraint, 430 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 431 ) 432 if self._match(TokenType.TAG): 433 return self.expression( 434 exp.TagColumnConstraint, 435 expressions=self._parse_wrapped_csv(self._parse_property), 436 ) 437 438 return None 439 440 def _parse_create(self) -> exp.Create | exp.Command: 441 expression = super()._parse_create() 442 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 443 # Replace the Table node with the enclosed Identifier 444 expression.this.replace(expression.this.this) 445 446 return expression 447 448 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 449 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 450 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 451 this = self._parse_var() or self._parse_type() 452 453 if not this: 454 return None 455 456 self._match(TokenType.COMMA) 457 expression = self._parse_bitwise() 458 this = map_date_part(this) 459 name = this.name.upper() 460 461 if name.startswith("EPOCH"): 462 if name == "EPOCH_MILLISECOND": 463 scale = 10**3 464 elif name == "EPOCH_MICROSECOND": 465 scale = 10**6 466 elif name == "EPOCH_NANOSECOND": 467 scale = 10**9 468 else: 469 scale = None 470 471 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 472 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 473 474 if scale: 475 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 476 477 return to_unix 478 479 return self.expression(exp.Extract, this=this, expression=expression) 480 481 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 482 if is_map: 483 # Keys are strings in Snowflake's objects, see also: 484 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 485 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 486 return self._parse_slice(self._parse_string()) 487 488 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 489 490 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 491 lateral = super()._parse_lateral() 492 if not lateral: 493 return lateral 494 495 if isinstance(lateral.this, exp.Explode): 496 table_alias = lateral.args.get("alias") 497 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 498 if table_alias and not table_alias.args.get("columns"): 499 table_alias.set("columns", columns) 500 elif not table_alias: 501 exp.alias_(lateral, "_flattened", table=columns, copy=False) 502 503 return lateral 504 505 def _parse_at_before(self, table: exp.Table) -> exp.Table: 506 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 507 index = self._index 508 if self._match_texts(("AT", "BEFORE")): 509 this = self._prev.text.upper() 510 kind = ( 511 self._match(TokenType.L_PAREN) 512 and self._match_texts(self.HISTORICAL_DATA_KIND) 513 and self._prev.text.upper() 514 ) 515 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 516 517 if expression: 518 self._match_r_paren() 519 when = self.expression( 520 exp.HistoricalData, this=this, kind=kind, expression=expression 521 ) 522 table.set("when", when) 523 else: 524 self._retreat(index) 525 526 return table 527 528 def _parse_table_parts( 529 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 530 ) -> exp.Table: 531 # https://docs.snowflake.com/en/user-guide/querying-stage 532 if self._match(TokenType.STRING, advance=False): 533 table = self._parse_string() 534 elif self._match_text_seq("@", advance=False): 535 table = self._parse_location_path() 536 else: 537 table = None 538 539 if table: 540 file_format = None 541 pattern = None 542 543 wrapped = self._match(TokenType.L_PAREN) 544 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 545 if self._match_text_seq("FILE_FORMAT", "=>"): 546 file_format = self._parse_string() or super()._parse_table_parts( 547 is_db_reference=is_db_reference 548 ) 549 elif self._match_text_seq("PATTERN", "=>"): 550 pattern = self._parse_string() 551 else: 552 break 553 554 self._match(TokenType.COMMA) 555 556 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 557 else: 558 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 559 560 return self._parse_at_before(table) 561 562 def _parse_id_var( 563 self, 564 any_token: bool = True, 565 tokens: t.Optional[t.Collection[TokenType]] = None, 566 ) -> t.Optional[exp.Expression]: 567 if self._match_text_seq("IDENTIFIER", "("): 568 identifier = ( 569 super()._parse_id_var(any_token=any_token, tokens=tokens) 570 or self._parse_string() 571 ) 572 self._match_r_paren() 573 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 574 575 return super()._parse_id_var(any_token=any_token, tokens=tokens) 576 577 def _parse_show_snowflake(self, this: str) -> exp.Show: 578 scope = None 579 scope_kind = None 580 581 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 582 # which is syntactically valid but has no effect on the output 583 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 584 585 history = self._match_text_seq("HISTORY") 586 587 like = self._parse_string() if self._match(TokenType.LIKE) else None 588 589 if self._match(TokenType.IN): 590 if self._match_text_seq("ACCOUNT"): 591 scope_kind = "ACCOUNT" 592 elif self._match_set(self.DB_CREATABLES): 593 scope_kind = self._prev.text.upper() 594 if self._curr: 595 scope = self._parse_table_parts() 596 elif self._curr: 597 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 598 scope = self._parse_table_parts() 599 600 return self.expression( 601 exp.Show, 602 **{ 603 "terse": terse, 604 "this": this, 605 "history": history, 606 "like": like, 607 "scope": scope, 608 "scope_kind": scope_kind, 609 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 610 "limit": self._parse_limit(), 611 "from": self._parse_string() if self._match(TokenType.FROM) else None, 612 }, 613 ) 614 615 def _parse_alter_table_swap(self) -> exp.SwapTable: 616 self._match_text_seq("WITH") 617 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 618 619 def _parse_location_property(self) -> exp.LocationProperty: 620 self._match(TokenType.EQ) 621 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 622 623 def _parse_file_location(self) -> t.Optional[exp.Expression]: 624 # Parse either a subquery or a staged file 625 return ( 626 self._parse_select(table=True, parse_subquery_alias=False) 627 if self._match(TokenType.L_PAREN, advance=False) 628 else self._parse_table_parts() 629 ) 630 631 def _parse_location_path(self) -> exp.Var: 632 parts = [self._advance_any(ignore_reserved=True)] 633 634 # We avoid consuming a comma token because external tables like @foo and @bar 635 # can be joined in a query with a comma separator, as well as closing paren 636 # in case of subqueries 637 while self._is_connected() and not self._match_set( 638 (TokenType.COMMA, TokenType.R_PAREN), advance=False 639 ): 640 parts.append(self._advance_any(ignore_reserved=True)) 641 642 return exp.var("".join(part.text for part in parts if part)) 643 644 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 645 this = super()._parse_lambda_arg() 646 647 if not this: 648 return this 649 650 typ = self._parse_types() 651 652 if typ: 653 return self.expression(exp.Cast, this=this, to=typ) 654 655 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
657 class Tokenizer(tokens.Tokenizer): 658 STRING_ESCAPES = ["\\", "'"] 659 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 660 RAW_STRINGS = ["$$"] 661 COMMENTS = ["--", "//", ("/*", "*/")] 662 663 KEYWORDS = { 664 **tokens.Tokenizer.KEYWORDS, 665 "BYTEINT": TokenType.INT, 666 "CHAR VARYING": TokenType.VARCHAR, 667 "CHARACTER VARYING": TokenType.VARCHAR, 668 "EXCLUDE": TokenType.EXCEPT, 669 "ILIKE ANY": TokenType.ILIKE_ANY, 670 "LIKE ANY": TokenType.LIKE_ANY, 671 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 672 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 673 "MINUS": TokenType.EXCEPT, 674 "NCHAR VARYING": TokenType.VARCHAR, 675 "PUT": TokenType.COMMAND, 676 "REMOVE": TokenType.COMMAND, 677 "RM": TokenType.COMMAND, 678 "SAMPLE": TokenType.TABLE_SAMPLE, 679 "SQL_DOUBLE": TokenType.DOUBLE, 680 "SQL_VARCHAR": TokenType.VARCHAR, 681 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 682 "TAG": TokenType.TAG, 683 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 684 "TOP": TokenType.TOP, 685 "WAREHOUSE": TokenType.WAREHOUSE, 686 "STREAMLIT": TokenType.STREAMLIT, 687 } 688 689 SINGLE_TOKENS = { 690 **tokens.Tokenizer.SINGLE_TOKENS, 691 "$": TokenType.PARAMETER, 692 } 693 694 VAR_SINGLE_TOKENS = {"$"} 695 696 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
698 class Generator(generator.Generator): 699 PARAMETER_TOKEN = "$" 700 MATCHED_BY_SOURCE = False 701 SINGLE_STRING_INTERVAL = True 702 JOIN_HINTS = False 703 TABLE_HINTS = False 704 QUERY_HINTS = False 705 AGGREGATE_FILTER_SUPPORTED = False 706 SUPPORTS_TABLE_COPY = False 707 COLLATE_IS_FUNC = True 708 LIMIT_ONLY_LITERALS = True 709 JSON_KEY_VALUE_PAIR_SEP = "," 710 INSERT_OVERWRITE = " OVERWRITE INTO" 711 STRUCT_DELIMITER = ("(", ")") 712 COPY_PARAMS_ARE_WRAPPED = False 713 COPY_PARAMS_EQ_REQUIRED = True 714 STAR_EXCEPT = "EXCLUDE" 715 716 TRANSFORMS = { 717 **generator.Generator.TRANSFORMS, 718 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 719 exp.ArgMax: rename_func("MAX_BY"), 720 exp.ArgMin: rename_func("MIN_BY"), 721 exp.Array: inline_array_sql, 722 exp.ArrayConcat: rename_func("ARRAY_CAT"), 723 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 724 exp.AtTimeZone: lambda self, e: self.func( 725 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 726 ), 727 exp.BitwiseXor: rename_func("BITXOR"), 728 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 729 exp.DateAdd: date_delta_sql("DATEADD"), 730 exp.DateDiff: date_delta_sql("DATEDIFF"), 731 exp.DateStrToDate: datestrtodate_sql, 732 exp.DayOfMonth: rename_func("DAYOFMONTH"), 733 exp.DayOfWeek: rename_func("DAYOFWEEK"), 734 exp.DayOfYear: rename_func("DAYOFYEAR"), 735 exp.Explode: rename_func("FLATTEN"), 736 exp.Extract: rename_func("DATE_PART"), 737 exp.FromTimeZone: lambda self, e: self.func( 738 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 739 ), 740 exp.GenerateSeries: lambda self, e: self.func( 741 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 742 ), 743 exp.GroupConcat: rename_func("LISTAGG"), 744 exp.If: if_sql(name="IFF", false_value="NULL"), 745 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 746 exp.JSONExtractScalar: lambda self, e: self.func( 747 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 748 ), 749 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 750 exp.JSONPathRoot: lambda *_: "", 751 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 752 exp.LogicalOr: rename_func("BOOLOR_AGG"), 753 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 754 exp.Max: max_or_greatest, 755 exp.Min: min_or_least, 756 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 757 exp.PercentileCont: transforms.preprocess( 758 [transforms.add_within_group_for_percentiles] 759 ), 760 exp.PercentileDisc: transforms.preprocess( 761 [transforms.add_within_group_for_percentiles] 762 ), 763 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 764 exp.RegexpILike: _regexpilike_sql, 765 exp.Rand: rename_func("RANDOM"), 766 exp.Select: transforms.preprocess( 767 [ 768 transforms.eliminate_distinct_on, 769 transforms.explode_to_unnest(), 770 transforms.eliminate_semi_and_anti_joins, 771 ] 772 ), 773 exp.SHA: rename_func("SHA1"), 774 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 775 exp.StartsWith: rename_func("STARTSWITH"), 776 exp.StrPosition: lambda self, e: self.func( 777 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 778 ), 779 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 780 exp.Stuff: rename_func("INSERT"), 781 exp.TimeAdd: date_delta_sql("TIMEADD"), 782 exp.TimestampDiff: lambda self, e: self.func( 783 "TIMESTAMPDIFF", e.unit, e.expression, e.this 784 ), 785 exp.TimestampTrunc: timestamptrunc_sql(), 786 exp.TimeStrToTime: timestrtotime_sql, 787 exp.TimeToStr: lambda self, e: self.func( 788 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 789 ), 790 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 791 exp.ToArray: rename_func("TO_ARRAY"), 792 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 793 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 794 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 795 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 796 exp.TsOrDsToDate: lambda self, e: self.func( 797 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 798 ), 799 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 800 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 801 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 802 exp.Xor: rename_func("BOOLXOR"), 803 } 804 805 SUPPORTED_JSON_PATH_PARTS = { 806 exp.JSONPathKey, 807 exp.JSONPathRoot, 808 exp.JSONPathSubscript, 809 } 810 811 TYPE_MAPPING = { 812 **generator.Generator.TYPE_MAPPING, 813 exp.DataType.Type.NESTED: "OBJECT", 814 exp.DataType.Type.STRUCT: "OBJECT", 815 } 816 817 PROPERTIES_LOCATION = { 818 **generator.Generator.PROPERTIES_LOCATION, 819 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 820 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 821 } 822 823 UNSUPPORTED_VALUES_EXPRESSIONS = { 824 exp.Map, 825 exp.StarMap, 826 exp.Struct, 827 exp.VarMap, 828 } 829 830 def with_properties(self, properties: exp.Properties) -> str: 831 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 832 833 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 834 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 835 values_as_table = False 836 837 return super().values_sql(expression, values_as_table=values_as_table) 838 839 def datatype_sql(self, expression: exp.DataType) -> str: 840 expressions = expression.expressions 841 if ( 842 expressions 843 and expression.is_type(*exp.DataType.STRUCT_TYPES) 844 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 845 ): 846 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 847 return "OBJECT" 848 849 return super().datatype_sql(expression) 850 851 def tonumber_sql(self, expression: exp.ToNumber) -> str: 852 return self.func( 853 "TO_NUMBER", 854 expression.this, 855 expression.args.get("format"), 856 expression.args.get("precision"), 857 expression.args.get("scale"), 858 ) 859 860 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 861 milli = expression.args.get("milli") 862 if milli is not None: 863 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 864 expression.set("nano", milli_to_nano) 865 866 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 867 868 def trycast_sql(self, expression: exp.TryCast) -> str: 869 value = expression.this 870 871 if value.type is None: 872 from sqlglot.optimizer.annotate_types import annotate_types 873 874 value = annotate_types(value) 875 876 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 877 return super().trycast_sql(expression) 878 879 # TRY_CAST only works for string values in Snowflake 880 return self.cast_sql(expression) 881 882 def log_sql(self, expression: exp.Log) -> str: 883 if not expression.expression: 884 return self.func("LN", expression.this) 885 886 return super().log_sql(expression) 887 888 def unnest_sql(self, expression: exp.Unnest) -> str: 889 unnest_alias = expression.args.get("alias") 890 offset = expression.args.get("offset") 891 892 columns = [ 893 exp.to_identifier("seq"), 894 exp.to_identifier("key"), 895 exp.to_identifier("path"), 896 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 897 seq_get(unnest_alias.columns if unnest_alias else [], 0) 898 or exp.to_identifier("value"), 899 exp.to_identifier("this"), 900 ] 901 902 if unnest_alias: 903 unnest_alias.set("columns", columns) 904 else: 905 unnest_alias = exp.TableAlias(this="_u", columns=columns) 906 907 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 908 alias = self.sql(unnest_alias) 909 alias = f" AS {alias}" if alias else "" 910 return f"{explode}{alias}" 911 912 def show_sql(self, expression: exp.Show) -> str: 913 terse = "TERSE " if expression.args.get("terse") else "" 914 history = " HISTORY" if expression.args.get("history") else "" 915 like = self.sql(expression, "like") 916 like = f" LIKE {like}" if like else "" 917 918 scope = self.sql(expression, "scope") 919 scope = f" {scope}" if scope else "" 920 921 scope_kind = self.sql(expression, "scope_kind") 922 if scope_kind: 923 scope_kind = f" IN {scope_kind}" 924 925 starts_with = self.sql(expression, "starts_with") 926 if starts_with: 927 starts_with = f" STARTS WITH {starts_with}" 928 929 limit = self.sql(expression, "limit") 930 931 from_ = self.sql(expression, "from") 932 if from_: 933 from_ = f" FROM {from_}" 934 935 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 936 937 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 938 # Other dialects don't support all of the following parameters, so we need to 939 # generate default values as necessary to ensure the transpilation is correct 940 group = expression.args.get("group") 941 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 942 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 943 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 944 945 return self.func( 946 "REGEXP_SUBSTR", 947 expression.this, 948 expression.expression, 949 position, 950 occurrence, 951 parameters, 952 group, 953 ) 954 955 def except_op(self, expression: exp.Except) -> str: 956 if not expression.args.get("distinct"): 957 self.unsupported("EXCEPT with All is not supported in Snowflake") 958 return super().except_op(expression) 959 960 def intersect_op(self, expression: exp.Intersect) -> str: 961 if not expression.args.get("distinct"): 962 self.unsupported("INTERSECT with All is not supported in Snowflake") 963 return super().intersect_op(expression) 964 965 def describe_sql(self, expression: exp.Describe) -> str: 966 # Default to table if kind is unknown 967 kind_value = expression.args.get("kind") or "TABLE" 968 kind = f" {kind_value}" if kind_value else "" 969 this = f" {self.sql(expression, 'this')}" 970 expressions = self.expressions(expression, flat=True) 971 expressions = f" {expressions}" if expressions else "" 972 return f"DESCRIBE{kind}{this}{expressions}" 973 974 def generatedasidentitycolumnconstraint_sql( 975 self, expression: exp.GeneratedAsIdentityColumnConstraint 976 ) -> str: 977 start = expression.args.get("start") 978 start = f" START {start}" if start else "" 979 increment = expression.args.get("increment") 980 increment = f" INCREMENT {increment}" if increment else "" 981 return f"AUTOINCREMENT{start}{increment}" 982 983 def swaptable_sql(self, expression: exp.SwapTable) -> str: 984 this = self.sql(expression, "this") 985 return f"SWAP WITH {this}" 986 987 def cluster_sql(self, expression: exp.Cluster) -> str: 988 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 989 990 def struct_sql(self, expression: exp.Struct) -> str: 991 keys = [] 992 values = [] 993 994 for i, e in enumerate(expression.expressions): 995 if isinstance(e, exp.PropertyEQ): 996 keys.append( 997 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 998 ) 999 values.append(e.expression) 1000 else: 1001 keys.append(exp.Literal.string(f"_{i}")) 1002 values.append(e) 1003 1004 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1005 1006 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1007 if expression.args.get("weight") or expression.args.get("accuracy"): 1008 self.unsupported( 1009 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1010 ) 1011 1012 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1013 1014 def alterset_sql(self, expression: exp.AlterSet) -> str: 1015 exprs = self.expressions(expression, flat=True) 1016 exprs = f" {exprs}" if exprs else "" 1017 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1018 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1019 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1020 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1021 tag = self.expressions(expression, key="tag", flat=True) 1022 tag = f" TAG {tag}" if tag else "" 1023 1024 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
839 def datatype_sql(self, expression: exp.DataType) -> str: 840 expressions = expression.expressions 841 if ( 842 expressions 843 and expression.is_type(*exp.DataType.STRUCT_TYPES) 844 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 845 ): 846 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 847 return "OBJECT" 848 849 return super().datatype_sql(expression)
860 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 861 milli = expression.args.get("milli") 862 if milli is not None: 863 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 864 expression.set("nano", milli_to_nano) 865 866 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
868 def trycast_sql(self, expression: exp.TryCast) -> str: 869 value = expression.this 870 871 if value.type is None: 872 from sqlglot.optimizer.annotate_types import annotate_types 873 874 value = annotate_types(value) 875 876 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 877 return super().trycast_sql(expression) 878 879 # TRY_CAST only works for string values in Snowflake 880 return self.cast_sql(expression)
888 def unnest_sql(self, expression: exp.Unnest) -> str: 889 unnest_alias = expression.args.get("alias") 890 offset = expression.args.get("offset") 891 892 columns = [ 893 exp.to_identifier("seq"), 894 exp.to_identifier("key"), 895 exp.to_identifier("path"), 896 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 897 seq_get(unnest_alias.columns if unnest_alias else [], 0) 898 or exp.to_identifier("value"), 899 exp.to_identifier("this"), 900 ] 901 902 if unnest_alias: 903 unnest_alias.set("columns", columns) 904 else: 905 unnest_alias = exp.TableAlias(this="_u", columns=columns) 906 907 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 908 alias = self.sql(unnest_alias) 909 alias = f" AS {alias}" if alias else "" 910 return f"{explode}{alias}"
912 def show_sql(self, expression: exp.Show) -> str: 913 terse = "TERSE " if expression.args.get("terse") else "" 914 history = " HISTORY" if expression.args.get("history") else "" 915 like = self.sql(expression, "like") 916 like = f" LIKE {like}" if like else "" 917 918 scope = self.sql(expression, "scope") 919 scope = f" {scope}" if scope else "" 920 921 scope_kind = self.sql(expression, "scope_kind") 922 if scope_kind: 923 scope_kind = f" IN {scope_kind}" 924 925 starts_with = self.sql(expression, "starts_with") 926 if starts_with: 927 starts_with = f" STARTS WITH {starts_with}" 928 929 limit = self.sql(expression, "limit") 930 931 from_ = self.sql(expression, "from") 932 if from_: 933 from_ = f" FROM {from_}" 934 935 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
937 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 938 # Other dialects don't support all of the following parameters, so we need to 939 # generate default values as necessary to ensure the transpilation is correct 940 group = expression.args.get("group") 941 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 942 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 943 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 944 945 return self.func( 946 "REGEXP_SUBSTR", 947 expression.this, 948 expression.expression, 949 position, 950 occurrence, 951 parameters, 952 group, 953 )
965 def describe_sql(self, expression: exp.Describe) -> str: 966 # Default to table if kind is unknown 967 kind_value = expression.args.get("kind") or "TABLE" 968 kind = f" {kind_value}" if kind_value else "" 969 this = f" {self.sql(expression, 'this')}" 970 expressions = self.expressions(expression, flat=True) 971 expressions = f" {expressions}" if expressions else "" 972 return f"DESCRIBE{kind}{this}{expressions}"
974 def generatedasidentitycolumnconstraint_sql( 975 self, expression: exp.GeneratedAsIdentityColumnConstraint 976 ) -> str: 977 start = expression.args.get("start") 978 start = f" START {start}" if start else "" 979 increment = expression.args.get("increment") 980 increment = f" INCREMENT {increment}" if increment else "" 981 return f"AUTOINCREMENT{start}{increment}"
990 def struct_sql(self, expression: exp.Struct) -> str: 991 keys = [] 992 values = [] 993 994 for i, e in enumerate(expression.expressions): 995 if isinstance(e, exp.PropertyEQ): 996 keys.append( 997 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 998 ) 999 values.append(e.expression) 1000 else: 1001 keys.append(exp.Literal.string(f"_{i}")) 1002 values.append(e) 1003 1004 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1006 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1007 if expression.args.get("weight") or expression.args.get("accuracy"): 1008 self.unsupported( 1009 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1010 ) 1011 1012 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1014 def alterset_sql(self, expression: exp.AlterSet) -> str: 1015 exprs = self.expressions(expression, flat=True) 1016 exprs = f" {exprs}" if exprs else "" 1017 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1018 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1019 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1020 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1021 tag = self.expressions(expression, key="tag", flat=True) 1022 tag = f" TAG {tag}" if tag else "" 1023 1024 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql