sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25) 26from sqlglot.helper import flatten, is_float, is_int, seq_get 27from sqlglot.tokens import TokenType 28 29if t.TYPE_CHECKING: 30 from sqlglot._typing import E 31 32 33# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 34def _build_datetime( 35 name: str, kind: exp.DataType.Type, safe: bool = False 36) -> t.Callable[[t.List], exp.Func]: 37 def _builder(args: t.List) -> exp.Func: 38 value = seq_get(args, 0) 39 int_value = value is not None and is_int(value.name) 40 41 if isinstance(value, exp.Literal): 42 # Converts calls like `TO_TIME('01:02:03')` into casts 43 if len(args) == 1 and value.is_string and not int_value: 44 return exp.cast(value, kind) 45 46 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 47 # cases so we can transpile them, since they're relatively common 48 if kind == exp.DataType.Type.TIMESTAMP: 49 if int_value: 50 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 51 if not is_float(value.this): 52 return build_formatted_time(exp.StrToTime, "snowflake")(args) 53 54 if kind == exp.DataType.Type.DATE and not int_value: 55 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 56 formatted_exp.set("safe", safe) 57 return formatted_exp 58 59 return exp.Anonymous(this=name, expressions=args) 60 61 return _builder 62 63 64def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 65 expression = parser.build_var_map(args) 66 67 if isinstance(expression, exp.StarMap): 68 return expression 69 70 return exp.Struct( 71 expressions=[ 72 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 73 ] 74 ) 75 76 77def _build_datediff(args: t.List) -> exp.DateDiff: 78 return exp.DateDiff( 79 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 80 ) 81 82 83def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 84 def _builder(args: t.List) -> E: 85 return expr_type( 86 this=seq_get(args, 2), 87 expression=seq_get(args, 1), 88 unit=map_date_part(seq_get(args, 0)), 89 ) 90 91 return _builder 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/div0 95def _build_if_from_div0(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 97 true = exp.Literal.number(0) 98 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 99 return exp.If(this=cond, true=true, false=false) 100 101 102# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 103def _build_if_from_zeroifnull(args: t.List) -> exp.If: 104 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 105 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 106 107 108# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 109def _build_if_from_nullifzero(args: t.List) -> exp.If: 110 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 111 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 112 113 114def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 115 flag = expression.text("flag") 116 117 if "i" not in flag: 118 flag += "i" 119 120 return self.func( 121 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 122 ) 123 124 125def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 126 regexp_replace = exp.RegexpReplace.from_arg_list(args) 127 128 if not regexp_replace.args.get("replacement"): 129 regexp_replace.set("replacement", exp.Literal.string("")) 130 131 return regexp_replace 132 133 134def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 135 def _parse(self: Snowflake.Parser) -> exp.Show: 136 return self._parse_show_snowflake(*args, **kwargs) 137 138 return _parse 139 140 141def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 142 trunc = date_trunc_to_time(args) 143 trunc.set("unit", map_date_part(trunc.args["unit"])) 144 return trunc 145 146 147def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 148 """ 149 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 150 so we need to unqualify them. 151 152 Example: 153 >>> from sqlglot import parse_one 154 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 155 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 156 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 157 """ 158 if isinstance(expression, exp.Pivot) and expression.unpivot: 159 expression = transforms.unqualify_columns(expression) 160 161 return expression 162 163 164def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 165 assert isinstance(expression, exp.Create) 166 167 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 168 if expression.this in exp.DataType.NESTED_TYPES: 169 expression.set("expressions", None) 170 return expression 171 172 props = expression.args.get("properties") 173 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 174 for schema_expression in expression.this.expressions: 175 if isinstance(schema_expression, exp.ColumnDef): 176 column_type = schema_expression.kind 177 if isinstance(column_type, exp.DataType): 178 column_type.transform(_flatten_structured_type, copy=False) 179 180 return expression 181 182 183class Snowflake(Dialect): 184 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 185 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 186 NULL_ORDERING = "nulls_are_large" 187 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 188 SUPPORTS_USER_DEFINED_TYPES = False 189 SUPPORTS_SEMI_ANTI_JOIN = False 190 PREFER_CTE_ALIAS_COLUMN = True 191 TABLESAMPLE_SIZE_IS_PERCENT = True 192 COPY_PARAMS_ARE_CSV = False 193 194 TIME_MAPPING = { 195 "YYYY": "%Y", 196 "yyyy": "%Y", 197 "YY": "%y", 198 "yy": "%y", 199 "MMMM": "%B", 200 "mmmm": "%B", 201 "MON": "%b", 202 "mon": "%b", 203 "MM": "%m", 204 "mm": "%m", 205 "DD": "%d", 206 "dd": "%-d", 207 "DY": "%a", 208 "dy": "%w", 209 "HH24": "%H", 210 "hh24": "%H", 211 "HH12": "%I", 212 "hh12": "%I", 213 "MI": "%M", 214 "mi": "%M", 215 "SS": "%S", 216 "ss": "%S", 217 "FF": "%f", 218 "ff": "%f", 219 "FF6": "%f", 220 "ff6": "%f", 221 } 222 223 def quote_identifier(self, expression: E, identify: bool = True) -> E: 224 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 225 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 226 if ( 227 isinstance(expression, exp.Identifier) 228 and isinstance(expression.parent, exp.Table) 229 and expression.name.lower() == "dual" 230 ): 231 return expression # type: ignore 232 233 return super().quote_identifier(expression, identify=identify) 234 235 class Parser(parser.Parser): 236 IDENTIFY_PIVOT_STRINGS = True 237 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 238 COLON_IS_VARIANT_EXTRACT = True 239 240 ID_VAR_TOKENS = { 241 *parser.Parser.ID_VAR_TOKENS, 242 TokenType.MATCH_CONDITION, 243 } 244 245 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 246 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 247 248 FUNCTIONS = { 249 **parser.Parser.FUNCTIONS, 250 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 251 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 252 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 253 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 254 this=seq_get(args, 1), expression=seq_get(args, 0) 255 ), 256 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 257 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 258 start=seq_get(args, 0), 259 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 260 step=seq_get(args, 2), 261 ), 262 "BITXOR": binary_from_function(exp.BitwiseXor), 263 "BIT_XOR": binary_from_function(exp.BitwiseXor), 264 "BOOLXOR": binary_from_function(exp.Xor), 265 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 266 "DATE_TRUNC": _date_trunc_to_time, 267 "DATEADD": _build_date_time_add(exp.DateAdd), 268 "DATEDIFF": _build_datediff, 269 "DIV0": _build_if_from_div0, 270 "FLATTEN": exp.Explode.from_arg_list, 271 "GET_PATH": lambda args, dialect: exp.JSONExtract( 272 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 273 ), 274 "IFF": exp.If.from_arg_list, 275 "LAST_DAY": lambda args: exp.LastDay( 276 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 277 ), 278 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 279 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 280 "LISTAGG": exp.GroupConcat.from_arg_list, 281 "MEDIAN": lambda args: exp.PercentileCont( 282 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 283 ), 284 "NULLIFZERO": _build_if_from_nullifzero, 285 "OBJECT_CONSTRUCT": _build_object_construct, 286 "REGEXP_REPLACE": _build_regexp_replace, 287 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 288 "RLIKE": exp.RegexpLike.from_arg_list, 289 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 290 "TIMEADD": _build_date_time_add(exp.TimeAdd), 291 "TIMEDIFF": _build_datediff, 292 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 293 "TIMESTAMPDIFF": _build_datediff, 294 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 295 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 296 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 297 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 298 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 299 "TO_NUMBER": lambda args: exp.ToNumber( 300 this=seq_get(args, 0), 301 format=seq_get(args, 1), 302 precision=seq_get(args, 2), 303 scale=seq_get(args, 3), 304 ), 305 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 306 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 307 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 308 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 309 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 310 "TO_VARCHAR": exp.ToChar.from_arg_list, 311 "ZEROIFNULL": _build_if_from_zeroifnull, 312 } 313 314 FUNCTION_PARSERS = { 315 **parser.Parser.FUNCTION_PARSERS, 316 "DATE_PART": lambda self: self._parse_date_part(), 317 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 318 } 319 FUNCTION_PARSERS.pop("TRIM") 320 321 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 322 323 RANGE_PARSERS = { 324 **parser.Parser.RANGE_PARSERS, 325 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 326 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 327 } 328 329 ALTER_PARSERS = { 330 **parser.Parser.ALTER_PARSERS, 331 "UNSET": lambda self: self.expression( 332 exp.Set, 333 tag=self._match_text_seq("TAG"), 334 expressions=self._parse_csv(self._parse_id_var), 335 unset=True, 336 ), 337 "SWAP": lambda self: self._parse_alter_table_swap(), 338 } 339 340 STATEMENT_PARSERS = { 341 **parser.Parser.STATEMENT_PARSERS, 342 TokenType.SHOW: lambda self: self._parse_show(), 343 } 344 345 PROPERTY_PARSERS = { 346 **parser.Parser.PROPERTY_PARSERS, 347 "LOCATION": lambda self: self._parse_location_property(), 348 } 349 350 TYPE_CONVERTERS = { 351 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 352 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 353 } 354 355 SHOW_PARSERS = { 356 "SCHEMAS": _show_parser("SCHEMAS"), 357 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 358 "OBJECTS": _show_parser("OBJECTS"), 359 "TERSE OBJECTS": _show_parser("OBJECTS"), 360 "TABLES": _show_parser("TABLES"), 361 "TERSE TABLES": _show_parser("TABLES"), 362 "VIEWS": _show_parser("VIEWS"), 363 "TERSE VIEWS": _show_parser("VIEWS"), 364 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 365 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 366 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 367 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 368 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 369 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 370 "SEQUENCES": _show_parser("SEQUENCES"), 371 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 372 "COLUMNS": _show_parser("COLUMNS"), 373 "USERS": _show_parser("USERS"), 374 "TERSE USERS": _show_parser("USERS"), 375 } 376 377 CONSTRAINT_PARSERS = { 378 **parser.Parser.CONSTRAINT_PARSERS, 379 "WITH": lambda self: self._parse_with_constraint(), 380 "MASKING": lambda self: self._parse_with_constraint(), 381 "PROJECTION": lambda self: self._parse_with_constraint(), 382 "TAG": lambda self: self._parse_with_constraint(), 383 } 384 385 STAGED_FILE_SINGLE_TOKENS = { 386 TokenType.DOT, 387 TokenType.MOD, 388 TokenType.SLASH, 389 } 390 391 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 392 393 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 394 395 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 396 397 LAMBDAS = { 398 **parser.Parser.LAMBDAS, 399 TokenType.ARROW: lambda self, expressions: self.expression( 400 exp.Lambda, 401 this=self._replace_lambda( 402 self._parse_assignment(), 403 expressions, 404 ), 405 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 406 ), 407 } 408 409 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 410 if self._prev.token_type != TokenType.WITH: 411 self._retreat(self._index - 1) 412 413 if self._match_text_seq("MASKING", "POLICY"): 414 policy = self._parse_column() 415 return self.expression( 416 exp.MaskingPolicyColumnConstraint, 417 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 418 expressions=self._match(TokenType.USING) 419 and self._parse_wrapped_csv(self._parse_id_var), 420 ) 421 if self._match_text_seq("PROJECTION", "POLICY"): 422 policy = self._parse_column() 423 return self.expression( 424 exp.ProjectionPolicyColumnConstraint, 425 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 426 ) 427 if self._match(TokenType.TAG): 428 return self.expression( 429 exp.TagColumnConstraint, 430 expressions=self._parse_wrapped_csv(self._parse_property), 431 ) 432 433 return None 434 435 def _parse_create(self) -> exp.Create | exp.Command: 436 expression = super()._parse_create() 437 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 438 # Replace the Table node with the enclosed Identifier 439 expression.this.replace(expression.this.this) 440 441 return expression 442 443 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 444 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 445 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 446 this = self._parse_var() or self._parse_type() 447 448 if not this: 449 return None 450 451 self._match(TokenType.COMMA) 452 expression = self._parse_bitwise() 453 this = map_date_part(this) 454 name = this.name.upper() 455 456 if name.startswith("EPOCH"): 457 if name == "EPOCH_MILLISECOND": 458 scale = 10**3 459 elif name == "EPOCH_MICROSECOND": 460 scale = 10**6 461 elif name == "EPOCH_NANOSECOND": 462 scale = 10**9 463 else: 464 scale = None 465 466 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 467 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 468 469 if scale: 470 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 471 472 return to_unix 473 474 return self.expression(exp.Extract, this=this, expression=expression) 475 476 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 477 if is_map: 478 # Keys are strings in Snowflake's objects, see also: 479 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 480 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 481 return self._parse_slice(self._parse_string()) 482 483 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 484 485 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 486 lateral = super()._parse_lateral() 487 if not lateral: 488 return lateral 489 490 if isinstance(lateral.this, exp.Explode): 491 table_alias = lateral.args.get("alias") 492 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 493 if table_alias and not table_alias.args.get("columns"): 494 table_alias.set("columns", columns) 495 elif not table_alias: 496 exp.alias_(lateral, "_flattened", table=columns, copy=False) 497 498 return lateral 499 500 def _parse_table_parts( 501 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 502 ) -> exp.Table: 503 # https://docs.snowflake.com/en/user-guide/querying-stage 504 if self._match(TokenType.STRING, advance=False): 505 table = self._parse_string() 506 elif self._match_text_seq("@", advance=False): 507 table = self._parse_location_path() 508 else: 509 table = None 510 511 if table: 512 file_format = None 513 pattern = None 514 515 wrapped = self._match(TokenType.L_PAREN) 516 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 517 if self._match_text_seq("FILE_FORMAT", "=>"): 518 file_format = self._parse_string() or super()._parse_table_parts( 519 is_db_reference=is_db_reference 520 ) 521 elif self._match_text_seq("PATTERN", "=>"): 522 pattern = self._parse_string() 523 else: 524 break 525 526 self._match(TokenType.COMMA) 527 528 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 529 else: 530 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 531 532 return table 533 534 def _parse_id_var( 535 self, 536 any_token: bool = True, 537 tokens: t.Optional[t.Collection[TokenType]] = None, 538 ) -> t.Optional[exp.Expression]: 539 if self._match_text_seq("IDENTIFIER", "("): 540 identifier = ( 541 super()._parse_id_var(any_token=any_token, tokens=tokens) 542 or self._parse_string() 543 ) 544 self._match_r_paren() 545 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 546 547 return super()._parse_id_var(any_token=any_token, tokens=tokens) 548 549 def _parse_show_snowflake(self, this: str) -> exp.Show: 550 scope = None 551 scope_kind = None 552 553 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 554 # which is syntactically valid but has no effect on the output 555 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 556 557 history = self._match_text_seq("HISTORY") 558 559 like = self._parse_string() if self._match(TokenType.LIKE) else None 560 561 if self._match(TokenType.IN): 562 if self._match_text_seq("ACCOUNT"): 563 scope_kind = "ACCOUNT" 564 elif self._match_set(self.DB_CREATABLES): 565 scope_kind = self._prev.text.upper() 566 if self._curr: 567 scope = self._parse_table_parts() 568 elif self._curr: 569 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 570 scope = self._parse_table_parts() 571 572 return self.expression( 573 exp.Show, 574 **{ 575 "terse": terse, 576 "this": this, 577 "history": history, 578 "like": like, 579 "scope": scope, 580 "scope_kind": scope_kind, 581 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 582 "limit": self._parse_limit(), 583 "from": self._parse_string() if self._match(TokenType.FROM) else None, 584 }, 585 ) 586 587 def _parse_alter_table_swap(self) -> exp.SwapTable: 588 self._match_text_seq("WITH") 589 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 590 591 def _parse_location_property(self) -> exp.LocationProperty: 592 self._match(TokenType.EQ) 593 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 594 595 def _parse_file_location(self) -> t.Optional[exp.Expression]: 596 # Parse either a subquery or a staged file 597 return ( 598 self._parse_select(table=True, parse_subquery_alias=False) 599 if self._match(TokenType.L_PAREN, advance=False) 600 else self._parse_table_parts() 601 ) 602 603 def _parse_location_path(self) -> exp.Var: 604 parts = [self._advance_any(ignore_reserved=True)] 605 606 # We avoid consuming a comma token because external tables like @foo and @bar 607 # can be joined in a query with a comma separator, as well as closing paren 608 # in case of subqueries 609 while self._is_connected() and not self._match_set( 610 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 611 ): 612 parts.append(self._advance_any(ignore_reserved=True)) 613 614 return exp.var("".join(part.text for part in parts if part)) 615 616 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 617 this = super()._parse_lambda_arg() 618 619 if not this: 620 return this 621 622 typ = self._parse_types() 623 624 if typ: 625 return self.expression(exp.Cast, this=this, to=typ) 626 627 return this 628 629 class Tokenizer(tokens.Tokenizer): 630 STRING_ESCAPES = ["\\", "'"] 631 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 632 RAW_STRINGS = ["$$"] 633 COMMENTS = ["--", "//", ("/*", "*/")] 634 635 KEYWORDS = { 636 **tokens.Tokenizer.KEYWORDS, 637 "BYTEINT": TokenType.INT, 638 "CHAR VARYING": TokenType.VARCHAR, 639 "CHARACTER VARYING": TokenType.VARCHAR, 640 "EXCLUDE": TokenType.EXCEPT, 641 "ILIKE ANY": TokenType.ILIKE_ANY, 642 "LIKE ANY": TokenType.LIKE_ANY, 643 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 644 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 645 "MINUS": TokenType.EXCEPT, 646 "NCHAR VARYING": TokenType.VARCHAR, 647 "PUT": TokenType.COMMAND, 648 "REMOVE": TokenType.COMMAND, 649 "RM": TokenType.COMMAND, 650 "SAMPLE": TokenType.TABLE_SAMPLE, 651 "SQL_DOUBLE": TokenType.DOUBLE, 652 "SQL_VARCHAR": TokenType.VARCHAR, 653 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 654 "TAG": TokenType.TAG, 655 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 656 "TOP": TokenType.TOP, 657 "WAREHOUSE": TokenType.WAREHOUSE, 658 "STREAMLIT": TokenType.STREAMLIT, 659 } 660 KEYWORDS.pop("/*+") 661 662 SINGLE_TOKENS = { 663 **tokens.Tokenizer.SINGLE_TOKENS, 664 "$": TokenType.PARAMETER, 665 } 666 667 VAR_SINGLE_TOKENS = {"$"} 668 669 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 670 671 class Generator(generator.Generator): 672 PARAMETER_TOKEN = "$" 673 MATCHED_BY_SOURCE = False 674 SINGLE_STRING_INTERVAL = True 675 JOIN_HINTS = False 676 TABLE_HINTS = False 677 QUERY_HINTS = False 678 AGGREGATE_FILTER_SUPPORTED = False 679 SUPPORTS_TABLE_COPY = False 680 COLLATE_IS_FUNC = True 681 LIMIT_ONLY_LITERALS = True 682 JSON_KEY_VALUE_PAIR_SEP = "," 683 INSERT_OVERWRITE = " OVERWRITE INTO" 684 STRUCT_DELIMITER = ("(", ")") 685 COPY_PARAMS_ARE_WRAPPED = False 686 COPY_PARAMS_EQ_REQUIRED = True 687 STAR_EXCEPT = "EXCLUDE" 688 SUPPORTS_EXPLODING_PROJECTIONS = False 689 ARRAY_CONCAT_IS_VAR_LEN = False 690 SUPPORTS_CONVERT_TIMEZONE = True 691 692 TRANSFORMS = { 693 **generator.Generator.TRANSFORMS, 694 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 695 exp.ArgMax: rename_func("MAX_BY"), 696 exp.ArgMin: rename_func("MIN_BY"), 697 exp.Array: inline_array_sql, 698 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 699 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 700 exp.AtTimeZone: lambda self, e: self.func( 701 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 702 ), 703 exp.BitwiseXor: rename_func("BITXOR"), 704 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 705 exp.DateAdd: date_delta_sql("DATEADD"), 706 exp.DateDiff: date_delta_sql("DATEDIFF"), 707 exp.DateStrToDate: datestrtodate_sql, 708 exp.DayOfMonth: rename_func("DAYOFMONTH"), 709 exp.DayOfWeek: rename_func("DAYOFWEEK"), 710 exp.DayOfYear: rename_func("DAYOFYEAR"), 711 exp.Explode: rename_func("FLATTEN"), 712 exp.Extract: rename_func("DATE_PART"), 713 exp.FromTimeZone: lambda self, e: self.func( 714 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 715 ), 716 exp.GenerateSeries: lambda self, e: self.func( 717 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 718 ), 719 exp.GroupConcat: rename_func("LISTAGG"), 720 exp.If: if_sql(name="IFF", false_value="NULL"), 721 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 722 exp.JSONExtractScalar: lambda self, e: self.func( 723 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 724 ), 725 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 726 exp.JSONPathRoot: lambda *_: "", 727 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 728 exp.LogicalOr: rename_func("BOOLOR_AGG"), 729 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 730 exp.Max: max_or_greatest, 731 exp.Min: min_or_least, 732 exp.ParseJSON: lambda self, e: self.func( 733 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 734 ), 735 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 736 exp.PercentileCont: transforms.preprocess( 737 [transforms.add_within_group_for_percentiles] 738 ), 739 exp.PercentileDisc: transforms.preprocess( 740 [transforms.add_within_group_for_percentiles] 741 ), 742 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 743 exp.RegexpILike: _regexpilike_sql, 744 exp.Rand: rename_func("RANDOM"), 745 exp.Select: transforms.preprocess( 746 [ 747 transforms.eliminate_distinct_on, 748 transforms.explode_to_unnest(), 749 transforms.eliminate_semi_and_anti_joins, 750 ] 751 ), 752 exp.SHA: rename_func("SHA1"), 753 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 754 exp.StartsWith: rename_func("STARTSWITH"), 755 exp.StrPosition: lambda self, e: self.func( 756 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 757 ), 758 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 759 exp.Stuff: rename_func("INSERT"), 760 exp.TimeAdd: date_delta_sql("TIMEADD"), 761 exp.TimestampDiff: lambda self, e: self.func( 762 "TIMESTAMPDIFF", e.unit, e.expression, e.this 763 ), 764 exp.TimestampTrunc: timestamptrunc_sql(), 765 exp.TimeStrToTime: timestrtotime_sql, 766 exp.TimeToStr: lambda self, e: self.func( 767 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 768 ), 769 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 770 exp.ToArray: rename_func("TO_ARRAY"), 771 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 772 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 773 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 774 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 775 exp.TsOrDsToDate: lambda self, e: self.func( 776 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 777 ), 778 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 779 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 780 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 781 exp.Xor: rename_func("BOOLXOR"), 782 } 783 784 SUPPORTED_JSON_PATH_PARTS = { 785 exp.JSONPathKey, 786 exp.JSONPathRoot, 787 exp.JSONPathSubscript, 788 } 789 790 TYPE_MAPPING = { 791 **generator.Generator.TYPE_MAPPING, 792 exp.DataType.Type.NESTED: "OBJECT", 793 exp.DataType.Type.STRUCT: "OBJECT", 794 } 795 796 PROPERTIES_LOCATION = { 797 **generator.Generator.PROPERTIES_LOCATION, 798 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 799 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 800 } 801 802 UNSUPPORTED_VALUES_EXPRESSIONS = { 803 exp.Map, 804 exp.StarMap, 805 exp.Struct, 806 exp.VarMap, 807 } 808 809 def with_properties(self, properties: exp.Properties) -> str: 810 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 811 812 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 813 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 814 values_as_table = False 815 816 return super().values_sql(expression, values_as_table=values_as_table) 817 818 def datatype_sql(self, expression: exp.DataType) -> str: 819 expressions = expression.expressions 820 if ( 821 expressions 822 and expression.is_type(*exp.DataType.STRUCT_TYPES) 823 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 824 ): 825 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 826 return "OBJECT" 827 828 return super().datatype_sql(expression) 829 830 def tonumber_sql(self, expression: exp.ToNumber) -> str: 831 return self.func( 832 "TO_NUMBER", 833 expression.this, 834 expression.args.get("format"), 835 expression.args.get("precision"), 836 expression.args.get("scale"), 837 ) 838 839 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 840 milli = expression.args.get("milli") 841 if milli is not None: 842 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 843 expression.set("nano", milli_to_nano) 844 845 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 846 847 def trycast_sql(self, expression: exp.TryCast) -> str: 848 value = expression.this 849 850 if value.type is None: 851 from sqlglot.optimizer.annotate_types import annotate_types 852 853 value = annotate_types(value) 854 855 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 856 return super().trycast_sql(expression) 857 858 # TRY_CAST only works for string values in Snowflake 859 return self.cast_sql(expression) 860 861 def log_sql(self, expression: exp.Log) -> str: 862 if not expression.expression: 863 return self.func("LN", expression.this) 864 865 return super().log_sql(expression) 866 867 def unnest_sql(self, expression: exp.Unnest) -> str: 868 unnest_alias = expression.args.get("alias") 869 offset = expression.args.get("offset") 870 871 columns = [ 872 exp.to_identifier("seq"), 873 exp.to_identifier("key"), 874 exp.to_identifier("path"), 875 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 876 seq_get(unnest_alias.columns if unnest_alias else [], 0) 877 or exp.to_identifier("value"), 878 exp.to_identifier("this"), 879 ] 880 881 if unnest_alias: 882 unnest_alias.set("columns", columns) 883 else: 884 unnest_alias = exp.TableAlias(this="_u", columns=columns) 885 886 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 887 alias = self.sql(unnest_alias) 888 alias = f" AS {alias}" if alias else "" 889 return f"{explode}{alias}" 890 891 def show_sql(self, expression: exp.Show) -> str: 892 terse = "TERSE " if expression.args.get("terse") else "" 893 history = " HISTORY" if expression.args.get("history") else "" 894 like = self.sql(expression, "like") 895 like = f" LIKE {like}" if like else "" 896 897 scope = self.sql(expression, "scope") 898 scope = f" {scope}" if scope else "" 899 900 scope_kind = self.sql(expression, "scope_kind") 901 if scope_kind: 902 scope_kind = f" IN {scope_kind}" 903 904 starts_with = self.sql(expression, "starts_with") 905 if starts_with: 906 starts_with = f" STARTS WITH {starts_with}" 907 908 limit = self.sql(expression, "limit") 909 910 from_ = self.sql(expression, "from") 911 if from_: 912 from_ = f" FROM {from_}" 913 914 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 915 916 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 917 # Other dialects don't support all of the following parameters, so we need to 918 # generate default values as necessary to ensure the transpilation is correct 919 group = expression.args.get("group") 920 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 921 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 922 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 923 924 return self.func( 925 "REGEXP_SUBSTR", 926 expression.this, 927 expression.expression, 928 position, 929 occurrence, 930 parameters, 931 group, 932 ) 933 934 def except_op(self, expression: exp.Except) -> str: 935 if not expression.args.get("distinct"): 936 self.unsupported("EXCEPT with All is not supported in Snowflake") 937 return super().except_op(expression) 938 939 def intersect_op(self, expression: exp.Intersect) -> str: 940 if not expression.args.get("distinct"): 941 self.unsupported("INTERSECT with All is not supported in Snowflake") 942 return super().intersect_op(expression) 943 944 def describe_sql(self, expression: exp.Describe) -> str: 945 # Default to table if kind is unknown 946 kind_value = expression.args.get("kind") or "TABLE" 947 kind = f" {kind_value}" if kind_value else "" 948 this = f" {self.sql(expression, 'this')}" 949 expressions = self.expressions(expression, flat=True) 950 expressions = f" {expressions}" if expressions else "" 951 return f"DESCRIBE{kind}{this}{expressions}" 952 953 def generatedasidentitycolumnconstraint_sql( 954 self, expression: exp.GeneratedAsIdentityColumnConstraint 955 ) -> str: 956 start = expression.args.get("start") 957 start = f" START {start}" if start else "" 958 increment = expression.args.get("increment") 959 increment = f" INCREMENT {increment}" if increment else "" 960 return f"AUTOINCREMENT{start}{increment}" 961 962 def swaptable_sql(self, expression: exp.SwapTable) -> str: 963 this = self.sql(expression, "this") 964 return f"SWAP WITH {this}" 965 966 def cluster_sql(self, expression: exp.Cluster) -> str: 967 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 968 969 def struct_sql(self, expression: exp.Struct) -> str: 970 keys = [] 971 values = [] 972 973 for i, e in enumerate(expression.expressions): 974 if isinstance(e, exp.PropertyEQ): 975 keys.append( 976 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 977 ) 978 values.append(e.expression) 979 else: 980 keys.append(exp.Literal.string(f"_{i}")) 981 values.append(e) 982 983 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 984 985 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 986 if expression.args.get("weight") or expression.args.get("accuracy"): 987 self.unsupported( 988 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 989 ) 990 991 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 992 993 def alterset_sql(self, expression: exp.AlterSet) -> str: 994 exprs = self.expressions(expression, flat=True) 995 exprs = f" {exprs}" if exprs else "" 996 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 997 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 998 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 999 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1000 tag = self.expressions(expression, key="tag", flat=True) 1001 tag = f" TAG {tag}" if tag else "" 1002 1003 return f"SET{exprs}{file_format}{copy_options}{tag}"
184class Snowflake(Dialect): 185 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 186 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 187 NULL_ORDERING = "nulls_are_large" 188 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 189 SUPPORTS_USER_DEFINED_TYPES = False 190 SUPPORTS_SEMI_ANTI_JOIN = False 191 PREFER_CTE_ALIAS_COLUMN = True 192 TABLESAMPLE_SIZE_IS_PERCENT = True 193 COPY_PARAMS_ARE_CSV = False 194 195 TIME_MAPPING = { 196 "YYYY": "%Y", 197 "yyyy": "%Y", 198 "YY": "%y", 199 "yy": "%y", 200 "MMMM": "%B", 201 "mmmm": "%B", 202 "MON": "%b", 203 "mon": "%b", 204 "MM": "%m", 205 "mm": "%m", 206 "DD": "%d", 207 "dd": "%-d", 208 "DY": "%a", 209 "dy": "%w", 210 "HH24": "%H", 211 "hh24": "%H", 212 "HH12": "%I", 213 "hh12": "%I", 214 "MI": "%M", 215 "mi": "%M", 216 "SS": "%S", 217 "ss": "%S", 218 "FF": "%f", 219 "ff": "%f", 220 "FF6": "%f", 221 "ff6": "%f", 222 } 223 224 def quote_identifier(self, expression: E, identify: bool = True) -> E: 225 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 226 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 227 if ( 228 isinstance(expression, exp.Identifier) 229 and isinstance(expression.parent, exp.Table) 230 and expression.name.lower() == "dual" 231 ): 232 return expression # type: ignore 233 234 return super().quote_identifier(expression, identify=identify) 235 236 class Parser(parser.Parser): 237 IDENTIFY_PIVOT_STRINGS = True 238 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 239 COLON_IS_VARIANT_EXTRACT = True 240 241 ID_VAR_TOKENS = { 242 *parser.Parser.ID_VAR_TOKENS, 243 TokenType.MATCH_CONDITION, 244 } 245 246 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 247 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 248 249 FUNCTIONS = { 250 **parser.Parser.FUNCTIONS, 251 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 252 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 253 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 254 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 255 this=seq_get(args, 1), expression=seq_get(args, 0) 256 ), 257 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 258 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 259 start=seq_get(args, 0), 260 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 261 step=seq_get(args, 2), 262 ), 263 "BITXOR": binary_from_function(exp.BitwiseXor), 264 "BIT_XOR": binary_from_function(exp.BitwiseXor), 265 "BOOLXOR": binary_from_function(exp.Xor), 266 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 267 "DATE_TRUNC": _date_trunc_to_time, 268 "DATEADD": _build_date_time_add(exp.DateAdd), 269 "DATEDIFF": _build_datediff, 270 "DIV0": _build_if_from_div0, 271 "FLATTEN": exp.Explode.from_arg_list, 272 "GET_PATH": lambda args, dialect: exp.JSONExtract( 273 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 274 ), 275 "IFF": exp.If.from_arg_list, 276 "LAST_DAY": lambda args: exp.LastDay( 277 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 278 ), 279 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 280 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 281 "LISTAGG": exp.GroupConcat.from_arg_list, 282 "MEDIAN": lambda args: exp.PercentileCont( 283 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 284 ), 285 "NULLIFZERO": _build_if_from_nullifzero, 286 "OBJECT_CONSTRUCT": _build_object_construct, 287 "REGEXP_REPLACE": _build_regexp_replace, 288 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 289 "RLIKE": exp.RegexpLike.from_arg_list, 290 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 291 "TIMEADD": _build_date_time_add(exp.TimeAdd), 292 "TIMEDIFF": _build_datediff, 293 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 294 "TIMESTAMPDIFF": _build_datediff, 295 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 296 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 297 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 298 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 299 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 300 "TO_NUMBER": lambda args: exp.ToNumber( 301 this=seq_get(args, 0), 302 format=seq_get(args, 1), 303 precision=seq_get(args, 2), 304 scale=seq_get(args, 3), 305 ), 306 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 307 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 308 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 309 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 310 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 311 "TO_VARCHAR": exp.ToChar.from_arg_list, 312 "ZEROIFNULL": _build_if_from_zeroifnull, 313 } 314 315 FUNCTION_PARSERS = { 316 **parser.Parser.FUNCTION_PARSERS, 317 "DATE_PART": lambda self: self._parse_date_part(), 318 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 319 } 320 FUNCTION_PARSERS.pop("TRIM") 321 322 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 323 324 RANGE_PARSERS = { 325 **parser.Parser.RANGE_PARSERS, 326 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 327 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 328 } 329 330 ALTER_PARSERS = { 331 **parser.Parser.ALTER_PARSERS, 332 "UNSET": lambda self: self.expression( 333 exp.Set, 334 tag=self._match_text_seq("TAG"), 335 expressions=self._parse_csv(self._parse_id_var), 336 unset=True, 337 ), 338 "SWAP": lambda self: self._parse_alter_table_swap(), 339 } 340 341 STATEMENT_PARSERS = { 342 **parser.Parser.STATEMENT_PARSERS, 343 TokenType.SHOW: lambda self: self._parse_show(), 344 } 345 346 PROPERTY_PARSERS = { 347 **parser.Parser.PROPERTY_PARSERS, 348 "LOCATION": lambda self: self._parse_location_property(), 349 } 350 351 TYPE_CONVERTERS = { 352 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 353 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 354 } 355 356 SHOW_PARSERS = { 357 "SCHEMAS": _show_parser("SCHEMAS"), 358 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 359 "OBJECTS": _show_parser("OBJECTS"), 360 "TERSE OBJECTS": _show_parser("OBJECTS"), 361 "TABLES": _show_parser("TABLES"), 362 "TERSE TABLES": _show_parser("TABLES"), 363 "VIEWS": _show_parser("VIEWS"), 364 "TERSE VIEWS": _show_parser("VIEWS"), 365 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 366 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 367 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 368 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 369 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 370 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 371 "SEQUENCES": _show_parser("SEQUENCES"), 372 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 373 "COLUMNS": _show_parser("COLUMNS"), 374 "USERS": _show_parser("USERS"), 375 "TERSE USERS": _show_parser("USERS"), 376 } 377 378 CONSTRAINT_PARSERS = { 379 **parser.Parser.CONSTRAINT_PARSERS, 380 "WITH": lambda self: self._parse_with_constraint(), 381 "MASKING": lambda self: self._parse_with_constraint(), 382 "PROJECTION": lambda self: self._parse_with_constraint(), 383 "TAG": lambda self: self._parse_with_constraint(), 384 } 385 386 STAGED_FILE_SINGLE_TOKENS = { 387 TokenType.DOT, 388 TokenType.MOD, 389 TokenType.SLASH, 390 } 391 392 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 393 394 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 395 396 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 397 398 LAMBDAS = { 399 **parser.Parser.LAMBDAS, 400 TokenType.ARROW: lambda self, expressions: self.expression( 401 exp.Lambda, 402 this=self._replace_lambda( 403 self._parse_assignment(), 404 expressions, 405 ), 406 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 407 ), 408 } 409 410 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 411 if self._prev.token_type != TokenType.WITH: 412 self._retreat(self._index - 1) 413 414 if self._match_text_seq("MASKING", "POLICY"): 415 policy = self._parse_column() 416 return self.expression( 417 exp.MaskingPolicyColumnConstraint, 418 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 419 expressions=self._match(TokenType.USING) 420 and self._parse_wrapped_csv(self._parse_id_var), 421 ) 422 if self._match_text_seq("PROJECTION", "POLICY"): 423 policy = self._parse_column() 424 return self.expression( 425 exp.ProjectionPolicyColumnConstraint, 426 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 427 ) 428 if self._match(TokenType.TAG): 429 return self.expression( 430 exp.TagColumnConstraint, 431 expressions=self._parse_wrapped_csv(self._parse_property), 432 ) 433 434 return None 435 436 def _parse_create(self) -> exp.Create | exp.Command: 437 expression = super()._parse_create() 438 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 439 # Replace the Table node with the enclosed Identifier 440 expression.this.replace(expression.this.this) 441 442 return expression 443 444 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 445 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 446 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 447 this = self._parse_var() or self._parse_type() 448 449 if not this: 450 return None 451 452 self._match(TokenType.COMMA) 453 expression = self._parse_bitwise() 454 this = map_date_part(this) 455 name = this.name.upper() 456 457 if name.startswith("EPOCH"): 458 if name == "EPOCH_MILLISECOND": 459 scale = 10**3 460 elif name == "EPOCH_MICROSECOND": 461 scale = 10**6 462 elif name == "EPOCH_NANOSECOND": 463 scale = 10**9 464 else: 465 scale = None 466 467 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 468 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 469 470 if scale: 471 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 472 473 return to_unix 474 475 return self.expression(exp.Extract, this=this, expression=expression) 476 477 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 478 if is_map: 479 # Keys are strings in Snowflake's objects, see also: 480 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 481 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 482 return self._parse_slice(self._parse_string()) 483 484 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 485 486 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 487 lateral = super()._parse_lateral() 488 if not lateral: 489 return lateral 490 491 if isinstance(lateral.this, exp.Explode): 492 table_alias = lateral.args.get("alias") 493 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 494 if table_alias and not table_alias.args.get("columns"): 495 table_alias.set("columns", columns) 496 elif not table_alias: 497 exp.alias_(lateral, "_flattened", table=columns, copy=False) 498 499 return lateral 500 501 def _parse_table_parts( 502 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 503 ) -> exp.Table: 504 # https://docs.snowflake.com/en/user-guide/querying-stage 505 if self._match(TokenType.STRING, advance=False): 506 table = self._parse_string() 507 elif self._match_text_seq("@", advance=False): 508 table = self._parse_location_path() 509 else: 510 table = None 511 512 if table: 513 file_format = None 514 pattern = None 515 516 wrapped = self._match(TokenType.L_PAREN) 517 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 518 if self._match_text_seq("FILE_FORMAT", "=>"): 519 file_format = self._parse_string() or super()._parse_table_parts( 520 is_db_reference=is_db_reference 521 ) 522 elif self._match_text_seq("PATTERN", "=>"): 523 pattern = self._parse_string() 524 else: 525 break 526 527 self._match(TokenType.COMMA) 528 529 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 530 else: 531 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 532 533 return table 534 535 def _parse_id_var( 536 self, 537 any_token: bool = True, 538 tokens: t.Optional[t.Collection[TokenType]] = None, 539 ) -> t.Optional[exp.Expression]: 540 if self._match_text_seq("IDENTIFIER", "("): 541 identifier = ( 542 super()._parse_id_var(any_token=any_token, tokens=tokens) 543 or self._parse_string() 544 ) 545 self._match_r_paren() 546 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 547 548 return super()._parse_id_var(any_token=any_token, tokens=tokens) 549 550 def _parse_show_snowflake(self, this: str) -> exp.Show: 551 scope = None 552 scope_kind = None 553 554 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 555 # which is syntactically valid but has no effect on the output 556 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 557 558 history = self._match_text_seq("HISTORY") 559 560 like = self._parse_string() if self._match(TokenType.LIKE) else None 561 562 if self._match(TokenType.IN): 563 if self._match_text_seq("ACCOUNT"): 564 scope_kind = "ACCOUNT" 565 elif self._match_set(self.DB_CREATABLES): 566 scope_kind = self._prev.text.upper() 567 if self._curr: 568 scope = self._parse_table_parts() 569 elif self._curr: 570 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 571 scope = self._parse_table_parts() 572 573 return self.expression( 574 exp.Show, 575 **{ 576 "terse": terse, 577 "this": this, 578 "history": history, 579 "like": like, 580 "scope": scope, 581 "scope_kind": scope_kind, 582 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 583 "limit": self._parse_limit(), 584 "from": self._parse_string() if self._match(TokenType.FROM) else None, 585 }, 586 ) 587 588 def _parse_alter_table_swap(self) -> exp.SwapTable: 589 self._match_text_seq("WITH") 590 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 591 592 def _parse_location_property(self) -> exp.LocationProperty: 593 self._match(TokenType.EQ) 594 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 595 596 def _parse_file_location(self) -> t.Optional[exp.Expression]: 597 # Parse either a subquery or a staged file 598 return ( 599 self._parse_select(table=True, parse_subquery_alias=False) 600 if self._match(TokenType.L_PAREN, advance=False) 601 else self._parse_table_parts() 602 ) 603 604 def _parse_location_path(self) -> exp.Var: 605 parts = [self._advance_any(ignore_reserved=True)] 606 607 # We avoid consuming a comma token because external tables like @foo and @bar 608 # can be joined in a query with a comma separator, as well as closing paren 609 # in case of subqueries 610 while self._is_connected() and not self._match_set( 611 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 612 ): 613 parts.append(self._advance_any(ignore_reserved=True)) 614 615 return exp.var("".join(part.text for part in parts if part)) 616 617 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 618 this = super()._parse_lambda_arg() 619 620 if not this: 621 return this 622 623 typ = self._parse_types() 624 625 if typ: 626 return self.expression(exp.Cast, this=this, to=typ) 627 628 return this 629 630 class Tokenizer(tokens.Tokenizer): 631 STRING_ESCAPES = ["\\", "'"] 632 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 633 RAW_STRINGS = ["$$"] 634 COMMENTS = ["--", "//", ("/*", "*/")] 635 636 KEYWORDS = { 637 **tokens.Tokenizer.KEYWORDS, 638 "BYTEINT": TokenType.INT, 639 "CHAR VARYING": TokenType.VARCHAR, 640 "CHARACTER VARYING": TokenType.VARCHAR, 641 "EXCLUDE": TokenType.EXCEPT, 642 "ILIKE ANY": TokenType.ILIKE_ANY, 643 "LIKE ANY": TokenType.LIKE_ANY, 644 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 645 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 646 "MINUS": TokenType.EXCEPT, 647 "NCHAR VARYING": TokenType.VARCHAR, 648 "PUT": TokenType.COMMAND, 649 "REMOVE": TokenType.COMMAND, 650 "RM": TokenType.COMMAND, 651 "SAMPLE": TokenType.TABLE_SAMPLE, 652 "SQL_DOUBLE": TokenType.DOUBLE, 653 "SQL_VARCHAR": TokenType.VARCHAR, 654 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 655 "TAG": TokenType.TAG, 656 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 657 "TOP": TokenType.TOP, 658 "WAREHOUSE": TokenType.WAREHOUSE, 659 "STREAMLIT": TokenType.STREAMLIT, 660 } 661 KEYWORDS.pop("/*+") 662 663 SINGLE_TOKENS = { 664 **tokens.Tokenizer.SINGLE_TOKENS, 665 "$": TokenType.PARAMETER, 666 } 667 668 VAR_SINGLE_TOKENS = {"$"} 669 670 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 671 672 class Generator(generator.Generator): 673 PARAMETER_TOKEN = "$" 674 MATCHED_BY_SOURCE = False 675 SINGLE_STRING_INTERVAL = True 676 JOIN_HINTS = False 677 TABLE_HINTS = False 678 QUERY_HINTS = False 679 AGGREGATE_FILTER_SUPPORTED = False 680 SUPPORTS_TABLE_COPY = False 681 COLLATE_IS_FUNC = True 682 LIMIT_ONLY_LITERALS = True 683 JSON_KEY_VALUE_PAIR_SEP = "," 684 INSERT_OVERWRITE = " OVERWRITE INTO" 685 STRUCT_DELIMITER = ("(", ")") 686 COPY_PARAMS_ARE_WRAPPED = False 687 COPY_PARAMS_EQ_REQUIRED = True 688 STAR_EXCEPT = "EXCLUDE" 689 SUPPORTS_EXPLODING_PROJECTIONS = False 690 ARRAY_CONCAT_IS_VAR_LEN = False 691 SUPPORTS_CONVERT_TIMEZONE = True 692 693 TRANSFORMS = { 694 **generator.Generator.TRANSFORMS, 695 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 696 exp.ArgMax: rename_func("MAX_BY"), 697 exp.ArgMin: rename_func("MIN_BY"), 698 exp.Array: inline_array_sql, 699 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 700 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 701 exp.AtTimeZone: lambda self, e: self.func( 702 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 703 ), 704 exp.BitwiseXor: rename_func("BITXOR"), 705 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 706 exp.DateAdd: date_delta_sql("DATEADD"), 707 exp.DateDiff: date_delta_sql("DATEDIFF"), 708 exp.DateStrToDate: datestrtodate_sql, 709 exp.DayOfMonth: rename_func("DAYOFMONTH"), 710 exp.DayOfWeek: rename_func("DAYOFWEEK"), 711 exp.DayOfYear: rename_func("DAYOFYEAR"), 712 exp.Explode: rename_func("FLATTEN"), 713 exp.Extract: rename_func("DATE_PART"), 714 exp.FromTimeZone: lambda self, e: self.func( 715 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 716 ), 717 exp.GenerateSeries: lambda self, e: self.func( 718 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 719 ), 720 exp.GroupConcat: rename_func("LISTAGG"), 721 exp.If: if_sql(name="IFF", false_value="NULL"), 722 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 723 exp.JSONExtractScalar: lambda self, e: self.func( 724 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 725 ), 726 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 727 exp.JSONPathRoot: lambda *_: "", 728 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 729 exp.LogicalOr: rename_func("BOOLOR_AGG"), 730 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 731 exp.Max: max_or_greatest, 732 exp.Min: min_or_least, 733 exp.ParseJSON: lambda self, e: self.func( 734 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 735 ), 736 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 737 exp.PercentileCont: transforms.preprocess( 738 [transforms.add_within_group_for_percentiles] 739 ), 740 exp.PercentileDisc: transforms.preprocess( 741 [transforms.add_within_group_for_percentiles] 742 ), 743 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 744 exp.RegexpILike: _regexpilike_sql, 745 exp.Rand: rename_func("RANDOM"), 746 exp.Select: transforms.preprocess( 747 [ 748 transforms.eliminate_distinct_on, 749 transforms.explode_to_unnest(), 750 transforms.eliminate_semi_and_anti_joins, 751 ] 752 ), 753 exp.SHA: rename_func("SHA1"), 754 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 755 exp.StartsWith: rename_func("STARTSWITH"), 756 exp.StrPosition: lambda self, e: self.func( 757 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 758 ), 759 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 760 exp.Stuff: rename_func("INSERT"), 761 exp.TimeAdd: date_delta_sql("TIMEADD"), 762 exp.TimestampDiff: lambda self, e: self.func( 763 "TIMESTAMPDIFF", e.unit, e.expression, e.this 764 ), 765 exp.TimestampTrunc: timestamptrunc_sql(), 766 exp.TimeStrToTime: timestrtotime_sql, 767 exp.TimeToStr: lambda self, e: self.func( 768 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 769 ), 770 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 771 exp.ToArray: rename_func("TO_ARRAY"), 772 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 773 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 774 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 775 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 776 exp.TsOrDsToDate: lambda self, e: self.func( 777 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 778 ), 779 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 780 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 781 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 782 exp.Xor: rename_func("BOOLXOR"), 783 } 784 785 SUPPORTED_JSON_PATH_PARTS = { 786 exp.JSONPathKey, 787 exp.JSONPathRoot, 788 exp.JSONPathSubscript, 789 } 790 791 TYPE_MAPPING = { 792 **generator.Generator.TYPE_MAPPING, 793 exp.DataType.Type.NESTED: "OBJECT", 794 exp.DataType.Type.STRUCT: "OBJECT", 795 } 796 797 PROPERTIES_LOCATION = { 798 **generator.Generator.PROPERTIES_LOCATION, 799 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 800 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 801 } 802 803 UNSUPPORTED_VALUES_EXPRESSIONS = { 804 exp.Map, 805 exp.StarMap, 806 exp.Struct, 807 exp.VarMap, 808 } 809 810 def with_properties(self, properties: exp.Properties) -> str: 811 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 812 813 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 814 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 815 values_as_table = False 816 817 return super().values_sql(expression, values_as_table=values_as_table) 818 819 def datatype_sql(self, expression: exp.DataType) -> str: 820 expressions = expression.expressions 821 if ( 822 expressions 823 and expression.is_type(*exp.DataType.STRUCT_TYPES) 824 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 825 ): 826 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 827 return "OBJECT" 828 829 return super().datatype_sql(expression) 830 831 def tonumber_sql(self, expression: exp.ToNumber) -> str: 832 return self.func( 833 "TO_NUMBER", 834 expression.this, 835 expression.args.get("format"), 836 expression.args.get("precision"), 837 expression.args.get("scale"), 838 ) 839 840 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 841 milli = expression.args.get("milli") 842 if milli is not None: 843 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 844 expression.set("nano", milli_to_nano) 845 846 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 847 848 def trycast_sql(self, expression: exp.TryCast) -> str: 849 value = expression.this 850 851 if value.type is None: 852 from sqlglot.optimizer.annotate_types import annotate_types 853 854 value = annotate_types(value) 855 856 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 857 return super().trycast_sql(expression) 858 859 # TRY_CAST only works for string values in Snowflake 860 return self.cast_sql(expression) 861 862 def log_sql(self, expression: exp.Log) -> str: 863 if not expression.expression: 864 return self.func("LN", expression.this) 865 866 return super().log_sql(expression) 867 868 def unnest_sql(self, expression: exp.Unnest) -> str: 869 unnest_alias = expression.args.get("alias") 870 offset = expression.args.get("offset") 871 872 columns = [ 873 exp.to_identifier("seq"), 874 exp.to_identifier("key"), 875 exp.to_identifier("path"), 876 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 877 seq_get(unnest_alias.columns if unnest_alias else [], 0) 878 or exp.to_identifier("value"), 879 exp.to_identifier("this"), 880 ] 881 882 if unnest_alias: 883 unnest_alias.set("columns", columns) 884 else: 885 unnest_alias = exp.TableAlias(this="_u", columns=columns) 886 887 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 888 alias = self.sql(unnest_alias) 889 alias = f" AS {alias}" if alias else "" 890 return f"{explode}{alias}" 891 892 def show_sql(self, expression: exp.Show) -> str: 893 terse = "TERSE " if expression.args.get("terse") else "" 894 history = " HISTORY" if expression.args.get("history") else "" 895 like = self.sql(expression, "like") 896 like = f" LIKE {like}" if like else "" 897 898 scope = self.sql(expression, "scope") 899 scope = f" {scope}" if scope else "" 900 901 scope_kind = self.sql(expression, "scope_kind") 902 if scope_kind: 903 scope_kind = f" IN {scope_kind}" 904 905 starts_with = self.sql(expression, "starts_with") 906 if starts_with: 907 starts_with = f" STARTS WITH {starts_with}" 908 909 limit = self.sql(expression, "limit") 910 911 from_ = self.sql(expression, "from") 912 if from_: 913 from_ = f" FROM {from_}" 914 915 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 916 917 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 918 # Other dialects don't support all of the following parameters, so we need to 919 # generate default values as necessary to ensure the transpilation is correct 920 group = expression.args.get("group") 921 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 922 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 923 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 924 925 return self.func( 926 "REGEXP_SUBSTR", 927 expression.this, 928 expression.expression, 929 position, 930 occurrence, 931 parameters, 932 group, 933 ) 934 935 def except_op(self, expression: exp.Except) -> str: 936 if not expression.args.get("distinct"): 937 self.unsupported("EXCEPT with All is not supported in Snowflake") 938 return super().except_op(expression) 939 940 def intersect_op(self, expression: exp.Intersect) -> str: 941 if not expression.args.get("distinct"): 942 self.unsupported("INTERSECT with All is not supported in Snowflake") 943 return super().intersect_op(expression) 944 945 def describe_sql(self, expression: exp.Describe) -> str: 946 # Default to table if kind is unknown 947 kind_value = expression.args.get("kind") or "TABLE" 948 kind = f" {kind_value}" if kind_value else "" 949 this = f" {self.sql(expression, 'this')}" 950 expressions = self.expressions(expression, flat=True) 951 expressions = f" {expressions}" if expressions else "" 952 return f"DESCRIBE{kind}{this}{expressions}" 953 954 def generatedasidentitycolumnconstraint_sql( 955 self, expression: exp.GeneratedAsIdentityColumnConstraint 956 ) -> str: 957 start = expression.args.get("start") 958 start = f" START {start}" if start else "" 959 increment = expression.args.get("increment") 960 increment = f" INCREMENT {increment}" if increment else "" 961 return f"AUTOINCREMENT{start}{increment}" 962 963 def swaptable_sql(self, expression: exp.SwapTable) -> str: 964 this = self.sql(expression, "this") 965 return f"SWAP WITH {this}" 966 967 def cluster_sql(self, expression: exp.Cluster) -> str: 968 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 969 970 def struct_sql(self, expression: exp.Struct) -> str: 971 keys = [] 972 values = [] 973 974 for i, e in enumerate(expression.expressions): 975 if isinstance(e, exp.PropertyEQ): 976 keys.append( 977 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 978 ) 979 values.append(e.expression) 980 else: 981 keys.append(exp.Literal.string(f"_{i}")) 982 values.append(e) 983 984 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 985 986 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 987 if expression.args.get("weight") or expression.args.get("accuracy"): 988 self.unsupported( 989 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 990 ) 991 992 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 993 994 def alterset_sql(self, expression: exp.AlterSet) -> str: 995 exprs = self.expressions(expression, flat=True) 996 exprs = f" {exprs}" if exprs else "" 997 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 998 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 999 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1000 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1001 tag = self.expressions(expression, key="tag", flat=True) 1002 tag = f" TAG {tag}" if tag else "" 1003 1004 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
224 def quote_identifier(self, expression: E, identify: bool = True) -> E: 225 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 226 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 227 if ( 228 isinstance(expression, exp.Identifier) 229 and isinstance(expression.parent, exp.Table) 230 and expression.name.lower() == "dual" 231 ): 232 return expression # type: ignore 233 234 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
236 class Parser(parser.Parser): 237 IDENTIFY_PIVOT_STRINGS = True 238 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 239 COLON_IS_VARIANT_EXTRACT = True 240 241 ID_VAR_TOKENS = { 242 *parser.Parser.ID_VAR_TOKENS, 243 TokenType.MATCH_CONDITION, 244 } 245 246 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 247 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 248 249 FUNCTIONS = { 250 **parser.Parser.FUNCTIONS, 251 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 252 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 253 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 254 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 255 this=seq_get(args, 1), expression=seq_get(args, 0) 256 ), 257 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 258 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 259 start=seq_get(args, 0), 260 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 261 step=seq_get(args, 2), 262 ), 263 "BITXOR": binary_from_function(exp.BitwiseXor), 264 "BIT_XOR": binary_from_function(exp.BitwiseXor), 265 "BOOLXOR": binary_from_function(exp.Xor), 266 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 267 "DATE_TRUNC": _date_trunc_to_time, 268 "DATEADD": _build_date_time_add(exp.DateAdd), 269 "DATEDIFF": _build_datediff, 270 "DIV0": _build_if_from_div0, 271 "FLATTEN": exp.Explode.from_arg_list, 272 "GET_PATH": lambda args, dialect: exp.JSONExtract( 273 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 274 ), 275 "IFF": exp.If.from_arg_list, 276 "LAST_DAY": lambda args: exp.LastDay( 277 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 278 ), 279 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 280 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 281 "LISTAGG": exp.GroupConcat.from_arg_list, 282 "MEDIAN": lambda args: exp.PercentileCont( 283 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 284 ), 285 "NULLIFZERO": _build_if_from_nullifzero, 286 "OBJECT_CONSTRUCT": _build_object_construct, 287 "REGEXP_REPLACE": _build_regexp_replace, 288 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 289 "RLIKE": exp.RegexpLike.from_arg_list, 290 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 291 "TIMEADD": _build_date_time_add(exp.TimeAdd), 292 "TIMEDIFF": _build_datediff, 293 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 294 "TIMESTAMPDIFF": _build_datediff, 295 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 296 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 297 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 298 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 299 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 300 "TO_NUMBER": lambda args: exp.ToNumber( 301 this=seq_get(args, 0), 302 format=seq_get(args, 1), 303 precision=seq_get(args, 2), 304 scale=seq_get(args, 3), 305 ), 306 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 307 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 308 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 309 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 310 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 311 "TO_VARCHAR": exp.ToChar.from_arg_list, 312 "ZEROIFNULL": _build_if_from_zeroifnull, 313 } 314 315 FUNCTION_PARSERS = { 316 **parser.Parser.FUNCTION_PARSERS, 317 "DATE_PART": lambda self: self._parse_date_part(), 318 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 319 } 320 FUNCTION_PARSERS.pop("TRIM") 321 322 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 323 324 RANGE_PARSERS = { 325 **parser.Parser.RANGE_PARSERS, 326 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 327 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 328 } 329 330 ALTER_PARSERS = { 331 **parser.Parser.ALTER_PARSERS, 332 "UNSET": lambda self: self.expression( 333 exp.Set, 334 tag=self._match_text_seq("TAG"), 335 expressions=self._parse_csv(self._parse_id_var), 336 unset=True, 337 ), 338 "SWAP": lambda self: self._parse_alter_table_swap(), 339 } 340 341 STATEMENT_PARSERS = { 342 **parser.Parser.STATEMENT_PARSERS, 343 TokenType.SHOW: lambda self: self._parse_show(), 344 } 345 346 PROPERTY_PARSERS = { 347 **parser.Parser.PROPERTY_PARSERS, 348 "LOCATION": lambda self: self._parse_location_property(), 349 } 350 351 TYPE_CONVERTERS = { 352 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 353 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 354 } 355 356 SHOW_PARSERS = { 357 "SCHEMAS": _show_parser("SCHEMAS"), 358 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 359 "OBJECTS": _show_parser("OBJECTS"), 360 "TERSE OBJECTS": _show_parser("OBJECTS"), 361 "TABLES": _show_parser("TABLES"), 362 "TERSE TABLES": _show_parser("TABLES"), 363 "VIEWS": _show_parser("VIEWS"), 364 "TERSE VIEWS": _show_parser("VIEWS"), 365 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 366 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 367 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 368 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 369 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 370 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 371 "SEQUENCES": _show_parser("SEQUENCES"), 372 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 373 "COLUMNS": _show_parser("COLUMNS"), 374 "USERS": _show_parser("USERS"), 375 "TERSE USERS": _show_parser("USERS"), 376 } 377 378 CONSTRAINT_PARSERS = { 379 **parser.Parser.CONSTRAINT_PARSERS, 380 "WITH": lambda self: self._parse_with_constraint(), 381 "MASKING": lambda self: self._parse_with_constraint(), 382 "PROJECTION": lambda self: self._parse_with_constraint(), 383 "TAG": lambda self: self._parse_with_constraint(), 384 } 385 386 STAGED_FILE_SINGLE_TOKENS = { 387 TokenType.DOT, 388 TokenType.MOD, 389 TokenType.SLASH, 390 } 391 392 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 393 394 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 395 396 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 397 398 LAMBDAS = { 399 **parser.Parser.LAMBDAS, 400 TokenType.ARROW: lambda self, expressions: self.expression( 401 exp.Lambda, 402 this=self._replace_lambda( 403 self._parse_assignment(), 404 expressions, 405 ), 406 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 407 ), 408 } 409 410 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 411 if self._prev.token_type != TokenType.WITH: 412 self._retreat(self._index - 1) 413 414 if self._match_text_seq("MASKING", "POLICY"): 415 policy = self._parse_column() 416 return self.expression( 417 exp.MaskingPolicyColumnConstraint, 418 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 419 expressions=self._match(TokenType.USING) 420 and self._parse_wrapped_csv(self._parse_id_var), 421 ) 422 if self._match_text_seq("PROJECTION", "POLICY"): 423 policy = self._parse_column() 424 return self.expression( 425 exp.ProjectionPolicyColumnConstraint, 426 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 427 ) 428 if self._match(TokenType.TAG): 429 return self.expression( 430 exp.TagColumnConstraint, 431 expressions=self._parse_wrapped_csv(self._parse_property), 432 ) 433 434 return None 435 436 def _parse_create(self) -> exp.Create | exp.Command: 437 expression = super()._parse_create() 438 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 439 # Replace the Table node with the enclosed Identifier 440 expression.this.replace(expression.this.this) 441 442 return expression 443 444 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 445 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 446 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 447 this = self._parse_var() or self._parse_type() 448 449 if not this: 450 return None 451 452 self._match(TokenType.COMMA) 453 expression = self._parse_bitwise() 454 this = map_date_part(this) 455 name = this.name.upper() 456 457 if name.startswith("EPOCH"): 458 if name == "EPOCH_MILLISECOND": 459 scale = 10**3 460 elif name == "EPOCH_MICROSECOND": 461 scale = 10**6 462 elif name == "EPOCH_NANOSECOND": 463 scale = 10**9 464 else: 465 scale = None 466 467 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 468 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 469 470 if scale: 471 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 472 473 return to_unix 474 475 return self.expression(exp.Extract, this=this, expression=expression) 476 477 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 478 if is_map: 479 # Keys are strings in Snowflake's objects, see also: 480 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 481 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 482 return self._parse_slice(self._parse_string()) 483 484 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 485 486 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 487 lateral = super()._parse_lateral() 488 if not lateral: 489 return lateral 490 491 if isinstance(lateral.this, exp.Explode): 492 table_alias = lateral.args.get("alias") 493 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 494 if table_alias and not table_alias.args.get("columns"): 495 table_alias.set("columns", columns) 496 elif not table_alias: 497 exp.alias_(lateral, "_flattened", table=columns, copy=False) 498 499 return lateral 500 501 def _parse_table_parts( 502 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 503 ) -> exp.Table: 504 # https://docs.snowflake.com/en/user-guide/querying-stage 505 if self._match(TokenType.STRING, advance=False): 506 table = self._parse_string() 507 elif self._match_text_seq("@", advance=False): 508 table = self._parse_location_path() 509 else: 510 table = None 511 512 if table: 513 file_format = None 514 pattern = None 515 516 wrapped = self._match(TokenType.L_PAREN) 517 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 518 if self._match_text_seq("FILE_FORMAT", "=>"): 519 file_format = self._parse_string() or super()._parse_table_parts( 520 is_db_reference=is_db_reference 521 ) 522 elif self._match_text_seq("PATTERN", "=>"): 523 pattern = self._parse_string() 524 else: 525 break 526 527 self._match(TokenType.COMMA) 528 529 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 530 else: 531 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 532 533 return table 534 535 def _parse_id_var( 536 self, 537 any_token: bool = True, 538 tokens: t.Optional[t.Collection[TokenType]] = None, 539 ) -> t.Optional[exp.Expression]: 540 if self._match_text_seq("IDENTIFIER", "("): 541 identifier = ( 542 super()._parse_id_var(any_token=any_token, tokens=tokens) 543 or self._parse_string() 544 ) 545 self._match_r_paren() 546 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 547 548 return super()._parse_id_var(any_token=any_token, tokens=tokens) 549 550 def _parse_show_snowflake(self, this: str) -> exp.Show: 551 scope = None 552 scope_kind = None 553 554 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 555 # which is syntactically valid but has no effect on the output 556 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 557 558 history = self._match_text_seq("HISTORY") 559 560 like = self._parse_string() if self._match(TokenType.LIKE) else None 561 562 if self._match(TokenType.IN): 563 if self._match_text_seq("ACCOUNT"): 564 scope_kind = "ACCOUNT" 565 elif self._match_set(self.DB_CREATABLES): 566 scope_kind = self._prev.text.upper() 567 if self._curr: 568 scope = self._parse_table_parts() 569 elif self._curr: 570 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 571 scope = self._parse_table_parts() 572 573 return self.expression( 574 exp.Show, 575 **{ 576 "terse": terse, 577 "this": this, 578 "history": history, 579 "like": like, 580 "scope": scope, 581 "scope_kind": scope_kind, 582 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 583 "limit": self._parse_limit(), 584 "from": self._parse_string() if self._match(TokenType.FROM) else None, 585 }, 586 ) 587 588 def _parse_alter_table_swap(self) -> exp.SwapTable: 589 self._match_text_seq("WITH") 590 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 591 592 def _parse_location_property(self) -> exp.LocationProperty: 593 self._match(TokenType.EQ) 594 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 595 596 def _parse_file_location(self) -> t.Optional[exp.Expression]: 597 # Parse either a subquery or a staged file 598 return ( 599 self._parse_select(table=True, parse_subquery_alias=False) 600 if self._match(TokenType.L_PAREN, advance=False) 601 else self._parse_table_parts() 602 ) 603 604 def _parse_location_path(self) -> exp.Var: 605 parts = [self._advance_any(ignore_reserved=True)] 606 607 # We avoid consuming a comma token because external tables like @foo and @bar 608 # can be joined in a query with a comma separator, as well as closing paren 609 # in case of subqueries 610 while self._is_connected() and not self._match_set( 611 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 612 ): 613 parts.append(self._advance_any(ignore_reserved=True)) 614 615 return exp.var("".join(part.text for part in parts if part)) 616 617 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 618 this = super()._parse_lambda_arg() 619 620 if not this: 621 return this 622 623 typ = self._parse_types() 624 625 if typ: 626 return self.expression(exp.Cast, this=this, to=typ) 627 628 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
630 class Tokenizer(tokens.Tokenizer): 631 STRING_ESCAPES = ["\\", "'"] 632 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 633 RAW_STRINGS = ["$$"] 634 COMMENTS = ["--", "//", ("/*", "*/")] 635 636 KEYWORDS = { 637 **tokens.Tokenizer.KEYWORDS, 638 "BYTEINT": TokenType.INT, 639 "CHAR VARYING": TokenType.VARCHAR, 640 "CHARACTER VARYING": TokenType.VARCHAR, 641 "EXCLUDE": TokenType.EXCEPT, 642 "ILIKE ANY": TokenType.ILIKE_ANY, 643 "LIKE ANY": TokenType.LIKE_ANY, 644 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 645 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 646 "MINUS": TokenType.EXCEPT, 647 "NCHAR VARYING": TokenType.VARCHAR, 648 "PUT": TokenType.COMMAND, 649 "REMOVE": TokenType.COMMAND, 650 "RM": TokenType.COMMAND, 651 "SAMPLE": TokenType.TABLE_SAMPLE, 652 "SQL_DOUBLE": TokenType.DOUBLE, 653 "SQL_VARCHAR": TokenType.VARCHAR, 654 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 655 "TAG": TokenType.TAG, 656 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 657 "TOP": TokenType.TOP, 658 "WAREHOUSE": TokenType.WAREHOUSE, 659 "STREAMLIT": TokenType.STREAMLIT, 660 } 661 KEYWORDS.pop("/*+") 662 663 SINGLE_TOKENS = { 664 **tokens.Tokenizer.SINGLE_TOKENS, 665 "$": TokenType.PARAMETER, 666 } 667 668 VAR_SINGLE_TOKENS = {"$"} 669 670 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
672 class Generator(generator.Generator): 673 PARAMETER_TOKEN = "$" 674 MATCHED_BY_SOURCE = False 675 SINGLE_STRING_INTERVAL = True 676 JOIN_HINTS = False 677 TABLE_HINTS = False 678 QUERY_HINTS = False 679 AGGREGATE_FILTER_SUPPORTED = False 680 SUPPORTS_TABLE_COPY = False 681 COLLATE_IS_FUNC = True 682 LIMIT_ONLY_LITERALS = True 683 JSON_KEY_VALUE_PAIR_SEP = "," 684 INSERT_OVERWRITE = " OVERWRITE INTO" 685 STRUCT_DELIMITER = ("(", ")") 686 COPY_PARAMS_ARE_WRAPPED = False 687 COPY_PARAMS_EQ_REQUIRED = True 688 STAR_EXCEPT = "EXCLUDE" 689 SUPPORTS_EXPLODING_PROJECTIONS = False 690 ARRAY_CONCAT_IS_VAR_LEN = False 691 SUPPORTS_CONVERT_TIMEZONE = True 692 693 TRANSFORMS = { 694 **generator.Generator.TRANSFORMS, 695 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 696 exp.ArgMax: rename_func("MAX_BY"), 697 exp.ArgMin: rename_func("MIN_BY"), 698 exp.Array: inline_array_sql, 699 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 700 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 701 exp.AtTimeZone: lambda self, e: self.func( 702 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 703 ), 704 exp.BitwiseXor: rename_func("BITXOR"), 705 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 706 exp.DateAdd: date_delta_sql("DATEADD"), 707 exp.DateDiff: date_delta_sql("DATEDIFF"), 708 exp.DateStrToDate: datestrtodate_sql, 709 exp.DayOfMonth: rename_func("DAYOFMONTH"), 710 exp.DayOfWeek: rename_func("DAYOFWEEK"), 711 exp.DayOfYear: rename_func("DAYOFYEAR"), 712 exp.Explode: rename_func("FLATTEN"), 713 exp.Extract: rename_func("DATE_PART"), 714 exp.FromTimeZone: lambda self, e: self.func( 715 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 716 ), 717 exp.GenerateSeries: lambda self, e: self.func( 718 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 719 ), 720 exp.GroupConcat: rename_func("LISTAGG"), 721 exp.If: if_sql(name="IFF", false_value="NULL"), 722 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 723 exp.JSONExtractScalar: lambda self, e: self.func( 724 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 725 ), 726 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 727 exp.JSONPathRoot: lambda *_: "", 728 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 729 exp.LogicalOr: rename_func("BOOLOR_AGG"), 730 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 731 exp.Max: max_or_greatest, 732 exp.Min: min_or_least, 733 exp.ParseJSON: lambda self, e: self.func( 734 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 735 ), 736 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 737 exp.PercentileCont: transforms.preprocess( 738 [transforms.add_within_group_for_percentiles] 739 ), 740 exp.PercentileDisc: transforms.preprocess( 741 [transforms.add_within_group_for_percentiles] 742 ), 743 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 744 exp.RegexpILike: _regexpilike_sql, 745 exp.Rand: rename_func("RANDOM"), 746 exp.Select: transforms.preprocess( 747 [ 748 transforms.eliminate_distinct_on, 749 transforms.explode_to_unnest(), 750 transforms.eliminate_semi_and_anti_joins, 751 ] 752 ), 753 exp.SHA: rename_func("SHA1"), 754 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 755 exp.StartsWith: rename_func("STARTSWITH"), 756 exp.StrPosition: lambda self, e: self.func( 757 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 758 ), 759 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 760 exp.Stuff: rename_func("INSERT"), 761 exp.TimeAdd: date_delta_sql("TIMEADD"), 762 exp.TimestampDiff: lambda self, e: self.func( 763 "TIMESTAMPDIFF", e.unit, e.expression, e.this 764 ), 765 exp.TimestampTrunc: timestamptrunc_sql(), 766 exp.TimeStrToTime: timestrtotime_sql, 767 exp.TimeToStr: lambda self, e: self.func( 768 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 769 ), 770 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 771 exp.ToArray: rename_func("TO_ARRAY"), 772 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 773 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 774 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 775 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 776 exp.TsOrDsToDate: lambda self, e: self.func( 777 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 778 ), 779 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 780 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 781 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 782 exp.Xor: rename_func("BOOLXOR"), 783 } 784 785 SUPPORTED_JSON_PATH_PARTS = { 786 exp.JSONPathKey, 787 exp.JSONPathRoot, 788 exp.JSONPathSubscript, 789 } 790 791 TYPE_MAPPING = { 792 **generator.Generator.TYPE_MAPPING, 793 exp.DataType.Type.NESTED: "OBJECT", 794 exp.DataType.Type.STRUCT: "OBJECT", 795 } 796 797 PROPERTIES_LOCATION = { 798 **generator.Generator.PROPERTIES_LOCATION, 799 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 800 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 801 } 802 803 UNSUPPORTED_VALUES_EXPRESSIONS = { 804 exp.Map, 805 exp.StarMap, 806 exp.Struct, 807 exp.VarMap, 808 } 809 810 def with_properties(self, properties: exp.Properties) -> str: 811 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 812 813 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 814 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 815 values_as_table = False 816 817 return super().values_sql(expression, values_as_table=values_as_table) 818 819 def datatype_sql(self, expression: exp.DataType) -> str: 820 expressions = expression.expressions 821 if ( 822 expressions 823 and expression.is_type(*exp.DataType.STRUCT_TYPES) 824 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 825 ): 826 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 827 return "OBJECT" 828 829 return super().datatype_sql(expression) 830 831 def tonumber_sql(self, expression: exp.ToNumber) -> str: 832 return self.func( 833 "TO_NUMBER", 834 expression.this, 835 expression.args.get("format"), 836 expression.args.get("precision"), 837 expression.args.get("scale"), 838 ) 839 840 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 841 milli = expression.args.get("milli") 842 if milli is not None: 843 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 844 expression.set("nano", milli_to_nano) 845 846 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 847 848 def trycast_sql(self, expression: exp.TryCast) -> str: 849 value = expression.this 850 851 if value.type is None: 852 from sqlglot.optimizer.annotate_types import annotate_types 853 854 value = annotate_types(value) 855 856 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 857 return super().trycast_sql(expression) 858 859 # TRY_CAST only works for string values in Snowflake 860 return self.cast_sql(expression) 861 862 def log_sql(self, expression: exp.Log) -> str: 863 if not expression.expression: 864 return self.func("LN", expression.this) 865 866 return super().log_sql(expression) 867 868 def unnest_sql(self, expression: exp.Unnest) -> str: 869 unnest_alias = expression.args.get("alias") 870 offset = expression.args.get("offset") 871 872 columns = [ 873 exp.to_identifier("seq"), 874 exp.to_identifier("key"), 875 exp.to_identifier("path"), 876 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 877 seq_get(unnest_alias.columns if unnest_alias else [], 0) 878 or exp.to_identifier("value"), 879 exp.to_identifier("this"), 880 ] 881 882 if unnest_alias: 883 unnest_alias.set("columns", columns) 884 else: 885 unnest_alias = exp.TableAlias(this="_u", columns=columns) 886 887 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 888 alias = self.sql(unnest_alias) 889 alias = f" AS {alias}" if alias else "" 890 return f"{explode}{alias}" 891 892 def show_sql(self, expression: exp.Show) -> str: 893 terse = "TERSE " if expression.args.get("terse") else "" 894 history = " HISTORY" if expression.args.get("history") else "" 895 like = self.sql(expression, "like") 896 like = f" LIKE {like}" if like else "" 897 898 scope = self.sql(expression, "scope") 899 scope = f" {scope}" if scope else "" 900 901 scope_kind = self.sql(expression, "scope_kind") 902 if scope_kind: 903 scope_kind = f" IN {scope_kind}" 904 905 starts_with = self.sql(expression, "starts_with") 906 if starts_with: 907 starts_with = f" STARTS WITH {starts_with}" 908 909 limit = self.sql(expression, "limit") 910 911 from_ = self.sql(expression, "from") 912 if from_: 913 from_ = f" FROM {from_}" 914 915 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 916 917 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 918 # Other dialects don't support all of the following parameters, so we need to 919 # generate default values as necessary to ensure the transpilation is correct 920 group = expression.args.get("group") 921 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 922 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 923 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 924 925 return self.func( 926 "REGEXP_SUBSTR", 927 expression.this, 928 expression.expression, 929 position, 930 occurrence, 931 parameters, 932 group, 933 ) 934 935 def except_op(self, expression: exp.Except) -> str: 936 if not expression.args.get("distinct"): 937 self.unsupported("EXCEPT with All is not supported in Snowflake") 938 return super().except_op(expression) 939 940 def intersect_op(self, expression: exp.Intersect) -> str: 941 if not expression.args.get("distinct"): 942 self.unsupported("INTERSECT with All is not supported in Snowflake") 943 return super().intersect_op(expression) 944 945 def describe_sql(self, expression: exp.Describe) -> str: 946 # Default to table if kind is unknown 947 kind_value = expression.args.get("kind") or "TABLE" 948 kind = f" {kind_value}" if kind_value else "" 949 this = f" {self.sql(expression, 'this')}" 950 expressions = self.expressions(expression, flat=True) 951 expressions = f" {expressions}" if expressions else "" 952 return f"DESCRIBE{kind}{this}{expressions}" 953 954 def generatedasidentitycolumnconstraint_sql( 955 self, expression: exp.GeneratedAsIdentityColumnConstraint 956 ) -> str: 957 start = expression.args.get("start") 958 start = f" START {start}" if start else "" 959 increment = expression.args.get("increment") 960 increment = f" INCREMENT {increment}" if increment else "" 961 return f"AUTOINCREMENT{start}{increment}" 962 963 def swaptable_sql(self, expression: exp.SwapTable) -> str: 964 this = self.sql(expression, "this") 965 return f"SWAP WITH {this}" 966 967 def cluster_sql(self, expression: exp.Cluster) -> str: 968 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 969 970 def struct_sql(self, expression: exp.Struct) -> str: 971 keys = [] 972 values = [] 973 974 for i, e in enumerate(expression.expressions): 975 if isinstance(e, exp.PropertyEQ): 976 keys.append( 977 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 978 ) 979 values.append(e.expression) 980 else: 981 keys.append(exp.Literal.string(f"_{i}")) 982 values.append(e) 983 984 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 985 986 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 987 if expression.args.get("weight") or expression.args.get("accuracy"): 988 self.unsupported( 989 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 990 ) 991 992 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 993 994 def alterset_sql(self, expression: exp.AlterSet) -> str: 995 exprs = self.expressions(expression, flat=True) 996 exprs = f" {exprs}" if exprs else "" 997 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 998 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 999 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1000 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1001 tag = self.expressions(expression, key="tag", flat=True) 1002 tag = f" TAG {tag}" if tag else "" 1003 1004 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
819 def datatype_sql(self, expression: exp.DataType) -> str: 820 expressions = expression.expressions 821 if ( 822 expressions 823 and expression.is_type(*exp.DataType.STRUCT_TYPES) 824 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 825 ): 826 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 827 return "OBJECT" 828 829 return super().datatype_sql(expression)
840 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 841 milli = expression.args.get("milli") 842 if milli is not None: 843 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 844 expression.set("nano", milli_to_nano) 845 846 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
848 def trycast_sql(self, expression: exp.TryCast) -> str: 849 value = expression.this 850 851 if value.type is None: 852 from sqlglot.optimizer.annotate_types import annotate_types 853 854 value = annotate_types(value) 855 856 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 857 return super().trycast_sql(expression) 858 859 # TRY_CAST only works for string values in Snowflake 860 return self.cast_sql(expression)
868 def unnest_sql(self, expression: exp.Unnest) -> str: 869 unnest_alias = expression.args.get("alias") 870 offset = expression.args.get("offset") 871 872 columns = [ 873 exp.to_identifier("seq"), 874 exp.to_identifier("key"), 875 exp.to_identifier("path"), 876 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 877 seq_get(unnest_alias.columns if unnest_alias else [], 0) 878 or exp.to_identifier("value"), 879 exp.to_identifier("this"), 880 ] 881 882 if unnest_alias: 883 unnest_alias.set("columns", columns) 884 else: 885 unnest_alias = exp.TableAlias(this="_u", columns=columns) 886 887 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 888 alias = self.sql(unnest_alias) 889 alias = f" AS {alias}" if alias else "" 890 return f"{explode}{alias}"
892 def show_sql(self, expression: exp.Show) -> str: 893 terse = "TERSE " if expression.args.get("terse") else "" 894 history = " HISTORY" if expression.args.get("history") else "" 895 like = self.sql(expression, "like") 896 like = f" LIKE {like}" if like else "" 897 898 scope = self.sql(expression, "scope") 899 scope = f" {scope}" if scope else "" 900 901 scope_kind = self.sql(expression, "scope_kind") 902 if scope_kind: 903 scope_kind = f" IN {scope_kind}" 904 905 starts_with = self.sql(expression, "starts_with") 906 if starts_with: 907 starts_with = f" STARTS WITH {starts_with}" 908 909 limit = self.sql(expression, "limit") 910 911 from_ = self.sql(expression, "from") 912 if from_: 913 from_ = f" FROM {from_}" 914 915 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
917 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 918 # Other dialects don't support all of the following parameters, so we need to 919 # generate default values as necessary to ensure the transpilation is correct 920 group = expression.args.get("group") 921 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 922 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 923 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 924 925 return self.func( 926 "REGEXP_SUBSTR", 927 expression.this, 928 expression.expression, 929 position, 930 occurrence, 931 parameters, 932 group, 933 )
945 def describe_sql(self, expression: exp.Describe) -> str: 946 # Default to table if kind is unknown 947 kind_value = expression.args.get("kind") or "TABLE" 948 kind = f" {kind_value}" if kind_value else "" 949 this = f" {self.sql(expression, 'this')}" 950 expressions = self.expressions(expression, flat=True) 951 expressions = f" {expressions}" if expressions else "" 952 return f"DESCRIBE{kind}{this}{expressions}"
954 def generatedasidentitycolumnconstraint_sql( 955 self, expression: exp.GeneratedAsIdentityColumnConstraint 956 ) -> str: 957 start = expression.args.get("start") 958 start = f" START {start}" if start else "" 959 increment = expression.args.get("increment") 960 increment = f" INCREMENT {increment}" if increment else "" 961 return f"AUTOINCREMENT{start}{increment}"
970 def struct_sql(self, expression: exp.Struct) -> str: 971 keys = [] 972 values = [] 973 974 for i, e in enumerate(expression.expressions): 975 if isinstance(e, exp.PropertyEQ): 976 keys.append( 977 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 978 ) 979 values.append(e.expression) 980 else: 981 keys.append(exp.Literal.string(f"_{i}")) 982 values.append(e) 983 984 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
986 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 987 if expression.args.get("weight") or expression.args.get("accuracy"): 988 self.unsupported( 989 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 990 ) 991 992 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
994 def alterset_sql(self, expression: exp.AlterSet) -> str: 995 exprs = self.expressions(expression, flat=True) 996 exprs = f" {exprs}" if exprs else "" 997 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 998 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 999 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1000 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1001 tag = self.expressions(expression, key="tag", flat=True) 1002 tag = f" TAG {tag}" if tag else "" 1003 1004 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql