sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot._typing import E 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 NormalizationStrategy, 10 binary_from_function, 11 date_delta_sql, 12 date_trunc_to_time, 13 datestrtodate_sql, 14 format_time_lambda, 15 if_sql, 16 inline_array_sql, 17 max_or_greatest, 18 min_or_least, 19 rename_func, 20 timestamptrunc_sql, 21 timestrtotime_sql, 22 ts_or_ds_to_date_sql, 23 var_map_sql, 24) 25from sqlglot.expressions import Literal 26from sqlglot.helper import seq_get 27from sqlglot.tokens import TokenType 28 29 30def _check_int(s: str) -> bool: 31 if s[0] in ("-", "+"): 32 return s[1:].isdigit() 33 return s.isdigit() 34 35 36# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 37def _parse_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime, exp.TimeStrToTime]: 38 if len(args) == 2: 39 first_arg, second_arg = args 40 if second_arg.is_string: 41 # case: <string_expr> [ , <format> ] 42 return format_time_lambda(exp.StrToTime, "snowflake")(args) 43 44 # case: <numeric_expr> [ , <scale> ] 45 if second_arg.name not in ["0", "3", "9"]: 46 raise ValueError( 47 f"Scale for snowflake numeric timestamp is {second_arg}, but should be 0, 3, or 9" 48 ) 49 50 if second_arg.name == "0": 51 timescale = exp.UnixToTime.SECONDS 52 elif second_arg.name == "3": 53 timescale = exp.UnixToTime.MILLIS 54 elif second_arg.name == "9": 55 timescale = exp.UnixToTime.NANOS 56 57 return exp.UnixToTime(this=first_arg, scale=timescale) 58 59 from sqlglot.optimizer.simplify import simplify_literals 60 61 # The first argument might be an expression like 40 * 365 * 86400, so we try to 62 # reduce it using `simplify_literals` first and then check if it's a Literal. 63 first_arg = seq_get(args, 0) 64 if not isinstance(simplify_literals(first_arg, root=True), Literal): 65 # case: <variant_expr> or other expressions such as columns 66 return exp.TimeStrToTime.from_arg_list(args) 67 68 if first_arg.is_string: 69 if _check_int(first_arg.this): 70 # case: <integer> 71 return exp.UnixToTime.from_arg_list(args) 72 73 # case: <date_expr> 74 return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args) 75 76 # case: <numeric_expr> 77 return exp.UnixToTime.from_arg_list(args) 78 79 80def _parse_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 81 expression = parser.parse_var_map(args) 82 83 if isinstance(expression, exp.StarMap): 84 return expression 85 86 return exp.Struct( 87 expressions=[ 88 t.cast(exp.Condition, k).eq(v) for k, v in zip(expression.keys, expression.values) 89 ] 90 ) 91 92 93def _parse_datediff(args: t.List) -> exp.DateDiff: 94 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 95 96 97def _unix_to_time_sql(self: Snowflake.Generator, expression: exp.UnixToTime) -> str: 98 scale = expression.args.get("scale") 99 timestamp = self.sql(expression, "this") 100 if scale in (None, exp.UnixToTime.SECONDS): 101 return f"TO_TIMESTAMP({timestamp})" 102 if scale == exp.UnixToTime.MILLIS: 103 return f"TO_TIMESTAMP({timestamp}, 3)" 104 if scale == exp.UnixToTime.MICROS: 105 return f"TO_TIMESTAMP({timestamp} / 1000, 3)" 106 if scale == exp.UnixToTime.NANOS: 107 return f"TO_TIMESTAMP({timestamp}, 9)" 108 109 self.unsupported(f"Unsupported scale for timestamp: {scale}.") 110 return "" 111 112 113# https://docs.snowflake.com/en/sql-reference/functions/date_part.html 114# https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 115def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 116 this = self._parse_var() or self._parse_type() 117 118 if not this: 119 return None 120 121 self._match(TokenType.COMMA) 122 expression = self._parse_bitwise() 123 124 name = this.name.upper() 125 if name.startswith("EPOCH"): 126 if name.startswith("EPOCH_MILLISECOND"): 127 scale = 10**3 128 elif name.startswith("EPOCH_MICROSECOND"): 129 scale = 10**6 130 elif name.startswith("EPOCH_NANOSECOND"): 131 scale = 10**9 132 else: 133 scale = None 134 135 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 136 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 137 138 if scale: 139 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 140 141 return to_unix 142 143 return self.expression(exp.Extract, this=this, expression=expression) 144 145 146# https://docs.snowflake.com/en/sql-reference/functions/div0 147def _div0_to_if(args: t.List) -> exp.If: 148 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 149 true = exp.Literal.number(0) 150 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 151 return exp.If(this=cond, true=true, false=false) 152 153 154# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 155def _zeroifnull_to_if(args: t.List) -> exp.If: 156 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 157 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 158 159 160# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 161def _nullifzero_to_if(args: t.List) -> exp.If: 162 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 163 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 164 165 166def _datatype_sql(self: Snowflake.Generator, expression: exp.DataType) -> str: 167 if expression.is_type("array"): 168 return "ARRAY" 169 elif expression.is_type("map"): 170 return "OBJECT" 171 return self.datatype_sql(expression) 172 173 174def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 175 flag = expression.text("flag") 176 177 if "i" not in flag: 178 flag += "i" 179 180 return self.func( 181 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 182 ) 183 184 185def _parse_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 186 if len(args) == 3: 187 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 188 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 189 190 191def _parse_regexp_replace(args: t.List) -> exp.RegexpReplace: 192 regexp_replace = exp.RegexpReplace.from_arg_list(args) 193 194 if not regexp_replace.args.get("replacement"): 195 regexp_replace.set("replacement", exp.Literal.string("")) 196 197 return regexp_replace 198 199 200def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 201 def _parse(self: Snowflake.Parser) -> exp.Show: 202 return self._parse_show_snowflake(*args, **kwargs) 203 204 return _parse 205 206 207class Snowflake(Dialect): 208 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 209 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 210 NULL_ORDERING = "nulls_are_large" 211 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 212 SUPPORTS_USER_DEFINED_TYPES = False 213 SUPPORTS_SEMI_ANTI_JOIN = False 214 PREFER_CTE_ALIAS_COLUMN = True 215 216 TIME_MAPPING = { 217 "YYYY": "%Y", 218 "yyyy": "%Y", 219 "YY": "%y", 220 "yy": "%y", 221 "MMMM": "%B", 222 "mmmm": "%B", 223 "MON": "%b", 224 "mon": "%b", 225 "MM": "%m", 226 "mm": "%m", 227 "DD": "%d", 228 "dd": "%-d", 229 "DY": "%a", 230 "dy": "%w", 231 "HH24": "%H", 232 "hh24": "%H", 233 "HH12": "%I", 234 "hh12": "%I", 235 "MI": "%M", 236 "mi": "%M", 237 "SS": "%S", 238 "ss": "%S", 239 "FF": "%f", 240 "ff": "%f", 241 "FF6": "%f", 242 "ff6": "%f", 243 } 244 245 def quote_identifier(self, expression: E, identify: bool = True) -> E: 246 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 247 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 248 if ( 249 isinstance(expression, exp.Identifier) 250 and isinstance(expression.parent, exp.Table) 251 and expression.name.lower() == "dual" 252 ): 253 return t.cast(E, expression) 254 255 return super().quote_identifier(expression, identify=identify) 256 257 class Parser(parser.Parser): 258 IDENTIFY_PIVOT_STRINGS = True 259 260 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 261 262 FUNCTIONS = { 263 **parser.Parser.FUNCTIONS, 264 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 265 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 266 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 267 this=seq_get(args, 1), expression=seq_get(args, 0) 268 ), 269 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 270 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 271 start=seq_get(args, 0), 272 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 273 step=seq_get(args, 2), 274 ), 275 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 276 "BITXOR": binary_from_function(exp.BitwiseXor), 277 "BIT_XOR": binary_from_function(exp.BitwiseXor), 278 "BOOLXOR": binary_from_function(exp.Xor), 279 "CONVERT_TIMEZONE": _parse_convert_timezone, 280 "DATE_TRUNC": date_trunc_to_time, 281 "DATEADD": lambda args: exp.DateAdd( 282 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 283 ), 284 "DATEDIFF": _parse_datediff, 285 "DIV0": _div0_to_if, 286 "FLATTEN": exp.Explode.from_arg_list, 287 "IFF": exp.If.from_arg_list, 288 "LISTAGG": exp.GroupConcat.from_arg_list, 289 "NULLIFZERO": _nullifzero_to_if, 290 "OBJECT_CONSTRUCT": _parse_object_construct, 291 "REGEXP_REPLACE": _parse_regexp_replace, 292 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 293 "RLIKE": exp.RegexpLike.from_arg_list, 294 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 295 "TIMEDIFF": _parse_datediff, 296 "TIMESTAMPDIFF": _parse_datediff, 297 "TO_TIMESTAMP": _parse_to_timestamp, 298 "TO_VARCHAR": exp.ToChar.from_arg_list, 299 "ZEROIFNULL": _zeroifnull_to_if, 300 } 301 302 FUNCTION_PARSERS = { 303 **parser.Parser.FUNCTION_PARSERS, 304 "DATE_PART": _parse_date_part, 305 } 306 FUNCTION_PARSERS.pop("TRIM") 307 308 COLUMN_OPERATORS = { 309 **parser.Parser.COLUMN_OPERATORS, 310 TokenType.COLON: lambda self, this, path: self.expression( 311 exp.Bracket, this=this, expressions=[path] 312 ), 313 } 314 315 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 316 317 RANGE_PARSERS = { 318 **parser.Parser.RANGE_PARSERS, 319 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 320 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 321 } 322 323 ALTER_PARSERS = { 324 **parser.Parser.ALTER_PARSERS, 325 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 326 "UNSET": lambda self: self.expression( 327 exp.Set, 328 tag=self._match_text_seq("TAG"), 329 expressions=self._parse_csv(self._parse_id_var), 330 unset=True, 331 ), 332 "SWAP": lambda self: self._parse_alter_table_swap(), 333 } 334 335 STATEMENT_PARSERS = { 336 **parser.Parser.STATEMENT_PARSERS, 337 TokenType.SHOW: lambda self: self._parse_show(), 338 } 339 340 PROPERTY_PARSERS = { 341 **parser.Parser.PROPERTY_PARSERS, 342 "LOCATION": lambda self: self._parse_location(), 343 } 344 345 SHOW_PARSERS = { 346 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 347 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 348 } 349 350 STAGED_FILE_SINGLE_TOKENS = { 351 TokenType.DOT, 352 TokenType.MOD, 353 TokenType.SLASH, 354 } 355 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 356 357 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 358 lateral = super()._parse_lateral() 359 if not lateral: 360 return lateral 361 362 if isinstance(lateral.this, exp.Explode): 363 table_alias = lateral.args.get("alias") 364 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 365 if table_alias and not table_alias.args.get("columns"): 366 table_alias.set("columns", columns) 367 elif not table_alias: 368 exp.alias_(lateral, "_flattened", table=columns, copy=False) 369 370 return lateral 371 372 def _parse_at_before(self, table: exp.Table) -> exp.Table: 373 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 374 index = self._index 375 if self._match_texts(("AT", "BEFORE")): 376 this = self._prev.text.upper() 377 kind = ( 378 self._match(TokenType.L_PAREN) 379 and self._match_texts(self.HISTORICAL_DATA_KIND) 380 and self._prev.text.upper() 381 ) 382 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 383 384 if expression: 385 self._match_r_paren() 386 when = self.expression( 387 exp.HistoricalData, this=this, kind=kind, expression=expression 388 ) 389 table.set("when", when) 390 else: 391 self._retreat(index) 392 393 return table 394 395 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 396 # https://docs.snowflake.com/en/user-guide/querying-stage 397 if self._match(TokenType.STRING, advance=False): 398 table = self._parse_string() 399 elif self._match_text_seq("@", advance=False): 400 table = self._parse_location_path() 401 else: 402 table = None 403 404 if table: 405 file_format = None 406 pattern = None 407 408 self._match(TokenType.L_PAREN) 409 while self._curr and not self._match(TokenType.R_PAREN): 410 if self._match_text_seq("FILE_FORMAT", "=>"): 411 file_format = self._parse_string() or super()._parse_table_parts() 412 elif self._match_text_seq("PATTERN", "=>"): 413 pattern = self._parse_string() 414 else: 415 break 416 417 self._match(TokenType.COMMA) 418 419 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 420 else: 421 table = super()._parse_table_parts(schema=schema) 422 423 return self._parse_at_before(table) 424 425 def _parse_id_var( 426 self, 427 any_token: bool = True, 428 tokens: t.Optional[t.Collection[TokenType]] = None, 429 ) -> t.Optional[exp.Expression]: 430 if self._match_text_seq("IDENTIFIER", "("): 431 identifier = ( 432 super()._parse_id_var(any_token=any_token, tokens=tokens) 433 or self._parse_string() 434 ) 435 self._match_r_paren() 436 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 437 438 return super()._parse_id_var(any_token=any_token, tokens=tokens) 439 440 def _parse_show_snowflake(self, this: str) -> exp.Show: 441 scope = None 442 scope_kind = None 443 444 if self._match(TokenType.IN): 445 if self._match_text_seq("ACCOUNT"): 446 scope_kind = "ACCOUNT" 447 elif self._match_set(self.DB_CREATABLES): 448 scope_kind = self._prev.text 449 if self._curr: 450 scope = self._parse_table() 451 elif self._curr: 452 scope_kind = "TABLE" 453 scope = self._parse_table() 454 455 return self.expression(exp.Show, this=this, scope=scope, scope_kind=scope_kind) 456 457 def _parse_alter_table_swap(self) -> exp.SwapTable: 458 self._match_text_seq("WITH") 459 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 460 461 def _parse_location(self) -> exp.LocationProperty: 462 self._match(TokenType.EQ) 463 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 464 465 def _parse_location_path(self) -> exp.Var: 466 parts = [self._advance_any(ignore_reserved=True)] 467 468 # We avoid consuming a comma token because external tables like @foo and @bar 469 # can be joined in a query with a comma separator. 470 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 471 parts.append(self._advance_any(ignore_reserved=True)) 472 473 return exp.var("".join(part.text for part in parts if part)) 474 475 class Tokenizer(tokens.Tokenizer): 476 STRING_ESCAPES = ["\\", "'"] 477 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 478 RAW_STRINGS = ["$$"] 479 COMMENTS = ["--", "//", ("/*", "*/")] 480 481 KEYWORDS = { 482 **tokens.Tokenizer.KEYWORDS, 483 "BYTEINT": TokenType.INT, 484 "CHAR VARYING": TokenType.VARCHAR, 485 "CHARACTER VARYING": TokenType.VARCHAR, 486 "EXCLUDE": TokenType.EXCEPT, 487 "ILIKE ANY": TokenType.ILIKE_ANY, 488 "LIKE ANY": TokenType.LIKE_ANY, 489 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 490 "MINUS": TokenType.EXCEPT, 491 "NCHAR VARYING": TokenType.VARCHAR, 492 "PUT": TokenType.COMMAND, 493 "RENAME": TokenType.REPLACE, 494 "SAMPLE": TokenType.TABLE_SAMPLE, 495 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 496 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 497 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 498 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 499 "TOP": TokenType.TOP, 500 } 501 502 SINGLE_TOKENS = { 503 **tokens.Tokenizer.SINGLE_TOKENS, 504 "$": TokenType.PARAMETER, 505 } 506 507 VAR_SINGLE_TOKENS = {"$"} 508 509 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 510 511 class Generator(generator.Generator): 512 PARAMETER_TOKEN = "$" 513 MATCHED_BY_SOURCE = False 514 SINGLE_STRING_INTERVAL = True 515 JOIN_HINTS = False 516 TABLE_HINTS = False 517 QUERY_HINTS = False 518 AGGREGATE_FILTER_SUPPORTED = False 519 SUPPORTS_TABLE_COPY = False 520 COLLATE_IS_FUNC = True 521 LIMIT_ONLY_LITERALS = True 522 523 TRANSFORMS = { 524 **generator.Generator.TRANSFORMS, 525 exp.ArgMax: rename_func("MAX_BY"), 526 exp.ArgMin: rename_func("MIN_BY"), 527 exp.Array: inline_array_sql, 528 exp.ArrayConcat: rename_func("ARRAY_CAT"), 529 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 530 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 531 exp.AtTimeZone: lambda self, e: self.func( 532 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 533 ), 534 exp.BitwiseXor: rename_func("BITXOR"), 535 exp.DateAdd: date_delta_sql("DATEADD"), 536 exp.DateDiff: date_delta_sql("DATEDIFF"), 537 exp.DateStrToDate: datestrtodate_sql, 538 exp.DataType: _datatype_sql, 539 exp.DayOfMonth: rename_func("DAYOFMONTH"), 540 exp.DayOfWeek: rename_func("DAYOFWEEK"), 541 exp.DayOfYear: rename_func("DAYOFYEAR"), 542 exp.Explode: rename_func("FLATTEN"), 543 exp.Extract: rename_func("DATE_PART"), 544 exp.GenerateSeries: lambda self, e: self.func( 545 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 546 ), 547 exp.GroupConcat: rename_func("LISTAGG"), 548 exp.If: if_sql(name="IFF", false_value="NULL"), 549 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 550 exp.LogicalOr: rename_func("BOOLOR_AGG"), 551 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 552 exp.Max: max_or_greatest, 553 exp.Min: min_or_least, 554 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 555 exp.PercentileCont: transforms.preprocess( 556 [transforms.add_within_group_for_percentiles] 557 ), 558 exp.PercentileDisc: transforms.preprocess( 559 [transforms.add_within_group_for_percentiles] 560 ), 561 exp.RegexpILike: _regexpilike_sql, 562 exp.Rand: rename_func("RANDOM"), 563 exp.Select: transforms.preprocess( 564 [ 565 transforms.eliminate_distinct_on, 566 transforms.explode_to_unnest(), 567 transforms.eliminate_semi_and_anti_joins, 568 ] 569 ), 570 exp.SHA: rename_func("SHA1"), 571 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 572 exp.StartsWith: rename_func("STARTSWITH"), 573 exp.StrPosition: lambda self, e: self.func( 574 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 575 ), 576 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 577 exp.Struct: lambda self, e: self.func( 578 "OBJECT_CONSTRUCT", 579 *(arg for expression in e.expressions for arg in expression.flatten()), 580 ), 581 exp.Stuff: rename_func("INSERT"), 582 exp.TimestampDiff: lambda self, e: self.func( 583 "TIMESTAMPDIFF", e.unit, e.expression, e.this 584 ), 585 exp.TimestampTrunc: timestamptrunc_sql, 586 exp.TimeStrToTime: timestrtotime_sql, 587 exp.TimeToStr: lambda self, e: self.func( 588 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 589 ), 590 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 591 exp.ToArray: rename_func("TO_ARRAY"), 592 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 593 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 594 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 595 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 596 exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"), 597 exp.UnixToTime: _unix_to_time_sql, 598 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 599 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 600 exp.Xor: rename_func("BOOLXOR"), 601 } 602 603 TYPE_MAPPING = { 604 **generator.Generator.TYPE_MAPPING, 605 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 606 } 607 608 STAR_MAPPING = { 609 "except": "EXCLUDE", 610 "replace": "RENAME", 611 } 612 613 PROPERTIES_LOCATION = { 614 **generator.Generator.PROPERTIES_LOCATION, 615 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 616 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 617 } 618 619 def trycast_sql(self, expression: exp.TryCast) -> str: 620 value = expression.this 621 622 if value.type is None: 623 from sqlglot.optimizer.annotate_types import annotate_types 624 625 value = annotate_types(value) 626 627 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 628 return super().trycast_sql(expression) 629 630 # TRY_CAST only works for string values in Snowflake 631 return self.cast_sql(expression) 632 633 def log_sql(self, expression: exp.Log) -> str: 634 if not expression.expression: 635 return self.func("LN", expression.this) 636 637 return super().log_sql(expression) 638 639 def unnest_sql(self, expression: exp.Unnest) -> str: 640 unnest_alias = expression.args.get("alias") 641 offset = expression.args.get("offset") 642 643 columns = [ 644 exp.to_identifier("seq"), 645 exp.to_identifier("key"), 646 exp.to_identifier("path"), 647 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 648 seq_get(unnest_alias.columns if unnest_alias else [], 0) 649 or exp.to_identifier("value"), 650 exp.to_identifier("this"), 651 ] 652 653 if unnest_alias: 654 unnest_alias.set("columns", columns) 655 else: 656 unnest_alias = exp.TableAlias(this="_u", columns=columns) 657 658 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 659 alias = self.sql(unnest_alias) 660 alias = f" AS {alias}" if alias else "" 661 return f"{explode}{alias}" 662 663 def show_sql(self, expression: exp.Show) -> str: 664 scope = self.sql(expression, "scope") 665 scope = f" {scope}" if scope else "" 666 667 scope_kind = self.sql(expression, "scope_kind") 668 if scope_kind: 669 scope_kind = f" IN {scope_kind}" 670 671 return f"SHOW {expression.name}{scope_kind}{scope}" 672 673 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 674 # Other dialects don't support all of the following parameters, so we need to 675 # generate default values as necessary to ensure the transpilation is correct 676 group = expression.args.get("group") 677 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 678 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 679 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 680 681 return self.func( 682 "REGEXP_SUBSTR", 683 expression.this, 684 expression.expression, 685 position, 686 occurrence, 687 parameters, 688 group, 689 ) 690 691 def except_op(self, expression: exp.Except) -> str: 692 if not expression.args.get("distinct", False): 693 self.unsupported("EXCEPT with All is not supported in Snowflake") 694 return super().except_op(expression) 695 696 def intersect_op(self, expression: exp.Intersect) -> str: 697 if not expression.args.get("distinct", False): 698 self.unsupported("INTERSECT with All is not supported in Snowflake") 699 return super().intersect_op(expression) 700 701 def describe_sql(self, expression: exp.Describe) -> str: 702 # Default to table if kind is unknown 703 kind_value = expression.args.get("kind") or "TABLE" 704 kind = f" {kind_value}" if kind_value else "" 705 this = f" {self.sql(expression, 'this')}" 706 expressions = self.expressions(expression, flat=True) 707 expressions = f" {expressions}" if expressions else "" 708 return f"DESCRIBE{kind}{this}{expressions}" 709 710 def generatedasidentitycolumnconstraint_sql( 711 self, expression: exp.GeneratedAsIdentityColumnConstraint 712 ) -> str: 713 start = expression.args.get("start") 714 start = f" START {start}" if start else "" 715 increment = expression.args.get("increment") 716 increment = f" INCREMENT {increment}" if increment else "" 717 return f"AUTOINCREMENT{start}{increment}" 718 719 def swaptable_sql(self, expression: exp.SwapTable) -> str: 720 this = self.sql(expression, "this") 721 return f"SWAP WITH {this}" 722 723 def with_properties(self, properties: exp.Properties) -> str: 724 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ")
208class Snowflake(Dialect): 209 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 210 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 211 NULL_ORDERING = "nulls_are_large" 212 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 213 SUPPORTS_USER_DEFINED_TYPES = False 214 SUPPORTS_SEMI_ANTI_JOIN = False 215 PREFER_CTE_ALIAS_COLUMN = True 216 217 TIME_MAPPING = { 218 "YYYY": "%Y", 219 "yyyy": "%Y", 220 "YY": "%y", 221 "yy": "%y", 222 "MMMM": "%B", 223 "mmmm": "%B", 224 "MON": "%b", 225 "mon": "%b", 226 "MM": "%m", 227 "mm": "%m", 228 "DD": "%d", 229 "dd": "%-d", 230 "DY": "%a", 231 "dy": "%w", 232 "HH24": "%H", 233 "hh24": "%H", 234 "HH12": "%I", 235 "hh12": "%I", 236 "MI": "%M", 237 "mi": "%M", 238 "SS": "%S", 239 "ss": "%S", 240 "FF": "%f", 241 "ff": "%f", 242 "FF6": "%f", 243 "ff6": "%f", 244 } 245 246 def quote_identifier(self, expression: E, identify: bool = True) -> E: 247 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 248 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 249 if ( 250 isinstance(expression, exp.Identifier) 251 and isinstance(expression.parent, exp.Table) 252 and expression.name.lower() == "dual" 253 ): 254 return t.cast(E, expression) 255 256 return super().quote_identifier(expression, identify=identify) 257 258 class Parser(parser.Parser): 259 IDENTIFY_PIVOT_STRINGS = True 260 261 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 262 263 FUNCTIONS = { 264 **parser.Parser.FUNCTIONS, 265 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 266 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 267 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 268 this=seq_get(args, 1), expression=seq_get(args, 0) 269 ), 270 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 271 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 272 start=seq_get(args, 0), 273 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 274 step=seq_get(args, 2), 275 ), 276 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 277 "BITXOR": binary_from_function(exp.BitwiseXor), 278 "BIT_XOR": binary_from_function(exp.BitwiseXor), 279 "BOOLXOR": binary_from_function(exp.Xor), 280 "CONVERT_TIMEZONE": _parse_convert_timezone, 281 "DATE_TRUNC": date_trunc_to_time, 282 "DATEADD": lambda args: exp.DateAdd( 283 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 284 ), 285 "DATEDIFF": _parse_datediff, 286 "DIV0": _div0_to_if, 287 "FLATTEN": exp.Explode.from_arg_list, 288 "IFF": exp.If.from_arg_list, 289 "LISTAGG": exp.GroupConcat.from_arg_list, 290 "NULLIFZERO": _nullifzero_to_if, 291 "OBJECT_CONSTRUCT": _parse_object_construct, 292 "REGEXP_REPLACE": _parse_regexp_replace, 293 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 294 "RLIKE": exp.RegexpLike.from_arg_list, 295 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 296 "TIMEDIFF": _parse_datediff, 297 "TIMESTAMPDIFF": _parse_datediff, 298 "TO_TIMESTAMP": _parse_to_timestamp, 299 "TO_VARCHAR": exp.ToChar.from_arg_list, 300 "ZEROIFNULL": _zeroifnull_to_if, 301 } 302 303 FUNCTION_PARSERS = { 304 **parser.Parser.FUNCTION_PARSERS, 305 "DATE_PART": _parse_date_part, 306 } 307 FUNCTION_PARSERS.pop("TRIM") 308 309 COLUMN_OPERATORS = { 310 **parser.Parser.COLUMN_OPERATORS, 311 TokenType.COLON: lambda self, this, path: self.expression( 312 exp.Bracket, this=this, expressions=[path] 313 ), 314 } 315 316 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 317 318 RANGE_PARSERS = { 319 **parser.Parser.RANGE_PARSERS, 320 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 321 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 322 } 323 324 ALTER_PARSERS = { 325 **parser.Parser.ALTER_PARSERS, 326 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 327 "UNSET": lambda self: self.expression( 328 exp.Set, 329 tag=self._match_text_seq("TAG"), 330 expressions=self._parse_csv(self._parse_id_var), 331 unset=True, 332 ), 333 "SWAP": lambda self: self._parse_alter_table_swap(), 334 } 335 336 STATEMENT_PARSERS = { 337 **parser.Parser.STATEMENT_PARSERS, 338 TokenType.SHOW: lambda self: self._parse_show(), 339 } 340 341 PROPERTY_PARSERS = { 342 **parser.Parser.PROPERTY_PARSERS, 343 "LOCATION": lambda self: self._parse_location(), 344 } 345 346 SHOW_PARSERS = { 347 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 348 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 349 } 350 351 STAGED_FILE_SINGLE_TOKENS = { 352 TokenType.DOT, 353 TokenType.MOD, 354 TokenType.SLASH, 355 } 356 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 357 358 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 359 lateral = super()._parse_lateral() 360 if not lateral: 361 return lateral 362 363 if isinstance(lateral.this, exp.Explode): 364 table_alias = lateral.args.get("alias") 365 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 366 if table_alias and not table_alias.args.get("columns"): 367 table_alias.set("columns", columns) 368 elif not table_alias: 369 exp.alias_(lateral, "_flattened", table=columns, copy=False) 370 371 return lateral 372 373 def _parse_at_before(self, table: exp.Table) -> exp.Table: 374 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 375 index = self._index 376 if self._match_texts(("AT", "BEFORE")): 377 this = self._prev.text.upper() 378 kind = ( 379 self._match(TokenType.L_PAREN) 380 and self._match_texts(self.HISTORICAL_DATA_KIND) 381 and self._prev.text.upper() 382 ) 383 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 384 385 if expression: 386 self._match_r_paren() 387 when = self.expression( 388 exp.HistoricalData, this=this, kind=kind, expression=expression 389 ) 390 table.set("when", when) 391 else: 392 self._retreat(index) 393 394 return table 395 396 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 397 # https://docs.snowflake.com/en/user-guide/querying-stage 398 if self._match(TokenType.STRING, advance=False): 399 table = self._parse_string() 400 elif self._match_text_seq("@", advance=False): 401 table = self._parse_location_path() 402 else: 403 table = None 404 405 if table: 406 file_format = None 407 pattern = None 408 409 self._match(TokenType.L_PAREN) 410 while self._curr and not self._match(TokenType.R_PAREN): 411 if self._match_text_seq("FILE_FORMAT", "=>"): 412 file_format = self._parse_string() or super()._parse_table_parts() 413 elif self._match_text_seq("PATTERN", "=>"): 414 pattern = self._parse_string() 415 else: 416 break 417 418 self._match(TokenType.COMMA) 419 420 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 421 else: 422 table = super()._parse_table_parts(schema=schema) 423 424 return self._parse_at_before(table) 425 426 def _parse_id_var( 427 self, 428 any_token: bool = True, 429 tokens: t.Optional[t.Collection[TokenType]] = None, 430 ) -> t.Optional[exp.Expression]: 431 if self._match_text_seq("IDENTIFIER", "("): 432 identifier = ( 433 super()._parse_id_var(any_token=any_token, tokens=tokens) 434 or self._parse_string() 435 ) 436 self._match_r_paren() 437 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 438 439 return super()._parse_id_var(any_token=any_token, tokens=tokens) 440 441 def _parse_show_snowflake(self, this: str) -> exp.Show: 442 scope = None 443 scope_kind = None 444 445 if self._match(TokenType.IN): 446 if self._match_text_seq("ACCOUNT"): 447 scope_kind = "ACCOUNT" 448 elif self._match_set(self.DB_CREATABLES): 449 scope_kind = self._prev.text 450 if self._curr: 451 scope = self._parse_table() 452 elif self._curr: 453 scope_kind = "TABLE" 454 scope = self._parse_table() 455 456 return self.expression(exp.Show, this=this, scope=scope, scope_kind=scope_kind) 457 458 def _parse_alter_table_swap(self) -> exp.SwapTable: 459 self._match_text_seq("WITH") 460 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 461 462 def _parse_location(self) -> exp.LocationProperty: 463 self._match(TokenType.EQ) 464 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 465 466 def _parse_location_path(self) -> exp.Var: 467 parts = [self._advance_any(ignore_reserved=True)] 468 469 # We avoid consuming a comma token because external tables like @foo and @bar 470 # can be joined in a query with a comma separator. 471 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 472 parts.append(self._advance_any(ignore_reserved=True)) 473 474 return exp.var("".join(part.text for part in parts if part)) 475 476 class Tokenizer(tokens.Tokenizer): 477 STRING_ESCAPES = ["\\", "'"] 478 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 479 RAW_STRINGS = ["$$"] 480 COMMENTS = ["--", "//", ("/*", "*/")] 481 482 KEYWORDS = { 483 **tokens.Tokenizer.KEYWORDS, 484 "BYTEINT": TokenType.INT, 485 "CHAR VARYING": TokenType.VARCHAR, 486 "CHARACTER VARYING": TokenType.VARCHAR, 487 "EXCLUDE": TokenType.EXCEPT, 488 "ILIKE ANY": TokenType.ILIKE_ANY, 489 "LIKE ANY": TokenType.LIKE_ANY, 490 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 491 "MINUS": TokenType.EXCEPT, 492 "NCHAR VARYING": TokenType.VARCHAR, 493 "PUT": TokenType.COMMAND, 494 "RENAME": TokenType.REPLACE, 495 "SAMPLE": TokenType.TABLE_SAMPLE, 496 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 497 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 498 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 499 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 500 "TOP": TokenType.TOP, 501 } 502 503 SINGLE_TOKENS = { 504 **tokens.Tokenizer.SINGLE_TOKENS, 505 "$": TokenType.PARAMETER, 506 } 507 508 VAR_SINGLE_TOKENS = {"$"} 509 510 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 511 512 class Generator(generator.Generator): 513 PARAMETER_TOKEN = "$" 514 MATCHED_BY_SOURCE = False 515 SINGLE_STRING_INTERVAL = True 516 JOIN_HINTS = False 517 TABLE_HINTS = False 518 QUERY_HINTS = False 519 AGGREGATE_FILTER_SUPPORTED = False 520 SUPPORTS_TABLE_COPY = False 521 COLLATE_IS_FUNC = True 522 LIMIT_ONLY_LITERALS = True 523 524 TRANSFORMS = { 525 **generator.Generator.TRANSFORMS, 526 exp.ArgMax: rename_func("MAX_BY"), 527 exp.ArgMin: rename_func("MIN_BY"), 528 exp.Array: inline_array_sql, 529 exp.ArrayConcat: rename_func("ARRAY_CAT"), 530 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 531 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 532 exp.AtTimeZone: lambda self, e: self.func( 533 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 534 ), 535 exp.BitwiseXor: rename_func("BITXOR"), 536 exp.DateAdd: date_delta_sql("DATEADD"), 537 exp.DateDiff: date_delta_sql("DATEDIFF"), 538 exp.DateStrToDate: datestrtodate_sql, 539 exp.DataType: _datatype_sql, 540 exp.DayOfMonth: rename_func("DAYOFMONTH"), 541 exp.DayOfWeek: rename_func("DAYOFWEEK"), 542 exp.DayOfYear: rename_func("DAYOFYEAR"), 543 exp.Explode: rename_func("FLATTEN"), 544 exp.Extract: rename_func("DATE_PART"), 545 exp.GenerateSeries: lambda self, e: self.func( 546 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 547 ), 548 exp.GroupConcat: rename_func("LISTAGG"), 549 exp.If: if_sql(name="IFF", false_value="NULL"), 550 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 551 exp.LogicalOr: rename_func("BOOLOR_AGG"), 552 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 553 exp.Max: max_or_greatest, 554 exp.Min: min_or_least, 555 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 556 exp.PercentileCont: transforms.preprocess( 557 [transforms.add_within_group_for_percentiles] 558 ), 559 exp.PercentileDisc: transforms.preprocess( 560 [transforms.add_within_group_for_percentiles] 561 ), 562 exp.RegexpILike: _regexpilike_sql, 563 exp.Rand: rename_func("RANDOM"), 564 exp.Select: transforms.preprocess( 565 [ 566 transforms.eliminate_distinct_on, 567 transforms.explode_to_unnest(), 568 transforms.eliminate_semi_and_anti_joins, 569 ] 570 ), 571 exp.SHA: rename_func("SHA1"), 572 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 573 exp.StartsWith: rename_func("STARTSWITH"), 574 exp.StrPosition: lambda self, e: self.func( 575 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 576 ), 577 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 578 exp.Struct: lambda self, e: self.func( 579 "OBJECT_CONSTRUCT", 580 *(arg for expression in e.expressions for arg in expression.flatten()), 581 ), 582 exp.Stuff: rename_func("INSERT"), 583 exp.TimestampDiff: lambda self, e: self.func( 584 "TIMESTAMPDIFF", e.unit, e.expression, e.this 585 ), 586 exp.TimestampTrunc: timestamptrunc_sql, 587 exp.TimeStrToTime: timestrtotime_sql, 588 exp.TimeToStr: lambda self, e: self.func( 589 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 590 ), 591 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 592 exp.ToArray: rename_func("TO_ARRAY"), 593 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 594 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 595 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 596 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 597 exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"), 598 exp.UnixToTime: _unix_to_time_sql, 599 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 600 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 601 exp.Xor: rename_func("BOOLXOR"), 602 } 603 604 TYPE_MAPPING = { 605 **generator.Generator.TYPE_MAPPING, 606 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 607 } 608 609 STAR_MAPPING = { 610 "except": "EXCLUDE", 611 "replace": "RENAME", 612 } 613 614 PROPERTIES_LOCATION = { 615 **generator.Generator.PROPERTIES_LOCATION, 616 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 617 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 618 } 619 620 def trycast_sql(self, expression: exp.TryCast) -> str: 621 value = expression.this 622 623 if value.type is None: 624 from sqlglot.optimizer.annotate_types import annotate_types 625 626 value = annotate_types(value) 627 628 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 629 return super().trycast_sql(expression) 630 631 # TRY_CAST only works for string values in Snowflake 632 return self.cast_sql(expression) 633 634 def log_sql(self, expression: exp.Log) -> str: 635 if not expression.expression: 636 return self.func("LN", expression.this) 637 638 return super().log_sql(expression) 639 640 def unnest_sql(self, expression: exp.Unnest) -> str: 641 unnest_alias = expression.args.get("alias") 642 offset = expression.args.get("offset") 643 644 columns = [ 645 exp.to_identifier("seq"), 646 exp.to_identifier("key"), 647 exp.to_identifier("path"), 648 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 649 seq_get(unnest_alias.columns if unnest_alias else [], 0) 650 or exp.to_identifier("value"), 651 exp.to_identifier("this"), 652 ] 653 654 if unnest_alias: 655 unnest_alias.set("columns", columns) 656 else: 657 unnest_alias = exp.TableAlias(this="_u", columns=columns) 658 659 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 660 alias = self.sql(unnest_alias) 661 alias = f" AS {alias}" if alias else "" 662 return f"{explode}{alias}" 663 664 def show_sql(self, expression: exp.Show) -> str: 665 scope = self.sql(expression, "scope") 666 scope = f" {scope}" if scope else "" 667 668 scope_kind = self.sql(expression, "scope_kind") 669 if scope_kind: 670 scope_kind = f" IN {scope_kind}" 671 672 return f"SHOW {expression.name}{scope_kind}{scope}" 673 674 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 675 # Other dialects don't support all of the following parameters, so we need to 676 # generate default values as necessary to ensure the transpilation is correct 677 group = expression.args.get("group") 678 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 679 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 680 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 681 682 return self.func( 683 "REGEXP_SUBSTR", 684 expression.this, 685 expression.expression, 686 position, 687 occurrence, 688 parameters, 689 group, 690 ) 691 692 def except_op(self, expression: exp.Except) -> str: 693 if not expression.args.get("distinct", False): 694 self.unsupported("EXCEPT with All is not supported in Snowflake") 695 return super().except_op(expression) 696 697 def intersect_op(self, expression: exp.Intersect) -> str: 698 if not expression.args.get("distinct", False): 699 self.unsupported("INTERSECT with All is not supported in Snowflake") 700 return super().intersect_op(expression) 701 702 def describe_sql(self, expression: exp.Describe) -> str: 703 # Default to table if kind is unknown 704 kind_value = expression.args.get("kind") or "TABLE" 705 kind = f" {kind_value}" if kind_value else "" 706 this = f" {self.sql(expression, 'this')}" 707 expressions = self.expressions(expression, flat=True) 708 expressions = f" {expressions}" if expressions else "" 709 return f"DESCRIBE{kind}{this}{expressions}" 710 711 def generatedasidentitycolumnconstraint_sql( 712 self, expression: exp.GeneratedAsIdentityColumnConstraint 713 ) -> str: 714 start = expression.args.get("start") 715 start = f" START {start}" if start else "" 716 increment = expression.args.get("increment") 717 increment = f" INCREMENT {increment}" if increment else "" 718 return f"AUTOINCREMENT{start}{increment}" 719 720 def swaptable_sql(self, expression: exp.SwapTable) -> str: 721 this = self.sql(expression, "this") 722 return f"SWAP WITH {this}" 723 724 def with_properties(self, properties: exp.Properties) -> str: 725 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ")
Specifies the strategy according to which identifiers should be normalized.
Indicates the default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Determines whether or not user-defined data types are supported.
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
format.
246 def quote_identifier(self, expression: E, identify: bool = True) -> E: 247 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 248 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 249 if ( 250 isinstance(expression, exp.Identifier) 251 and isinstance(expression.parent, exp.Table) 252 and expression.name.lower() == "dual" 253 ): 254 return t.cast(E, expression) 255 256 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
258 class Parser(parser.Parser): 259 IDENTIFY_PIVOT_STRINGS = True 260 261 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 262 263 FUNCTIONS = { 264 **parser.Parser.FUNCTIONS, 265 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 266 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 267 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 268 this=seq_get(args, 1), expression=seq_get(args, 0) 269 ), 270 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 271 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 272 start=seq_get(args, 0), 273 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 274 step=seq_get(args, 2), 275 ), 276 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 277 "BITXOR": binary_from_function(exp.BitwiseXor), 278 "BIT_XOR": binary_from_function(exp.BitwiseXor), 279 "BOOLXOR": binary_from_function(exp.Xor), 280 "CONVERT_TIMEZONE": _parse_convert_timezone, 281 "DATE_TRUNC": date_trunc_to_time, 282 "DATEADD": lambda args: exp.DateAdd( 283 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 284 ), 285 "DATEDIFF": _parse_datediff, 286 "DIV0": _div0_to_if, 287 "FLATTEN": exp.Explode.from_arg_list, 288 "IFF": exp.If.from_arg_list, 289 "LISTAGG": exp.GroupConcat.from_arg_list, 290 "NULLIFZERO": _nullifzero_to_if, 291 "OBJECT_CONSTRUCT": _parse_object_construct, 292 "REGEXP_REPLACE": _parse_regexp_replace, 293 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 294 "RLIKE": exp.RegexpLike.from_arg_list, 295 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 296 "TIMEDIFF": _parse_datediff, 297 "TIMESTAMPDIFF": _parse_datediff, 298 "TO_TIMESTAMP": _parse_to_timestamp, 299 "TO_VARCHAR": exp.ToChar.from_arg_list, 300 "ZEROIFNULL": _zeroifnull_to_if, 301 } 302 303 FUNCTION_PARSERS = { 304 **parser.Parser.FUNCTION_PARSERS, 305 "DATE_PART": _parse_date_part, 306 } 307 FUNCTION_PARSERS.pop("TRIM") 308 309 COLUMN_OPERATORS = { 310 **parser.Parser.COLUMN_OPERATORS, 311 TokenType.COLON: lambda self, this, path: self.expression( 312 exp.Bracket, this=this, expressions=[path] 313 ), 314 } 315 316 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 317 318 RANGE_PARSERS = { 319 **parser.Parser.RANGE_PARSERS, 320 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 321 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 322 } 323 324 ALTER_PARSERS = { 325 **parser.Parser.ALTER_PARSERS, 326 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 327 "UNSET": lambda self: self.expression( 328 exp.Set, 329 tag=self._match_text_seq("TAG"), 330 expressions=self._parse_csv(self._parse_id_var), 331 unset=True, 332 ), 333 "SWAP": lambda self: self._parse_alter_table_swap(), 334 } 335 336 STATEMENT_PARSERS = { 337 **parser.Parser.STATEMENT_PARSERS, 338 TokenType.SHOW: lambda self: self._parse_show(), 339 } 340 341 PROPERTY_PARSERS = { 342 **parser.Parser.PROPERTY_PARSERS, 343 "LOCATION": lambda self: self._parse_location(), 344 } 345 346 SHOW_PARSERS = { 347 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 348 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 349 } 350 351 STAGED_FILE_SINGLE_TOKENS = { 352 TokenType.DOT, 353 TokenType.MOD, 354 TokenType.SLASH, 355 } 356 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 357 358 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 359 lateral = super()._parse_lateral() 360 if not lateral: 361 return lateral 362 363 if isinstance(lateral.this, exp.Explode): 364 table_alias = lateral.args.get("alias") 365 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 366 if table_alias and not table_alias.args.get("columns"): 367 table_alias.set("columns", columns) 368 elif not table_alias: 369 exp.alias_(lateral, "_flattened", table=columns, copy=False) 370 371 return lateral 372 373 def _parse_at_before(self, table: exp.Table) -> exp.Table: 374 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 375 index = self._index 376 if self._match_texts(("AT", "BEFORE")): 377 this = self._prev.text.upper() 378 kind = ( 379 self._match(TokenType.L_PAREN) 380 and self._match_texts(self.HISTORICAL_DATA_KIND) 381 and self._prev.text.upper() 382 ) 383 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 384 385 if expression: 386 self._match_r_paren() 387 when = self.expression( 388 exp.HistoricalData, this=this, kind=kind, expression=expression 389 ) 390 table.set("when", when) 391 else: 392 self._retreat(index) 393 394 return table 395 396 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 397 # https://docs.snowflake.com/en/user-guide/querying-stage 398 if self._match(TokenType.STRING, advance=False): 399 table = self._parse_string() 400 elif self._match_text_seq("@", advance=False): 401 table = self._parse_location_path() 402 else: 403 table = None 404 405 if table: 406 file_format = None 407 pattern = None 408 409 self._match(TokenType.L_PAREN) 410 while self._curr and not self._match(TokenType.R_PAREN): 411 if self._match_text_seq("FILE_FORMAT", "=>"): 412 file_format = self._parse_string() or super()._parse_table_parts() 413 elif self._match_text_seq("PATTERN", "=>"): 414 pattern = self._parse_string() 415 else: 416 break 417 418 self._match(TokenType.COMMA) 419 420 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 421 else: 422 table = super()._parse_table_parts(schema=schema) 423 424 return self._parse_at_before(table) 425 426 def _parse_id_var( 427 self, 428 any_token: bool = True, 429 tokens: t.Optional[t.Collection[TokenType]] = None, 430 ) -> t.Optional[exp.Expression]: 431 if self._match_text_seq("IDENTIFIER", "("): 432 identifier = ( 433 super()._parse_id_var(any_token=any_token, tokens=tokens) 434 or self._parse_string() 435 ) 436 self._match_r_paren() 437 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 438 439 return super()._parse_id_var(any_token=any_token, tokens=tokens) 440 441 def _parse_show_snowflake(self, this: str) -> exp.Show: 442 scope = None 443 scope_kind = None 444 445 if self._match(TokenType.IN): 446 if self._match_text_seq("ACCOUNT"): 447 scope_kind = "ACCOUNT" 448 elif self._match_set(self.DB_CREATABLES): 449 scope_kind = self._prev.text 450 if self._curr: 451 scope = self._parse_table() 452 elif self._curr: 453 scope_kind = "TABLE" 454 scope = self._parse_table() 455 456 return self.expression(exp.Show, this=this, scope=scope, scope_kind=scope_kind) 457 458 def _parse_alter_table_swap(self) -> exp.SwapTable: 459 self._match_text_seq("WITH") 460 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 461 462 def _parse_location(self) -> exp.LocationProperty: 463 self._match(TokenType.EQ) 464 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 465 466 def _parse_location_path(self) -> exp.Var: 467 parts = [self._advance_any(ignore_reserved=True)] 468 469 # We avoid consuming a comma token because external tables like @foo and @bar 470 # can be joined in a query with a comma separator. 471 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 472 parts.append(self._advance_any(ignore_reserved=True)) 473 474 return exp.var("".join(part.text for part in parts if part))
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
476 class Tokenizer(tokens.Tokenizer): 477 STRING_ESCAPES = ["\\", "'"] 478 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 479 RAW_STRINGS = ["$$"] 480 COMMENTS = ["--", "//", ("/*", "*/")] 481 482 KEYWORDS = { 483 **tokens.Tokenizer.KEYWORDS, 484 "BYTEINT": TokenType.INT, 485 "CHAR VARYING": TokenType.VARCHAR, 486 "CHARACTER VARYING": TokenType.VARCHAR, 487 "EXCLUDE": TokenType.EXCEPT, 488 "ILIKE ANY": TokenType.ILIKE_ANY, 489 "LIKE ANY": TokenType.LIKE_ANY, 490 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 491 "MINUS": TokenType.EXCEPT, 492 "NCHAR VARYING": TokenType.VARCHAR, 493 "PUT": TokenType.COMMAND, 494 "RENAME": TokenType.REPLACE, 495 "SAMPLE": TokenType.TABLE_SAMPLE, 496 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 497 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 498 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 499 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 500 "TOP": TokenType.TOP, 501 } 502 503 SINGLE_TOKENS = { 504 **tokens.Tokenizer.SINGLE_TOKENS, 505 "$": TokenType.PARAMETER, 506 } 507 508 VAR_SINGLE_TOKENS = {"$"} 509 510 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
512 class Generator(generator.Generator): 513 PARAMETER_TOKEN = "$" 514 MATCHED_BY_SOURCE = False 515 SINGLE_STRING_INTERVAL = True 516 JOIN_HINTS = False 517 TABLE_HINTS = False 518 QUERY_HINTS = False 519 AGGREGATE_FILTER_SUPPORTED = False 520 SUPPORTS_TABLE_COPY = False 521 COLLATE_IS_FUNC = True 522 LIMIT_ONLY_LITERALS = True 523 524 TRANSFORMS = { 525 **generator.Generator.TRANSFORMS, 526 exp.ArgMax: rename_func("MAX_BY"), 527 exp.ArgMin: rename_func("MIN_BY"), 528 exp.Array: inline_array_sql, 529 exp.ArrayConcat: rename_func("ARRAY_CAT"), 530 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 531 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 532 exp.AtTimeZone: lambda self, e: self.func( 533 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 534 ), 535 exp.BitwiseXor: rename_func("BITXOR"), 536 exp.DateAdd: date_delta_sql("DATEADD"), 537 exp.DateDiff: date_delta_sql("DATEDIFF"), 538 exp.DateStrToDate: datestrtodate_sql, 539 exp.DataType: _datatype_sql, 540 exp.DayOfMonth: rename_func("DAYOFMONTH"), 541 exp.DayOfWeek: rename_func("DAYOFWEEK"), 542 exp.DayOfYear: rename_func("DAYOFYEAR"), 543 exp.Explode: rename_func("FLATTEN"), 544 exp.Extract: rename_func("DATE_PART"), 545 exp.GenerateSeries: lambda self, e: self.func( 546 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 547 ), 548 exp.GroupConcat: rename_func("LISTAGG"), 549 exp.If: if_sql(name="IFF", false_value="NULL"), 550 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 551 exp.LogicalOr: rename_func("BOOLOR_AGG"), 552 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 553 exp.Max: max_or_greatest, 554 exp.Min: min_or_least, 555 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 556 exp.PercentileCont: transforms.preprocess( 557 [transforms.add_within_group_for_percentiles] 558 ), 559 exp.PercentileDisc: transforms.preprocess( 560 [transforms.add_within_group_for_percentiles] 561 ), 562 exp.RegexpILike: _regexpilike_sql, 563 exp.Rand: rename_func("RANDOM"), 564 exp.Select: transforms.preprocess( 565 [ 566 transforms.eliminate_distinct_on, 567 transforms.explode_to_unnest(), 568 transforms.eliminate_semi_and_anti_joins, 569 ] 570 ), 571 exp.SHA: rename_func("SHA1"), 572 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 573 exp.StartsWith: rename_func("STARTSWITH"), 574 exp.StrPosition: lambda self, e: self.func( 575 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 576 ), 577 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 578 exp.Struct: lambda self, e: self.func( 579 "OBJECT_CONSTRUCT", 580 *(arg for expression in e.expressions for arg in expression.flatten()), 581 ), 582 exp.Stuff: rename_func("INSERT"), 583 exp.TimestampDiff: lambda self, e: self.func( 584 "TIMESTAMPDIFF", e.unit, e.expression, e.this 585 ), 586 exp.TimestampTrunc: timestamptrunc_sql, 587 exp.TimeStrToTime: timestrtotime_sql, 588 exp.TimeToStr: lambda self, e: self.func( 589 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 590 ), 591 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 592 exp.ToArray: rename_func("TO_ARRAY"), 593 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 594 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 595 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 596 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 597 exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"), 598 exp.UnixToTime: _unix_to_time_sql, 599 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 600 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 601 exp.Xor: rename_func("BOOLXOR"), 602 } 603 604 TYPE_MAPPING = { 605 **generator.Generator.TYPE_MAPPING, 606 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 607 } 608 609 STAR_MAPPING = { 610 "except": "EXCLUDE", 611 "replace": "RENAME", 612 } 613 614 PROPERTIES_LOCATION = { 615 **generator.Generator.PROPERTIES_LOCATION, 616 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 617 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 618 } 619 620 def trycast_sql(self, expression: exp.TryCast) -> str: 621 value = expression.this 622 623 if value.type is None: 624 from sqlglot.optimizer.annotate_types import annotate_types 625 626 value = annotate_types(value) 627 628 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 629 return super().trycast_sql(expression) 630 631 # TRY_CAST only works for string values in Snowflake 632 return self.cast_sql(expression) 633 634 def log_sql(self, expression: exp.Log) -> str: 635 if not expression.expression: 636 return self.func("LN", expression.this) 637 638 return super().log_sql(expression) 639 640 def unnest_sql(self, expression: exp.Unnest) -> str: 641 unnest_alias = expression.args.get("alias") 642 offset = expression.args.get("offset") 643 644 columns = [ 645 exp.to_identifier("seq"), 646 exp.to_identifier("key"), 647 exp.to_identifier("path"), 648 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 649 seq_get(unnest_alias.columns if unnest_alias else [], 0) 650 or exp.to_identifier("value"), 651 exp.to_identifier("this"), 652 ] 653 654 if unnest_alias: 655 unnest_alias.set("columns", columns) 656 else: 657 unnest_alias = exp.TableAlias(this="_u", columns=columns) 658 659 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 660 alias = self.sql(unnest_alias) 661 alias = f" AS {alias}" if alias else "" 662 return f"{explode}{alias}" 663 664 def show_sql(self, expression: exp.Show) -> str: 665 scope = self.sql(expression, "scope") 666 scope = f" {scope}" if scope else "" 667 668 scope_kind = self.sql(expression, "scope_kind") 669 if scope_kind: 670 scope_kind = f" IN {scope_kind}" 671 672 return f"SHOW {expression.name}{scope_kind}{scope}" 673 674 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 675 # Other dialects don't support all of the following parameters, so we need to 676 # generate default values as necessary to ensure the transpilation is correct 677 group = expression.args.get("group") 678 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 679 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 680 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 681 682 return self.func( 683 "REGEXP_SUBSTR", 684 expression.this, 685 expression.expression, 686 position, 687 occurrence, 688 parameters, 689 group, 690 ) 691 692 def except_op(self, expression: exp.Except) -> str: 693 if not expression.args.get("distinct", False): 694 self.unsupported("EXCEPT with All is not supported in Snowflake") 695 return super().except_op(expression) 696 697 def intersect_op(self, expression: exp.Intersect) -> str: 698 if not expression.args.get("distinct", False): 699 self.unsupported("INTERSECT with All is not supported in Snowflake") 700 return super().intersect_op(expression) 701 702 def describe_sql(self, expression: exp.Describe) -> str: 703 # Default to table if kind is unknown 704 kind_value = expression.args.get("kind") or "TABLE" 705 kind = f" {kind_value}" if kind_value else "" 706 this = f" {self.sql(expression, 'this')}" 707 expressions = self.expressions(expression, flat=True) 708 expressions = f" {expressions}" if expressions else "" 709 return f"DESCRIBE{kind}{this}{expressions}" 710 711 def generatedasidentitycolumnconstraint_sql( 712 self, expression: exp.GeneratedAsIdentityColumnConstraint 713 ) -> str: 714 start = expression.args.get("start") 715 start = f" START {start}" if start else "" 716 increment = expression.args.get("increment") 717 increment = f" INCREMENT {increment}" if increment else "" 718 return f"AUTOINCREMENT{start}{increment}" 719 720 def swaptable_sql(self, expression: exp.SwapTable) -> str: 721 this = self.sql(expression, "this") 722 return f"SWAP WITH {this}" 723 724 def with_properties(self, properties: exp.Properties) -> str: 725 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ")
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
620 def trycast_sql(self, expression: exp.TryCast) -> str: 621 value = expression.this 622 623 if value.type is None: 624 from sqlglot.optimizer.annotate_types import annotate_types 625 626 value = annotate_types(value) 627 628 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 629 return super().trycast_sql(expression) 630 631 # TRY_CAST only works for string values in Snowflake 632 return self.cast_sql(expression)
640 def unnest_sql(self, expression: exp.Unnest) -> str: 641 unnest_alias = expression.args.get("alias") 642 offset = expression.args.get("offset") 643 644 columns = [ 645 exp.to_identifier("seq"), 646 exp.to_identifier("key"), 647 exp.to_identifier("path"), 648 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 649 seq_get(unnest_alias.columns if unnest_alias else [], 0) 650 or exp.to_identifier("value"), 651 exp.to_identifier("this"), 652 ] 653 654 if unnest_alias: 655 unnest_alias.set("columns", columns) 656 else: 657 unnest_alias = exp.TableAlias(this="_u", columns=columns) 658 659 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 660 alias = self.sql(unnest_alias) 661 alias = f" AS {alias}" if alias else "" 662 return f"{explode}{alias}"
664 def show_sql(self, expression: exp.Show) -> str: 665 scope = self.sql(expression, "scope") 666 scope = f" {scope}" if scope else "" 667 668 scope_kind = self.sql(expression, "scope_kind") 669 if scope_kind: 670 scope_kind = f" IN {scope_kind}" 671 672 return f"SHOW {expression.name}{scope_kind}{scope}"
674 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 675 # Other dialects don't support all of the following parameters, so we need to 676 # generate default values as necessary to ensure the transpilation is correct 677 group = expression.args.get("group") 678 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 679 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 680 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 681 682 return self.func( 683 "REGEXP_SUBSTR", 684 expression.this, 685 expression.expression, 686 position, 687 occurrence, 688 parameters, 689 group, 690 )
702 def describe_sql(self, expression: exp.Describe) -> str: 703 # Default to table if kind is unknown 704 kind_value = expression.args.get("kind") or "TABLE" 705 kind = f" {kind_value}" if kind_value else "" 706 this = f" {self.sql(expression, 'this')}" 707 expressions = self.expressions(expression, flat=True) 708 expressions = f" {expressions}" if expressions else "" 709 return f"DESCRIBE{kind}{this}{expressions}"
711 def generatedasidentitycolumnconstraint_sql( 712 self, expression: exp.GeneratedAsIdentityColumnConstraint 713 ) -> str: 714 start = expression.args.get("start") 715 start = f" START {start}" if start else "" 716 increment = expression.args.get("increment") 717 increment = f" INCREMENT {increment}" if increment else "" 718 return f"AUTOINCREMENT{start}{increment}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SIZE_IS_PERCENT
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql