sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33if t.TYPE_CHECKING: 34 from sqlglot._typing import E, Lit 35 36logger = logging.getLogger("sqlglot") 37 38 39def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 40 if not expression.find_ancestor(exp.From, exp.Join): 41 return self.values_sql(expression) 42 43 structs = [] 44 alias = expression.args.get("alias") 45 for tup in expression.find_all(exp.Tuple): 46 field_aliases = ( 47 alias.columns 48 if alias and alias.columns 49 else (f"_c{i}" for i in range(len(tup.expressions))) 50 ) 51 expressions = [ 52 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 53 for name, fld in zip(field_aliases, tup.expressions) 54 ] 55 structs.append(exp.Struct(expressions=expressions)) 56 57 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 58 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 59 return self.unnest_sql( 60 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 61 ) 62 63 64def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 65 this = expression.this 66 if isinstance(this, exp.Schema): 67 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 68 else: 69 this = self.sql(this) 70 return f"RETURNS {this}" 71 72 73def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 74 returns = expression.find(exp.ReturnsProperty) 75 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 76 expression.set("kind", "TABLE FUNCTION") 77 78 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 79 expression.set("expression", expression.expression.this) 80 81 return self.create_sql(expression) 82 83 84# https://issuetracker.google.com/issues/162294746 85# workaround for bigquery bug when grouping by an expression and then ordering 86# WITH x AS (SELECT 1 y) 87# SELECT y + 1 z 88# FROM x 89# GROUP BY x + 1 90# ORDER by z 91def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 92 if isinstance(expression, exp.Select): 93 group = expression.args.get("group") 94 order = expression.args.get("order") 95 96 if group and order: 97 aliases = { 98 select.this: select.args["alias"] 99 for select in expression.selects 100 if isinstance(select, exp.Alias) 101 } 102 103 for grouped in group.expressions: 104 if grouped.is_int: 105 continue 106 alias = aliases.get(grouped) 107 if alias: 108 grouped.replace(exp.column(alias)) 109 110 return expression 111 112 113def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 114 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 115 if isinstance(expression, exp.CTE) and expression.alias_column_names: 116 cte_query = expression.this 117 118 if cte_query.is_star: 119 logger.warning( 120 "Can't push down CTE column names for star queries. Run the query through" 121 " the optimizer or use 'qualify' to expand the star projections first." 122 ) 123 return expression 124 125 column_names = expression.alias_column_names 126 expression.args["alias"].set("columns", None) 127 128 for name, select in zip(column_names, cte_query.selects): 129 to_replace = select 130 131 if isinstance(select, exp.Alias): 132 select = select.this 133 134 # Inner aliases are shadowed by the CTE column names 135 to_replace.replace(exp.alias_(select, name)) 136 137 return expression 138 139 140def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 141 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 142 this.set("zone", seq_get(args, 2)) 143 return this 144 145 146def _build_timestamp(args: t.List) -> exp.Timestamp: 147 timestamp = exp.Timestamp.from_arg_list(args) 148 timestamp.set("with_tz", True) 149 return timestamp 150 151 152def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 153 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 154 return expr_type.from_arg_list(args) 155 156 157def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 158 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 159 arg = seq_get(args, 0) 160 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 161 162 163def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 164 return self.sql( 165 exp.Exists( 166 this=exp.select("1") 167 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 168 .where(exp.column("_col").eq(expression.right)) 169 ) 170 ) 171 172 173def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 174 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 175 176 177def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 178 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 179 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 180 unit = unit_to_var(expression) 181 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 182 183 184def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 185 scale = expression.args.get("scale") 186 timestamp = expression.this 187 188 if scale in (None, exp.UnixToTime.SECONDS): 189 return self.func("TIMESTAMP_SECONDS", timestamp) 190 if scale == exp.UnixToTime.MILLIS: 191 return self.func("TIMESTAMP_MILLIS", timestamp) 192 if scale == exp.UnixToTime.MICROS: 193 return self.func("TIMESTAMP_MICROS", timestamp) 194 195 unix_seconds = exp.cast( 196 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 197 ) 198 return self.func("TIMESTAMP_SECONDS", unix_seconds) 199 200 201def _build_time(args: t.List) -> exp.Func: 202 if len(args) == 1: 203 return exp.TsOrDsToTime(this=args[0]) 204 if len(args) == 2: 205 return exp.Time.from_arg_list(args) 206 return exp.TimeFromParts.from_arg_list(args) 207 208 209def _build_datetime(args: t.List) -> exp.Func: 210 if len(args) == 1: 211 return exp.TsOrDsToTimestamp.from_arg_list(args) 212 if len(args) == 2: 213 return exp.Datetime.from_arg_list(args) 214 return exp.TimestampFromParts.from_arg_list(args) 215 216 217def _str_to_datetime_sql( 218 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 219) -> str: 220 this = self.sql(expression, "this") 221 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 222 223 if expression.args.get("safe"): 224 fmt = self.format_time( 225 expression, 226 self.dialect.INVERSE_FORMAT_MAPPING, 227 self.dialect.INVERSE_FORMAT_TRIE, 228 ) 229 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 230 231 fmt = self.format_time(expression) 232 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 233 234 235class BigQuery(Dialect): 236 WEEK_OFFSET = -1 237 UNNEST_COLUMN_ONLY = True 238 SUPPORTS_USER_DEFINED_TYPES = False 239 SUPPORTS_SEMI_ANTI_JOIN = False 240 LOG_BASE_FIRST = False 241 HEX_LOWERCASE = True 242 FORCE_EARLY_ALIAS_REF_EXPANSION = True 243 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 244 245 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 246 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 247 248 # bigquery udfs are case sensitive 249 NORMALIZE_FUNCTIONS = False 250 251 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 252 TIME_MAPPING = { 253 "%D": "%m/%d/%y", 254 "%E6S": "%S.%f", 255 } 256 257 FORMAT_MAPPING = { 258 "DD": "%d", 259 "MM": "%m", 260 "MON": "%b", 261 "MONTH": "%B", 262 "YYYY": "%Y", 263 "YY": "%y", 264 "HH": "%I", 265 "HH12": "%I", 266 "HH24": "%H", 267 "MI": "%M", 268 "SS": "%S", 269 "SSSSS": "%f", 270 "TZH": "%z", 271 } 272 273 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 274 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 275 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 276 277 def normalize_identifier(self, expression: E) -> E: 278 if ( 279 isinstance(expression, exp.Identifier) 280 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 281 ): 282 parent = expression.parent 283 while isinstance(parent, exp.Dot): 284 parent = parent.parent 285 286 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 287 # by default. The following check uses a heuristic to detect tables based on whether 288 # they are qualified. This should generally be correct, because tables in BigQuery 289 # must be qualified with at least a dataset, unless @@dataset_id is set. 290 case_sensitive = ( 291 isinstance(parent, exp.UserDefinedFunction) 292 or ( 293 isinstance(parent, exp.Table) 294 and parent.db 295 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 296 ) 297 or expression.meta.get("is_table") 298 ) 299 if not case_sensitive: 300 expression.set("this", expression.this.lower()) 301 302 return expression 303 304 class Tokenizer(tokens.Tokenizer): 305 QUOTES = ["'", '"', '"""', "'''"] 306 COMMENTS = ["--", "#", ("/*", "*/")] 307 IDENTIFIERS = ["`"] 308 STRING_ESCAPES = ["\\"] 309 310 HEX_STRINGS = [("0x", ""), ("0X", "")] 311 312 BYTE_STRINGS = [ 313 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 314 ] 315 316 RAW_STRINGS = [ 317 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 318 ] 319 320 KEYWORDS = { 321 **tokens.Tokenizer.KEYWORDS, 322 "ANY TYPE": TokenType.VARIANT, 323 "BEGIN": TokenType.COMMAND, 324 "BEGIN TRANSACTION": TokenType.BEGIN, 325 "BYTEINT": TokenType.INT, 326 "BYTES": TokenType.BINARY, 327 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 328 "DATETIME": TokenType.TIMESTAMP, 329 "DECLARE": TokenType.COMMAND, 330 "ELSEIF": TokenType.COMMAND, 331 "EXCEPTION": TokenType.COMMAND, 332 "FLOAT64": TokenType.DOUBLE, 333 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 334 "MODEL": TokenType.MODEL, 335 "NOT DETERMINISTIC": TokenType.VOLATILE, 336 "RECORD": TokenType.STRUCT, 337 "TIMESTAMP": TokenType.TIMESTAMPTZ, 338 } 339 KEYWORDS.pop("DIV") 340 KEYWORDS.pop("VALUES") 341 KEYWORDS.pop("/*+") 342 343 class Parser(parser.Parser): 344 PREFIXED_PIVOT_COLUMNS = True 345 LOG_DEFAULTS_TO_LN = True 346 SUPPORTS_IMPLICIT_UNNEST = True 347 348 FUNCTIONS = { 349 **parser.Parser.FUNCTIONS, 350 "DATE": _build_date, 351 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 352 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 353 "DATE_TRUNC": lambda args: exp.DateTrunc( 354 unit=exp.Literal.string(str(seq_get(args, 1))), 355 this=seq_get(args, 0), 356 ), 357 "DATETIME": _build_datetime, 358 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 359 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 360 "DIV": binary_from_function(exp.IntDiv), 361 "FORMAT_DATE": lambda args: exp.TimeToStr( 362 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 363 ), 364 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 365 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 366 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 367 ), 368 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 369 "MD5": exp.MD5Digest.from_arg_list, 370 "TO_HEX": _build_to_hex, 371 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 372 [seq_get(args, 1), seq_get(args, 0)] 373 ), 374 "PARSE_TIMESTAMP": _build_parse_timestamp, 375 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 376 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 377 this=seq_get(args, 0), 378 expression=seq_get(args, 1), 379 position=seq_get(args, 2), 380 occurrence=seq_get(args, 3), 381 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 382 ), 383 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 384 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 385 "SPLIT": lambda args: exp.Split( 386 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 387 this=seq_get(args, 0), 388 expression=seq_get(args, 1) or exp.Literal.string(","), 389 ), 390 "TIME": _build_time, 391 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 392 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 393 "TIMESTAMP": _build_timestamp, 394 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 395 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 396 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 397 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 398 ), 399 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 400 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 401 ), 402 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 403 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 404 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 405 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 406 ), 407 } 408 409 FUNCTION_PARSERS = { 410 **parser.Parser.FUNCTION_PARSERS, 411 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 412 } 413 FUNCTION_PARSERS.pop("TRIM") 414 415 NO_PAREN_FUNCTIONS = { 416 **parser.Parser.NO_PAREN_FUNCTIONS, 417 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 418 } 419 420 NESTED_TYPE_TOKENS = { 421 *parser.Parser.NESTED_TYPE_TOKENS, 422 TokenType.TABLE, 423 } 424 425 PROPERTY_PARSERS = { 426 **parser.Parser.PROPERTY_PARSERS, 427 "NOT DETERMINISTIC": lambda self: self.expression( 428 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 429 ), 430 "OPTIONS": lambda self: self._parse_with_property(), 431 } 432 433 CONSTRAINT_PARSERS = { 434 **parser.Parser.CONSTRAINT_PARSERS, 435 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 436 } 437 438 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 439 RANGE_PARSERS.pop(TokenType.OVERLAPS) 440 441 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 442 443 STATEMENT_PARSERS = { 444 **parser.Parser.STATEMENT_PARSERS, 445 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 446 TokenType.END: lambda self: self._parse_as_command(self._prev), 447 TokenType.FOR: lambda self: self._parse_for_in(), 448 } 449 450 BRACKET_OFFSETS = { 451 "OFFSET": (0, False), 452 "ORDINAL": (1, False), 453 "SAFE_OFFSET": (0, True), 454 "SAFE_ORDINAL": (1, True), 455 } 456 457 def _parse_for_in(self) -> exp.ForIn: 458 this = self._parse_range() 459 self._match_text_seq("DO") 460 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 461 462 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 463 this = super()._parse_table_part(schema=schema) or self._parse_number() 464 465 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 466 if isinstance(this, exp.Identifier): 467 table_name = this.name 468 while self._match(TokenType.DASH, advance=False) and self._next: 469 text = "" 470 while self._curr and self._curr.token_type != TokenType.DOT: 471 self._advance() 472 text += self._prev.text 473 table_name += text 474 475 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 476 elif isinstance(this, exp.Literal): 477 table_name = this.name 478 479 if self._is_connected() and self._parse_var(any_token=True): 480 table_name += self._prev.text 481 482 this = exp.Identifier(this=table_name, quoted=True) 483 484 return this 485 486 def _parse_table_parts( 487 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 488 ) -> exp.Table: 489 table = super()._parse_table_parts( 490 schema=schema, is_db_reference=is_db_reference, wildcard=True 491 ) 492 493 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 494 if not table.catalog: 495 if table.db: 496 parts = table.db.split(".") 497 if len(parts) == 2 and not table.args["db"].quoted: 498 table.set("catalog", exp.Identifier(this=parts[0])) 499 table.set("db", exp.Identifier(this=parts[1])) 500 else: 501 parts = table.name.split(".") 502 if len(parts) == 2 and not table.this.quoted: 503 table.set("db", exp.Identifier(this=parts[0])) 504 table.set("this", exp.Identifier(this=parts[1])) 505 506 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 507 catalog, db, this, *rest = ( 508 exp.to_identifier(p, quoted=True) 509 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 510 ) 511 512 if rest and this: 513 this = exp.Dot.build([this, *rest]) # type: ignore 514 515 table = exp.Table( 516 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 517 ) 518 table.meta["quoted_table"] = True 519 520 return table 521 522 def _parse_column(self) -> t.Optional[exp.Expression]: 523 column = super()._parse_column() 524 if isinstance(column, exp.Column): 525 parts = column.parts 526 if any("." in p.name for p in parts): 527 catalog, db, table, this, *rest = ( 528 exp.to_identifier(p, quoted=True) 529 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 530 ) 531 532 if rest and this: 533 this = exp.Dot.build([this, *rest]) # type: ignore 534 535 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 536 column.meta["quoted_column"] = True 537 538 return column 539 540 @t.overload 541 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 542 543 @t.overload 544 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 545 546 def _parse_json_object(self, agg=False): 547 json_object = super()._parse_json_object() 548 array_kv_pair = seq_get(json_object.expressions, 0) 549 550 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 551 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 552 if ( 553 array_kv_pair 554 and isinstance(array_kv_pair.this, exp.Array) 555 and isinstance(array_kv_pair.expression, exp.Array) 556 ): 557 keys = array_kv_pair.this.expressions 558 values = array_kv_pair.expression.expressions 559 560 json_object.set( 561 "expressions", 562 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 563 ) 564 565 return json_object 566 567 def _parse_bracket( 568 self, this: t.Optional[exp.Expression] = None 569 ) -> t.Optional[exp.Expression]: 570 bracket = super()._parse_bracket(this) 571 572 if this is bracket: 573 return bracket 574 575 if isinstance(bracket, exp.Bracket): 576 for expression in bracket.expressions: 577 name = expression.name.upper() 578 579 if name not in self.BRACKET_OFFSETS: 580 break 581 582 offset, safe = self.BRACKET_OFFSETS[name] 583 bracket.set("offset", offset) 584 bracket.set("safe", safe) 585 expression.replace(expression.expressions[0]) 586 587 return bracket 588 589 class Generator(generator.Generator): 590 EXPLICIT_SET_OP = True 591 INTERVAL_ALLOWS_PLURAL_FORM = False 592 JOIN_HINTS = False 593 QUERY_HINTS = False 594 TABLE_HINTS = False 595 LIMIT_FETCH = "LIMIT" 596 RENAME_TABLE_WITH_DB = False 597 NVL2_SUPPORTED = False 598 UNNEST_WITH_ORDINALITY = False 599 COLLATE_IS_FUNC = True 600 LIMIT_ONLY_LITERALS = True 601 SUPPORTS_TABLE_ALIAS_COLUMNS = False 602 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 603 JSON_KEY_VALUE_PAIR_SEP = "," 604 NULL_ORDERING_SUPPORTED = False 605 IGNORE_NULLS_IN_FUNC = True 606 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 607 CAN_IMPLEMENT_ARRAY_ANY = True 608 SUPPORTS_TO_NUMBER = False 609 NAMED_PLACEHOLDER_TOKEN = "@" 610 HEX_FUNC = "TO_HEX" 611 WITH_PROPERTIES_PREFIX = "OPTIONS" 612 SUPPORTS_EXPLODING_PROJECTIONS = False 613 614 TRANSFORMS = { 615 **generator.Generator.TRANSFORMS, 616 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 617 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 618 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 619 exp.Array: inline_array_unless_query, 620 exp.ArrayContains: _array_contains_sql, 621 exp.ArrayFilter: filter_array_using_unnest, 622 exp.ArraySize: rename_func("ARRAY_LENGTH"), 623 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 624 exp.CollateProperty: lambda self, e: ( 625 f"DEFAULT COLLATE {self.sql(e, 'this')}" 626 if e.args.get("default") 627 else f"COLLATE {self.sql(e, 'this')}" 628 ), 629 exp.Commit: lambda *_: "COMMIT TRANSACTION", 630 exp.CountIf: rename_func("COUNTIF"), 631 exp.Create: _create_sql, 632 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 633 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 634 exp.DateDiff: lambda self, e: self.func( 635 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 636 ), 637 exp.DateFromParts: rename_func("DATE"), 638 exp.DateStrToDate: datestrtodate_sql, 639 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 640 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 641 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 642 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 643 exp.FromTimeZone: lambda self, e: self.func( 644 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 645 ), 646 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 647 exp.GroupConcat: rename_func("STRING_AGG"), 648 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 649 exp.If: if_sql(false_value="NULL"), 650 exp.ILike: no_ilike_sql, 651 exp.IntDiv: rename_func("DIV"), 652 exp.JSONFormat: rename_func("TO_JSON_STRING"), 653 exp.Max: max_or_greatest, 654 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 655 exp.MD5Digest: rename_func("MD5"), 656 exp.Min: min_or_least, 657 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 658 exp.RegexpExtract: lambda self, e: self.func( 659 "REGEXP_EXTRACT", 660 e.this, 661 e.expression, 662 e.args.get("position"), 663 e.args.get("occurrence"), 664 ), 665 exp.RegexpReplace: regexp_replace_sql, 666 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 667 exp.ReturnsProperty: _returnsproperty_sql, 668 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 669 exp.Select: transforms.preprocess( 670 [ 671 transforms.explode_to_unnest(), 672 transforms.unqualify_unnest, 673 transforms.eliminate_distinct_on, 674 _alias_ordered_group, 675 transforms.eliminate_semi_and_anti_joins, 676 ] 677 ), 678 exp.SHA: rename_func("SHA1"), 679 exp.SHA2: sha256_sql, 680 exp.StabilityProperty: lambda self, e: ( 681 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 682 ), 683 exp.StrToDate: _str_to_datetime_sql, 684 exp.StrToTime: _str_to_datetime_sql, 685 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 686 exp.TimeFromParts: rename_func("TIME"), 687 exp.TimestampFromParts: rename_func("DATETIME"), 688 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 689 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 690 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 691 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 692 exp.TimeStrToTime: timestrtotime_sql, 693 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 694 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 695 exp.TsOrDsAdd: _ts_or_ds_add_sql, 696 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 697 exp.TsOrDsToTime: rename_func("TIME"), 698 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 699 exp.Unhex: rename_func("FROM_HEX"), 700 exp.UnixDate: rename_func("UNIX_DATE"), 701 exp.UnixToTime: _unix_to_time_sql, 702 exp.Values: _derived_table_values_to_unnest, 703 exp.VariancePop: rename_func("VAR_POP"), 704 } 705 706 SUPPORTED_JSON_PATH_PARTS = { 707 exp.JSONPathKey, 708 exp.JSONPathRoot, 709 exp.JSONPathSubscript, 710 } 711 712 TYPE_MAPPING = { 713 **generator.Generator.TYPE_MAPPING, 714 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 715 exp.DataType.Type.BIGINT: "INT64", 716 exp.DataType.Type.BINARY: "BYTES", 717 exp.DataType.Type.BOOLEAN: "BOOL", 718 exp.DataType.Type.CHAR: "STRING", 719 exp.DataType.Type.DECIMAL: "NUMERIC", 720 exp.DataType.Type.DOUBLE: "FLOAT64", 721 exp.DataType.Type.FLOAT: "FLOAT64", 722 exp.DataType.Type.INT: "INT64", 723 exp.DataType.Type.NCHAR: "STRING", 724 exp.DataType.Type.NVARCHAR: "STRING", 725 exp.DataType.Type.SMALLINT: "INT64", 726 exp.DataType.Type.TEXT: "STRING", 727 exp.DataType.Type.TIMESTAMP: "DATETIME", 728 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 729 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 730 exp.DataType.Type.TINYINT: "INT64", 731 exp.DataType.Type.VARBINARY: "BYTES", 732 exp.DataType.Type.ROWVERSION: "BYTES", 733 exp.DataType.Type.VARCHAR: "STRING", 734 exp.DataType.Type.VARIANT: "ANY TYPE", 735 } 736 737 PROPERTIES_LOCATION = { 738 **generator.Generator.PROPERTIES_LOCATION, 739 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 740 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 741 } 742 743 # WINDOW comes after QUALIFY 744 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 745 AFTER_HAVING_MODIFIER_TRANSFORMS = { 746 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 747 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 748 } 749 750 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 751 RESERVED_KEYWORDS = { 752 "all", 753 "and", 754 "any", 755 "array", 756 "as", 757 "asc", 758 "assert_rows_modified", 759 "at", 760 "between", 761 "by", 762 "case", 763 "cast", 764 "collate", 765 "contains", 766 "create", 767 "cross", 768 "cube", 769 "current", 770 "default", 771 "define", 772 "desc", 773 "distinct", 774 "else", 775 "end", 776 "enum", 777 "escape", 778 "except", 779 "exclude", 780 "exists", 781 "extract", 782 "false", 783 "fetch", 784 "following", 785 "for", 786 "from", 787 "full", 788 "group", 789 "grouping", 790 "groups", 791 "hash", 792 "having", 793 "if", 794 "ignore", 795 "in", 796 "inner", 797 "intersect", 798 "interval", 799 "into", 800 "is", 801 "join", 802 "lateral", 803 "left", 804 "like", 805 "limit", 806 "lookup", 807 "merge", 808 "natural", 809 "new", 810 "no", 811 "not", 812 "null", 813 "nulls", 814 "of", 815 "on", 816 "or", 817 "order", 818 "outer", 819 "over", 820 "partition", 821 "preceding", 822 "proto", 823 "qualify", 824 "range", 825 "recursive", 826 "respect", 827 "right", 828 "rollup", 829 "rows", 830 "select", 831 "set", 832 "some", 833 "struct", 834 "tablesample", 835 "then", 836 "to", 837 "treat", 838 "true", 839 "unbounded", 840 "union", 841 "unnest", 842 "using", 843 "when", 844 "where", 845 "window", 846 "with", 847 "within", 848 } 849 850 def mod_sql(self, expression: exp.Mod) -> str: 851 this = expression.this 852 expr = expression.expression 853 return self.func( 854 "MOD", 855 this.unnest() if isinstance(this, exp.Paren) else this, 856 expr.unnest() if isinstance(expr, exp.Paren) else expr, 857 ) 858 859 def column_parts(self, expression: exp.Column) -> str: 860 if expression.meta.get("quoted_column"): 861 # If a column reference is of the form `dataset.table`.name, we need 862 # to preserve the quoted table path, otherwise the reference breaks 863 table_parts = ".".join(p.name for p in expression.parts[:-1]) 864 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 865 return f"{table_path}.{self.sql(expression, 'this')}" 866 867 return super().column_parts(expression) 868 869 def table_parts(self, expression: exp.Table) -> str: 870 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 871 # we need to make sure the correct quoting is used in each case. 872 # 873 # For example, if there is a CTE x that clashes with a schema name, then the former will 874 # return the table y in that schema, whereas the latter will return the CTE's y column: 875 # 876 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 877 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 878 if expression.meta.get("quoted_table"): 879 table_parts = ".".join(p.name for p in expression.parts) 880 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 881 882 return super().table_parts(expression) 883 884 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 885 if isinstance(expression.this, exp.TsOrDsToTimestamp): 886 func_name = "FORMAT_DATETIME" 887 else: 888 func_name = "FORMAT_DATE" 889 this = ( 890 expression.this 891 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 892 else expression 893 ) 894 return self.func(func_name, self.format_time(expression), this.this) 895 896 def eq_sql(self, expression: exp.EQ) -> str: 897 # Operands of = cannot be NULL in BigQuery 898 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 899 if not isinstance(expression.parent, exp.Update): 900 return "NULL" 901 902 return self.binary(expression, "=") 903 904 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 905 parent = expression.parent 906 907 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 908 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 909 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 910 return self.func( 911 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 912 ) 913 914 return super().attimezone_sql(expression) 915 916 def trycast_sql(self, expression: exp.TryCast) -> str: 917 return self.cast_sql(expression, safe_prefix="SAFE_") 918 919 def bracket_sql(self, expression: exp.Bracket) -> str: 920 this = expression.this 921 expressions = expression.expressions 922 923 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 924 arg = expressions[0] 925 if arg.type is None: 926 from sqlglot.optimizer.annotate_types import annotate_types 927 928 arg = annotate_types(arg) 929 930 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 931 # BQ doesn't support bracket syntax with string values for structs 932 return f"{self.sql(this)}.{arg.name}" 933 934 expressions_sql = self.expressions(expression, flat=True) 935 offset = expression.args.get("offset") 936 937 if offset == 0: 938 expressions_sql = f"OFFSET({expressions_sql})" 939 elif offset == 1: 940 expressions_sql = f"ORDINAL({expressions_sql})" 941 elif offset is not None: 942 self.unsupported(f"Unsupported array offset: {offset}") 943 944 if expression.args.get("safe"): 945 expressions_sql = f"SAFE_{expressions_sql}" 946 947 return f"{self.sql(this)}[{expressions_sql}]" 948 949 def in_unnest_op(self, expression: exp.Unnest) -> str: 950 return self.sql(expression) 951 952 def except_op(self, expression: exp.Except) -> str: 953 if not expression.args.get("distinct"): 954 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 955 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 956 957 def intersect_op(self, expression: exp.Intersect) -> str: 958 if not expression.args.get("distinct"): 959 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 960 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 961 962 def version_sql(self, expression: exp.Version) -> str: 963 if expression.name == "TIMESTAMP": 964 expression.set("this", "SYSTEM_TIME") 965 return super().version_sql(expression)
236class BigQuery(Dialect): 237 WEEK_OFFSET = -1 238 UNNEST_COLUMN_ONLY = True 239 SUPPORTS_USER_DEFINED_TYPES = False 240 SUPPORTS_SEMI_ANTI_JOIN = False 241 LOG_BASE_FIRST = False 242 HEX_LOWERCASE = True 243 FORCE_EARLY_ALIAS_REF_EXPANSION = True 244 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 245 246 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 247 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 248 249 # bigquery udfs are case sensitive 250 NORMALIZE_FUNCTIONS = False 251 252 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 253 TIME_MAPPING = { 254 "%D": "%m/%d/%y", 255 "%E6S": "%S.%f", 256 } 257 258 FORMAT_MAPPING = { 259 "DD": "%d", 260 "MM": "%m", 261 "MON": "%b", 262 "MONTH": "%B", 263 "YYYY": "%Y", 264 "YY": "%y", 265 "HH": "%I", 266 "HH12": "%I", 267 "HH24": "%H", 268 "MI": "%M", 269 "SS": "%S", 270 "SSSSS": "%f", 271 "TZH": "%z", 272 } 273 274 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 275 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 276 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 277 278 def normalize_identifier(self, expression: E) -> E: 279 if ( 280 isinstance(expression, exp.Identifier) 281 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 282 ): 283 parent = expression.parent 284 while isinstance(parent, exp.Dot): 285 parent = parent.parent 286 287 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 288 # by default. The following check uses a heuristic to detect tables based on whether 289 # they are qualified. This should generally be correct, because tables in BigQuery 290 # must be qualified with at least a dataset, unless @@dataset_id is set. 291 case_sensitive = ( 292 isinstance(parent, exp.UserDefinedFunction) 293 or ( 294 isinstance(parent, exp.Table) 295 and parent.db 296 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 297 ) 298 or expression.meta.get("is_table") 299 ) 300 if not case_sensitive: 301 expression.set("this", expression.this.lower()) 302 303 return expression 304 305 class Tokenizer(tokens.Tokenizer): 306 QUOTES = ["'", '"', '"""', "'''"] 307 COMMENTS = ["--", "#", ("/*", "*/")] 308 IDENTIFIERS = ["`"] 309 STRING_ESCAPES = ["\\"] 310 311 HEX_STRINGS = [("0x", ""), ("0X", "")] 312 313 BYTE_STRINGS = [ 314 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 315 ] 316 317 RAW_STRINGS = [ 318 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 319 ] 320 321 KEYWORDS = { 322 **tokens.Tokenizer.KEYWORDS, 323 "ANY TYPE": TokenType.VARIANT, 324 "BEGIN": TokenType.COMMAND, 325 "BEGIN TRANSACTION": TokenType.BEGIN, 326 "BYTEINT": TokenType.INT, 327 "BYTES": TokenType.BINARY, 328 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 329 "DATETIME": TokenType.TIMESTAMP, 330 "DECLARE": TokenType.COMMAND, 331 "ELSEIF": TokenType.COMMAND, 332 "EXCEPTION": TokenType.COMMAND, 333 "FLOAT64": TokenType.DOUBLE, 334 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 335 "MODEL": TokenType.MODEL, 336 "NOT DETERMINISTIC": TokenType.VOLATILE, 337 "RECORD": TokenType.STRUCT, 338 "TIMESTAMP": TokenType.TIMESTAMPTZ, 339 } 340 KEYWORDS.pop("DIV") 341 KEYWORDS.pop("VALUES") 342 KEYWORDS.pop("/*+") 343 344 class Parser(parser.Parser): 345 PREFIXED_PIVOT_COLUMNS = True 346 LOG_DEFAULTS_TO_LN = True 347 SUPPORTS_IMPLICIT_UNNEST = True 348 349 FUNCTIONS = { 350 **parser.Parser.FUNCTIONS, 351 "DATE": _build_date, 352 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 353 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 354 "DATE_TRUNC": lambda args: exp.DateTrunc( 355 unit=exp.Literal.string(str(seq_get(args, 1))), 356 this=seq_get(args, 0), 357 ), 358 "DATETIME": _build_datetime, 359 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 360 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 361 "DIV": binary_from_function(exp.IntDiv), 362 "FORMAT_DATE": lambda args: exp.TimeToStr( 363 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 364 ), 365 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 366 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 367 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 368 ), 369 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 370 "MD5": exp.MD5Digest.from_arg_list, 371 "TO_HEX": _build_to_hex, 372 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 373 [seq_get(args, 1), seq_get(args, 0)] 374 ), 375 "PARSE_TIMESTAMP": _build_parse_timestamp, 376 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 377 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 378 this=seq_get(args, 0), 379 expression=seq_get(args, 1), 380 position=seq_get(args, 2), 381 occurrence=seq_get(args, 3), 382 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 383 ), 384 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 385 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 386 "SPLIT": lambda args: exp.Split( 387 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 388 this=seq_get(args, 0), 389 expression=seq_get(args, 1) or exp.Literal.string(","), 390 ), 391 "TIME": _build_time, 392 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 393 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 394 "TIMESTAMP": _build_timestamp, 395 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 396 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 397 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 398 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 399 ), 400 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 401 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 402 ), 403 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 404 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 405 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 406 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 407 ), 408 } 409 410 FUNCTION_PARSERS = { 411 **parser.Parser.FUNCTION_PARSERS, 412 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 413 } 414 FUNCTION_PARSERS.pop("TRIM") 415 416 NO_PAREN_FUNCTIONS = { 417 **parser.Parser.NO_PAREN_FUNCTIONS, 418 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 419 } 420 421 NESTED_TYPE_TOKENS = { 422 *parser.Parser.NESTED_TYPE_TOKENS, 423 TokenType.TABLE, 424 } 425 426 PROPERTY_PARSERS = { 427 **parser.Parser.PROPERTY_PARSERS, 428 "NOT DETERMINISTIC": lambda self: self.expression( 429 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 430 ), 431 "OPTIONS": lambda self: self._parse_with_property(), 432 } 433 434 CONSTRAINT_PARSERS = { 435 **parser.Parser.CONSTRAINT_PARSERS, 436 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 437 } 438 439 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 440 RANGE_PARSERS.pop(TokenType.OVERLAPS) 441 442 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 443 444 STATEMENT_PARSERS = { 445 **parser.Parser.STATEMENT_PARSERS, 446 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 447 TokenType.END: lambda self: self._parse_as_command(self._prev), 448 TokenType.FOR: lambda self: self._parse_for_in(), 449 } 450 451 BRACKET_OFFSETS = { 452 "OFFSET": (0, False), 453 "ORDINAL": (1, False), 454 "SAFE_OFFSET": (0, True), 455 "SAFE_ORDINAL": (1, True), 456 } 457 458 def _parse_for_in(self) -> exp.ForIn: 459 this = self._parse_range() 460 self._match_text_seq("DO") 461 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 462 463 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 464 this = super()._parse_table_part(schema=schema) or self._parse_number() 465 466 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 467 if isinstance(this, exp.Identifier): 468 table_name = this.name 469 while self._match(TokenType.DASH, advance=False) and self._next: 470 text = "" 471 while self._curr and self._curr.token_type != TokenType.DOT: 472 self._advance() 473 text += self._prev.text 474 table_name += text 475 476 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 477 elif isinstance(this, exp.Literal): 478 table_name = this.name 479 480 if self._is_connected() and self._parse_var(any_token=True): 481 table_name += self._prev.text 482 483 this = exp.Identifier(this=table_name, quoted=True) 484 485 return this 486 487 def _parse_table_parts( 488 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 489 ) -> exp.Table: 490 table = super()._parse_table_parts( 491 schema=schema, is_db_reference=is_db_reference, wildcard=True 492 ) 493 494 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 495 if not table.catalog: 496 if table.db: 497 parts = table.db.split(".") 498 if len(parts) == 2 and not table.args["db"].quoted: 499 table.set("catalog", exp.Identifier(this=parts[0])) 500 table.set("db", exp.Identifier(this=parts[1])) 501 else: 502 parts = table.name.split(".") 503 if len(parts) == 2 and not table.this.quoted: 504 table.set("db", exp.Identifier(this=parts[0])) 505 table.set("this", exp.Identifier(this=parts[1])) 506 507 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 508 catalog, db, this, *rest = ( 509 exp.to_identifier(p, quoted=True) 510 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 511 ) 512 513 if rest and this: 514 this = exp.Dot.build([this, *rest]) # type: ignore 515 516 table = exp.Table( 517 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 518 ) 519 table.meta["quoted_table"] = True 520 521 return table 522 523 def _parse_column(self) -> t.Optional[exp.Expression]: 524 column = super()._parse_column() 525 if isinstance(column, exp.Column): 526 parts = column.parts 527 if any("." in p.name for p in parts): 528 catalog, db, table, this, *rest = ( 529 exp.to_identifier(p, quoted=True) 530 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 531 ) 532 533 if rest and this: 534 this = exp.Dot.build([this, *rest]) # type: ignore 535 536 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 537 column.meta["quoted_column"] = True 538 539 return column 540 541 @t.overload 542 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 543 544 @t.overload 545 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 546 547 def _parse_json_object(self, agg=False): 548 json_object = super()._parse_json_object() 549 array_kv_pair = seq_get(json_object.expressions, 0) 550 551 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 552 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 553 if ( 554 array_kv_pair 555 and isinstance(array_kv_pair.this, exp.Array) 556 and isinstance(array_kv_pair.expression, exp.Array) 557 ): 558 keys = array_kv_pair.this.expressions 559 values = array_kv_pair.expression.expressions 560 561 json_object.set( 562 "expressions", 563 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 564 ) 565 566 return json_object 567 568 def _parse_bracket( 569 self, this: t.Optional[exp.Expression] = None 570 ) -> t.Optional[exp.Expression]: 571 bracket = super()._parse_bracket(this) 572 573 if this is bracket: 574 return bracket 575 576 if isinstance(bracket, exp.Bracket): 577 for expression in bracket.expressions: 578 name = expression.name.upper() 579 580 if name not in self.BRACKET_OFFSETS: 581 break 582 583 offset, safe = self.BRACKET_OFFSETS[name] 584 bracket.set("offset", offset) 585 bracket.set("safe", safe) 586 expression.replace(expression.expressions[0]) 587 588 return bracket 589 590 class Generator(generator.Generator): 591 EXPLICIT_SET_OP = True 592 INTERVAL_ALLOWS_PLURAL_FORM = False 593 JOIN_HINTS = False 594 QUERY_HINTS = False 595 TABLE_HINTS = False 596 LIMIT_FETCH = "LIMIT" 597 RENAME_TABLE_WITH_DB = False 598 NVL2_SUPPORTED = False 599 UNNEST_WITH_ORDINALITY = False 600 COLLATE_IS_FUNC = True 601 LIMIT_ONLY_LITERALS = True 602 SUPPORTS_TABLE_ALIAS_COLUMNS = False 603 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 604 JSON_KEY_VALUE_PAIR_SEP = "," 605 NULL_ORDERING_SUPPORTED = False 606 IGNORE_NULLS_IN_FUNC = True 607 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 608 CAN_IMPLEMENT_ARRAY_ANY = True 609 SUPPORTS_TO_NUMBER = False 610 NAMED_PLACEHOLDER_TOKEN = "@" 611 HEX_FUNC = "TO_HEX" 612 WITH_PROPERTIES_PREFIX = "OPTIONS" 613 SUPPORTS_EXPLODING_PROJECTIONS = False 614 615 TRANSFORMS = { 616 **generator.Generator.TRANSFORMS, 617 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 618 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 619 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 620 exp.Array: inline_array_unless_query, 621 exp.ArrayContains: _array_contains_sql, 622 exp.ArrayFilter: filter_array_using_unnest, 623 exp.ArraySize: rename_func("ARRAY_LENGTH"), 624 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 625 exp.CollateProperty: lambda self, e: ( 626 f"DEFAULT COLLATE {self.sql(e, 'this')}" 627 if e.args.get("default") 628 else f"COLLATE {self.sql(e, 'this')}" 629 ), 630 exp.Commit: lambda *_: "COMMIT TRANSACTION", 631 exp.CountIf: rename_func("COUNTIF"), 632 exp.Create: _create_sql, 633 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 634 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 635 exp.DateDiff: lambda self, e: self.func( 636 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 637 ), 638 exp.DateFromParts: rename_func("DATE"), 639 exp.DateStrToDate: datestrtodate_sql, 640 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 641 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 642 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 643 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 644 exp.FromTimeZone: lambda self, e: self.func( 645 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 646 ), 647 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 648 exp.GroupConcat: rename_func("STRING_AGG"), 649 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 650 exp.If: if_sql(false_value="NULL"), 651 exp.ILike: no_ilike_sql, 652 exp.IntDiv: rename_func("DIV"), 653 exp.JSONFormat: rename_func("TO_JSON_STRING"), 654 exp.Max: max_or_greatest, 655 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 656 exp.MD5Digest: rename_func("MD5"), 657 exp.Min: min_or_least, 658 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 659 exp.RegexpExtract: lambda self, e: self.func( 660 "REGEXP_EXTRACT", 661 e.this, 662 e.expression, 663 e.args.get("position"), 664 e.args.get("occurrence"), 665 ), 666 exp.RegexpReplace: regexp_replace_sql, 667 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 668 exp.ReturnsProperty: _returnsproperty_sql, 669 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 670 exp.Select: transforms.preprocess( 671 [ 672 transforms.explode_to_unnest(), 673 transforms.unqualify_unnest, 674 transforms.eliminate_distinct_on, 675 _alias_ordered_group, 676 transforms.eliminate_semi_and_anti_joins, 677 ] 678 ), 679 exp.SHA: rename_func("SHA1"), 680 exp.SHA2: sha256_sql, 681 exp.StabilityProperty: lambda self, e: ( 682 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 683 ), 684 exp.StrToDate: _str_to_datetime_sql, 685 exp.StrToTime: _str_to_datetime_sql, 686 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 687 exp.TimeFromParts: rename_func("TIME"), 688 exp.TimestampFromParts: rename_func("DATETIME"), 689 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 690 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 691 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 692 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 693 exp.TimeStrToTime: timestrtotime_sql, 694 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 695 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 696 exp.TsOrDsAdd: _ts_or_ds_add_sql, 697 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 698 exp.TsOrDsToTime: rename_func("TIME"), 699 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 700 exp.Unhex: rename_func("FROM_HEX"), 701 exp.UnixDate: rename_func("UNIX_DATE"), 702 exp.UnixToTime: _unix_to_time_sql, 703 exp.Values: _derived_table_values_to_unnest, 704 exp.VariancePop: rename_func("VAR_POP"), 705 } 706 707 SUPPORTED_JSON_PATH_PARTS = { 708 exp.JSONPathKey, 709 exp.JSONPathRoot, 710 exp.JSONPathSubscript, 711 } 712 713 TYPE_MAPPING = { 714 **generator.Generator.TYPE_MAPPING, 715 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 716 exp.DataType.Type.BIGINT: "INT64", 717 exp.DataType.Type.BINARY: "BYTES", 718 exp.DataType.Type.BOOLEAN: "BOOL", 719 exp.DataType.Type.CHAR: "STRING", 720 exp.DataType.Type.DECIMAL: "NUMERIC", 721 exp.DataType.Type.DOUBLE: "FLOAT64", 722 exp.DataType.Type.FLOAT: "FLOAT64", 723 exp.DataType.Type.INT: "INT64", 724 exp.DataType.Type.NCHAR: "STRING", 725 exp.DataType.Type.NVARCHAR: "STRING", 726 exp.DataType.Type.SMALLINT: "INT64", 727 exp.DataType.Type.TEXT: "STRING", 728 exp.DataType.Type.TIMESTAMP: "DATETIME", 729 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 730 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 731 exp.DataType.Type.TINYINT: "INT64", 732 exp.DataType.Type.VARBINARY: "BYTES", 733 exp.DataType.Type.ROWVERSION: "BYTES", 734 exp.DataType.Type.VARCHAR: "STRING", 735 exp.DataType.Type.VARIANT: "ANY TYPE", 736 } 737 738 PROPERTIES_LOCATION = { 739 **generator.Generator.PROPERTIES_LOCATION, 740 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 741 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 742 } 743 744 # WINDOW comes after QUALIFY 745 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 746 AFTER_HAVING_MODIFIER_TRANSFORMS = { 747 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 748 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 749 } 750 751 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 752 RESERVED_KEYWORDS = { 753 "all", 754 "and", 755 "any", 756 "array", 757 "as", 758 "asc", 759 "assert_rows_modified", 760 "at", 761 "between", 762 "by", 763 "case", 764 "cast", 765 "collate", 766 "contains", 767 "create", 768 "cross", 769 "cube", 770 "current", 771 "default", 772 "define", 773 "desc", 774 "distinct", 775 "else", 776 "end", 777 "enum", 778 "escape", 779 "except", 780 "exclude", 781 "exists", 782 "extract", 783 "false", 784 "fetch", 785 "following", 786 "for", 787 "from", 788 "full", 789 "group", 790 "grouping", 791 "groups", 792 "hash", 793 "having", 794 "if", 795 "ignore", 796 "in", 797 "inner", 798 "intersect", 799 "interval", 800 "into", 801 "is", 802 "join", 803 "lateral", 804 "left", 805 "like", 806 "limit", 807 "lookup", 808 "merge", 809 "natural", 810 "new", 811 "no", 812 "not", 813 "null", 814 "nulls", 815 "of", 816 "on", 817 "or", 818 "order", 819 "outer", 820 "over", 821 "partition", 822 "preceding", 823 "proto", 824 "qualify", 825 "range", 826 "recursive", 827 "respect", 828 "right", 829 "rollup", 830 "rows", 831 "select", 832 "set", 833 "some", 834 "struct", 835 "tablesample", 836 "then", 837 "to", 838 "treat", 839 "true", 840 "unbounded", 841 "union", 842 "unnest", 843 "using", 844 "when", 845 "where", 846 "window", 847 "with", 848 "within", 849 } 850 851 def mod_sql(self, expression: exp.Mod) -> str: 852 this = expression.this 853 expr = expression.expression 854 return self.func( 855 "MOD", 856 this.unnest() if isinstance(this, exp.Paren) else this, 857 expr.unnest() if isinstance(expr, exp.Paren) else expr, 858 ) 859 860 def column_parts(self, expression: exp.Column) -> str: 861 if expression.meta.get("quoted_column"): 862 # If a column reference is of the form `dataset.table`.name, we need 863 # to preserve the quoted table path, otherwise the reference breaks 864 table_parts = ".".join(p.name for p in expression.parts[:-1]) 865 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 866 return f"{table_path}.{self.sql(expression, 'this')}" 867 868 return super().column_parts(expression) 869 870 def table_parts(self, expression: exp.Table) -> str: 871 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 872 # we need to make sure the correct quoting is used in each case. 873 # 874 # For example, if there is a CTE x that clashes with a schema name, then the former will 875 # return the table y in that schema, whereas the latter will return the CTE's y column: 876 # 877 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 878 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 879 if expression.meta.get("quoted_table"): 880 table_parts = ".".join(p.name for p in expression.parts) 881 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 882 883 return super().table_parts(expression) 884 885 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 886 if isinstance(expression.this, exp.TsOrDsToTimestamp): 887 func_name = "FORMAT_DATETIME" 888 else: 889 func_name = "FORMAT_DATE" 890 this = ( 891 expression.this 892 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 893 else expression 894 ) 895 return self.func(func_name, self.format_time(expression), this.this) 896 897 def eq_sql(self, expression: exp.EQ) -> str: 898 # Operands of = cannot be NULL in BigQuery 899 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 900 if not isinstance(expression.parent, exp.Update): 901 return "NULL" 902 903 return self.binary(expression, "=") 904 905 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 906 parent = expression.parent 907 908 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 909 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 910 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 911 return self.func( 912 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 913 ) 914 915 return super().attimezone_sql(expression) 916 917 def trycast_sql(self, expression: exp.TryCast) -> str: 918 return self.cast_sql(expression, safe_prefix="SAFE_") 919 920 def bracket_sql(self, expression: exp.Bracket) -> str: 921 this = expression.this 922 expressions = expression.expressions 923 924 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 925 arg = expressions[0] 926 if arg.type is None: 927 from sqlglot.optimizer.annotate_types import annotate_types 928 929 arg = annotate_types(arg) 930 931 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 932 # BQ doesn't support bracket syntax with string values for structs 933 return f"{self.sql(this)}.{arg.name}" 934 935 expressions_sql = self.expressions(expression, flat=True) 936 offset = expression.args.get("offset") 937 938 if offset == 0: 939 expressions_sql = f"OFFSET({expressions_sql})" 940 elif offset == 1: 941 expressions_sql = f"ORDINAL({expressions_sql})" 942 elif offset is not None: 943 self.unsupported(f"Unsupported array offset: {offset}") 944 945 if expression.args.get("safe"): 946 expressions_sql = f"SAFE_{expressions_sql}" 947 948 return f"{self.sql(this)}[{expressions_sql}]" 949 950 def in_unnest_op(self, expression: exp.Unnest) -> str: 951 return self.sql(expression) 952 953 def except_op(self, expression: exp.Except) -> str: 954 if not expression.args.get("distinct"): 955 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 956 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 957 958 def intersect_op(self, expression: exp.Intersect) -> str: 959 if not expression.args.get("distinct"): 960 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 961 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 962 963 def version_sql(self, expression: exp.Version) -> str: 964 if expression.name == "TIMESTAMP": 965 expression.set("this", "SYSTEM_TIME") 966 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
278 def normalize_identifier(self, expression: E) -> E: 279 if ( 280 isinstance(expression, exp.Identifier) 281 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 282 ): 283 parent = expression.parent 284 while isinstance(parent, exp.Dot): 285 parent = parent.parent 286 287 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 288 # by default. The following check uses a heuristic to detect tables based on whether 289 # they are qualified. This should generally be correct, because tables in BigQuery 290 # must be qualified with at least a dataset, unless @@dataset_id is set. 291 case_sensitive = ( 292 isinstance(parent, exp.UserDefinedFunction) 293 or ( 294 isinstance(parent, exp.Table) 295 and parent.db 296 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 297 ) 298 or expression.meta.get("is_table") 299 ) 300 if not case_sensitive: 301 expression.set("this", expression.this.lower()) 302 303 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- COPY_PARAMS_ARE_CSV
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
305 class Tokenizer(tokens.Tokenizer): 306 QUOTES = ["'", '"', '"""', "'''"] 307 COMMENTS = ["--", "#", ("/*", "*/")] 308 IDENTIFIERS = ["`"] 309 STRING_ESCAPES = ["\\"] 310 311 HEX_STRINGS = [("0x", ""), ("0X", "")] 312 313 BYTE_STRINGS = [ 314 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 315 ] 316 317 RAW_STRINGS = [ 318 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 319 ] 320 321 KEYWORDS = { 322 **tokens.Tokenizer.KEYWORDS, 323 "ANY TYPE": TokenType.VARIANT, 324 "BEGIN": TokenType.COMMAND, 325 "BEGIN TRANSACTION": TokenType.BEGIN, 326 "BYTEINT": TokenType.INT, 327 "BYTES": TokenType.BINARY, 328 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 329 "DATETIME": TokenType.TIMESTAMP, 330 "DECLARE": TokenType.COMMAND, 331 "ELSEIF": TokenType.COMMAND, 332 "EXCEPTION": TokenType.COMMAND, 333 "FLOAT64": TokenType.DOUBLE, 334 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 335 "MODEL": TokenType.MODEL, 336 "NOT DETERMINISTIC": TokenType.VOLATILE, 337 "RECORD": TokenType.STRUCT, 338 "TIMESTAMP": TokenType.TIMESTAMPTZ, 339 } 340 KEYWORDS.pop("DIV") 341 KEYWORDS.pop("VALUES") 342 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
344 class Parser(parser.Parser): 345 PREFIXED_PIVOT_COLUMNS = True 346 LOG_DEFAULTS_TO_LN = True 347 SUPPORTS_IMPLICIT_UNNEST = True 348 349 FUNCTIONS = { 350 **parser.Parser.FUNCTIONS, 351 "DATE": _build_date, 352 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 353 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 354 "DATE_TRUNC": lambda args: exp.DateTrunc( 355 unit=exp.Literal.string(str(seq_get(args, 1))), 356 this=seq_get(args, 0), 357 ), 358 "DATETIME": _build_datetime, 359 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 360 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 361 "DIV": binary_from_function(exp.IntDiv), 362 "FORMAT_DATE": lambda args: exp.TimeToStr( 363 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 364 ), 365 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 366 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 367 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 368 ), 369 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 370 "MD5": exp.MD5Digest.from_arg_list, 371 "TO_HEX": _build_to_hex, 372 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 373 [seq_get(args, 1), seq_get(args, 0)] 374 ), 375 "PARSE_TIMESTAMP": _build_parse_timestamp, 376 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 377 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 378 this=seq_get(args, 0), 379 expression=seq_get(args, 1), 380 position=seq_get(args, 2), 381 occurrence=seq_get(args, 3), 382 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 383 ), 384 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 385 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 386 "SPLIT": lambda args: exp.Split( 387 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 388 this=seq_get(args, 0), 389 expression=seq_get(args, 1) or exp.Literal.string(","), 390 ), 391 "TIME": _build_time, 392 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 393 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 394 "TIMESTAMP": _build_timestamp, 395 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 396 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 397 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 398 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 399 ), 400 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 401 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 402 ), 403 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 404 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 405 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 406 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 407 ), 408 } 409 410 FUNCTION_PARSERS = { 411 **parser.Parser.FUNCTION_PARSERS, 412 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 413 } 414 FUNCTION_PARSERS.pop("TRIM") 415 416 NO_PAREN_FUNCTIONS = { 417 **parser.Parser.NO_PAREN_FUNCTIONS, 418 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 419 } 420 421 NESTED_TYPE_TOKENS = { 422 *parser.Parser.NESTED_TYPE_TOKENS, 423 TokenType.TABLE, 424 } 425 426 PROPERTY_PARSERS = { 427 **parser.Parser.PROPERTY_PARSERS, 428 "NOT DETERMINISTIC": lambda self: self.expression( 429 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 430 ), 431 "OPTIONS": lambda self: self._parse_with_property(), 432 } 433 434 CONSTRAINT_PARSERS = { 435 **parser.Parser.CONSTRAINT_PARSERS, 436 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 437 } 438 439 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 440 RANGE_PARSERS.pop(TokenType.OVERLAPS) 441 442 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 443 444 STATEMENT_PARSERS = { 445 **parser.Parser.STATEMENT_PARSERS, 446 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 447 TokenType.END: lambda self: self._parse_as_command(self._prev), 448 TokenType.FOR: lambda self: self._parse_for_in(), 449 } 450 451 BRACKET_OFFSETS = { 452 "OFFSET": (0, False), 453 "ORDINAL": (1, False), 454 "SAFE_OFFSET": (0, True), 455 "SAFE_ORDINAL": (1, True), 456 } 457 458 def _parse_for_in(self) -> exp.ForIn: 459 this = self._parse_range() 460 self._match_text_seq("DO") 461 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 462 463 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 464 this = super()._parse_table_part(schema=schema) or self._parse_number() 465 466 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 467 if isinstance(this, exp.Identifier): 468 table_name = this.name 469 while self._match(TokenType.DASH, advance=False) and self._next: 470 text = "" 471 while self._curr and self._curr.token_type != TokenType.DOT: 472 self._advance() 473 text += self._prev.text 474 table_name += text 475 476 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 477 elif isinstance(this, exp.Literal): 478 table_name = this.name 479 480 if self._is_connected() and self._parse_var(any_token=True): 481 table_name += self._prev.text 482 483 this = exp.Identifier(this=table_name, quoted=True) 484 485 return this 486 487 def _parse_table_parts( 488 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 489 ) -> exp.Table: 490 table = super()._parse_table_parts( 491 schema=schema, is_db_reference=is_db_reference, wildcard=True 492 ) 493 494 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 495 if not table.catalog: 496 if table.db: 497 parts = table.db.split(".") 498 if len(parts) == 2 and not table.args["db"].quoted: 499 table.set("catalog", exp.Identifier(this=parts[0])) 500 table.set("db", exp.Identifier(this=parts[1])) 501 else: 502 parts = table.name.split(".") 503 if len(parts) == 2 and not table.this.quoted: 504 table.set("db", exp.Identifier(this=parts[0])) 505 table.set("this", exp.Identifier(this=parts[1])) 506 507 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 508 catalog, db, this, *rest = ( 509 exp.to_identifier(p, quoted=True) 510 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 511 ) 512 513 if rest and this: 514 this = exp.Dot.build([this, *rest]) # type: ignore 515 516 table = exp.Table( 517 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 518 ) 519 table.meta["quoted_table"] = True 520 521 return table 522 523 def _parse_column(self) -> t.Optional[exp.Expression]: 524 column = super()._parse_column() 525 if isinstance(column, exp.Column): 526 parts = column.parts 527 if any("." in p.name for p in parts): 528 catalog, db, table, this, *rest = ( 529 exp.to_identifier(p, quoted=True) 530 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 531 ) 532 533 if rest and this: 534 this = exp.Dot.build([this, *rest]) # type: ignore 535 536 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 537 column.meta["quoted_column"] = True 538 539 return column 540 541 @t.overload 542 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 543 544 @t.overload 545 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 546 547 def _parse_json_object(self, agg=False): 548 json_object = super()._parse_json_object() 549 array_kv_pair = seq_get(json_object.expressions, 0) 550 551 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 552 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 553 if ( 554 array_kv_pair 555 and isinstance(array_kv_pair.this, exp.Array) 556 and isinstance(array_kv_pair.expression, exp.Array) 557 ): 558 keys = array_kv_pair.this.expressions 559 values = array_kv_pair.expression.expressions 560 561 json_object.set( 562 "expressions", 563 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 564 ) 565 566 return json_object 567 568 def _parse_bracket( 569 self, this: t.Optional[exp.Expression] = None 570 ) -> t.Optional[exp.Expression]: 571 bracket = super()._parse_bracket(this) 572 573 if this is bracket: 574 return bracket 575 576 if isinstance(bracket, exp.Bracket): 577 for expression in bracket.expressions: 578 name = expression.name.upper() 579 580 if name not in self.BRACKET_OFFSETS: 581 break 582 583 offset, safe = self.BRACKET_OFFSETS[name] 584 bracket.set("offset", offset) 585 bracket.set("safe", safe) 586 expression.replace(expression.expressions[0]) 587 588 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
590 class Generator(generator.Generator): 591 EXPLICIT_SET_OP = True 592 INTERVAL_ALLOWS_PLURAL_FORM = False 593 JOIN_HINTS = False 594 QUERY_HINTS = False 595 TABLE_HINTS = False 596 LIMIT_FETCH = "LIMIT" 597 RENAME_TABLE_WITH_DB = False 598 NVL2_SUPPORTED = False 599 UNNEST_WITH_ORDINALITY = False 600 COLLATE_IS_FUNC = True 601 LIMIT_ONLY_LITERALS = True 602 SUPPORTS_TABLE_ALIAS_COLUMNS = False 603 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 604 JSON_KEY_VALUE_PAIR_SEP = "," 605 NULL_ORDERING_SUPPORTED = False 606 IGNORE_NULLS_IN_FUNC = True 607 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 608 CAN_IMPLEMENT_ARRAY_ANY = True 609 SUPPORTS_TO_NUMBER = False 610 NAMED_PLACEHOLDER_TOKEN = "@" 611 HEX_FUNC = "TO_HEX" 612 WITH_PROPERTIES_PREFIX = "OPTIONS" 613 SUPPORTS_EXPLODING_PROJECTIONS = False 614 615 TRANSFORMS = { 616 **generator.Generator.TRANSFORMS, 617 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 618 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 619 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 620 exp.Array: inline_array_unless_query, 621 exp.ArrayContains: _array_contains_sql, 622 exp.ArrayFilter: filter_array_using_unnest, 623 exp.ArraySize: rename_func("ARRAY_LENGTH"), 624 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 625 exp.CollateProperty: lambda self, e: ( 626 f"DEFAULT COLLATE {self.sql(e, 'this')}" 627 if e.args.get("default") 628 else f"COLLATE {self.sql(e, 'this')}" 629 ), 630 exp.Commit: lambda *_: "COMMIT TRANSACTION", 631 exp.CountIf: rename_func("COUNTIF"), 632 exp.Create: _create_sql, 633 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 634 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 635 exp.DateDiff: lambda self, e: self.func( 636 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 637 ), 638 exp.DateFromParts: rename_func("DATE"), 639 exp.DateStrToDate: datestrtodate_sql, 640 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 641 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 642 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 643 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 644 exp.FromTimeZone: lambda self, e: self.func( 645 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 646 ), 647 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 648 exp.GroupConcat: rename_func("STRING_AGG"), 649 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 650 exp.If: if_sql(false_value="NULL"), 651 exp.ILike: no_ilike_sql, 652 exp.IntDiv: rename_func("DIV"), 653 exp.JSONFormat: rename_func("TO_JSON_STRING"), 654 exp.Max: max_or_greatest, 655 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 656 exp.MD5Digest: rename_func("MD5"), 657 exp.Min: min_or_least, 658 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 659 exp.RegexpExtract: lambda self, e: self.func( 660 "REGEXP_EXTRACT", 661 e.this, 662 e.expression, 663 e.args.get("position"), 664 e.args.get("occurrence"), 665 ), 666 exp.RegexpReplace: regexp_replace_sql, 667 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 668 exp.ReturnsProperty: _returnsproperty_sql, 669 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 670 exp.Select: transforms.preprocess( 671 [ 672 transforms.explode_to_unnest(), 673 transforms.unqualify_unnest, 674 transforms.eliminate_distinct_on, 675 _alias_ordered_group, 676 transforms.eliminate_semi_and_anti_joins, 677 ] 678 ), 679 exp.SHA: rename_func("SHA1"), 680 exp.SHA2: sha256_sql, 681 exp.StabilityProperty: lambda self, e: ( 682 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 683 ), 684 exp.StrToDate: _str_to_datetime_sql, 685 exp.StrToTime: _str_to_datetime_sql, 686 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 687 exp.TimeFromParts: rename_func("TIME"), 688 exp.TimestampFromParts: rename_func("DATETIME"), 689 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 690 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 691 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 692 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 693 exp.TimeStrToTime: timestrtotime_sql, 694 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 695 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 696 exp.TsOrDsAdd: _ts_or_ds_add_sql, 697 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 698 exp.TsOrDsToTime: rename_func("TIME"), 699 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 700 exp.Unhex: rename_func("FROM_HEX"), 701 exp.UnixDate: rename_func("UNIX_DATE"), 702 exp.UnixToTime: _unix_to_time_sql, 703 exp.Values: _derived_table_values_to_unnest, 704 exp.VariancePop: rename_func("VAR_POP"), 705 } 706 707 SUPPORTED_JSON_PATH_PARTS = { 708 exp.JSONPathKey, 709 exp.JSONPathRoot, 710 exp.JSONPathSubscript, 711 } 712 713 TYPE_MAPPING = { 714 **generator.Generator.TYPE_MAPPING, 715 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 716 exp.DataType.Type.BIGINT: "INT64", 717 exp.DataType.Type.BINARY: "BYTES", 718 exp.DataType.Type.BOOLEAN: "BOOL", 719 exp.DataType.Type.CHAR: "STRING", 720 exp.DataType.Type.DECIMAL: "NUMERIC", 721 exp.DataType.Type.DOUBLE: "FLOAT64", 722 exp.DataType.Type.FLOAT: "FLOAT64", 723 exp.DataType.Type.INT: "INT64", 724 exp.DataType.Type.NCHAR: "STRING", 725 exp.DataType.Type.NVARCHAR: "STRING", 726 exp.DataType.Type.SMALLINT: "INT64", 727 exp.DataType.Type.TEXT: "STRING", 728 exp.DataType.Type.TIMESTAMP: "DATETIME", 729 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 730 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 731 exp.DataType.Type.TINYINT: "INT64", 732 exp.DataType.Type.VARBINARY: "BYTES", 733 exp.DataType.Type.ROWVERSION: "BYTES", 734 exp.DataType.Type.VARCHAR: "STRING", 735 exp.DataType.Type.VARIANT: "ANY TYPE", 736 } 737 738 PROPERTIES_LOCATION = { 739 **generator.Generator.PROPERTIES_LOCATION, 740 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 741 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 742 } 743 744 # WINDOW comes after QUALIFY 745 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 746 AFTER_HAVING_MODIFIER_TRANSFORMS = { 747 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 748 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 749 } 750 751 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 752 RESERVED_KEYWORDS = { 753 "all", 754 "and", 755 "any", 756 "array", 757 "as", 758 "asc", 759 "assert_rows_modified", 760 "at", 761 "between", 762 "by", 763 "case", 764 "cast", 765 "collate", 766 "contains", 767 "create", 768 "cross", 769 "cube", 770 "current", 771 "default", 772 "define", 773 "desc", 774 "distinct", 775 "else", 776 "end", 777 "enum", 778 "escape", 779 "except", 780 "exclude", 781 "exists", 782 "extract", 783 "false", 784 "fetch", 785 "following", 786 "for", 787 "from", 788 "full", 789 "group", 790 "grouping", 791 "groups", 792 "hash", 793 "having", 794 "if", 795 "ignore", 796 "in", 797 "inner", 798 "intersect", 799 "interval", 800 "into", 801 "is", 802 "join", 803 "lateral", 804 "left", 805 "like", 806 "limit", 807 "lookup", 808 "merge", 809 "natural", 810 "new", 811 "no", 812 "not", 813 "null", 814 "nulls", 815 "of", 816 "on", 817 "or", 818 "order", 819 "outer", 820 "over", 821 "partition", 822 "preceding", 823 "proto", 824 "qualify", 825 "range", 826 "recursive", 827 "respect", 828 "right", 829 "rollup", 830 "rows", 831 "select", 832 "set", 833 "some", 834 "struct", 835 "tablesample", 836 "then", 837 "to", 838 "treat", 839 "true", 840 "unbounded", 841 "union", 842 "unnest", 843 "using", 844 "when", 845 "where", 846 "window", 847 "with", 848 "within", 849 } 850 851 def mod_sql(self, expression: exp.Mod) -> str: 852 this = expression.this 853 expr = expression.expression 854 return self.func( 855 "MOD", 856 this.unnest() if isinstance(this, exp.Paren) else this, 857 expr.unnest() if isinstance(expr, exp.Paren) else expr, 858 ) 859 860 def column_parts(self, expression: exp.Column) -> str: 861 if expression.meta.get("quoted_column"): 862 # If a column reference is of the form `dataset.table`.name, we need 863 # to preserve the quoted table path, otherwise the reference breaks 864 table_parts = ".".join(p.name for p in expression.parts[:-1]) 865 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 866 return f"{table_path}.{self.sql(expression, 'this')}" 867 868 return super().column_parts(expression) 869 870 def table_parts(self, expression: exp.Table) -> str: 871 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 872 # we need to make sure the correct quoting is used in each case. 873 # 874 # For example, if there is a CTE x that clashes with a schema name, then the former will 875 # return the table y in that schema, whereas the latter will return the CTE's y column: 876 # 877 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 878 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 879 if expression.meta.get("quoted_table"): 880 table_parts = ".".join(p.name for p in expression.parts) 881 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 882 883 return super().table_parts(expression) 884 885 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 886 if isinstance(expression.this, exp.TsOrDsToTimestamp): 887 func_name = "FORMAT_DATETIME" 888 else: 889 func_name = "FORMAT_DATE" 890 this = ( 891 expression.this 892 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 893 else expression 894 ) 895 return self.func(func_name, self.format_time(expression), this.this) 896 897 def eq_sql(self, expression: exp.EQ) -> str: 898 # Operands of = cannot be NULL in BigQuery 899 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 900 if not isinstance(expression.parent, exp.Update): 901 return "NULL" 902 903 return self.binary(expression, "=") 904 905 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 906 parent = expression.parent 907 908 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 909 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 910 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 911 return self.func( 912 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 913 ) 914 915 return super().attimezone_sql(expression) 916 917 def trycast_sql(self, expression: exp.TryCast) -> str: 918 return self.cast_sql(expression, safe_prefix="SAFE_") 919 920 def bracket_sql(self, expression: exp.Bracket) -> str: 921 this = expression.this 922 expressions = expression.expressions 923 924 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 925 arg = expressions[0] 926 if arg.type is None: 927 from sqlglot.optimizer.annotate_types import annotate_types 928 929 arg = annotate_types(arg) 930 931 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 932 # BQ doesn't support bracket syntax with string values for structs 933 return f"{self.sql(this)}.{arg.name}" 934 935 expressions_sql = self.expressions(expression, flat=True) 936 offset = expression.args.get("offset") 937 938 if offset == 0: 939 expressions_sql = f"OFFSET({expressions_sql})" 940 elif offset == 1: 941 expressions_sql = f"ORDINAL({expressions_sql})" 942 elif offset is not None: 943 self.unsupported(f"Unsupported array offset: {offset}") 944 945 if expression.args.get("safe"): 946 expressions_sql = f"SAFE_{expressions_sql}" 947 948 return f"{self.sql(this)}[{expressions_sql}]" 949 950 def in_unnest_op(self, expression: exp.Unnest) -> str: 951 return self.sql(expression) 952 953 def except_op(self, expression: exp.Except) -> str: 954 if not expression.args.get("distinct"): 955 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 956 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 957 958 def intersect_op(self, expression: exp.Intersect) -> str: 959 if not expression.args.get("distinct"): 960 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 961 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 962 963 def version_sql(self, expression: exp.Version) -> str: 964 if expression.name == "TIMESTAMP": 965 expression.set("this", "SYSTEM_TIME") 966 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
860 def column_parts(self, expression: exp.Column) -> str: 861 if expression.meta.get("quoted_column"): 862 # If a column reference is of the form `dataset.table`.name, we need 863 # to preserve the quoted table path, otherwise the reference breaks 864 table_parts = ".".join(p.name for p in expression.parts[:-1]) 865 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 866 return f"{table_path}.{self.sql(expression, 'this')}" 867 868 return super().column_parts(expression)
870 def table_parts(self, expression: exp.Table) -> str: 871 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 872 # we need to make sure the correct quoting is used in each case. 873 # 874 # For example, if there is a CTE x that clashes with a schema name, then the former will 875 # return the table y in that schema, whereas the latter will return the CTE's y column: 876 # 877 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 878 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 879 if expression.meta.get("quoted_table"): 880 table_parts = ".".join(p.name for p in expression.parts) 881 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 882 883 return super().table_parts(expression)
885 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 886 if isinstance(expression.this, exp.TsOrDsToTimestamp): 887 func_name = "FORMAT_DATETIME" 888 else: 889 func_name = "FORMAT_DATE" 890 this = ( 891 expression.this 892 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 893 else expression 894 ) 895 return self.func(func_name, self.format_time(expression), this.this)
905 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 906 parent = expression.parent 907 908 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 909 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 910 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 911 return self.func( 912 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 913 ) 914 915 return super().attimezone_sql(expression)
920 def bracket_sql(self, expression: exp.Bracket) -> str: 921 this = expression.this 922 expressions = expression.expressions 923 924 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 925 arg = expressions[0] 926 if arg.type is None: 927 from sqlglot.optimizer.annotate_types import annotate_types 928 929 arg = annotate_types(arg) 930 931 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 932 # BQ doesn't support bracket syntax with string values for structs 933 return f"{self.sql(this)}.{arg.name}" 934 935 expressions_sql = self.expressions(expression, flat=True) 936 offset = expression.args.get("offset") 937 938 if offset == 0: 939 expressions_sql = f"OFFSET({expressions_sql})" 940 elif offset == 1: 941 expressions_sql = f"ORDINAL({expressions_sql})" 942 elif offset is not None: 943 self.unsupported(f"Unsupported array offset: {offset}") 944 945 if expression.args.get("safe"): 946 expressions_sql = f"SAFE_{expressions_sql}" 947 948 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql