sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27) 28from sqlglot.helper import seq_get, split_num_words 29from sqlglot.tokens import TokenType 30 31if t.TYPE_CHECKING: 32 from sqlglot._typing import E, Lit 33 34logger = logging.getLogger("sqlglot") 35 36 37def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 38 if not expression.find_ancestor(exp.From, exp.Join): 39 return self.values_sql(expression) 40 41 structs = [] 42 alias = expression.args.get("alias") 43 for tup in expression.find_all(exp.Tuple): 44 field_aliases = ( 45 alias.columns 46 if alias and alias.columns 47 else (f"_c{i}" for i in range(len(tup.expressions))) 48 ) 49 expressions = [ 50 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 51 for name, fld in zip(field_aliases, tup.expressions) 52 ] 53 structs.append(exp.Struct(expressions=expressions)) 54 55 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 56 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 57 return self.unnest_sql( 58 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 59 ) 60 61 62def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 63 this = expression.this 64 if isinstance(this, exp.Schema): 65 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 66 else: 67 this = self.sql(this) 68 return f"RETURNS {this}" 69 70 71def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 72 returns = expression.find(exp.ReturnsProperty) 73 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 74 expression.set("kind", "TABLE FUNCTION") 75 76 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 77 expression.set("expression", expression.expression.this) 78 79 return self.create_sql(expression) 80 81 82def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 83 """Remove references to unnest table aliases since bigquery doesn't allow them. 84 85 These are added by the optimizer's qualify_column step. 86 """ 87 from sqlglot.optimizer.scope import find_all_in_scope 88 89 if isinstance(expression, exp.Select): 90 unnest_aliases = { 91 unnest.alias 92 for unnest in find_all_in_scope(expression, exp.Unnest) 93 if isinstance(unnest.parent, (exp.From, exp.Join)) 94 } 95 if unnest_aliases: 96 for column in expression.find_all(exp.Column): 97 if column.table in unnest_aliases: 98 column.set("table", None) 99 elif column.db in unnest_aliases: 100 column.set("db", None) 101 102 return expression 103 104 105# https://issuetracker.google.com/issues/162294746 106# workaround for bigquery bug when grouping by an expression and then ordering 107# WITH x AS (SELECT 1 y) 108# SELECT y + 1 z 109# FROM x 110# GROUP BY x + 1 111# ORDER by z 112def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 113 if isinstance(expression, exp.Select): 114 group = expression.args.get("group") 115 order = expression.args.get("order") 116 117 if group and order: 118 aliases = { 119 select.this: select.args["alias"] 120 for select in expression.selects 121 if isinstance(select, exp.Alias) 122 } 123 124 for grouped in group.expressions: 125 if grouped.is_int: 126 continue 127 alias = aliases.get(grouped) 128 if alias: 129 grouped.replace(exp.column(alias)) 130 131 return expression 132 133 134def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 135 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 136 if isinstance(expression, exp.CTE) and expression.alias_column_names: 137 cte_query = expression.this 138 139 if cte_query.is_star: 140 logger.warning( 141 "Can't push down CTE column names for star queries. Run the query through" 142 " the optimizer or use 'qualify' to expand the star projections first." 143 ) 144 return expression 145 146 column_names = expression.alias_column_names 147 expression.args["alias"].set("columns", None) 148 149 for name, select in zip(column_names, cte_query.selects): 150 to_replace = select 151 152 if isinstance(select, exp.Alias): 153 select = select.this 154 155 # Inner aliases are shadowed by the CTE column names 156 to_replace.replace(exp.alias_(select, name)) 157 158 return expression 159 160 161def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 162 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 163 this.set("zone", seq_get(args, 2)) 164 return this 165 166 167def _build_timestamp(args: t.List) -> exp.Timestamp: 168 timestamp = exp.Timestamp.from_arg_list(args) 169 timestamp.set("with_tz", True) 170 return timestamp 171 172 173def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 174 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 175 return expr_type.from_arg_list(args) 176 177 178def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 179 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 180 arg = seq_get(args, 0) 181 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 182 183 184def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 185 return self.sql( 186 exp.Exists( 187 this=exp.select("1") 188 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 189 .where(exp.column("_col").eq(expression.right)) 190 ) 191 ) 192 193 194def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 195 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 196 197 198def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 199 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 200 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 201 unit = expression.args.get("unit") or "DAY" 202 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 203 204 205def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 206 scale = expression.args.get("scale") 207 timestamp = expression.this 208 209 if scale in (None, exp.UnixToTime.SECONDS): 210 return self.func("TIMESTAMP_SECONDS", timestamp) 211 if scale == exp.UnixToTime.MILLIS: 212 return self.func("TIMESTAMP_MILLIS", timestamp) 213 if scale == exp.UnixToTime.MICROS: 214 return self.func("TIMESTAMP_MICROS", timestamp) 215 216 unix_seconds = exp.cast(exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), "int64") 217 return self.func("TIMESTAMP_SECONDS", unix_seconds) 218 219 220def _build_time(args: t.List) -> exp.Func: 221 if len(args) == 1: 222 return exp.TsOrDsToTime(this=args[0]) 223 if len(args) == 3: 224 return exp.TimeFromParts.from_arg_list(args) 225 226 return exp.Anonymous(this="TIME", expressions=args) 227 228 229class BigQuery(Dialect): 230 WEEK_OFFSET = -1 231 UNNEST_COLUMN_ONLY = True 232 SUPPORTS_USER_DEFINED_TYPES = False 233 SUPPORTS_SEMI_ANTI_JOIN = False 234 LOG_BASE_FIRST = False 235 236 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 237 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 238 239 # bigquery udfs are case sensitive 240 NORMALIZE_FUNCTIONS = False 241 242 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 243 TIME_MAPPING = { 244 "%D": "%m/%d/%y", 245 "%E*S": "%S.%f", 246 "%E6S": "%S.%f", 247 } 248 249 ESCAPE_SEQUENCES = { 250 "\\a": "\a", 251 "\\b": "\b", 252 "\\f": "\f", 253 "\\n": "\n", 254 "\\r": "\r", 255 "\\t": "\t", 256 "\\v": "\v", 257 } 258 259 FORMAT_MAPPING = { 260 "DD": "%d", 261 "MM": "%m", 262 "MON": "%b", 263 "MONTH": "%B", 264 "YYYY": "%Y", 265 "YY": "%y", 266 "HH": "%I", 267 "HH12": "%I", 268 "HH24": "%H", 269 "MI": "%M", 270 "SS": "%S", 271 "SSSSS": "%f", 272 "TZH": "%z", 273 } 274 275 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 276 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 277 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 278 279 def normalize_identifier(self, expression: E) -> E: 280 if isinstance(expression, exp.Identifier): 281 parent = expression.parent 282 while isinstance(parent, exp.Dot): 283 parent = parent.parent 284 285 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 286 # by default. The following check uses a heuristic to detect tables based on whether 287 # they are qualified. This should generally be correct, because tables in BigQuery 288 # must be qualified with at least a dataset, unless @@dataset_id is set. 289 case_sensitive = ( 290 isinstance(parent, exp.UserDefinedFunction) 291 or ( 292 isinstance(parent, exp.Table) 293 and parent.db 294 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 295 ) 296 or expression.meta.get("is_table") 297 ) 298 if not case_sensitive: 299 expression.set("this", expression.this.lower()) 300 301 return expression 302 303 class Tokenizer(tokens.Tokenizer): 304 QUOTES = ["'", '"', '"""', "'''"] 305 COMMENTS = ["--", "#", ("/*", "*/")] 306 IDENTIFIERS = ["`"] 307 STRING_ESCAPES = ["\\"] 308 309 HEX_STRINGS = [("0x", ""), ("0X", "")] 310 311 BYTE_STRINGS = [ 312 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 313 ] 314 315 RAW_STRINGS = [ 316 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 317 ] 318 319 KEYWORDS = { 320 **tokens.Tokenizer.KEYWORDS, 321 "ANY TYPE": TokenType.VARIANT, 322 "BEGIN": TokenType.COMMAND, 323 "BEGIN TRANSACTION": TokenType.BEGIN, 324 "BYTES": TokenType.BINARY, 325 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 326 "DATETIME": TokenType.TIMESTAMP, 327 "DECLARE": TokenType.COMMAND, 328 "ELSEIF": TokenType.COMMAND, 329 "EXCEPTION": TokenType.COMMAND, 330 "FLOAT64": TokenType.DOUBLE, 331 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 332 "MODEL": TokenType.MODEL, 333 "NOT DETERMINISTIC": TokenType.VOLATILE, 334 "RECORD": TokenType.STRUCT, 335 "TIMESTAMP": TokenType.TIMESTAMPTZ, 336 } 337 KEYWORDS.pop("DIV") 338 KEYWORDS.pop("VALUES") 339 340 class Parser(parser.Parser): 341 PREFIXED_PIVOT_COLUMNS = True 342 LOG_DEFAULTS_TO_LN = True 343 SUPPORTS_IMPLICIT_UNNEST = True 344 345 FUNCTIONS = { 346 **parser.Parser.FUNCTIONS, 347 "DATE": _build_date, 348 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 349 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 350 "DATE_TRUNC": lambda args: exp.DateTrunc( 351 unit=exp.Literal.string(str(seq_get(args, 1))), 352 this=seq_get(args, 0), 353 ), 354 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 355 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 356 "DIV": binary_from_function(exp.IntDiv), 357 "FORMAT_DATE": lambda args: exp.TimeToStr( 358 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 359 ), 360 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 361 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 362 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 363 ), 364 "MD5": exp.MD5Digest.from_arg_list, 365 "TO_HEX": _build_to_hex, 366 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 367 [seq_get(args, 1), seq_get(args, 0)] 368 ), 369 "PARSE_TIMESTAMP": _build_parse_timestamp, 370 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 371 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 372 this=seq_get(args, 0), 373 expression=seq_get(args, 1), 374 position=seq_get(args, 2), 375 occurrence=seq_get(args, 3), 376 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 377 ), 378 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 379 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 380 "SPLIT": lambda args: exp.Split( 381 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 382 this=seq_get(args, 0), 383 expression=seq_get(args, 1) or exp.Literal.string(","), 384 ), 385 "TIME": _build_time, 386 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 387 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 388 "TIMESTAMP": _build_timestamp, 389 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 390 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 391 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 392 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 393 ), 394 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 395 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 396 ), 397 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 398 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 399 } 400 401 FUNCTION_PARSERS = { 402 **parser.Parser.FUNCTION_PARSERS, 403 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 404 } 405 FUNCTION_PARSERS.pop("TRIM") 406 407 NO_PAREN_FUNCTIONS = { 408 **parser.Parser.NO_PAREN_FUNCTIONS, 409 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 410 } 411 412 NESTED_TYPE_TOKENS = { 413 *parser.Parser.NESTED_TYPE_TOKENS, 414 TokenType.TABLE, 415 } 416 417 PROPERTY_PARSERS = { 418 **parser.Parser.PROPERTY_PARSERS, 419 "NOT DETERMINISTIC": lambda self: self.expression( 420 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 421 ), 422 "OPTIONS": lambda self: self._parse_with_property(), 423 } 424 425 CONSTRAINT_PARSERS = { 426 **parser.Parser.CONSTRAINT_PARSERS, 427 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 428 } 429 430 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 431 RANGE_PARSERS.pop(TokenType.OVERLAPS) 432 433 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 434 435 STATEMENT_PARSERS = { 436 **parser.Parser.STATEMENT_PARSERS, 437 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 438 TokenType.END: lambda self: self._parse_as_command(self._prev), 439 TokenType.FOR: lambda self: self._parse_for_in(), 440 } 441 442 BRACKET_OFFSETS = { 443 "OFFSET": (0, False), 444 "ORDINAL": (1, False), 445 "SAFE_OFFSET": (0, True), 446 "SAFE_ORDINAL": (1, True), 447 } 448 449 def _parse_for_in(self) -> exp.ForIn: 450 this = self._parse_range() 451 self._match_text_seq("DO") 452 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 453 454 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 455 this = super()._parse_table_part(schema=schema) or self._parse_number() 456 457 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 458 if isinstance(this, exp.Identifier): 459 table_name = this.name 460 while self._match(TokenType.DASH, advance=False) and self._next: 461 text = "" 462 while self._curr and self._curr.token_type != TokenType.DOT: 463 self._advance() 464 text += self._prev.text 465 table_name += text 466 467 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 468 elif isinstance(this, exp.Literal): 469 table_name = this.name 470 471 if self._is_connected() and self._parse_var(any_token=True): 472 table_name += self._prev.text 473 474 this = exp.Identifier(this=table_name, quoted=True) 475 476 return this 477 478 def _parse_table_parts( 479 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 480 ) -> exp.Table: 481 table = super()._parse_table_parts( 482 schema=schema, is_db_reference=is_db_reference, wildcard=True 483 ) 484 485 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 486 if not table.catalog: 487 if table.db: 488 parts = table.db.split(".") 489 if len(parts) == 2 and not table.args["db"].quoted: 490 table.set("catalog", exp.Identifier(this=parts[0])) 491 table.set("db", exp.Identifier(this=parts[1])) 492 else: 493 parts = table.name.split(".") 494 if len(parts) == 2 and not table.this.quoted: 495 table.set("db", exp.Identifier(this=parts[0])) 496 table.set("this", exp.Identifier(this=parts[1])) 497 498 if any("." in p.name for p in table.parts): 499 catalog, db, this, *rest = ( 500 exp.to_identifier(p, quoted=True) 501 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 502 ) 503 504 if rest and this: 505 this = exp.Dot.build([this, *rest]) # type: ignore 506 507 table = exp.Table(this=this, db=db, catalog=catalog) 508 table.meta["quoted_table"] = True 509 510 return table 511 512 @t.overload 513 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 514 515 @t.overload 516 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 517 518 def _parse_json_object(self, agg=False): 519 json_object = super()._parse_json_object() 520 array_kv_pair = seq_get(json_object.expressions, 0) 521 522 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 523 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 524 if ( 525 array_kv_pair 526 and isinstance(array_kv_pair.this, exp.Array) 527 and isinstance(array_kv_pair.expression, exp.Array) 528 ): 529 keys = array_kv_pair.this.expressions 530 values = array_kv_pair.expression.expressions 531 532 json_object.set( 533 "expressions", 534 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 535 ) 536 537 return json_object 538 539 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 540 bracket = super()._parse_bracket(this) 541 542 if this is bracket: 543 return bracket 544 545 if isinstance(bracket, exp.Bracket): 546 for expression in bracket.expressions: 547 name = expression.name.upper() 548 549 if name not in self.BRACKET_OFFSETS: 550 break 551 552 offset, safe = self.BRACKET_OFFSETS[name] 553 bracket.set("offset", offset) 554 bracket.set("safe", safe) 555 expression.replace(expression.expressions[0]) 556 557 return bracket 558 559 class Generator(generator.Generator): 560 EXPLICIT_UNION = True 561 INTERVAL_ALLOWS_PLURAL_FORM = False 562 JOIN_HINTS = False 563 QUERY_HINTS = False 564 TABLE_HINTS = False 565 LIMIT_FETCH = "LIMIT" 566 RENAME_TABLE_WITH_DB = False 567 NVL2_SUPPORTED = False 568 UNNEST_WITH_ORDINALITY = False 569 COLLATE_IS_FUNC = True 570 LIMIT_ONLY_LITERALS = True 571 SUPPORTS_TABLE_ALIAS_COLUMNS = False 572 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 573 JSON_KEY_VALUE_PAIR_SEP = "," 574 NULL_ORDERING_SUPPORTED = False 575 IGNORE_NULLS_IN_FUNC = True 576 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 577 CAN_IMPLEMENT_ARRAY_ANY = True 578 SUPPORTS_TO_NUMBER = False 579 NAMED_PLACEHOLDER_TOKEN = "@" 580 581 TRANSFORMS = { 582 **generator.Generator.TRANSFORMS, 583 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 584 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 585 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 586 exp.ArrayContains: _array_contains_sql, 587 exp.ArrayFilter: filter_array_using_unnest, 588 exp.ArraySize: rename_func("ARRAY_LENGTH"), 589 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 590 exp.CollateProperty: lambda self, e: ( 591 f"DEFAULT COLLATE {self.sql(e, 'this')}" 592 if e.args.get("default") 593 else f"COLLATE {self.sql(e, 'this')}" 594 ), 595 exp.Commit: lambda *_: "COMMIT TRANSACTION", 596 exp.CountIf: rename_func("COUNTIF"), 597 exp.Create: _create_sql, 598 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 599 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 600 exp.DateDiff: lambda self, e: self.func( 601 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 602 ), 603 exp.DateFromParts: rename_func("DATE"), 604 exp.DateStrToDate: datestrtodate_sql, 605 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 606 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 607 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 608 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 609 exp.FromTimeZone: lambda self, e: self.func( 610 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 611 ), 612 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 613 exp.GroupConcat: rename_func("STRING_AGG"), 614 exp.Hex: rename_func("TO_HEX"), 615 exp.If: if_sql(false_value="NULL"), 616 exp.ILike: no_ilike_sql, 617 exp.IntDiv: rename_func("DIV"), 618 exp.JSONFormat: rename_func("TO_JSON_STRING"), 619 exp.Max: max_or_greatest, 620 exp.Mod: rename_func("MOD"), 621 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 622 exp.MD5Digest: rename_func("MD5"), 623 exp.Min: min_or_least, 624 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 625 exp.RegexpExtract: lambda self, e: self.func( 626 "REGEXP_EXTRACT", 627 e.this, 628 e.expression, 629 e.args.get("position"), 630 e.args.get("occurrence"), 631 ), 632 exp.RegexpReplace: regexp_replace_sql, 633 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 634 exp.ReturnsProperty: _returnsproperty_sql, 635 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 636 exp.Select: transforms.preprocess( 637 [ 638 transforms.explode_to_unnest(), 639 _unqualify_unnest, 640 transforms.eliminate_distinct_on, 641 _alias_ordered_group, 642 transforms.eliminate_semi_and_anti_joins, 643 ] 644 ), 645 exp.SHA2: lambda self, e: self.func( 646 "SHA256" if e.text("length") == "256" else "SHA512", e.this 647 ), 648 exp.StabilityProperty: lambda self, e: ( 649 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 650 ), 651 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 652 exp.StrToTime: lambda self, e: self.func( 653 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 654 ), 655 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 656 exp.TimeFromParts: rename_func("TIME"), 657 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 658 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 659 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 660 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 661 exp.TimeStrToTime: timestrtotime_sql, 662 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 663 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 664 exp.TsOrDsAdd: _ts_or_ds_add_sql, 665 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 666 exp.TsOrDsToTime: rename_func("TIME"), 667 exp.Unhex: rename_func("FROM_HEX"), 668 exp.UnixDate: rename_func("UNIX_DATE"), 669 exp.UnixToTime: _unix_to_time_sql, 670 exp.Values: _derived_table_values_to_unnest, 671 exp.VariancePop: rename_func("VAR_POP"), 672 } 673 674 SUPPORTED_JSON_PATH_PARTS = { 675 exp.JSONPathKey, 676 exp.JSONPathRoot, 677 exp.JSONPathSubscript, 678 } 679 680 TYPE_MAPPING = { 681 **generator.Generator.TYPE_MAPPING, 682 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 683 exp.DataType.Type.BIGINT: "INT64", 684 exp.DataType.Type.BINARY: "BYTES", 685 exp.DataType.Type.BOOLEAN: "BOOL", 686 exp.DataType.Type.CHAR: "STRING", 687 exp.DataType.Type.DECIMAL: "NUMERIC", 688 exp.DataType.Type.DOUBLE: "FLOAT64", 689 exp.DataType.Type.FLOAT: "FLOAT64", 690 exp.DataType.Type.INT: "INT64", 691 exp.DataType.Type.NCHAR: "STRING", 692 exp.DataType.Type.NVARCHAR: "STRING", 693 exp.DataType.Type.SMALLINT: "INT64", 694 exp.DataType.Type.TEXT: "STRING", 695 exp.DataType.Type.TIMESTAMP: "DATETIME", 696 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 697 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 698 exp.DataType.Type.TINYINT: "INT64", 699 exp.DataType.Type.VARBINARY: "BYTES", 700 exp.DataType.Type.VARCHAR: "STRING", 701 exp.DataType.Type.VARIANT: "ANY TYPE", 702 } 703 704 PROPERTIES_LOCATION = { 705 **generator.Generator.PROPERTIES_LOCATION, 706 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 707 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 708 } 709 710 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 711 RESERVED_KEYWORDS = { 712 *generator.Generator.RESERVED_KEYWORDS, 713 "all", 714 "and", 715 "any", 716 "array", 717 "as", 718 "asc", 719 "assert_rows_modified", 720 "at", 721 "between", 722 "by", 723 "case", 724 "cast", 725 "collate", 726 "contains", 727 "create", 728 "cross", 729 "cube", 730 "current", 731 "default", 732 "define", 733 "desc", 734 "distinct", 735 "else", 736 "end", 737 "enum", 738 "escape", 739 "except", 740 "exclude", 741 "exists", 742 "extract", 743 "false", 744 "fetch", 745 "following", 746 "for", 747 "from", 748 "full", 749 "group", 750 "grouping", 751 "groups", 752 "hash", 753 "having", 754 "if", 755 "ignore", 756 "in", 757 "inner", 758 "intersect", 759 "interval", 760 "into", 761 "is", 762 "join", 763 "lateral", 764 "left", 765 "like", 766 "limit", 767 "lookup", 768 "merge", 769 "natural", 770 "new", 771 "no", 772 "not", 773 "null", 774 "nulls", 775 "of", 776 "on", 777 "or", 778 "order", 779 "outer", 780 "over", 781 "partition", 782 "preceding", 783 "proto", 784 "qualify", 785 "range", 786 "recursive", 787 "respect", 788 "right", 789 "rollup", 790 "rows", 791 "select", 792 "set", 793 "some", 794 "struct", 795 "tablesample", 796 "then", 797 "to", 798 "treat", 799 "true", 800 "unbounded", 801 "union", 802 "unnest", 803 "using", 804 "when", 805 "where", 806 "window", 807 "with", 808 "within", 809 } 810 811 def table_parts(self, expression: exp.Table) -> str: 812 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 813 # we need to make sure the correct quoting is used in each case. 814 # 815 # For example, if there is a CTE x that clashes with a schema name, then the former will 816 # return the table y in that schema, whereas the latter will return the CTE's y column: 817 # 818 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 819 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 820 if expression.meta.get("quoted_table"): 821 table_parts = ".".join(p.name for p in expression.parts) 822 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 823 824 return super().table_parts(expression) 825 826 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 827 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 828 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 829 830 def eq_sql(self, expression: exp.EQ) -> str: 831 # Operands of = cannot be NULL in BigQuery 832 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 833 if not isinstance(expression.parent, exp.Update): 834 return "NULL" 835 836 return self.binary(expression, "=") 837 838 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 839 parent = expression.parent 840 841 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 842 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 843 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 844 return self.func( 845 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 846 ) 847 848 return super().attimezone_sql(expression) 849 850 def trycast_sql(self, expression: exp.TryCast) -> str: 851 return self.cast_sql(expression, safe_prefix="SAFE_") 852 853 def array_sql(self, expression: exp.Array) -> str: 854 first_arg = seq_get(expression.expressions, 0) 855 if isinstance(first_arg, exp.Query): 856 return f"ARRAY{self.wrap(self.sql(first_arg))}" 857 858 return inline_array_sql(self, expression) 859 860 def bracket_sql(self, expression: exp.Bracket) -> str: 861 this = expression.this 862 expressions = expression.expressions 863 864 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 865 arg = expressions[0] 866 if arg.type is None: 867 from sqlglot.optimizer.annotate_types import annotate_types 868 869 arg = annotate_types(arg) 870 871 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 872 # BQ doesn't support bracket syntax with string values for structs 873 return f"{self.sql(this)}.{arg.name}" 874 875 expressions_sql = self.expressions(expression, flat=True) 876 offset = expression.args.get("offset") 877 878 if offset == 0: 879 expressions_sql = f"OFFSET({expressions_sql})" 880 elif offset == 1: 881 expressions_sql = f"ORDINAL({expressions_sql})" 882 elif offset is not None: 883 self.unsupported(f"Unsupported array offset: {offset}") 884 885 if expression.args.get("safe"): 886 expressions_sql = f"SAFE_{expressions_sql}" 887 888 return f"{self.sql(this)}[{expressions_sql}]" 889 890 def in_unnest_op(self, expression: exp.Unnest) -> str: 891 return self.sql(expression) 892 893 def except_op(self, expression: exp.Except) -> str: 894 if not expression.args.get("distinct"): 895 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 896 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 897 898 def intersect_op(self, expression: exp.Intersect) -> str: 899 if not expression.args.get("distinct"): 900 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 901 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 902 903 def with_properties(self, properties: exp.Properties) -> str: 904 return self.properties(properties, prefix=self.seg("OPTIONS")) 905 906 def version_sql(self, expression: exp.Version) -> str: 907 if expression.name == "TIMESTAMP": 908 expression.set("this", "SYSTEM_TIME") 909 return super().version_sql(expression)
230class BigQuery(Dialect): 231 WEEK_OFFSET = -1 232 UNNEST_COLUMN_ONLY = True 233 SUPPORTS_USER_DEFINED_TYPES = False 234 SUPPORTS_SEMI_ANTI_JOIN = False 235 LOG_BASE_FIRST = False 236 237 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 238 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 239 240 # bigquery udfs are case sensitive 241 NORMALIZE_FUNCTIONS = False 242 243 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 244 TIME_MAPPING = { 245 "%D": "%m/%d/%y", 246 "%E*S": "%S.%f", 247 "%E6S": "%S.%f", 248 } 249 250 ESCAPE_SEQUENCES = { 251 "\\a": "\a", 252 "\\b": "\b", 253 "\\f": "\f", 254 "\\n": "\n", 255 "\\r": "\r", 256 "\\t": "\t", 257 "\\v": "\v", 258 } 259 260 FORMAT_MAPPING = { 261 "DD": "%d", 262 "MM": "%m", 263 "MON": "%b", 264 "MONTH": "%B", 265 "YYYY": "%Y", 266 "YY": "%y", 267 "HH": "%I", 268 "HH12": "%I", 269 "HH24": "%H", 270 "MI": "%M", 271 "SS": "%S", 272 "SSSSS": "%f", 273 "TZH": "%z", 274 } 275 276 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 277 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 278 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 279 280 def normalize_identifier(self, expression: E) -> E: 281 if isinstance(expression, exp.Identifier): 282 parent = expression.parent 283 while isinstance(parent, exp.Dot): 284 parent = parent.parent 285 286 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 287 # by default. The following check uses a heuristic to detect tables based on whether 288 # they are qualified. This should generally be correct, because tables in BigQuery 289 # must be qualified with at least a dataset, unless @@dataset_id is set. 290 case_sensitive = ( 291 isinstance(parent, exp.UserDefinedFunction) 292 or ( 293 isinstance(parent, exp.Table) 294 and parent.db 295 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 296 ) 297 or expression.meta.get("is_table") 298 ) 299 if not case_sensitive: 300 expression.set("this", expression.this.lower()) 301 302 return expression 303 304 class Tokenizer(tokens.Tokenizer): 305 QUOTES = ["'", '"', '"""', "'''"] 306 COMMENTS = ["--", "#", ("/*", "*/")] 307 IDENTIFIERS = ["`"] 308 STRING_ESCAPES = ["\\"] 309 310 HEX_STRINGS = [("0x", ""), ("0X", "")] 311 312 BYTE_STRINGS = [ 313 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 314 ] 315 316 RAW_STRINGS = [ 317 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 318 ] 319 320 KEYWORDS = { 321 **tokens.Tokenizer.KEYWORDS, 322 "ANY TYPE": TokenType.VARIANT, 323 "BEGIN": TokenType.COMMAND, 324 "BEGIN TRANSACTION": TokenType.BEGIN, 325 "BYTES": TokenType.BINARY, 326 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 327 "DATETIME": TokenType.TIMESTAMP, 328 "DECLARE": TokenType.COMMAND, 329 "ELSEIF": TokenType.COMMAND, 330 "EXCEPTION": TokenType.COMMAND, 331 "FLOAT64": TokenType.DOUBLE, 332 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 333 "MODEL": TokenType.MODEL, 334 "NOT DETERMINISTIC": TokenType.VOLATILE, 335 "RECORD": TokenType.STRUCT, 336 "TIMESTAMP": TokenType.TIMESTAMPTZ, 337 } 338 KEYWORDS.pop("DIV") 339 KEYWORDS.pop("VALUES") 340 341 class Parser(parser.Parser): 342 PREFIXED_PIVOT_COLUMNS = True 343 LOG_DEFAULTS_TO_LN = True 344 SUPPORTS_IMPLICIT_UNNEST = True 345 346 FUNCTIONS = { 347 **parser.Parser.FUNCTIONS, 348 "DATE": _build_date, 349 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 350 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 351 "DATE_TRUNC": lambda args: exp.DateTrunc( 352 unit=exp.Literal.string(str(seq_get(args, 1))), 353 this=seq_get(args, 0), 354 ), 355 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 356 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 357 "DIV": binary_from_function(exp.IntDiv), 358 "FORMAT_DATE": lambda args: exp.TimeToStr( 359 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 360 ), 361 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 362 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 363 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 364 ), 365 "MD5": exp.MD5Digest.from_arg_list, 366 "TO_HEX": _build_to_hex, 367 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 368 [seq_get(args, 1), seq_get(args, 0)] 369 ), 370 "PARSE_TIMESTAMP": _build_parse_timestamp, 371 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 372 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 373 this=seq_get(args, 0), 374 expression=seq_get(args, 1), 375 position=seq_get(args, 2), 376 occurrence=seq_get(args, 3), 377 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 378 ), 379 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 380 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 381 "SPLIT": lambda args: exp.Split( 382 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 383 this=seq_get(args, 0), 384 expression=seq_get(args, 1) or exp.Literal.string(","), 385 ), 386 "TIME": _build_time, 387 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 388 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 389 "TIMESTAMP": _build_timestamp, 390 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 391 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 392 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 393 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 394 ), 395 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 396 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 397 ), 398 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 399 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 400 } 401 402 FUNCTION_PARSERS = { 403 **parser.Parser.FUNCTION_PARSERS, 404 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 405 } 406 FUNCTION_PARSERS.pop("TRIM") 407 408 NO_PAREN_FUNCTIONS = { 409 **parser.Parser.NO_PAREN_FUNCTIONS, 410 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 411 } 412 413 NESTED_TYPE_TOKENS = { 414 *parser.Parser.NESTED_TYPE_TOKENS, 415 TokenType.TABLE, 416 } 417 418 PROPERTY_PARSERS = { 419 **parser.Parser.PROPERTY_PARSERS, 420 "NOT DETERMINISTIC": lambda self: self.expression( 421 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 422 ), 423 "OPTIONS": lambda self: self._parse_with_property(), 424 } 425 426 CONSTRAINT_PARSERS = { 427 **parser.Parser.CONSTRAINT_PARSERS, 428 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 429 } 430 431 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 432 RANGE_PARSERS.pop(TokenType.OVERLAPS) 433 434 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 435 436 STATEMENT_PARSERS = { 437 **parser.Parser.STATEMENT_PARSERS, 438 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 439 TokenType.END: lambda self: self._parse_as_command(self._prev), 440 TokenType.FOR: lambda self: self._parse_for_in(), 441 } 442 443 BRACKET_OFFSETS = { 444 "OFFSET": (0, False), 445 "ORDINAL": (1, False), 446 "SAFE_OFFSET": (0, True), 447 "SAFE_ORDINAL": (1, True), 448 } 449 450 def _parse_for_in(self) -> exp.ForIn: 451 this = self._parse_range() 452 self._match_text_seq("DO") 453 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 454 455 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 456 this = super()._parse_table_part(schema=schema) or self._parse_number() 457 458 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 459 if isinstance(this, exp.Identifier): 460 table_name = this.name 461 while self._match(TokenType.DASH, advance=False) and self._next: 462 text = "" 463 while self._curr and self._curr.token_type != TokenType.DOT: 464 self._advance() 465 text += self._prev.text 466 table_name += text 467 468 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 469 elif isinstance(this, exp.Literal): 470 table_name = this.name 471 472 if self._is_connected() and self._parse_var(any_token=True): 473 table_name += self._prev.text 474 475 this = exp.Identifier(this=table_name, quoted=True) 476 477 return this 478 479 def _parse_table_parts( 480 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 481 ) -> exp.Table: 482 table = super()._parse_table_parts( 483 schema=schema, is_db_reference=is_db_reference, wildcard=True 484 ) 485 486 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 487 if not table.catalog: 488 if table.db: 489 parts = table.db.split(".") 490 if len(parts) == 2 and not table.args["db"].quoted: 491 table.set("catalog", exp.Identifier(this=parts[0])) 492 table.set("db", exp.Identifier(this=parts[1])) 493 else: 494 parts = table.name.split(".") 495 if len(parts) == 2 and not table.this.quoted: 496 table.set("db", exp.Identifier(this=parts[0])) 497 table.set("this", exp.Identifier(this=parts[1])) 498 499 if any("." in p.name for p in table.parts): 500 catalog, db, this, *rest = ( 501 exp.to_identifier(p, quoted=True) 502 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 503 ) 504 505 if rest and this: 506 this = exp.Dot.build([this, *rest]) # type: ignore 507 508 table = exp.Table(this=this, db=db, catalog=catalog) 509 table.meta["quoted_table"] = True 510 511 return table 512 513 @t.overload 514 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 515 516 @t.overload 517 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 518 519 def _parse_json_object(self, agg=False): 520 json_object = super()._parse_json_object() 521 array_kv_pair = seq_get(json_object.expressions, 0) 522 523 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 524 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 525 if ( 526 array_kv_pair 527 and isinstance(array_kv_pair.this, exp.Array) 528 and isinstance(array_kv_pair.expression, exp.Array) 529 ): 530 keys = array_kv_pair.this.expressions 531 values = array_kv_pair.expression.expressions 532 533 json_object.set( 534 "expressions", 535 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 536 ) 537 538 return json_object 539 540 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 541 bracket = super()._parse_bracket(this) 542 543 if this is bracket: 544 return bracket 545 546 if isinstance(bracket, exp.Bracket): 547 for expression in bracket.expressions: 548 name = expression.name.upper() 549 550 if name not in self.BRACKET_OFFSETS: 551 break 552 553 offset, safe = self.BRACKET_OFFSETS[name] 554 bracket.set("offset", offset) 555 bracket.set("safe", safe) 556 expression.replace(expression.expressions[0]) 557 558 return bracket 559 560 class Generator(generator.Generator): 561 EXPLICIT_UNION = True 562 INTERVAL_ALLOWS_PLURAL_FORM = False 563 JOIN_HINTS = False 564 QUERY_HINTS = False 565 TABLE_HINTS = False 566 LIMIT_FETCH = "LIMIT" 567 RENAME_TABLE_WITH_DB = False 568 NVL2_SUPPORTED = False 569 UNNEST_WITH_ORDINALITY = False 570 COLLATE_IS_FUNC = True 571 LIMIT_ONLY_LITERALS = True 572 SUPPORTS_TABLE_ALIAS_COLUMNS = False 573 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 574 JSON_KEY_VALUE_PAIR_SEP = "," 575 NULL_ORDERING_SUPPORTED = False 576 IGNORE_NULLS_IN_FUNC = True 577 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 578 CAN_IMPLEMENT_ARRAY_ANY = True 579 SUPPORTS_TO_NUMBER = False 580 NAMED_PLACEHOLDER_TOKEN = "@" 581 582 TRANSFORMS = { 583 **generator.Generator.TRANSFORMS, 584 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 585 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 586 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 587 exp.ArrayContains: _array_contains_sql, 588 exp.ArrayFilter: filter_array_using_unnest, 589 exp.ArraySize: rename_func("ARRAY_LENGTH"), 590 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 591 exp.CollateProperty: lambda self, e: ( 592 f"DEFAULT COLLATE {self.sql(e, 'this')}" 593 if e.args.get("default") 594 else f"COLLATE {self.sql(e, 'this')}" 595 ), 596 exp.Commit: lambda *_: "COMMIT TRANSACTION", 597 exp.CountIf: rename_func("COUNTIF"), 598 exp.Create: _create_sql, 599 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 600 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 601 exp.DateDiff: lambda self, e: self.func( 602 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 603 ), 604 exp.DateFromParts: rename_func("DATE"), 605 exp.DateStrToDate: datestrtodate_sql, 606 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 607 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 608 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 609 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 610 exp.FromTimeZone: lambda self, e: self.func( 611 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 612 ), 613 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 614 exp.GroupConcat: rename_func("STRING_AGG"), 615 exp.Hex: rename_func("TO_HEX"), 616 exp.If: if_sql(false_value="NULL"), 617 exp.ILike: no_ilike_sql, 618 exp.IntDiv: rename_func("DIV"), 619 exp.JSONFormat: rename_func("TO_JSON_STRING"), 620 exp.Max: max_or_greatest, 621 exp.Mod: rename_func("MOD"), 622 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 623 exp.MD5Digest: rename_func("MD5"), 624 exp.Min: min_or_least, 625 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 626 exp.RegexpExtract: lambda self, e: self.func( 627 "REGEXP_EXTRACT", 628 e.this, 629 e.expression, 630 e.args.get("position"), 631 e.args.get("occurrence"), 632 ), 633 exp.RegexpReplace: regexp_replace_sql, 634 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 635 exp.ReturnsProperty: _returnsproperty_sql, 636 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 637 exp.Select: transforms.preprocess( 638 [ 639 transforms.explode_to_unnest(), 640 _unqualify_unnest, 641 transforms.eliminate_distinct_on, 642 _alias_ordered_group, 643 transforms.eliminate_semi_and_anti_joins, 644 ] 645 ), 646 exp.SHA2: lambda self, e: self.func( 647 "SHA256" if e.text("length") == "256" else "SHA512", e.this 648 ), 649 exp.StabilityProperty: lambda self, e: ( 650 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 651 ), 652 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 653 exp.StrToTime: lambda self, e: self.func( 654 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 655 ), 656 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 657 exp.TimeFromParts: rename_func("TIME"), 658 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 659 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 660 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 661 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 662 exp.TimeStrToTime: timestrtotime_sql, 663 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 664 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 665 exp.TsOrDsAdd: _ts_or_ds_add_sql, 666 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 667 exp.TsOrDsToTime: rename_func("TIME"), 668 exp.Unhex: rename_func("FROM_HEX"), 669 exp.UnixDate: rename_func("UNIX_DATE"), 670 exp.UnixToTime: _unix_to_time_sql, 671 exp.Values: _derived_table_values_to_unnest, 672 exp.VariancePop: rename_func("VAR_POP"), 673 } 674 675 SUPPORTED_JSON_PATH_PARTS = { 676 exp.JSONPathKey, 677 exp.JSONPathRoot, 678 exp.JSONPathSubscript, 679 } 680 681 TYPE_MAPPING = { 682 **generator.Generator.TYPE_MAPPING, 683 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 684 exp.DataType.Type.BIGINT: "INT64", 685 exp.DataType.Type.BINARY: "BYTES", 686 exp.DataType.Type.BOOLEAN: "BOOL", 687 exp.DataType.Type.CHAR: "STRING", 688 exp.DataType.Type.DECIMAL: "NUMERIC", 689 exp.DataType.Type.DOUBLE: "FLOAT64", 690 exp.DataType.Type.FLOAT: "FLOAT64", 691 exp.DataType.Type.INT: "INT64", 692 exp.DataType.Type.NCHAR: "STRING", 693 exp.DataType.Type.NVARCHAR: "STRING", 694 exp.DataType.Type.SMALLINT: "INT64", 695 exp.DataType.Type.TEXT: "STRING", 696 exp.DataType.Type.TIMESTAMP: "DATETIME", 697 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 698 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 699 exp.DataType.Type.TINYINT: "INT64", 700 exp.DataType.Type.VARBINARY: "BYTES", 701 exp.DataType.Type.VARCHAR: "STRING", 702 exp.DataType.Type.VARIANT: "ANY TYPE", 703 } 704 705 PROPERTIES_LOCATION = { 706 **generator.Generator.PROPERTIES_LOCATION, 707 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 708 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 709 } 710 711 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 712 RESERVED_KEYWORDS = { 713 *generator.Generator.RESERVED_KEYWORDS, 714 "all", 715 "and", 716 "any", 717 "array", 718 "as", 719 "asc", 720 "assert_rows_modified", 721 "at", 722 "between", 723 "by", 724 "case", 725 "cast", 726 "collate", 727 "contains", 728 "create", 729 "cross", 730 "cube", 731 "current", 732 "default", 733 "define", 734 "desc", 735 "distinct", 736 "else", 737 "end", 738 "enum", 739 "escape", 740 "except", 741 "exclude", 742 "exists", 743 "extract", 744 "false", 745 "fetch", 746 "following", 747 "for", 748 "from", 749 "full", 750 "group", 751 "grouping", 752 "groups", 753 "hash", 754 "having", 755 "if", 756 "ignore", 757 "in", 758 "inner", 759 "intersect", 760 "interval", 761 "into", 762 "is", 763 "join", 764 "lateral", 765 "left", 766 "like", 767 "limit", 768 "lookup", 769 "merge", 770 "natural", 771 "new", 772 "no", 773 "not", 774 "null", 775 "nulls", 776 "of", 777 "on", 778 "or", 779 "order", 780 "outer", 781 "over", 782 "partition", 783 "preceding", 784 "proto", 785 "qualify", 786 "range", 787 "recursive", 788 "respect", 789 "right", 790 "rollup", 791 "rows", 792 "select", 793 "set", 794 "some", 795 "struct", 796 "tablesample", 797 "then", 798 "to", 799 "treat", 800 "true", 801 "unbounded", 802 "union", 803 "unnest", 804 "using", 805 "when", 806 "where", 807 "window", 808 "with", 809 "within", 810 } 811 812 def table_parts(self, expression: exp.Table) -> str: 813 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 814 # we need to make sure the correct quoting is used in each case. 815 # 816 # For example, if there is a CTE x that clashes with a schema name, then the former will 817 # return the table y in that schema, whereas the latter will return the CTE's y column: 818 # 819 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 820 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 821 if expression.meta.get("quoted_table"): 822 table_parts = ".".join(p.name for p in expression.parts) 823 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 824 825 return super().table_parts(expression) 826 827 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 828 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 829 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 830 831 def eq_sql(self, expression: exp.EQ) -> str: 832 # Operands of = cannot be NULL in BigQuery 833 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 834 if not isinstance(expression.parent, exp.Update): 835 return "NULL" 836 837 return self.binary(expression, "=") 838 839 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 840 parent = expression.parent 841 842 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 843 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 844 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 845 return self.func( 846 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 847 ) 848 849 return super().attimezone_sql(expression) 850 851 def trycast_sql(self, expression: exp.TryCast) -> str: 852 return self.cast_sql(expression, safe_prefix="SAFE_") 853 854 def array_sql(self, expression: exp.Array) -> str: 855 first_arg = seq_get(expression.expressions, 0) 856 if isinstance(first_arg, exp.Query): 857 return f"ARRAY{self.wrap(self.sql(first_arg))}" 858 859 return inline_array_sql(self, expression) 860 861 def bracket_sql(self, expression: exp.Bracket) -> str: 862 this = expression.this 863 expressions = expression.expressions 864 865 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 866 arg = expressions[0] 867 if arg.type is None: 868 from sqlglot.optimizer.annotate_types import annotate_types 869 870 arg = annotate_types(arg) 871 872 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 873 # BQ doesn't support bracket syntax with string values for structs 874 return f"{self.sql(this)}.{arg.name}" 875 876 expressions_sql = self.expressions(expression, flat=True) 877 offset = expression.args.get("offset") 878 879 if offset == 0: 880 expressions_sql = f"OFFSET({expressions_sql})" 881 elif offset == 1: 882 expressions_sql = f"ORDINAL({expressions_sql})" 883 elif offset is not None: 884 self.unsupported(f"Unsupported array offset: {offset}") 885 886 if expression.args.get("safe"): 887 expressions_sql = f"SAFE_{expressions_sql}" 888 889 return f"{self.sql(this)}[{expressions_sql}]" 890 891 def in_unnest_op(self, expression: exp.Unnest) -> str: 892 return self.sql(expression) 893 894 def except_op(self, expression: exp.Except) -> str: 895 if not expression.args.get("distinct"): 896 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 897 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 898 899 def intersect_op(self, expression: exp.Intersect) -> str: 900 if not expression.args.get("distinct"): 901 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 902 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 903 904 def with_properties(self, properties: exp.Properties) -> str: 905 return self.properties(properties, prefix=self.seg("OPTIONS")) 906 907 def version_sql(self, expression: exp.Version) -> str: 908 if expression.name == "TIMESTAMP": 909 expression.set("this", "SYSTEM_TIME") 910 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
280 def normalize_identifier(self, expression: E) -> E: 281 if isinstance(expression, exp.Identifier): 282 parent = expression.parent 283 while isinstance(parent, exp.Dot): 284 parent = parent.parent 285 286 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 287 # by default. The following check uses a heuristic to detect tables based on whether 288 # they are qualified. This should generally be correct, because tables in BigQuery 289 # must be qualified with at least a dataset, unless @@dataset_id is set. 290 case_sensitive = ( 291 isinstance(parent, exp.UserDefinedFunction) 292 or ( 293 isinstance(parent, exp.Table) 294 and parent.db 295 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 296 ) 297 or expression.meta.get("is_table") 298 ) 299 if not case_sensitive: 300 expression.set("this", expression.this.lower()) 301 302 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
304 class Tokenizer(tokens.Tokenizer): 305 QUOTES = ["'", '"', '"""', "'''"] 306 COMMENTS = ["--", "#", ("/*", "*/")] 307 IDENTIFIERS = ["`"] 308 STRING_ESCAPES = ["\\"] 309 310 HEX_STRINGS = [("0x", ""), ("0X", "")] 311 312 BYTE_STRINGS = [ 313 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 314 ] 315 316 RAW_STRINGS = [ 317 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 318 ] 319 320 KEYWORDS = { 321 **tokens.Tokenizer.KEYWORDS, 322 "ANY TYPE": TokenType.VARIANT, 323 "BEGIN": TokenType.COMMAND, 324 "BEGIN TRANSACTION": TokenType.BEGIN, 325 "BYTES": TokenType.BINARY, 326 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 327 "DATETIME": TokenType.TIMESTAMP, 328 "DECLARE": TokenType.COMMAND, 329 "ELSEIF": TokenType.COMMAND, 330 "EXCEPTION": TokenType.COMMAND, 331 "FLOAT64": TokenType.DOUBLE, 332 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 333 "MODEL": TokenType.MODEL, 334 "NOT DETERMINISTIC": TokenType.VOLATILE, 335 "RECORD": TokenType.STRUCT, 336 "TIMESTAMP": TokenType.TIMESTAMPTZ, 337 } 338 KEYWORDS.pop("DIV") 339 KEYWORDS.pop("VALUES")
Inherited Members
341 class Parser(parser.Parser): 342 PREFIXED_PIVOT_COLUMNS = True 343 LOG_DEFAULTS_TO_LN = True 344 SUPPORTS_IMPLICIT_UNNEST = True 345 346 FUNCTIONS = { 347 **parser.Parser.FUNCTIONS, 348 "DATE": _build_date, 349 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 350 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 351 "DATE_TRUNC": lambda args: exp.DateTrunc( 352 unit=exp.Literal.string(str(seq_get(args, 1))), 353 this=seq_get(args, 0), 354 ), 355 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 356 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 357 "DIV": binary_from_function(exp.IntDiv), 358 "FORMAT_DATE": lambda args: exp.TimeToStr( 359 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 360 ), 361 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 362 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 363 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 364 ), 365 "MD5": exp.MD5Digest.from_arg_list, 366 "TO_HEX": _build_to_hex, 367 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 368 [seq_get(args, 1), seq_get(args, 0)] 369 ), 370 "PARSE_TIMESTAMP": _build_parse_timestamp, 371 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 372 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 373 this=seq_get(args, 0), 374 expression=seq_get(args, 1), 375 position=seq_get(args, 2), 376 occurrence=seq_get(args, 3), 377 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 378 ), 379 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 380 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 381 "SPLIT": lambda args: exp.Split( 382 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 383 this=seq_get(args, 0), 384 expression=seq_get(args, 1) or exp.Literal.string(","), 385 ), 386 "TIME": _build_time, 387 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 388 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 389 "TIMESTAMP": _build_timestamp, 390 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 391 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 392 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 393 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 394 ), 395 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 396 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 397 ), 398 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 399 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 400 } 401 402 FUNCTION_PARSERS = { 403 **parser.Parser.FUNCTION_PARSERS, 404 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 405 } 406 FUNCTION_PARSERS.pop("TRIM") 407 408 NO_PAREN_FUNCTIONS = { 409 **parser.Parser.NO_PAREN_FUNCTIONS, 410 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 411 } 412 413 NESTED_TYPE_TOKENS = { 414 *parser.Parser.NESTED_TYPE_TOKENS, 415 TokenType.TABLE, 416 } 417 418 PROPERTY_PARSERS = { 419 **parser.Parser.PROPERTY_PARSERS, 420 "NOT DETERMINISTIC": lambda self: self.expression( 421 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 422 ), 423 "OPTIONS": lambda self: self._parse_with_property(), 424 } 425 426 CONSTRAINT_PARSERS = { 427 **parser.Parser.CONSTRAINT_PARSERS, 428 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 429 } 430 431 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 432 RANGE_PARSERS.pop(TokenType.OVERLAPS) 433 434 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 435 436 STATEMENT_PARSERS = { 437 **parser.Parser.STATEMENT_PARSERS, 438 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 439 TokenType.END: lambda self: self._parse_as_command(self._prev), 440 TokenType.FOR: lambda self: self._parse_for_in(), 441 } 442 443 BRACKET_OFFSETS = { 444 "OFFSET": (0, False), 445 "ORDINAL": (1, False), 446 "SAFE_OFFSET": (0, True), 447 "SAFE_ORDINAL": (1, True), 448 } 449 450 def _parse_for_in(self) -> exp.ForIn: 451 this = self._parse_range() 452 self._match_text_seq("DO") 453 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 454 455 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 456 this = super()._parse_table_part(schema=schema) or self._parse_number() 457 458 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 459 if isinstance(this, exp.Identifier): 460 table_name = this.name 461 while self._match(TokenType.DASH, advance=False) and self._next: 462 text = "" 463 while self._curr and self._curr.token_type != TokenType.DOT: 464 self._advance() 465 text += self._prev.text 466 table_name += text 467 468 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 469 elif isinstance(this, exp.Literal): 470 table_name = this.name 471 472 if self._is_connected() and self._parse_var(any_token=True): 473 table_name += self._prev.text 474 475 this = exp.Identifier(this=table_name, quoted=True) 476 477 return this 478 479 def _parse_table_parts( 480 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 481 ) -> exp.Table: 482 table = super()._parse_table_parts( 483 schema=schema, is_db_reference=is_db_reference, wildcard=True 484 ) 485 486 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 487 if not table.catalog: 488 if table.db: 489 parts = table.db.split(".") 490 if len(parts) == 2 and not table.args["db"].quoted: 491 table.set("catalog", exp.Identifier(this=parts[0])) 492 table.set("db", exp.Identifier(this=parts[1])) 493 else: 494 parts = table.name.split(".") 495 if len(parts) == 2 and not table.this.quoted: 496 table.set("db", exp.Identifier(this=parts[0])) 497 table.set("this", exp.Identifier(this=parts[1])) 498 499 if any("." in p.name for p in table.parts): 500 catalog, db, this, *rest = ( 501 exp.to_identifier(p, quoted=True) 502 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 503 ) 504 505 if rest and this: 506 this = exp.Dot.build([this, *rest]) # type: ignore 507 508 table = exp.Table(this=this, db=db, catalog=catalog) 509 table.meta["quoted_table"] = True 510 511 return table 512 513 @t.overload 514 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 515 516 @t.overload 517 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 518 519 def _parse_json_object(self, agg=False): 520 json_object = super()._parse_json_object() 521 array_kv_pair = seq_get(json_object.expressions, 0) 522 523 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 524 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 525 if ( 526 array_kv_pair 527 and isinstance(array_kv_pair.this, exp.Array) 528 and isinstance(array_kv_pair.expression, exp.Array) 529 ): 530 keys = array_kv_pair.this.expressions 531 values = array_kv_pair.expression.expressions 532 533 json_object.set( 534 "expressions", 535 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 536 ) 537 538 return json_object 539 540 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 541 bracket = super()._parse_bracket(this) 542 543 if this is bracket: 544 return bracket 545 546 if isinstance(bracket, exp.Bracket): 547 for expression in bracket.expressions: 548 name = expression.name.upper() 549 550 if name not in self.BRACKET_OFFSETS: 551 break 552 553 offset, safe = self.BRACKET_OFFSETS[name] 554 bracket.set("offset", offset) 555 bracket.set("safe", safe) 556 expression.replace(expression.expressions[0]) 557 558 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
560 class Generator(generator.Generator): 561 EXPLICIT_UNION = True 562 INTERVAL_ALLOWS_PLURAL_FORM = False 563 JOIN_HINTS = False 564 QUERY_HINTS = False 565 TABLE_HINTS = False 566 LIMIT_FETCH = "LIMIT" 567 RENAME_TABLE_WITH_DB = False 568 NVL2_SUPPORTED = False 569 UNNEST_WITH_ORDINALITY = False 570 COLLATE_IS_FUNC = True 571 LIMIT_ONLY_LITERALS = True 572 SUPPORTS_TABLE_ALIAS_COLUMNS = False 573 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 574 JSON_KEY_VALUE_PAIR_SEP = "," 575 NULL_ORDERING_SUPPORTED = False 576 IGNORE_NULLS_IN_FUNC = True 577 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 578 CAN_IMPLEMENT_ARRAY_ANY = True 579 SUPPORTS_TO_NUMBER = False 580 NAMED_PLACEHOLDER_TOKEN = "@" 581 582 TRANSFORMS = { 583 **generator.Generator.TRANSFORMS, 584 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 585 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 586 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 587 exp.ArrayContains: _array_contains_sql, 588 exp.ArrayFilter: filter_array_using_unnest, 589 exp.ArraySize: rename_func("ARRAY_LENGTH"), 590 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 591 exp.CollateProperty: lambda self, e: ( 592 f"DEFAULT COLLATE {self.sql(e, 'this')}" 593 if e.args.get("default") 594 else f"COLLATE {self.sql(e, 'this')}" 595 ), 596 exp.Commit: lambda *_: "COMMIT TRANSACTION", 597 exp.CountIf: rename_func("COUNTIF"), 598 exp.Create: _create_sql, 599 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 600 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 601 exp.DateDiff: lambda self, e: self.func( 602 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 603 ), 604 exp.DateFromParts: rename_func("DATE"), 605 exp.DateStrToDate: datestrtodate_sql, 606 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 607 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 608 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 609 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 610 exp.FromTimeZone: lambda self, e: self.func( 611 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 612 ), 613 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 614 exp.GroupConcat: rename_func("STRING_AGG"), 615 exp.Hex: rename_func("TO_HEX"), 616 exp.If: if_sql(false_value="NULL"), 617 exp.ILike: no_ilike_sql, 618 exp.IntDiv: rename_func("DIV"), 619 exp.JSONFormat: rename_func("TO_JSON_STRING"), 620 exp.Max: max_or_greatest, 621 exp.Mod: rename_func("MOD"), 622 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 623 exp.MD5Digest: rename_func("MD5"), 624 exp.Min: min_or_least, 625 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 626 exp.RegexpExtract: lambda self, e: self.func( 627 "REGEXP_EXTRACT", 628 e.this, 629 e.expression, 630 e.args.get("position"), 631 e.args.get("occurrence"), 632 ), 633 exp.RegexpReplace: regexp_replace_sql, 634 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 635 exp.ReturnsProperty: _returnsproperty_sql, 636 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 637 exp.Select: transforms.preprocess( 638 [ 639 transforms.explode_to_unnest(), 640 _unqualify_unnest, 641 transforms.eliminate_distinct_on, 642 _alias_ordered_group, 643 transforms.eliminate_semi_and_anti_joins, 644 ] 645 ), 646 exp.SHA2: lambda self, e: self.func( 647 "SHA256" if e.text("length") == "256" else "SHA512", e.this 648 ), 649 exp.StabilityProperty: lambda self, e: ( 650 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 651 ), 652 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 653 exp.StrToTime: lambda self, e: self.func( 654 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 655 ), 656 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 657 exp.TimeFromParts: rename_func("TIME"), 658 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 659 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 660 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 661 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 662 exp.TimeStrToTime: timestrtotime_sql, 663 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 664 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 665 exp.TsOrDsAdd: _ts_or_ds_add_sql, 666 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 667 exp.TsOrDsToTime: rename_func("TIME"), 668 exp.Unhex: rename_func("FROM_HEX"), 669 exp.UnixDate: rename_func("UNIX_DATE"), 670 exp.UnixToTime: _unix_to_time_sql, 671 exp.Values: _derived_table_values_to_unnest, 672 exp.VariancePop: rename_func("VAR_POP"), 673 } 674 675 SUPPORTED_JSON_PATH_PARTS = { 676 exp.JSONPathKey, 677 exp.JSONPathRoot, 678 exp.JSONPathSubscript, 679 } 680 681 TYPE_MAPPING = { 682 **generator.Generator.TYPE_MAPPING, 683 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 684 exp.DataType.Type.BIGINT: "INT64", 685 exp.DataType.Type.BINARY: "BYTES", 686 exp.DataType.Type.BOOLEAN: "BOOL", 687 exp.DataType.Type.CHAR: "STRING", 688 exp.DataType.Type.DECIMAL: "NUMERIC", 689 exp.DataType.Type.DOUBLE: "FLOAT64", 690 exp.DataType.Type.FLOAT: "FLOAT64", 691 exp.DataType.Type.INT: "INT64", 692 exp.DataType.Type.NCHAR: "STRING", 693 exp.DataType.Type.NVARCHAR: "STRING", 694 exp.DataType.Type.SMALLINT: "INT64", 695 exp.DataType.Type.TEXT: "STRING", 696 exp.DataType.Type.TIMESTAMP: "DATETIME", 697 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 698 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 699 exp.DataType.Type.TINYINT: "INT64", 700 exp.DataType.Type.VARBINARY: "BYTES", 701 exp.DataType.Type.VARCHAR: "STRING", 702 exp.DataType.Type.VARIANT: "ANY TYPE", 703 } 704 705 PROPERTIES_LOCATION = { 706 **generator.Generator.PROPERTIES_LOCATION, 707 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 708 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 709 } 710 711 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 712 RESERVED_KEYWORDS = { 713 *generator.Generator.RESERVED_KEYWORDS, 714 "all", 715 "and", 716 "any", 717 "array", 718 "as", 719 "asc", 720 "assert_rows_modified", 721 "at", 722 "between", 723 "by", 724 "case", 725 "cast", 726 "collate", 727 "contains", 728 "create", 729 "cross", 730 "cube", 731 "current", 732 "default", 733 "define", 734 "desc", 735 "distinct", 736 "else", 737 "end", 738 "enum", 739 "escape", 740 "except", 741 "exclude", 742 "exists", 743 "extract", 744 "false", 745 "fetch", 746 "following", 747 "for", 748 "from", 749 "full", 750 "group", 751 "grouping", 752 "groups", 753 "hash", 754 "having", 755 "if", 756 "ignore", 757 "in", 758 "inner", 759 "intersect", 760 "interval", 761 "into", 762 "is", 763 "join", 764 "lateral", 765 "left", 766 "like", 767 "limit", 768 "lookup", 769 "merge", 770 "natural", 771 "new", 772 "no", 773 "not", 774 "null", 775 "nulls", 776 "of", 777 "on", 778 "or", 779 "order", 780 "outer", 781 "over", 782 "partition", 783 "preceding", 784 "proto", 785 "qualify", 786 "range", 787 "recursive", 788 "respect", 789 "right", 790 "rollup", 791 "rows", 792 "select", 793 "set", 794 "some", 795 "struct", 796 "tablesample", 797 "then", 798 "to", 799 "treat", 800 "true", 801 "unbounded", 802 "union", 803 "unnest", 804 "using", 805 "when", 806 "where", 807 "window", 808 "with", 809 "within", 810 } 811 812 def table_parts(self, expression: exp.Table) -> str: 813 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 814 # we need to make sure the correct quoting is used in each case. 815 # 816 # For example, if there is a CTE x that clashes with a schema name, then the former will 817 # return the table y in that schema, whereas the latter will return the CTE's y column: 818 # 819 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 820 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 821 if expression.meta.get("quoted_table"): 822 table_parts = ".".join(p.name for p in expression.parts) 823 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 824 825 return super().table_parts(expression) 826 827 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 828 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 829 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 830 831 def eq_sql(self, expression: exp.EQ) -> str: 832 # Operands of = cannot be NULL in BigQuery 833 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 834 if not isinstance(expression.parent, exp.Update): 835 return "NULL" 836 837 return self.binary(expression, "=") 838 839 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 840 parent = expression.parent 841 842 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 843 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 844 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 845 return self.func( 846 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 847 ) 848 849 return super().attimezone_sql(expression) 850 851 def trycast_sql(self, expression: exp.TryCast) -> str: 852 return self.cast_sql(expression, safe_prefix="SAFE_") 853 854 def array_sql(self, expression: exp.Array) -> str: 855 first_arg = seq_get(expression.expressions, 0) 856 if isinstance(first_arg, exp.Query): 857 return f"ARRAY{self.wrap(self.sql(first_arg))}" 858 859 return inline_array_sql(self, expression) 860 861 def bracket_sql(self, expression: exp.Bracket) -> str: 862 this = expression.this 863 expressions = expression.expressions 864 865 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 866 arg = expressions[0] 867 if arg.type is None: 868 from sqlglot.optimizer.annotate_types import annotate_types 869 870 arg = annotate_types(arg) 871 872 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 873 # BQ doesn't support bracket syntax with string values for structs 874 return f"{self.sql(this)}.{arg.name}" 875 876 expressions_sql = self.expressions(expression, flat=True) 877 offset = expression.args.get("offset") 878 879 if offset == 0: 880 expressions_sql = f"OFFSET({expressions_sql})" 881 elif offset == 1: 882 expressions_sql = f"ORDINAL({expressions_sql})" 883 elif offset is not None: 884 self.unsupported(f"Unsupported array offset: {offset}") 885 886 if expression.args.get("safe"): 887 expressions_sql = f"SAFE_{expressions_sql}" 888 889 return f"{self.sql(this)}[{expressions_sql}]" 890 891 def in_unnest_op(self, expression: exp.Unnest) -> str: 892 return self.sql(expression) 893 894 def except_op(self, expression: exp.Except) -> str: 895 if not expression.args.get("distinct"): 896 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 897 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 898 899 def intersect_op(self, expression: exp.Intersect) -> str: 900 if not expression.args.get("distinct"): 901 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 902 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 903 904 def with_properties(self, properties: exp.Properties) -> str: 905 return self.properties(properties, prefix=self.seg("OPTIONS")) 906 907 def version_sql(self, expression: exp.Version) -> str: 908 if expression.name == "TIMESTAMP": 909 expression.set("this", "SYSTEM_TIME") 910 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
812 def table_parts(self, expression: exp.Table) -> str: 813 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 814 # we need to make sure the correct quoting is used in each case. 815 # 816 # For example, if there is a CTE x that clashes with a schema name, then the former will 817 # return the table y in that schema, whereas the latter will return the CTE's y column: 818 # 819 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 820 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 821 if expression.meta.get("quoted_table"): 822 table_parts = ".".join(p.name for p in expression.parts) 823 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 824 825 return super().table_parts(expression)
839 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 840 parent = expression.parent 841 842 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 843 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 844 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 845 return self.func( 846 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 847 ) 848 849 return super().attimezone_sql(expression)
861 def bracket_sql(self, expression: exp.Bracket) -> str: 862 this = expression.this 863 expressions = expression.expressions 864 865 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 866 arg = expressions[0] 867 if arg.type is None: 868 from sqlglot.optimizer.annotate_types import annotate_types 869 870 arg = annotate_types(arg) 871 872 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 873 # BQ doesn't support bracket syntax with string values for structs 874 return f"{self.sql(this)}.{arg.name}" 875 876 expressions_sql = self.expressions(expression, flat=True) 877 offset = expression.args.get("offset") 878 879 if offset == 0: 880 expressions_sql = f"OFFSET({expressions_sql})" 881 elif offset == 1: 882 expressions_sql = f"ORDINAL({expressions_sql})" 883 elif offset is not None: 884 self.unsupported(f"Unsupported array offset: {offset}") 885 886 if expression.args.get("safe"): 887 expressions_sql = f"SAFE_{expressions_sql}" 888 889 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql