sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27) 28from sqlglot.helper import seq_get, split_num_words 29from sqlglot.tokens import TokenType 30 31if t.TYPE_CHECKING: 32 from sqlglot._typing import E, Lit 33 34logger = logging.getLogger("sqlglot") 35 36 37def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 38 if not expression.find_ancestor(exp.From, exp.Join): 39 return self.values_sql(expression) 40 41 structs = [] 42 alias = expression.args.get("alias") 43 for tup in expression.find_all(exp.Tuple): 44 field_aliases = alias.columns if alias else (f"_c{i}" for i in range(len(tup.expressions))) 45 expressions = [ 46 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 47 for name, fld in zip(field_aliases, tup.expressions) 48 ] 49 structs.append(exp.Struct(expressions=expressions)) 50 51 return self.unnest_sql(exp.Unnest(expressions=[exp.array(*structs, copy=False)])) 52 53 54def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 55 this = expression.this 56 if isinstance(this, exp.Schema): 57 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 58 else: 59 this = self.sql(this) 60 return f"RETURNS {this}" 61 62 63def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 64 returns = expression.find(exp.ReturnsProperty) 65 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 66 expression.set("kind", "TABLE FUNCTION") 67 68 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 69 expression.set("expression", expression.expression.this) 70 71 return self.create_sql(expression) 72 73 74def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 75 """Remove references to unnest table aliases since bigquery doesn't allow them. 76 77 These are added by the optimizer's qualify_column step. 78 """ 79 from sqlglot.optimizer.scope import find_all_in_scope 80 81 if isinstance(expression, exp.Select): 82 unnest_aliases = { 83 unnest.alias 84 for unnest in find_all_in_scope(expression, exp.Unnest) 85 if isinstance(unnest.parent, (exp.From, exp.Join)) 86 } 87 if unnest_aliases: 88 for column in expression.find_all(exp.Column): 89 if column.table in unnest_aliases: 90 column.set("table", None) 91 elif column.db in unnest_aliases: 92 column.set("db", None) 93 94 return expression 95 96 97# https://issuetracker.google.com/issues/162294746 98# workaround for bigquery bug when grouping by an expression and then ordering 99# WITH x AS (SELECT 1 y) 100# SELECT y + 1 z 101# FROM x 102# GROUP BY x + 1 103# ORDER by z 104def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 105 if isinstance(expression, exp.Select): 106 group = expression.args.get("group") 107 order = expression.args.get("order") 108 109 if group and order: 110 aliases = { 111 select.this: select.args["alias"] 112 for select in expression.selects 113 if isinstance(select, exp.Alias) 114 } 115 116 for grouped in group.expressions: 117 if grouped.is_int: 118 continue 119 alias = aliases.get(grouped) 120 if alias: 121 grouped.replace(exp.column(alias)) 122 123 return expression 124 125 126def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 127 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 128 if isinstance(expression, exp.CTE) and expression.alias_column_names: 129 cte_query = expression.this 130 131 if cte_query.is_star: 132 logger.warning( 133 "Can't push down CTE column names for star queries. Run the query through" 134 " the optimizer or use 'qualify' to expand the star projections first." 135 ) 136 return expression 137 138 column_names = expression.alias_column_names 139 expression.args["alias"].set("columns", None) 140 141 for name, select in zip(column_names, cte_query.selects): 142 to_replace = select 143 144 if isinstance(select, exp.Alias): 145 select = select.this 146 147 # Inner aliases are shadowed by the CTE column names 148 to_replace.replace(exp.alias_(select, name)) 149 150 return expression 151 152 153def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 154 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 155 this.set("zone", seq_get(args, 2)) 156 return this 157 158 159def _build_timestamp(args: t.List) -> exp.Timestamp: 160 timestamp = exp.Timestamp.from_arg_list(args) 161 timestamp.set("with_tz", True) 162 return timestamp 163 164 165def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 166 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 167 return expr_type.from_arg_list(args) 168 169 170def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 171 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 172 arg = seq_get(args, 0) 173 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 174 175 176def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 177 return self.sql( 178 exp.Exists( 179 this=exp.select("1") 180 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 181 .where(exp.column("_col").eq(expression.right)) 182 ) 183 ) 184 185 186def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 187 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 188 189 190def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 191 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 192 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 193 unit = expression.args.get("unit") or "DAY" 194 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 195 196 197def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 198 scale = expression.args.get("scale") 199 timestamp = expression.this 200 201 if scale in (None, exp.UnixToTime.SECONDS): 202 return self.func("TIMESTAMP_SECONDS", timestamp) 203 if scale == exp.UnixToTime.MILLIS: 204 return self.func("TIMESTAMP_MILLIS", timestamp) 205 if scale == exp.UnixToTime.MICROS: 206 return self.func("TIMESTAMP_MICROS", timestamp) 207 208 unix_seconds = exp.cast(exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), "int64") 209 return self.func("TIMESTAMP_SECONDS", unix_seconds) 210 211 212def _build_time(args: t.List) -> exp.Func: 213 if len(args) == 1: 214 return exp.TsOrDsToTime(this=args[0]) 215 if len(args) == 3: 216 return exp.TimeFromParts.from_arg_list(args) 217 218 return exp.Anonymous(this="TIME", expressions=args) 219 220 221class BigQuery(Dialect): 222 WEEK_OFFSET = -1 223 UNNEST_COLUMN_ONLY = True 224 SUPPORTS_USER_DEFINED_TYPES = False 225 SUPPORTS_SEMI_ANTI_JOIN = False 226 LOG_BASE_FIRST = False 227 228 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 229 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 230 231 # bigquery udfs are case sensitive 232 NORMALIZE_FUNCTIONS = False 233 234 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 235 TIME_MAPPING = { 236 "%D": "%m/%d/%y", 237 "%E*S": "%S.%f", 238 "%E6S": "%S.%f", 239 } 240 241 ESCAPE_SEQUENCES = { 242 "\\a": "\a", 243 "\\b": "\b", 244 "\\f": "\f", 245 "\\n": "\n", 246 "\\r": "\r", 247 "\\t": "\t", 248 "\\v": "\v", 249 } 250 251 FORMAT_MAPPING = { 252 "DD": "%d", 253 "MM": "%m", 254 "MON": "%b", 255 "MONTH": "%B", 256 "YYYY": "%Y", 257 "YY": "%y", 258 "HH": "%I", 259 "HH12": "%I", 260 "HH24": "%H", 261 "MI": "%M", 262 "SS": "%S", 263 "SSSSS": "%f", 264 "TZH": "%z", 265 } 266 267 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 268 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 269 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 270 271 def normalize_identifier(self, expression: E) -> E: 272 if isinstance(expression, exp.Identifier): 273 parent = expression.parent 274 while isinstance(parent, exp.Dot): 275 parent = parent.parent 276 277 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 278 # by default. The following check uses a heuristic to detect tables based on whether 279 # they are qualified. This should generally be correct, because tables in BigQuery 280 # must be qualified with at least a dataset, unless @@dataset_id is set. 281 case_sensitive = ( 282 isinstance(parent, exp.UserDefinedFunction) 283 or ( 284 isinstance(parent, exp.Table) 285 and parent.db 286 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 287 ) 288 or expression.meta.get("is_table") 289 ) 290 if not case_sensitive: 291 expression.set("this", expression.this.lower()) 292 293 return expression 294 295 class Tokenizer(tokens.Tokenizer): 296 QUOTES = ["'", '"', '"""', "'''"] 297 COMMENTS = ["--", "#", ("/*", "*/")] 298 IDENTIFIERS = ["`"] 299 STRING_ESCAPES = ["\\"] 300 301 HEX_STRINGS = [("0x", ""), ("0X", "")] 302 303 BYTE_STRINGS = [ 304 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 305 ] 306 307 RAW_STRINGS = [ 308 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 309 ] 310 311 KEYWORDS = { 312 **tokens.Tokenizer.KEYWORDS, 313 "ANY TYPE": TokenType.VARIANT, 314 "BEGIN": TokenType.COMMAND, 315 "BEGIN TRANSACTION": TokenType.BEGIN, 316 "BYTES": TokenType.BINARY, 317 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 318 "DECLARE": TokenType.COMMAND, 319 "ELSEIF": TokenType.COMMAND, 320 "EXCEPTION": TokenType.COMMAND, 321 "FLOAT64": TokenType.DOUBLE, 322 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 323 "MODEL": TokenType.MODEL, 324 "NOT DETERMINISTIC": TokenType.VOLATILE, 325 "RECORD": TokenType.STRUCT, 326 "TIMESTAMP": TokenType.TIMESTAMPTZ, 327 } 328 KEYWORDS.pop("DIV") 329 KEYWORDS.pop("VALUES") 330 331 class Parser(parser.Parser): 332 PREFIXED_PIVOT_COLUMNS = True 333 LOG_DEFAULTS_TO_LN = True 334 SUPPORTS_IMPLICIT_UNNEST = True 335 336 FUNCTIONS = { 337 **parser.Parser.FUNCTIONS, 338 "DATE": _build_date, 339 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 340 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 341 "DATE_TRUNC": lambda args: exp.DateTrunc( 342 unit=exp.Literal.string(str(seq_get(args, 1))), 343 this=seq_get(args, 0), 344 ), 345 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 346 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 347 "DIV": binary_from_function(exp.IntDiv), 348 "FORMAT_DATE": lambda args: exp.TimeToStr( 349 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 350 ), 351 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 352 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 353 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 354 ), 355 "MD5": exp.MD5Digest.from_arg_list, 356 "TO_HEX": _build_to_hex, 357 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 358 [seq_get(args, 1), seq_get(args, 0)] 359 ), 360 "PARSE_TIMESTAMP": _build_parse_timestamp, 361 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 362 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 363 this=seq_get(args, 0), 364 expression=seq_get(args, 1), 365 position=seq_get(args, 2), 366 occurrence=seq_get(args, 3), 367 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 368 ), 369 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 370 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 371 "SPLIT": lambda args: exp.Split( 372 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 373 this=seq_get(args, 0), 374 expression=seq_get(args, 1) or exp.Literal.string(","), 375 ), 376 "TIME": _build_time, 377 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 378 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 379 "TIMESTAMP": _build_timestamp, 380 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 381 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 382 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 383 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 384 ), 385 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 386 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 387 ), 388 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 389 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 390 } 391 392 FUNCTION_PARSERS = { 393 **parser.Parser.FUNCTION_PARSERS, 394 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 395 } 396 FUNCTION_PARSERS.pop("TRIM") 397 398 NO_PAREN_FUNCTIONS = { 399 **parser.Parser.NO_PAREN_FUNCTIONS, 400 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 401 } 402 403 NESTED_TYPE_TOKENS = { 404 *parser.Parser.NESTED_TYPE_TOKENS, 405 TokenType.TABLE, 406 } 407 408 PROPERTY_PARSERS = { 409 **parser.Parser.PROPERTY_PARSERS, 410 "NOT DETERMINISTIC": lambda self: self.expression( 411 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 412 ), 413 "OPTIONS": lambda self: self._parse_with_property(), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 419 } 420 421 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 422 RANGE_PARSERS.pop(TokenType.OVERLAPS) 423 424 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 425 426 STATEMENT_PARSERS = { 427 **parser.Parser.STATEMENT_PARSERS, 428 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 429 TokenType.END: lambda self: self._parse_as_command(self._prev), 430 TokenType.FOR: lambda self: self._parse_for_in(), 431 } 432 433 BRACKET_OFFSETS = { 434 "OFFSET": (0, False), 435 "ORDINAL": (1, False), 436 "SAFE_OFFSET": (0, True), 437 "SAFE_ORDINAL": (1, True), 438 } 439 440 def _parse_for_in(self) -> exp.ForIn: 441 this = self._parse_range() 442 self._match_text_seq("DO") 443 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 444 445 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 446 this = super()._parse_table_part(schema=schema) or self._parse_number() 447 448 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 449 if isinstance(this, exp.Identifier): 450 table_name = this.name 451 while self._match(TokenType.DASH, advance=False) and self._next: 452 text = "" 453 while self._curr and self._curr.token_type != TokenType.DOT: 454 self._advance() 455 text += self._prev.text 456 table_name += text 457 458 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 459 elif isinstance(this, exp.Literal): 460 table_name = this.name 461 462 if self._is_connected() and self._parse_var(any_token=True): 463 table_name += self._prev.text 464 465 this = exp.Identifier(this=table_name, quoted=True) 466 467 return this 468 469 def _parse_table_parts( 470 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 471 ) -> exp.Table: 472 table = super()._parse_table_parts( 473 schema=schema, is_db_reference=is_db_reference, wildcard=True 474 ) 475 476 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 477 if not table.catalog: 478 if table.db: 479 parts = table.db.split(".") 480 if len(parts) == 2 and not table.args["db"].quoted: 481 table.set("catalog", exp.Identifier(this=parts[0])) 482 table.set("db", exp.Identifier(this=parts[1])) 483 else: 484 parts = table.name.split(".") 485 if len(parts) == 2 and not table.this.quoted: 486 table.set("db", exp.Identifier(this=parts[0])) 487 table.set("this", exp.Identifier(this=parts[1])) 488 489 if isinstance(table.this, exp.Identifier) and "." in table.name: 490 catalog, db, this, *rest = ( 491 t.cast(t.Optional[exp.Expression], exp.to_identifier(x, quoted=True)) 492 for x in split_num_words(table.name, ".", 3) 493 ) 494 495 if rest and this: 496 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 497 498 table = exp.Table(this=this, db=db, catalog=catalog) 499 table.meta["quoted_table"] = True 500 501 return table 502 503 @t.overload 504 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 505 506 @t.overload 507 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 508 509 def _parse_json_object(self, agg=False): 510 json_object = super()._parse_json_object() 511 array_kv_pair = seq_get(json_object.expressions, 0) 512 513 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 514 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 515 if ( 516 array_kv_pair 517 and isinstance(array_kv_pair.this, exp.Array) 518 and isinstance(array_kv_pair.expression, exp.Array) 519 ): 520 keys = array_kv_pair.this.expressions 521 values = array_kv_pair.expression.expressions 522 523 json_object.set( 524 "expressions", 525 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 526 ) 527 528 return json_object 529 530 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 531 bracket = super()._parse_bracket(this) 532 533 if this is bracket: 534 return bracket 535 536 if isinstance(bracket, exp.Bracket): 537 for expression in bracket.expressions: 538 name = expression.name.upper() 539 540 if name not in self.BRACKET_OFFSETS: 541 break 542 543 offset, safe = self.BRACKET_OFFSETS[name] 544 bracket.set("offset", offset) 545 bracket.set("safe", safe) 546 expression.replace(expression.expressions[0]) 547 548 return bracket 549 550 class Generator(generator.Generator): 551 EXPLICIT_UNION = True 552 INTERVAL_ALLOWS_PLURAL_FORM = False 553 JOIN_HINTS = False 554 QUERY_HINTS = False 555 TABLE_HINTS = False 556 LIMIT_FETCH = "LIMIT" 557 RENAME_TABLE_WITH_DB = False 558 NVL2_SUPPORTED = False 559 UNNEST_WITH_ORDINALITY = False 560 COLLATE_IS_FUNC = True 561 LIMIT_ONLY_LITERALS = True 562 SUPPORTS_TABLE_ALIAS_COLUMNS = False 563 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 564 JSON_KEY_VALUE_PAIR_SEP = "," 565 NULL_ORDERING_SUPPORTED = False 566 IGNORE_NULLS_IN_FUNC = True 567 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 568 CAN_IMPLEMENT_ARRAY_ANY = True 569 NAMED_PLACEHOLDER_TOKEN = "@" 570 571 TRANSFORMS = { 572 **generator.Generator.TRANSFORMS, 573 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 574 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 575 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 576 exp.ArrayContains: _array_contains_sql, 577 exp.ArrayFilter: filter_array_using_unnest, 578 exp.ArraySize: rename_func("ARRAY_LENGTH"), 579 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 580 exp.CollateProperty: lambda self, e: ( 581 f"DEFAULT COLLATE {self.sql(e, 'this')}" 582 if e.args.get("default") 583 else f"COLLATE {self.sql(e, 'this')}" 584 ), 585 exp.Commit: lambda *_: "COMMIT TRANSACTION", 586 exp.CountIf: rename_func("COUNTIF"), 587 exp.Create: _create_sql, 588 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 589 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 590 exp.DateDiff: lambda self, e: self.func( 591 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 592 ), 593 exp.DateFromParts: rename_func("DATE"), 594 exp.DateStrToDate: datestrtodate_sql, 595 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 596 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 597 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 598 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 599 exp.FromTimeZone: lambda self, e: self.func( 600 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 601 ), 602 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 603 exp.GroupConcat: rename_func("STRING_AGG"), 604 exp.Hex: rename_func("TO_HEX"), 605 exp.If: if_sql(false_value="NULL"), 606 exp.ILike: no_ilike_sql, 607 exp.IntDiv: rename_func("DIV"), 608 exp.JSONFormat: rename_func("TO_JSON_STRING"), 609 exp.Max: max_or_greatest, 610 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 611 exp.MD5Digest: rename_func("MD5"), 612 exp.Min: min_or_least, 613 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 614 exp.RegexpExtract: lambda self, e: self.func( 615 "REGEXP_EXTRACT", 616 e.this, 617 e.expression, 618 e.args.get("position"), 619 e.args.get("occurrence"), 620 ), 621 exp.RegexpReplace: regexp_replace_sql, 622 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 623 exp.ReturnsProperty: _returnsproperty_sql, 624 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 625 exp.Select: transforms.preprocess( 626 [ 627 transforms.explode_to_unnest(), 628 _unqualify_unnest, 629 transforms.eliminate_distinct_on, 630 _alias_ordered_group, 631 transforms.eliminate_semi_and_anti_joins, 632 ] 633 ), 634 exp.SHA2: lambda self, e: self.func( 635 "SHA256" if e.text("length") == "256" else "SHA512", e.this 636 ), 637 exp.StabilityProperty: lambda self, e: ( 638 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 639 ), 640 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 641 exp.StrToTime: lambda self, e: self.func( 642 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 643 ), 644 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 645 exp.TimeFromParts: rename_func("TIME"), 646 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 647 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 648 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 649 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 650 exp.TimeStrToTime: timestrtotime_sql, 651 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 652 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 653 exp.TsOrDsAdd: _ts_or_ds_add_sql, 654 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 655 exp.TsOrDsToTime: rename_func("TIME"), 656 exp.Unhex: rename_func("FROM_HEX"), 657 exp.UnixDate: rename_func("UNIX_DATE"), 658 exp.UnixToTime: _unix_to_time_sql, 659 exp.Values: _derived_table_values_to_unnest, 660 exp.VariancePop: rename_func("VAR_POP"), 661 } 662 663 SUPPORTED_JSON_PATH_PARTS = { 664 exp.JSONPathKey, 665 exp.JSONPathRoot, 666 exp.JSONPathSubscript, 667 } 668 669 TYPE_MAPPING = { 670 **generator.Generator.TYPE_MAPPING, 671 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 672 exp.DataType.Type.BIGINT: "INT64", 673 exp.DataType.Type.BINARY: "BYTES", 674 exp.DataType.Type.BOOLEAN: "BOOL", 675 exp.DataType.Type.CHAR: "STRING", 676 exp.DataType.Type.DECIMAL: "NUMERIC", 677 exp.DataType.Type.DOUBLE: "FLOAT64", 678 exp.DataType.Type.FLOAT: "FLOAT64", 679 exp.DataType.Type.INT: "INT64", 680 exp.DataType.Type.NCHAR: "STRING", 681 exp.DataType.Type.NVARCHAR: "STRING", 682 exp.DataType.Type.SMALLINT: "INT64", 683 exp.DataType.Type.TEXT: "STRING", 684 exp.DataType.Type.TIMESTAMP: "DATETIME", 685 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 686 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 687 exp.DataType.Type.TINYINT: "INT64", 688 exp.DataType.Type.VARBINARY: "BYTES", 689 exp.DataType.Type.VARCHAR: "STRING", 690 exp.DataType.Type.VARIANT: "ANY TYPE", 691 } 692 693 PROPERTIES_LOCATION = { 694 **generator.Generator.PROPERTIES_LOCATION, 695 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 696 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 697 } 698 699 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 700 RESERVED_KEYWORDS = { 701 *generator.Generator.RESERVED_KEYWORDS, 702 "all", 703 "and", 704 "any", 705 "array", 706 "as", 707 "asc", 708 "assert_rows_modified", 709 "at", 710 "between", 711 "by", 712 "case", 713 "cast", 714 "collate", 715 "contains", 716 "create", 717 "cross", 718 "cube", 719 "current", 720 "default", 721 "define", 722 "desc", 723 "distinct", 724 "else", 725 "end", 726 "enum", 727 "escape", 728 "except", 729 "exclude", 730 "exists", 731 "extract", 732 "false", 733 "fetch", 734 "following", 735 "for", 736 "from", 737 "full", 738 "group", 739 "grouping", 740 "groups", 741 "hash", 742 "having", 743 "if", 744 "ignore", 745 "in", 746 "inner", 747 "intersect", 748 "interval", 749 "into", 750 "is", 751 "join", 752 "lateral", 753 "left", 754 "like", 755 "limit", 756 "lookup", 757 "merge", 758 "natural", 759 "new", 760 "no", 761 "not", 762 "null", 763 "nulls", 764 "of", 765 "on", 766 "or", 767 "order", 768 "outer", 769 "over", 770 "partition", 771 "preceding", 772 "proto", 773 "qualify", 774 "range", 775 "recursive", 776 "respect", 777 "right", 778 "rollup", 779 "rows", 780 "select", 781 "set", 782 "some", 783 "struct", 784 "tablesample", 785 "then", 786 "to", 787 "treat", 788 "true", 789 "unbounded", 790 "union", 791 "unnest", 792 "using", 793 "when", 794 "where", 795 "window", 796 "with", 797 "within", 798 } 799 800 def table_parts(self, expression: exp.Table) -> str: 801 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 802 # we need to make sure the correct quoting is used in each case. 803 # 804 # For example, if there is a CTE x that clashes with a schema name, then the former will 805 # return the table y in that schema, whereas the latter will return the CTE's y column: 806 # 807 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 808 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 809 if expression.meta.get("quoted_table"): 810 table_parts = ".".join(p.name for p in expression.parts) 811 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 812 813 return super().table_parts(expression) 814 815 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 816 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 817 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 818 819 def eq_sql(self, expression: exp.EQ) -> str: 820 # Operands of = cannot be NULL in BigQuery 821 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 822 if not isinstance(expression.parent, exp.Update): 823 return "NULL" 824 825 return self.binary(expression, "=") 826 827 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 828 parent = expression.parent 829 830 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 831 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 832 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 833 return self.func( 834 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 835 ) 836 837 return super().attimezone_sql(expression) 838 839 def trycast_sql(self, expression: exp.TryCast) -> str: 840 return self.cast_sql(expression, safe_prefix="SAFE_") 841 842 def array_sql(self, expression: exp.Array) -> str: 843 first_arg = seq_get(expression.expressions, 0) 844 if isinstance(first_arg, exp.Query): 845 return f"ARRAY{self.wrap(self.sql(first_arg))}" 846 847 return inline_array_sql(self, expression) 848 849 def bracket_sql(self, expression: exp.Bracket) -> str: 850 this = self.sql(expression, "this") 851 expressions = expression.expressions 852 853 if len(expressions) == 1: 854 arg = expressions[0] 855 if arg.type is None: 856 from sqlglot.optimizer.annotate_types import annotate_types 857 858 arg = annotate_types(arg) 859 860 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 861 # BQ doesn't support bracket syntax with string values 862 return f"{this}.{arg.name}" 863 864 expressions_sql = ", ".join(self.sql(e) for e in expressions) 865 offset = expression.args.get("offset") 866 867 if offset == 0: 868 expressions_sql = f"OFFSET({expressions_sql})" 869 elif offset == 1: 870 expressions_sql = f"ORDINAL({expressions_sql})" 871 elif offset is not None: 872 self.unsupported(f"Unsupported array offset: {offset}") 873 874 if expression.args.get("safe"): 875 expressions_sql = f"SAFE_{expressions_sql}" 876 877 return f"{this}[{expressions_sql}]" 878 879 def in_unnest_op(self, expression: exp.Unnest) -> str: 880 return self.sql(expression) 881 882 def except_op(self, expression: exp.Except) -> str: 883 if not expression.args.get("distinct"): 884 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 885 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 886 887 def intersect_op(self, expression: exp.Intersect) -> str: 888 if not expression.args.get("distinct"): 889 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 890 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 891 892 def with_properties(self, properties: exp.Properties) -> str: 893 return self.properties(properties, prefix=self.seg("OPTIONS")) 894 895 def version_sql(self, expression: exp.Version) -> str: 896 if expression.name == "TIMESTAMP": 897 expression.set("this", "SYSTEM_TIME") 898 return super().version_sql(expression)
222class BigQuery(Dialect): 223 WEEK_OFFSET = -1 224 UNNEST_COLUMN_ONLY = True 225 SUPPORTS_USER_DEFINED_TYPES = False 226 SUPPORTS_SEMI_ANTI_JOIN = False 227 LOG_BASE_FIRST = False 228 229 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 230 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 231 232 # bigquery udfs are case sensitive 233 NORMALIZE_FUNCTIONS = False 234 235 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 236 TIME_MAPPING = { 237 "%D": "%m/%d/%y", 238 "%E*S": "%S.%f", 239 "%E6S": "%S.%f", 240 } 241 242 ESCAPE_SEQUENCES = { 243 "\\a": "\a", 244 "\\b": "\b", 245 "\\f": "\f", 246 "\\n": "\n", 247 "\\r": "\r", 248 "\\t": "\t", 249 "\\v": "\v", 250 } 251 252 FORMAT_MAPPING = { 253 "DD": "%d", 254 "MM": "%m", 255 "MON": "%b", 256 "MONTH": "%B", 257 "YYYY": "%Y", 258 "YY": "%y", 259 "HH": "%I", 260 "HH12": "%I", 261 "HH24": "%H", 262 "MI": "%M", 263 "SS": "%S", 264 "SSSSS": "%f", 265 "TZH": "%z", 266 } 267 268 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 269 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 270 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 271 272 def normalize_identifier(self, expression: E) -> E: 273 if isinstance(expression, exp.Identifier): 274 parent = expression.parent 275 while isinstance(parent, exp.Dot): 276 parent = parent.parent 277 278 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 279 # by default. The following check uses a heuristic to detect tables based on whether 280 # they are qualified. This should generally be correct, because tables in BigQuery 281 # must be qualified with at least a dataset, unless @@dataset_id is set. 282 case_sensitive = ( 283 isinstance(parent, exp.UserDefinedFunction) 284 or ( 285 isinstance(parent, exp.Table) 286 and parent.db 287 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 288 ) 289 or expression.meta.get("is_table") 290 ) 291 if not case_sensitive: 292 expression.set("this", expression.this.lower()) 293 294 return expression 295 296 class Tokenizer(tokens.Tokenizer): 297 QUOTES = ["'", '"', '"""', "'''"] 298 COMMENTS = ["--", "#", ("/*", "*/")] 299 IDENTIFIERS = ["`"] 300 STRING_ESCAPES = ["\\"] 301 302 HEX_STRINGS = [("0x", ""), ("0X", "")] 303 304 BYTE_STRINGS = [ 305 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 306 ] 307 308 RAW_STRINGS = [ 309 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 310 ] 311 312 KEYWORDS = { 313 **tokens.Tokenizer.KEYWORDS, 314 "ANY TYPE": TokenType.VARIANT, 315 "BEGIN": TokenType.COMMAND, 316 "BEGIN TRANSACTION": TokenType.BEGIN, 317 "BYTES": TokenType.BINARY, 318 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 319 "DECLARE": TokenType.COMMAND, 320 "ELSEIF": TokenType.COMMAND, 321 "EXCEPTION": TokenType.COMMAND, 322 "FLOAT64": TokenType.DOUBLE, 323 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 324 "MODEL": TokenType.MODEL, 325 "NOT DETERMINISTIC": TokenType.VOLATILE, 326 "RECORD": TokenType.STRUCT, 327 "TIMESTAMP": TokenType.TIMESTAMPTZ, 328 } 329 KEYWORDS.pop("DIV") 330 KEYWORDS.pop("VALUES") 331 332 class Parser(parser.Parser): 333 PREFIXED_PIVOT_COLUMNS = True 334 LOG_DEFAULTS_TO_LN = True 335 SUPPORTS_IMPLICIT_UNNEST = True 336 337 FUNCTIONS = { 338 **parser.Parser.FUNCTIONS, 339 "DATE": _build_date, 340 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 341 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 342 "DATE_TRUNC": lambda args: exp.DateTrunc( 343 unit=exp.Literal.string(str(seq_get(args, 1))), 344 this=seq_get(args, 0), 345 ), 346 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 347 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 348 "DIV": binary_from_function(exp.IntDiv), 349 "FORMAT_DATE": lambda args: exp.TimeToStr( 350 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 351 ), 352 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 353 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 354 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 355 ), 356 "MD5": exp.MD5Digest.from_arg_list, 357 "TO_HEX": _build_to_hex, 358 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 359 [seq_get(args, 1), seq_get(args, 0)] 360 ), 361 "PARSE_TIMESTAMP": _build_parse_timestamp, 362 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 363 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 364 this=seq_get(args, 0), 365 expression=seq_get(args, 1), 366 position=seq_get(args, 2), 367 occurrence=seq_get(args, 3), 368 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 369 ), 370 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 371 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 372 "SPLIT": lambda args: exp.Split( 373 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 374 this=seq_get(args, 0), 375 expression=seq_get(args, 1) or exp.Literal.string(","), 376 ), 377 "TIME": _build_time, 378 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 379 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 380 "TIMESTAMP": _build_timestamp, 381 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 382 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 383 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 384 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 385 ), 386 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 387 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 388 ), 389 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 390 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 391 } 392 393 FUNCTION_PARSERS = { 394 **parser.Parser.FUNCTION_PARSERS, 395 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 396 } 397 FUNCTION_PARSERS.pop("TRIM") 398 399 NO_PAREN_FUNCTIONS = { 400 **parser.Parser.NO_PAREN_FUNCTIONS, 401 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 402 } 403 404 NESTED_TYPE_TOKENS = { 405 *parser.Parser.NESTED_TYPE_TOKENS, 406 TokenType.TABLE, 407 } 408 409 PROPERTY_PARSERS = { 410 **parser.Parser.PROPERTY_PARSERS, 411 "NOT DETERMINISTIC": lambda self: self.expression( 412 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 413 ), 414 "OPTIONS": lambda self: self._parse_with_property(), 415 } 416 417 CONSTRAINT_PARSERS = { 418 **parser.Parser.CONSTRAINT_PARSERS, 419 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 420 } 421 422 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 423 RANGE_PARSERS.pop(TokenType.OVERLAPS) 424 425 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 426 427 STATEMENT_PARSERS = { 428 **parser.Parser.STATEMENT_PARSERS, 429 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 430 TokenType.END: lambda self: self._parse_as_command(self._prev), 431 TokenType.FOR: lambda self: self._parse_for_in(), 432 } 433 434 BRACKET_OFFSETS = { 435 "OFFSET": (0, False), 436 "ORDINAL": (1, False), 437 "SAFE_OFFSET": (0, True), 438 "SAFE_ORDINAL": (1, True), 439 } 440 441 def _parse_for_in(self) -> exp.ForIn: 442 this = self._parse_range() 443 self._match_text_seq("DO") 444 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 445 446 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 447 this = super()._parse_table_part(schema=schema) or self._parse_number() 448 449 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 450 if isinstance(this, exp.Identifier): 451 table_name = this.name 452 while self._match(TokenType.DASH, advance=False) and self._next: 453 text = "" 454 while self._curr and self._curr.token_type != TokenType.DOT: 455 self._advance() 456 text += self._prev.text 457 table_name += text 458 459 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 460 elif isinstance(this, exp.Literal): 461 table_name = this.name 462 463 if self._is_connected() and self._parse_var(any_token=True): 464 table_name += self._prev.text 465 466 this = exp.Identifier(this=table_name, quoted=True) 467 468 return this 469 470 def _parse_table_parts( 471 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 472 ) -> exp.Table: 473 table = super()._parse_table_parts( 474 schema=schema, is_db_reference=is_db_reference, wildcard=True 475 ) 476 477 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 478 if not table.catalog: 479 if table.db: 480 parts = table.db.split(".") 481 if len(parts) == 2 and not table.args["db"].quoted: 482 table.set("catalog", exp.Identifier(this=parts[0])) 483 table.set("db", exp.Identifier(this=parts[1])) 484 else: 485 parts = table.name.split(".") 486 if len(parts) == 2 and not table.this.quoted: 487 table.set("db", exp.Identifier(this=parts[0])) 488 table.set("this", exp.Identifier(this=parts[1])) 489 490 if isinstance(table.this, exp.Identifier) and "." in table.name: 491 catalog, db, this, *rest = ( 492 t.cast(t.Optional[exp.Expression], exp.to_identifier(x, quoted=True)) 493 for x in split_num_words(table.name, ".", 3) 494 ) 495 496 if rest and this: 497 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 498 499 table = exp.Table(this=this, db=db, catalog=catalog) 500 table.meta["quoted_table"] = True 501 502 return table 503 504 @t.overload 505 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 506 507 @t.overload 508 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 509 510 def _parse_json_object(self, agg=False): 511 json_object = super()._parse_json_object() 512 array_kv_pair = seq_get(json_object.expressions, 0) 513 514 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 515 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 516 if ( 517 array_kv_pair 518 and isinstance(array_kv_pair.this, exp.Array) 519 and isinstance(array_kv_pair.expression, exp.Array) 520 ): 521 keys = array_kv_pair.this.expressions 522 values = array_kv_pair.expression.expressions 523 524 json_object.set( 525 "expressions", 526 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 527 ) 528 529 return json_object 530 531 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 532 bracket = super()._parse_bracket(this) 533 534 if this is bracket: 535 return bracket 536 537 if isinstance(bracket, exp.Bracket): 538 for expression in bracket.expressions: 539 name = expression.name.upper() 540 541 if name not in self.BRACKET_OFFSETS: 542 break 543 544 offset, safe = self.BRACKET_OFFSETS[name] 545 bracket.set("offset", offset) 546 bracket.set("safe", safe) 547 expression.replace(expression.expressions[0]) 548 549 return bracket 550 551 class Generator(generator.Generator): 552 EXPLICIT_UNION = True 553 INTERVAL_ALLOWS_PLURAL_FORM = False 554 JOIN_HINTS = False 555 QUERY_HINTS = False 556 TABLE_HINTS = False 557 LIMIT_FETCH = "LIMIT" 558 RENAME_TABLE_WITH_DB = False 559 NVL2_SUPPORTED = False 560 UNNEST_WITH_ORDINALITY = False 561 COLLATE_IS_FUNC = True 562 LIMIT_ONLY_LITERALS = True 563 SUPPORTS_TABLE_ALIAS_COLUMNS = False 564 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 565 JSON_KEY_VALUE_PAIR_SEP = "," 566 NULL_ORDERING_SUPPORTED = False 567 IGNORE_NULLS_IN_FUNC = True 568 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 569 CAN_IMPLEMENT_ARRAY_ANY = True 570 NAMED_PLACEHOLDER_TOKEN = "@" 571 572 TRANSFORMS = { 573 **generator.Generator.TRANSFORMS, 574 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 575 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 576 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 577 exp.ArrayContains: _array_contains_sql, 578 exp.ArrayFilter: filter_array_using_unnest, 579 exp.ArraySize: rename_func("ARRAY_LENGTH"), 580 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 581 exp.CollateProperty: lambda self, e: ( 582 f"DEFAULT COLLATE {self.sql(e, 'this')}" 583 if e.args.get("default") 584 else f"COLLATE {self.sql(e, 'this')}" 585 ), 586 exp.Commit: lambda *_: "COMMIT TRANSACTION", 587 exp.CountIf: rename_func("COUNTIF"), 588 exp.Create: _create_sql, 589 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 590 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 591 exp.DateDiff: lambda self, e: self.func( 592 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 593 ), 594 exp.DateFromParts: rename_func("DATE"), 595 exp.DateStrToDate: datestrtodate_sql, 596 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 597 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 598 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 599 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 600 exp.FromTimeZone: lambda self, e: self.func( 601 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 602 ), 603 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 604 exp.GroupConcat: rename_func("STRING_AGG"), 605 exp.Hex: rename_func("TO_HEX"), 606 exp.If: if_sql(false_value="NULL"), 607 exp.ILike: no_ilike_sql, 608 exp.IntDiv: rename_func("DIV"), 609 exp.JSONFormat: rename_func("TO_JSON_STRING"), 610 exp.Max: max_or_greatest, 611 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 612 exp.MD5Digest: rename_func("MD5"), 613 exp.Min: min_or_least, 614 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 615 exp.RegexpExtract: lambda self, e: self.func( 616 "REGEXP_EXTRACT", 617 e.this, 618 e.expression, 619 e.args.get("position"), 620 e.args.get("occurrence"), 621 ), 622 exp.RegexpReplace: regexp_replace_sql, 623 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 624 exp.ReturnsProperty: _returnsproperty_sql, 625 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 626 exp.Select: transforms.preprocess( 627 [ 628 transforms.explode_to_unnest(), 629 _unqualify_unnest, 630 transforms.eliminate_distinct_on, 631 _alias_ordered_group, 632 transforms.eliminate_semi_and_anti_joins, 633 ] 634 ), 635 exp.SHA2: lambda self, e: self.func( 636 "SHA256" if e.text("length") == "256" else "SHA512", e.this 637 ), 638 exp.StabilityProperty: lambda self, e: ( 639 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 640 ), 641 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 642 exp.StrToTime: lambda self, e: self.func( 643 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 644 ), 645 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 646 exp.TimeFromParts: rename_func("TIME"), 647 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 648 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 649 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 650 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 651 exp.TimeStrToTime: timestrtotime_sql, 652 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 653 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 654 exp.TsOrDsAdd: _ts_or_ds_add_sql, 655 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 656 exp.TsOrDsToTime: rename_func("TIME"), 657 exp.Unhex: rename_func("FROM_HEX"), 658 exp.UnixDate: rename_func("UNIX_DATE"), 659 exp.UnixToTime: _unix_to_time_sql, 660 exp.Values: _derived_table_values_to_unnest, 661 exp.VariancePop: rename_func("VAR_POP"), 662 } 663 664 SUPPORTED_JSON_PATH_PARTS = { 665 exp.JSONPathKey, 666 exp.JSONPathRoot, 667 exp.JSONPathSubscript, 668 } 669 670 TYPE_MAPPING = { 671 **generator.Generator.TYPE_MAPPING, 672 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 673 exp.DataType.Type.BIGINT: "INT64", 674 exp.DataType.Type.BINARY: "BYTES", 675 exp.DataType.Type.BOOLEAN: "BOOL", 676 exp.DataType.Type.CHAR: "STRING", 677 exp.DataType.Type.DECIMAL: "NUMERIC", 678 exp.DataType.Type.DOUBLE: "FLOAT64", 679 exp.DataType.Type.FLOAT: "FLOAT64", 680 exp.DataType.Type.INT: "INT64", 681 exp.DataType.Type.NCHAR: "STRING", 682 exp.DataType.Type.NVARCHAR: "STRING", 683 exp.DataType.Type.SMALLINT: "INT64", 684 exp.DataType.Type.TEXT: "STRING", 685 exp.DataType.Type.TIMESTAMP: "DATETIME", 686 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 687 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 688 exp.DataType.Type.TINYINT: "INT64", 689 exp.DataType.Type.VARBINARY: "BYTES", 690 exp.DataType.Type.VARCHAR: "STRING", 691 exp.DataType.Type.VARIANT: "ANY TYPE", 692 } 693 694 PROPERTIES_LOCATION = { 695 **generator.Generator.PROPERTIES_LOCATION, 696 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 697 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 698 } 699 700 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 701 RESERVED_KEYWORDS = { 702 *generator.Generator.RESERVED_KEYWORDS, 703 "all", 704 "and", 705 "any", 706 "array", 707 "as", 708 "asc", 709 "assert_rows_modified", 710 "at", 711 "between", 712 "by", 713 "case", 714 "cast", 715 "collate", 716 "contains", 717 "create", 718 "cross", 719 "cube", 720 "current", 721 "default", 722 "define", 723 "desc", 724 "distinct", 725 "else", 726 "end", 727 "enum", 728 "escape", 729 "except", 730 "exclude", 731 "exists", 732 "extract", 733 "false", 734 "fetch", 735 "following", 736 "for", 737 "from", 738 "full", 739 "group", 740 "grouping", 741 "groups", 742 "hash", 743 "having", 744 "if", 745 "ignore", 746 "in", 747 "inner", 748 "intersect", 749 "interval", 750 "into", 751 "is", 752 "join", 753 "lateral", 754 "left", 755 "like", 756 "limit", 757 "lookup", 758 "merge", 759 "natural", 760 "new", 761 "no", 762 "not", 763 "null", 764 "nulls", 765 "of", 766 "on", 767 "or", 768 "order", 769 "outer", 770 "over", 771 "partition", 772 "preceding", 773 "proto", 774 "qualify", 775 "range", 776 "recursive", 777 "respect", 778 "right", 779 "rollup", 780 "rows", 781 "select", 782 "set", 783 "some", 784 "struct", 785 "tablesample", 786 "then", 787 "to", 788 "treat", 789 "true", 790 "unbounded", 791 "union", 792 "unnest", 793 "using", 794 "when", 795 "where", 796 "window", 797 "with", 798 "within", 799 } 800 801 def table_parts(self, expression: exp.Table) -> str: 802 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 803 # we need to make sure the correct quoting is used in each case. 804 # 805 # For example, if there is a CTE x that clashes with a schema name, then the former will 806 # return the table y in that schema, whereas the latter will return the CTE's y column: 807 # 808 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 809 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 810 if expression.meta.get("quoted_table"): 811 table_parts = ".".join(p.name for p in expression.parts) 812 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 813 814 return super().table_parts(expression) 815 816 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 817 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 818 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 819 820 def eq_sql(self, expression: exp.EQ) -> str: 821 # Operands of = cannot be NULL in BigQuery 822 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 823 if not isinstance(expression.parent, exp.Update): 824 return "NULL" 825 826 return self.binary(expression, "=") 827 828 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 829 parent = expression.parent 830 831 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 832 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 833 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 834 return self.func( 835 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 836 ) 837 838 return super().attimezone_sql(expression) 839 840 def trycast_sql(self, expression: exp.TryCast) -> str: 841 return self.cast_sql(expression, safe_prefix="SAFE_") 842 843 def array_sql(self, expression: exp.Array) -> str: 844 first_arg = seq_get(expression.expressions, 0) 845 if isinstance(first_arg, exp.Query): 846 return f"ARRAY{self.wrap(self.sql(first_arg))}" 847 848 return inline_array_sql(self, expression) 849 850 def bracket_sql(self, expression: exp.Bracket) -> str: 851 this = self.sql(expression, "this") 852 expressions = expression.expressions 853 854 if len(expressions) == 1: 855 arg = expressions[0] 856 if arg.type is None: 857 from sqlglot.optimizer.annotate_types import annotate_types 858 859 arg = annotate_types(arg) 860 861 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 862 # BQ doesn't support bracket syntax with string values 863 return f"{this}.{arg.name}" 864 865 expressions_sql = ", ".join(self.sql(e) for e in expressions) 866 offset = expression.args.get("offset") 867 868 if offset == 0: 869 expressions_sql = f"OFFSET({expressions_sql})" 870 elif offset == 1: 871 expressions_sql = f"ORDINAL({expressions_sql})" 872 elif offset is not None: 873 self.unsupported(f"Unsupported array offset: {offset}") 874 875 if expression.args.get("safe"): 876 expressions_sql = f"SAFE_{expressions_sql}" 877 878 return f"{this}[{expressions_sql}]" 879 880 def in_unnest_op(self, expression: exp.Unnest) -> str: 881 return self.sql(expression) 882 883 def except_op(self, expression: exp.Except) -> str: 884 if not expression.args.get("distinct"): 885 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 886 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 887 888 def intersect_op(self, expression: exp.Intersect) -> str: 889 if not expression.args.get("distinct"): 890 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 891 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 892 893 def with_properties(self, properties: exp.Properties) -> str: 894 return self.properties(properties, prefix=self.seg("OPTIONS")) 895 896 def version_sql(self, expression: exp.Version) -> str: 897 if expression.name == "TIMESTAMP": 898 expression.set("this", "SYSTEM_TIME") 899 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
272 def normalize_identifier(self, expression: E) -> E: 273 if isinstance(expression, exp.Identifier): 274 parent = expression.parent 275 while isinstance(parent, exp.Dot): 276 parent = parent.parent 277 278 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 279 # by default. The following check uses a heuristic to detect tables based on whether 280 # they are qualified. This should generally be correct, because tables in BigQuery 281 # must be qualified with at least a dataset, unless @@dataset_id is set. 282 case_sensitive = ( 283 isinstance(parent, exp.UserDefinedFunction) 284 or ( 285 isinstance(parent, exp.Table) 286 and parent.db 287 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 288 ) 289 or expression.meta.get("is_table") 290 ) 291 if not case_sensitive: 292 expression.set("this", expression.this.lower()) 293 294 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
296 class Tokenizer(tokens.Tokenizer): 297 QUOTES = ["'", '"', '"""', "'''"] 298 COMMENTS = ["--", "#", ("/*", "*/")] 299 IDENTIFIERS = ["`"] 300 STRING_ESCAPES = ["\\"] 301 302 HEX_STRINGS = [("0x", ""), ("0X", "")] 303 304 BYTE_STRINGS = [ 305 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 306 ] 307 308 RAW_STRINGS = [ 309 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 310 ] 311 312 KEYWORDS = { 313 **tokens.Tokenizer.KEYWORDS, 314 "ANY TYPE": TokenType.VARIANT, 315 "BEGIN": TokenType.COMMAND, 316 "BEGIN TRANSACTION": TokenType.BEGIN, 317 "BYTES": TokenType.BINARY, 318 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 319 "DECLARE": TokenType.COMMAND, 320 "ELSEIF": TokenType.COMMAND, 321 "EXCEPTION": TokenType.COMMAND, 322 "FLOAT64": TokenType.DOUBLE, 323 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 324 "MODEL": TokenType.MODEL, 325 "NOT DETERMINISTIC": TokenType.VOLATILE, 326 "RECORD": TokenType.STRUCT, 327 "TIMESTAMP": TokenType.TIMESTAMPTZ, 328 } 329 KEYWORDS.pop("DIV") 330 KEYWORDS.pop("VALUES")
Inherited Members
332 class Parser(parser.Parser): 333 PREFIXED_PIVOT_COLUMNS = True 334 LOG_DEFAULTS_TO_LN = True 335 SUPPORTS_IMPLICIT_UNNEST = True 336 337 FUNCTIONS = { 338 **parser.Parser.FUNCTIONS, 339 "DATE": _build_date, 340 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 341 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 342 "DATE_TRUNC": lambda args: exp.DateTrunc( 343 unit=exp.Literal.string(str(seq_get(args, 1))), 344 this=seq_get(args, 0), 345 ), 346 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 347 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 348 "DIV": binary_from_function(exp.IntDiv), 349 "FORMAT_DATE": lambda args: exp.TimeToStr( 350 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 351 ), 352 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 353 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 354 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 355 ), 356 "MD5": exp.MD5Digest.from_arg_list, 357 "TO_HEX": _build_to_hex, 358 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 359 [seq_get(args, 1), seq_get(args, 0)] 360 ), 361 "PARSE_TIMESTAMP": _build_parse_timestamp, 362 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 363 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 364 this=seq_get(args, 0), 365 expression=seq_get(args, 1), 366 position=seq_get(args, 2), 367 occurrence=seq_get(args, 3), 368 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 369 ), 370 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 371 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 372 "SPLIT": lambda args: exp.Split( 373 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 374 this=seq_get(args, 0), 375 expression=seq_get(args, 1) or exp.Literal.string(","), 376 ), 377 "TIME": _build_time, 378 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 379 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 380 "TIMESTAMP": _build_timestamp, 381 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 382 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 383 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 384 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 385 ), 386 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 387 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 388 ), 389 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 390 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 391 } 392 393 FUNCTION_PARSERS = { 394 **parser.Parser.FUNCTION_PARSERS, 395 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 396 } 397 FUNCTION_PARSERS.pop("TRIM") 398 399 NO_PAREN_FUNCTIONS = { 400 **parser.Parser.NO_PAREN_FUNCTIONS, 401 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 402 } 403 404 NESTED_TYPE_TOKENS = { 405 *parser.Parser.NESTED_TYPE_TOKENS, 406 TokenType.TABLE, 407 } 408 409 PROPERTY_PARSERS = { 410 **parser.Parser.PROPERTY_PARSERS, 411 "NOT DETERMINISTIC": lambda self: self.expression( 412 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 413 ), 414 "OPTIONS": lambda self: self._parse_with_property(), 415 } 416 417 CONSTRAINT_PARSERS = { 418 **parser.Parser.CONSTRAINT_PARSERS, 419 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 420 } 421 422 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 423 RANGE_PARSERS.pop(TokenType.OVERLAPS) 424 425 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 426 427 STATEMENT_PARSERS = { 428 **parser.Parser.STATEMENT_PARSERS, 429 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 430 TokenType.END: lambda self: self._parse_as_command(self._prev), 431 TokenType.FOR: lambda self: self._parse_for_in(), 432 } 433 434 BRACKET_OFFSETS = { 435 "OFFSET": (0, False), 436 "ORDINAL": (1, False), 437 "SAFE_OFFSET": (0, True), 438 "SAFE_ORDINAL": (1, True), 439 } 440 441 def _parse_for_in(self) -> exp.ForIn: 442 this = self._parse_range() 443 self._match_text_seq("DO") 444 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 445 446 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 447 this = super()._parse_table_part(schema=schema) or self._parse_number() 448 449 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 450 if isinstance(this, exp.Identifier): 451 table_name = this.name 452 while self._match(TokenType.DASH, advance=False) and self._next: 453 text = "" 454 while self._curr and self._curr.token_type != TokenType.DOT: 455 self._advance() 456 text += self._prev.text 457 table_name += text 458 459 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 460 elif isinstance(this, exp.Literal): 461 table_name = this.name 462 463 if self._is_connected() and self._parse_var(any_token=True): 464 table_name += self._prev.text 465 466 this = exp.Identifier(this=table_name, quoted=True) 467 468 return this 469 470 def _parse_table_parts( 471 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 472 ) -> exp.Table: 473 table = super()._parse_table_parts( 474 schema=schema, is_db_reference=is_db_reference, wildcard=True 475 ) 476 477 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 478 if not table.catalog: 479 if table.db: 480 parts = table.db.split(".") 481 if len(parts) == 2 and not table.args["db"].quoted: 482 table.set("catalog", exp.Identifier(this=parts[0])) 483 table.set("db", exp.Identifier(this=parts[1])) 484 else: 485 parts = table.name.split(".") 486 if len(parts) == 2 and not table.this.quoted: 487 table.set("db", exp.Identifier(this=parts[0])) 488 table.set("this", exp.Identifier(this=parts[1])) 489 490 if isinstance(table.this, exp.Identifier) and "." in table.name: 491 catalog, db, this, *rest = ( 492 t.cast(t.Optional[exp.Expression], exp.to_identifier(x, quoted=True)) 493 for x in split_num_words(table.name, ".", 3) 494 ) 495 496 if rest and this: 497 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 498 499 table = exp.Table(this=this, db=db, catalog=catalog) 500 table.meta["quoted_table"] = True 501 502 return table 503 504 @t.overload 505 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 506 507 @t.overload 508 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 509 510 def _parse_json_object(self, agg=False): 511 json_object = super()._parse_json_object() 512 array_kv_pair = seq_get(json_object.expressions, 0) 513 514 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 515 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 516 if ( 517 array_kv_pair 518 and isinstance(array_kv_pair.this, exp.Array) 519 and isinstance(array_kv_pair.expression, exp.Array) 520 ): 521 keys = array_kv_pair.this.expressions 522 values = array_kv_pair.expression.expressions 523 524 json_object.set( 525 "expressions", 526 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 527 ) 528 529 return json_object 530 531 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 532 bracket = super()._parse_bracket(this) 533 534 if this is bracket: 535 return bracket 536 537 if isinstance(bracket, exp.Bracket): 538 for expression in bracket.expressions: 539 name = expression.name.upper() 540 541 if name not in self.BRACKET_OFFSETS: 542 break 543 544 offset, safe = self.BRACKET_OFFSETS[name] 545 bracket.set("offset", offset) 546 bracket.set("safe", safe) 547 expression.replace(expression.expressions[0]) 548 549 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- USABLES
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
551 class Generator(generator.Generator): 552 EXPLICIT_UNION = True 553 INTERVAL_ALLOWS_PLURAL_FORM = False 554 JOIN_HINTS = False 555 QUERY_HINTS = False 556 TABLE_HINTS = False 557 LIMIT_FETCH = "LIMIT" 558 RENAME_TABLE_WITH_DB = False 559 NVL2_SUPPORTED = False 560 UNNEST_WITH_ORDINALITY = False 561 COLLATE_IS_FUNC = True 562 LIMIT_ONLY_LITERALS = True 563 SUPPORTS_TABLE_ALIAS_COLUMNS = False 564 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 565 JSON_KEY_VALUE_PAIR_SEP = "," 566 NULL_ORDERING_SUPPORTED = False 567 IGNORE_NULLS_IN_FUNC = True 568 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 569 CAN_IMPLEMENT_ARRAY_ANY = True 570 NAMED_PLACEHOLDER_TOKEN = "@" 571 572 TRANSFORMS = { 573 **generator.Generator.TRANSFORMS, 574 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 575 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 576 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 577 exp.ArrayContains: _array_contains_sql, 578 exp.ArrayFilter: filter_array_using_unnest, 579 exp.ArraySize: rename_func("ARRAY_LENGTH"), 580 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 581 exp.CollateProperty: lambda self, e: ( 582 f"DEFAULT COLLATE {self.sql(e, 'this')}" 583 if e.args.get("default") 584 else f"COLLATE {self.sql(e, 'this')}" 585 ), 586 exp.Commit: lambda *_: "COMMIT TRANSACTION", 587 exp.CountIf: rename_func("COUNTIF"), 588 exp.Create: _create_sql, 589 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 590 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 591 exp.DateDiff: lambda self, e: self.func( 592 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 593 ), 594 exp.DateFromParts: rename_func("DATE"), 595 exp.DateStrToDate: datestrtodate_sql, 596 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 597 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 598 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 599 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 600 exp.FromTimeZone: lambda self, e: self.func( 601 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 602 ), 603 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 604 exp.GroupConcat: rename_func("STRING_AGG"), 605 exp.Hex: rename_func("TO_HEX"), 606 exp.If: if_sql(false_value="NULL"), 607 exp.ILike: no_ilike_sql, 608 exp.IntDiv: rename_func("DIV"), 609 exp.JSONFormat: rename_func("TO_JSON_STRING"), 610 exp.Max: max_or_greatest, 611 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 612 exp.MD5Digest: rename_func("MD5"), 613 exp.Min: min_or_least, 614 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 615 exp.RegexpExtract: lambda self, e: self.func( 616 "REGEXP_EXTRACT", 617 e.this, 618 e.expression, 619 e.args.get("position"), 620 e.args.get("occurrence"), 621 ), 622 exp.RegexpReplace: regexp_replace_sql, 623 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 624 exp.ReturnsProperty: _returnsproperty_sql, 625 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 626 exp.Select: transforms.preprocess( 627 [ 628 transforms.explode_to_unnest(), 629 _unqualify_unnest, 630 transforms.eliminate_distinct_on, 631 _alias_ordered_group, 632 transforms.eliminate_semi_and_anti_joins, 633 ] 634 ), 635 exp.SHA2: lambda self, e: self.func( 636 "SHA256" if e.text("length") == "256" else "SHA512", e.this 637 ), 638 exp.StabilityProperty: lambda self, e: ( 639 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 640 ), 641 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 642 exp.StrToTime: lambda self, e: self.func( 643 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 644 ), 645 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 646 exp.TimeFromParts: rename_func("TIME"), 647 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 648 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 649 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 650 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 651 exp.TimeStrToTime: timestrtotime_sql, 652 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 653 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 654 exp.TsOrDsAdd: _ts_or_ds_add_sql, 655 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 656 exp.TsOrDsToTime: rename_func("TIME"), 657 exp.Unhex: rename_func("FROM_HEX"), 658 exp.UnixDate: rename_func("UNIX_DATE"), 659 exp.UnixToTime: _unix_to_time_sql, 660 exp.Values: _derived_table_values_to_unnest, 661 exp.VariancePop: rename_func("VAR_POP"), 662 } 663 664 SUPPORTED_JSON_PATH_PARTS = { 665 exp.JSONPathKey, 666 exp.JSONPathRoot, 667 exp.JSONPathSubscript, 668 } 669 670 TYPE_MAPPING = { 671 **generator.Generator.TYPE_MAPPING, 672 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 673 exp.DataType.Type.BIGINT: "INT64", 674 exp.DataType.Type.BINARY: "BYTES", 675 exp.DataType.Type.BOOLEAN: "BOOL", 676 exp.DataType.Type.CHAR: "STRING", 677 exp.DataType.Type.DECIMAL: "NUMERIC", 678 exp.DataType.Type.DOUBLE: "FLOAT64", 679 exp.DataType.Type.FLOAT: "FLOAT64", 680 exp.DataType.Type.INT: "INT64", 681 exp.DataType.Type.NCHAR: "STRING", 682 exp.DataType.Type.NVARCHAR: "STRING", 683 exp.DataType.Type.SMALLINT: "INT64", 684 exp.DataType.Type.TEXT: "STRING", 685 exp.DataType.Type.TIMESTAMP: "DATETIME", 686 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 687 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 688 exp.DataType.Type.TINYINT: "INT64", 689 exp.DataType.Type.VARBINARY: "BYTES", 690 exp.DataType.Type.VARCHAR: "STRING", 691 exp.DataType.Type.VARIANT: "ANY TYPE", 692 } 693 694 PROPERTIES_LOCATION = { 695 **generator.Generator.PROPERTIES_LOCATION, 696 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 697 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 698 } 699 700 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 701 RESERVED_KEYWORDS = { 702 *generator.Generator.RESERVED_KEYWORDS, 703 "all", 704 "and", 705 "any", 706 "array", 707 "as", 708 "asc", 709 "assert_rows_modified", 710 "at", 711 "between", 712 "by", 713 "case", 714 "cast", 715 "collate", 716 "contains", 717 "create", 718 "cross", 719 "cube", 720 "current", 721 "default", 722 "define", 723 "desc", 724 "distinct", 725 "else", 726 "end", 727 "enum", 728 "escape", 729 "except", 730 "exclude", 731 "exists", 732 "extract", 733 "false", 734 "fetch", 735 "following", 736 "for", 737 "from", 738 "full", 739 "group", 740 "grouping", 741 "groups", 742 "hash", 743 "having", 744 "if", 745 "ignore", 746 "in", 747 "inner", 748 "intersect", 749 "interval", 750 "into", 751 "is", 752 "join", 753 "lateral", 754 "left", 755 "like", 756 "limit", 757 "lookup", 758 "merge", 759 "natural", 760 "new", 761 "no", 762 "not", 763 "null", 764 "nulls", 765 "of", 766 "on", 767 "or", 768 "order", 769 "outer", 770 "over", 771 "partition", 772 "preceding", 773 "proto", 774 "qualify", 775 "range", 776 "recursive", 777 "respect", 778 "right", 779 "rollup", 780 "rows", 781 "select", 782 "set", 783 "some", 784 "struct", 785 "tablesample", 786 "then", 787 "to", 788 "treat", 789 "true", 790 "unbounded", 791 "union", 792 "unnest", 793 "using", 794 "when", 795 "where", 796 "window", 797 "with", 798 "within", 799 } 800 801 def table_parts(self, expression: exp.Table) -> str: 802 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 803 # we need to make sure the correct quoting is used in each case. 804 # 805 # For example, if there is a CTE x that clashes with a schema name, then the former will 806 # return the table y in that schema, whereas the latter will return the CTE's y column: 807 # 808 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 809 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 810 if expression.meta.get("quoted_table"): 811 table_parts = ".".join(p.name for p in expression.parts) 812 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 813 814 return super().table_parts(expression) 815 816 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 817 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 818 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 819 820 def eq_sql(self, expression: exp.EQ) -> str: 821 # Operands of = cannot be NULL in BigQuery 822 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 823 if not isinstance(expression.parent, exp.Update): 824 return "NULL" 825 826 return self.binary(expression, "=") 827 828 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 829 parent = expression.parent 830 831 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 832 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 833 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 834 return self.func( 835 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 836 ) 837 838 return super().attimezone_sql(expression) 839 840 def trycast_sql(self, expression: exp.TryCast) -> str: 841 return self.cast_sql(expression, safe_prefix="SAFE_") 842 843 def array_sql(self, expression: exp.Array) -> str: 844 first_arg = seq_get(expression.expressions, 0) 845 if isinstance(first_arg, exp.Query): 846 return f"ARRAY{self.wrap(self.sql(first_arg))}" 847 848 return inline_array_sql(self, expression) 849 850 def bracket_sql(self, expression: exp.Bracket) -> str: 851 this = self.sql(expression, "this") 852 expressions = expression.expressions 853 854 if len(expressions) == 1: 855 arg = expressions[0] 856 if arg.type is None: 857 from sqlglot.optimizer.annotate_types import annotate_types 858 859 arg = annotate_types(arg) 860 861 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 862 # BQ doesn't support bracket syntax with string values 863 return f"{this}.{arg.name}" 864 865 expressions_sql = ", ".join(self.sql(e) for e in expressions) 866 offset = expression.args.get("offset") 867 868 if offset == 0: 869 expressions_sql = f"OFFSET({expressions_sql})" 870 elif offset == 1: 871 expressions_sql = f"ORDINAL({expressions_sql})" 872 elif offset is not None: 873 self.unsupported(f"Unsupported array offset: {offset}") 874 875 if expression.args.get("safe"): 876 expressions_sql = f"SAFE_{expressions_sql}" 877 878 return f"{this}[{expressions_sql}]" 879 880 def in_unnest_op(self, expression: exp.Unnest) -> str: 881 return self.sql(expression) 882 883 def except_op(self, expression: exp.Except) -> str: 884 if not expression.args.get("distinct"): 885 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 886 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 887 888 def intersect_op(self, expression: exp.Intersect) -> str: 889 if not expression.args.get("distinct"): 890 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 891 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 892 893 def with_properties(self, properties: exp.Properties) -> str: 894 return self.properties(properties, prefix=self.seg("OPTIONS")) 895 896 def version_sql(self, expression: exp.Version) -> str: 897 if expression.name == "TIMESTAMP": 898 expression.set("this", "SYSTEM_TIME") 899 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. Default: 2.
- indent: The indentation size in a formatted string. Default: 2.
- normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
801 def table_parts(self, expression: exp.Table) -> str: 802 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 803 # we need to make sure the correct quoting is used in each case. 804 # 805 # For example, if there is a CTE x that clashes with a schema name, then the former will 806 # return the table y in that schema, whereas the latter will return the CTE's y column: 807 # 808 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 809 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 810 if expression.meta.get("quoted_table"): 811 table_parts = ".".join(p.name for p in expression.parts) 812 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 813 814 return super().table_parts(expression)
828 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 829 parent = expression.parent 830 831 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 832 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 833 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 834 return self.func( 835 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 836 ) 837 838 return super().attimezone_sql(expression)
850 def bracket_sql(self, expression: exp.Bracket) -> str: 851 this = self.sql(expression, "this") 852 expressions = expression.expressions 853 854 if len(expressions) == 1: 855 arg = expressions[0] 856 if arg.type is None: 857 from sqlglot.optimizer.annotate_types import annotate_types 858 859 arg = annotate_types(arg) 860 861 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 862 # BQ doesn't support bracket syntax with string values 863 return f"{this}.{arg.name}" 864 865 expressions_sql = ", ".join(self.sql(e) for e in expressions) 866 offset = expression.args.get("offset") 867 868 if offset == 0: 869 expressions_sql = f"OFFSET({expressions_sql})" 870 elif offset == 1: 871 expressions_sql = f"ORDINAL({expressions_sql})" 872 elif offset is not None: 873 self.unsupported(f"Unsupported array offset: {offset}") 874 875 if expression.args.get("safe"): 876 expressions_sql = f"SAFE_{expressions_sql}" 877 878 return f"{this}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql