sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27) 28from sqlglot.helper import seq_get, split_num_words 29from sqlglot.tokens import TokenType 30 31if t.TYPE_CHECKING: 32 from sqlglot._typing import E, Lit 33 34logger = logging.getLogger("sqlglot") 35 36 37def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 38 if not expression.find_ancestor(exp.From, exp.Join): 39 return self.values_sql(expression) 40 41 structs = [] 42 alias = expression.args.get("alias") 43 for tup in expression.find_all(exp.Tuple): 44 field_aliases = alias.columns if alias else (f"_c{i}" for i in range(len(tup.expressions))) 45 expressions = [ 46 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 47 for name, fld in zip(field_aliases, tup.expressions) 48 ] 49 structs.append(exp.Struct(expressions=expressions)) 50 51 return self.unnest_sql(exp.Unnest(expressions=[exp.array(*structs, copy=False)])) 52 53 54def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 55 this = expression.this 56 if isinstance(this, exp.Schema): 57 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 58 else: 59 this = self.sql(this) 60 return f"RETURNS {this}" 61 62 63def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 64 returns = expression.find(exp.ReturnsProperty) 65 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 66 expression.set("kind", "TABLE FUNCTION") 67 68 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 69 expression.set("expression", expression.expression.this) 70 71 return self.create_sql(expression) 72 73 74def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 75 """Remove references to unnest table aliases since bigquery doesn't allow them. 76 77 These are added by the optimizer's qualify_column step. 78 """ 79 from sqlglot.optimizer.scope import find_all_in_scope 80 81 if isinstance(expression, exp.Select): 82 unnest_aliases = { 83 unnest.alias 84 for unnest in find_all_in_scope(expression, exp.Unnest) 85 if isinstance(unnest.parent, (exp.From, exp.Join)) 86 } 87 if unnest_aliases: 88 for column in expression.find_all(exp.Column): 89 if column.table in unnest_aliases: 90 column.set("table", None) 91 elif column.db in unnest_aliases: 92 column.set("db", None) 93 94 return expression 95 96 97# https://issuetracker.google.com/issues/162294746 98# workaround for bigquery bug when grouping by an expression and then ordering 99# WITH x AS (SELECT 1 y) 100# SELECT y + 1 z 101# FROM x 102# GROUP BY x + 1 103# ORDER by z 104def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 105 if isinstance(expression, exp.Select): 106 group = expression.args.get("group") 107 order = expression.args.get("order") 108 109 if group and order: 110 aliases = { 111 select.this: select.args["alias"] 112 for select in expression.selects 113 if isinstance(select, exp.Alias) 114 } 115 116 for grouped in group.expressions: 117 if grouped.is_int: 118 continue 119 alias = aliases.get(grouped) 120 if alias: 121 grouped.replace(exp.column(alias)) 122 123 return expression 124 125 126def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 127 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 128 if isinstance(expression, exp.CTE) and expression.alias_column_names: 129 cte_query = expression.this 130 131 if cte_query.is_star: 132 logger.warning( 133 "Can't push down CTE column names for star queries. Run the query through" 134 " the optimizer or use 'qualify' to expand the star projections first." 135 ) 136 return expression 137 138 column_names = expression.alias_column_names 139 expression.args["alias"].set("columns", None) 140 141 for name, select in zip(column_names, cte_query.selects): 142 to_replace = select 143 144 if isinstance(select, exp.Alias): 145 select = select.this 146 147 # Inner aliases are shadowed by the CTE column names 148 to_replace.replace(exp.alias_(select, name)) 149 150 return expression 151 152 153def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 154 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 155 this.set("zone", seq_get(args, 2)) 156 return this 157 158 159def _build_timestamp(args: t.List) -> exp.Timestamp: 160 timestamp = exp.Timestamp.from_arg_list(args) 161 timestamp.set("with_tz", True) 162 return timestamp 163 164 165def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 166 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 167 return expr_type.from_arg_list(args) 168 169 170def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 171 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 172 arg = seq_get(args, 0) 173 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 174 175 176def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 177 return self.sql( 178 exp.Exists( 179 this=exp.select("1") 180 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 181 .where(exp.column("_col").eq(expression.right)) 182 ) 183 ) 184 185 186def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 187 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 188 189 190def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 191 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 192 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 193 unit = expression.args.get("unit") or "DAY" 194 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 195 196 197def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 198 scale = expression.args.get("scale") 199 timestamp = expression.this 200 201 if scale in (None, exp.UnixToTime.SECONDS): 202 return self.func("TIMESTAMP_SECONDS", timestamp) 203 if scale == exp.UnixToTime.MILLIS: 204 return self.func("TIMESTAMP_MILLIS", timestamp) 205 if scale == exp.UnixToTime.MICROS: 206 return self.func("TIMESTAMP_MICROS", timestamp) 207 208 unix_seconds = exp.cast(exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), "int64") 209 return self.func("TIMESTAMP_SECONDS", unix_seconds) 210 211 212def _build_time(args: t.List) -> exp.Func: 213 if len(args) == 1: 214 return exp.TsOrDsToTime(this=args[0]) 215 if len(args) == 3: 216 return exp.TimeFromParts.from_arg_list(args) 217 218 return exp.Anonymous(this="TIME", expressions=args) 219 220 221class BigQuery(Dialect): 222 WEEK_OFFSET = -1 223 UNNEST_COLUMN_ONLY = True 224 SUPPORTS_USER_DEFINED_TYPES = False 225 SUPPORTS_SEMI_ANTI_JOIN = False 226 LOG_BASE_FIRST = False 227 228 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 229 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 230 231 # bigquery udfs are case sensitive 232 NORMALIZE_FUNCTIONS = False 233 234 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 235 TIME_MAPPING = { 236 "%D": "%m/%d/%y", 237 "%E*S": "%S.%f", 238 "%E6S": "%S.%f", 239 } 240 241 ESCAPE_SEQUENCES = { 242 "\\a": "\a", 243 "\\b": "\b", 244 "\\f": "\f", 245 "\\n": "\n", 246 "\\r": "\r", 247 "\\t": "\t", 248 "\\v": "\v", 249 } 250 251 FORMAT_MAPPING = { 252 "DD": "%d", 253 "MM": "%m", 254 "MON": "%b", 255 "MONTH": "%B", 256 "YYYY": "%Y", 257 "YY": "%y", 258 "HH": "%I", 259 "HH12": "%I", 260 "HH24": "%H", 261 "MI": "%M", 262 "SS": "%S", 263 "SSSSS": "%f", 264 "TZH": "%z", 265 } 266 267 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 268 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 269 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 270 271 def normalize_identifier(self, expression: E) -> E: 272 if isinstance(expression, exp.Identifier): 273 parent = expression.parent 274 while isinstance(parent, exp.Dot): 275 parent = parent.parent 276 277 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 278 # by default. The following check uses a heuristic to detect tables based on whether 279 # they are qualified. This should generally be correct, because tables in BigQuery 280 # must be qualified with at least a dataset, unless @@dataset_id is set. 281 case_sensitive = ( 282 isinstance(parent, exp.UserDefinedFunction) 283 or ( 284 isinstance(parent, exp.Table) 285 and parent.db 286 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 287 ) 288 or expression.meta.get("is_table") 289 ) 290 if not case_sensitive: 291 expression.set("this", expression.this.lower()) 292 293 return expression 294 295 class Tokenizer(tokens.Tokenizer): 296 QUOTES = ["'", '"', '"""', "'''"] 297 COMMENTS = ["--", "#", ("/*", "*/")] 298 IDENTIFIERS = ["`"] 299 STRING_ESCAPES = ["\\"] 300 301 HEX_STRINGS = [("0x", ""), ("0X", "")] 302 303 BYTE_STRINGS = [ 304 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 305 ] 306 307 RAW_STRINGS = [ 308 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 309 ] 310 311 KEYWORDS = { 312 **tokens.Tokenizer.KEYWORDS, 313 "ANY TYPE": TokenType.VARIANT, 314 "BEGIN": TokenType.COMMAND, 315 "BEGIN TRANSACTION": TokenType.BEGIN, 316 "BYTES": TokenType.BINARY, 317 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 318 "DECLARE": TokenType.COMMAND, 319 "ELSEIF": TokenType.COMMAND, 320 "EXCEPTION": TokenType.COMMAND, 321 "FLOAT64": TokenType.DOUBLE, 322 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 323 "MODEL": TokenType.MODEL, 324 "NOT DETERMINISTIC": TokenType.VOLATILE, 325 "RECORD": TokenType.STRUCT, 326 "TIMESTAMP": TokenType.TIMESTAMPTZ, 327 } 328 KEYWORDS.pop("DIV") 329 KEYWORDS.pop("VALUES") 330 331 class Parser(parser.Parser): 332 PREFIXED_PIVOT_COLUMNS = True 333 LOG_DEFAULTS_TO_LN = True 334 SUPPORTS_IMPLICIT_UNNEST = True 335 336 FUNCTIONS = { 337 **parser.Parser.FUNCTIONS, 338 "DATE": _build_date, 339 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 340 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 341 "DATE_TRUNC": lambda args: exp.DateTrunc( 342 unit=exp.Literal.string(str(seq_get(args, 1))), 343 this=seq_get(args, 0), 344 ), 345 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 346 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 347 "DIV": binary_from_function(exp.IntDiv), 348 "FORMAT_DATE": lambda args: exp.TimeToStr( 349 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 350 ), 351 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 352 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 353 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 354 ), 355 "MD5": exp.MD5Digest.from_arg_list, 356 "TO_HEX": _build_to_hex, 357 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 358 [seq_get(args, 1), seq_get(args, 0)] 359 ), 360 "PARSE_TIMESTAMP": _build_parse_timestamp, 361 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 362 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 363 this=seq_get(args, 0), 364 expression=seq_get(args, 1), 365 position=seq_get(args, 2), 366 occurrence=seq_get(args, 3), 367 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 368 ), 369 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 370 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 371 "SPLIT": lambda args: exp.Split( 372 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 373 this=seq_get(args, 0), 374 expression=seq_get(args, 1) or exp.Literal.string(","), 375 ), 376 "TIME": _build_time, 377 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 378 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 379 "TIMESTAMP": _build_timestamp, 380 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 381 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 382 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 383 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 384 ), 385 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 386 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 387 ), 388 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 389 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 390 } 391 392 FUNCTION_PARSERS = { 393 **parser.Parser.FUNCTION_PARSERS, 394 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 395 } 396 FUNCTION_PARSERS.pop("TRIM") 397 398 NO_PAREN_FUNCTIONS = { 399 **parser.Parser.NO_PAREN_FUNCTIONS, 400 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 401 } 402 403 NESTED_TYPE_TOKENS = { 404 *parser.Parser.NESTED_TYPE_TOKENS, 405 TokenType.TABLE, 406 } 407 408 PROPERTY_PARSERS = { 409 **parser.Parser.PROPERTY_PARSERS, 410 "NOT DETERMINISTIC": lambda self: self.expression( 411 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 412 ), 413 "OPTIONS": lambda self: self._parse_with_property(), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 419 } 420 421 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 422 RANGE_PARSERS.pop(TokenType.OVERLAPS) 423 424 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 425 426 STATEMENT_PARSERS = { 427 **parser.Parser.STATEMENT_PARSERS, 428 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 429 TokenType.END: lambda self: self._parse_as_command(self._prev), 430 TokenType.FOR: lambda self: self._parse_for_in(), 431 } 432 433 BRACKET_OFFSETS = { 434 "OFFSET": (0, False), 435 "ORDINAL": (1, False), 436 "SAFE_OFFSET": (0, True), 437 "SAFE_ORDINAL": (1, True), 438 } 439 440 def _parse_for_in(self) -> exp.ForIn: 441 this = self._parse_range() 442 self._match_text_seq("DO") 443 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 444 445 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 446 this = super()._parse_table_part(schema=schema) or self._parse_number() 447 448 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 449 if isinstance(this, exp.Identifier): 450 table_name = this.name 451 while self._match(TokenType.DASH, advance=False) and self._next: 452 text = "" 453 while self._curr and self._curr.token_type != TokenType.DOT: 454 self._advance() 455 text += self._prev.text 456 table_name += text 457 458 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 459 elif isinstance(this, exp.Literal): 460 table_name = this.name 461 462 if self._is_connected() and self._parse_var(any_token=True): 463 table_name += self._prev.text 464 465 this = exp.Identifier(this=table_name, quoted=True) 466 467 return this 468 469 def _parse_table_parts( 470 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 471 ) -> exp.Table: 472 table = super()._parse_table_parts( 473 schema=schema, is_db_reference=is_db_reference, wildcard=True 474 ) 475 476 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 477 if not table.catalog: 478 if table.db: 479 parts = table.db.split(".") 480 if len(parts) == 2 and not table.args["db"].quoted: 481 table.set("catalog", exp.Identifier(this=parts[0])) 482 table.set("db", exp.Identifier(this=parts[1])) 483 else: 484 parts = table.name.split(".") 485 if len(parts) == 2 and not table.this.quoted: 486 table.set("db", exp.Identifier(this=parts[0])) 487 table.set("this", exp.Identifier(this=parts[1])) 488 489 if isinstance(table.this, exp.Identifier) and "." in table.name: 490 catalog, db, this, *rest = ( 491 t.cast(t.Optional[exp.Expression], exp.to_identifier(x, quoted=True)) 492 for x in split_num_words(table.name, ".", 3) 493 ) 494 495 if rest and this: 496 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 497 498 table = exp.Table(this=this, db=db, catalog=catalog) 499 table.meta["quoted_table"] = True 500 501 return table 502 503 @t.overload 504 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 505 ... 506 507 @t.overload 508 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 509 ... 510 511 def _parse_json_object(self, agg=False): 512 json_object = super()._parse_json_object() 513 array_kv_pair = seq_get(json_object.expressions, 0) 514 515 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 516 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 517 if ( 518 array_kv_pair 519 and isinstance(array_kv_pair.this, exp.Array) 520 and isinstance(array_kv_pair.expression, exp.Array) 521 ): 522 keys = array_kv_pair.this.expressions 523 values = array_kv_pair.expression.expressions 524 525 json_object.set( 526 "expressions", 527 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 528 ) 529 530 return json_object 531 532 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 533 bracket = super()._parse_bracket(this) 534 535 if this is bracket: 536 return bracket 537 538 if isinstance(bracket, exp.Bracket): 539 for expression in bracket.expressions: 540 name = expression.name.upper() 541 542 if name not in self.BRACKET_OFFSETS: 543 break 544 545 offset, safe = self.BRACKET_OFFSETS[name] 546 bracket.set("offset", offset) 547 bracket.set("safe", safe) 548 expression.replace(expression.expressions[0]) 549 550 return bracket 551 552 class Generator(generator.Generator): 553 EXPLICIT_UNION = True 554 INTERVAL_ALLOWS_PLURAL_FORM = False 555 JOIN_HINTS = False 556 QUERY_HINTS = False 557 TABLE_HINTS = False 558 LIMIT_FETCH = "LIMIT" 559 RENAME_TABLE_WITH_DB = False 560 NVL2_SUPPORTED = False 561 UNNEST_WITH_ORDINALITY = False 562 COLLATE_IS_FUNC = True 563 LIMIT_ONLY_LITERALS = True 564 SUPPORTS_TABLE_ALIAS_COLUMNS = False 565 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 566 JSON_KEY_VALUE_PAIR_SEP = "," 567 NULL_ORDERING_SUPPORTED = False 568 IGNORE_NULLS_IN_FUNC = True 569 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 570 CAN_IMPLEMENT_ARRAY_ANY = True 571 NAMED_PLACEHOLDER_TOKEN = "@" 572 573 TRANSFORMS = { 574 **generator.Generator.TRANSFORMS, 575 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 576 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 577 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 578 exp.ArrayContains: _array_contains_sql, 579 exp.ArrayFilter: filter_array_using_unnest, 580 exp.ArraySize: rename_func("ARRAY_LENGTH"), 581 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 582 exp.CollateProperty: lambda self, e: ( 583 f"DEFAULT COLLATE {self.sql(e, 'this')}" 584 if e.args.get("default") 585 else f"COLLATE {self.sql(e, 'this')}" 586 ), 587 exp.Commit: lambda *_: "COMMIT TRANSACTION", 588 exp.CountIf: rename_func("COUNTIF"), 589 exp.Create: _create_sql, 590 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 591 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 592 exp.DateDiff: lambda self, e: self.func( 593 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 594 ), 595 exp.DateFromParts: rename_func("DATE"), 596 exp.DateStrToDate: datestrtodate_sql, 597 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 598 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 599 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 600 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 601 exp.FromTimeZone: lambda self, e: self.func( 602 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 603 ), 604 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 605 exp.GroupConcat: rename_func("STRING_AGG"), 606 exp.Hex: rename_func("TO_HEX"), 607 exp.If: if_sql(false_value="NULL"), 608 exp.ILike: no_ilike_sql, 609 exp.IntDiv: rename_func("DIV"), 610 exp.JSONFormat: rename_func("TO_JSON_STRING"), 611 exp.Max: max_or_greatest, 612 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 613 exp.MD5Digest: rename_func("MD5"), 614 exp.Min: min_or_least, 615 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 616 exp.RegexpExtract: lambda self, e: self.func( 617 "REGEXP_EXTRACT", 618 e.this, 619 e.expression, 620 e.args.get("position"), 621 e.args.get("occurrence"), 622 ), 623 exp.RegexpReplace: regexp_replace_sql, 624 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 625 exp.ReturnsProperty: _returnsproperty_sql, 626 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 627 exp.Select: transforms.preprocess( 628 [ 629 transforms.explode_to_unnest(), 630 _unqualify_unnest, 631 transforms.eliminate_distinct_on, 632 _alias_ordered_group, 633 transforms.eliminate_semi_and_anti_joins, 634 ] 635 ), 636 exp.SHA2: lambda self, e: self.func( 637 "SHA256" if e.text("length") == "256" else "SHA512", e.this 638 ), 639 exp.StabilityProperty: lambda self, e: ( 640 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 641 ), 642 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 643 exp.StrToTime: lambda self, e: self.func( 644 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 645 ), 646 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 647 exp.TimeFromParts: rename_func("TIME"), 648 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 649 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 650 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 651 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 652 exp.TimeStrToTime: timestrtotime_sql, 653 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 654 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 655 exp.TsOrDsAdd: _ts_or_ds_add_sql, 656 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 657 exp.TsOrDsToTime: rename_func("TIME"), 658 exp.Unhex: rename_func("FROM_HEX"), 659 exp.UnixDate: rename_func("UNIX_DATE"), 660 exp.UnixToTime: _unix_to_time_sql, 661 exp.Values: _derived_table_values_to_unnest, 662 exp.VariancePop: rename_func("VAR_POP"), 663 } 664 665 SUPPORTED_JSON_PATH_PARTS = { 666 exp.JSONPathKey, 667 exp.JSONPathRoot, 668 exp.JSONPathSubscript, 669 } 670 671 TYPE_MAPPING = { 672 **generator.Generator.TYPE_MAPPING, 673 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 674 exp.DataType.Type.BIGINT: "INT64", 675 exp.DataType.Type.BINARY: "BYTES", 676 exp.DataType.Type.BOOLEAN: "BOOL", 677 exp.DataType.Type.CHAR: "STRING", 678 exp.DataType.Type.DECIMAL: "NUMERIC", 679 exp.DataType.Type.DOUBLE: "FLOAT64", 680 exp.DataType.Type.FLOAT: "FLOAT64", 681 exp.DataType.Type.INT: "INT64", 682 exp.DataType.Type.NCHAR: "STRING", 683 exp.DataType.Type.NVARCHAR: "STRING", 684 exp.DataType.Type.SMALLINT: "INT64", 685 exp.DataType.Type.TEXT: "STRING", 686 exp.DataType.Type.TIMESTAMP: "DATETIME", 687 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 688 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 689 exp.DataType.Type.TINYINT: "INT64", 690 exp.DataType.Type.VARBINARY: "BYTES", 691 exp.DataType.Type.VARCHAR: "STRING", 692 exp.DataType.Type.VARIANT: "ANY TYPE", 693 } 694 695 PROPERTIES_LOCATION = { 696 **generator.Generator.PROPERTIES_LOCATION, 697 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 698 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 699 } 700 701 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 702 RESERVED_KEYWORDS = { 703 *generator.Generator.RESERVED_KEYWORDS, 704 "all", 705 "and", 706 "any", 707 "array", 708 "as", 709 "asc", 710 "assert_rows_modified", 711 "at", 712 "between", 713 "by", 714 "case", 715 "cast", 716 "collate", 717 "contains", 718 "create", 719 "cross", 720 "cube", 721 "current", 722 "default", 723 "define", 724 "desc", 725 "distinct", 726 "else", 727 "end", 728 "enum", 729 "escape", 730 "except", 731 "exclude", 732 "exists", 733 "extract", 734 "false", 735 "fetch", 736 "following", 737 "for", 738 "from", 739 "full", 740 "group", 741 "grouping", 742 "groups", 743 "hash", 744 "having", 745 "if", 746 "ignore", 747 "in", 748 "inner", 749 "intersect", 750 "interval", 751 "into", 752 "is", 753 "join", 754 "lateral", 755 "left", 756 "like", 757 "limit", 758 "lookup", 759 "merge", 760 "natural", 761 "new", 762 "no", 763 "not", 764 "null", 765 "nulls", 766 "of", 767 "on", 768 "or", 769 "order", 770 "outer", 771 "over", 772 "partition", 773 "preceding", 774 "proto", 775 "qualify", 776 "range", 777 "recursive", 778 "respect", 779 "right", 780 "rollup", 781 "rows", 782 "select", 783 "set", 784 "some", 785 "struct", 786 "tablesample", 787 "then", 788 "to", 789 "treat", 790 "true", 791 "unbounded", 792 "union", 793 "unnest", 794 "using", 795 "when", 796 "where", 797 "window", 798 "with", 799 "within", 800 } 801 802 def table_parts(self, expression: exp.Table) -> str: 803 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 804 # we need to make sure the correct quoting is used in each case. 805 # 806 # For example, if there is a CTE x that clashes with a schema name, then the former will 807 # return the table y in that schema, whereas the latter will return the CTE's y column: 808 # 809 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 810 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 811 if expression.meta.get("quoted_table"): 812 table_parts = ".".join(p.name for p in expression.parts) 813 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 814 815 return super().table_parts(expression) 816 817 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 818 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 819 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 820 821 def eq_sql(self, expression: exp.EQ) -> str: 822 # Operands of = cannot be NULL in BigQuery 823 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 824 if not isinstance(expression.parent, exp.Update): 825 return "NULL" 826 827 return self.binary(expression, "=") 828 829 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 830 parent = expression.parent 831 832 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 833 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 834 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 835 return self.func( 836 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 837 ) 838 839 return super().attimezone_sql(expression) 840 841 def trycast_sql(self, expression: exp.TryCast) -> str: 842 return self.cast_sql(expression, safe_prefix="SAFE_") 843 844 def array_sql(self, expression: exp.Array) -> str: 845 first_arg = seq_get(expression.expressions, 0) 846 if isinstance(first_arg, exp.Query): 847 return f"ARRAY{self.wrap(self.sql(first_arg))}" 848 849 return inline_array_sql(self, expression) 850 851 def bracket_sql(self, expression: exp.Bracket) -> str: 852 this = self.sql(expression, "this") 853 expressions = expression.expressions 854 855 if len(expressions) == 1: 856 arg = expressions[0] 857 if arg.type is None: 858 from sqlglot.optimizer.annotate_types import annotate_types 859 860 arg = annotate_types(arg) 861 862 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 863 # BQ doesn't support bracket syntax with string values 864 return f"{this}.{arg.name}" 865 866 expressions_sql = ", ".join(self.sql(e) for e in expressions) 867 offset = expression.args.get("offset") 868 869 if offset == 0: 870 expressions_sql = f"OFFSET({expressions_sql})" 871 elif offset == 1: 872 expressions_sql = f"ORDINAL({expressions_sql})" 873 elif offset is not None: 874 self.unsupported(f"Unsupported array offset: {offset}") 875 876 if expression.args.get("safe"): 877 expressions_sql = f"SAFE_{expressions_sql}" 878 879 return f"{this}[{expressions_sql}]" 880 881 def in_unnest_op(self, expression: exp.Unnest) -> str: 882 return self.sql(expression) 883 884 def except_op(self, expression: exp.Except) -> str: 885 if not expression.args.get("distinct"): 886 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 887 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 888 889 def intersect_op(self, expression: exp.Intersect) -> str: 890 if not expression.args.get("distinct"): 891 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 892 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 893 894 def with_properties(self, properties: exp.Properties) -> str: 895 return self.properties(properties, prefix=self.seg("OPTIONS")) 896 897 def version_sql(self, expression: exp.Version) -> str: 898 if expression.name == "TIMESTAMP": 899 expression.set("this", "SYSTEM_TIME") 900 return super().version_sql(expression)
222class BigQuery(Dialect): 223 WEEK_OFFSET = -1 224 UNNEST_COLUMN_ONLY = True 225 SUPPORTS_USER_DEFINED_TYPES = False 226 SUPPORTS_SEMI_ANTI_JOIN = False 227 LOG_BASE_FIRST = False 228 229 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 230 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 231 232 # bigquery udfs are case sensitive 233 NORMALIZE_FUNCTIONS = False 234 235 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 236 TIME_MAPPING = { 237 "%D": "%m/%d/%y", 238 "%E*S": "%S.%f", 239 "%E6S": "%S.%f", 240 } 241 242 ESCAPE_SEQUENCES = { 243 "\\a": "\a", 244 "\\b": "\b", 245 "\\f": "\f", 246 "\\n": "\n", 247 "\\r": "\r", 248 "\\t": "\t", 249 "\\v": "\v", 250 } 251 252 FORMAT_MAPPING = { 253 "DD": "%d", 254 "MM": "%m", 255 "MON": "%b", 256 "MONTH": "%B", 257 "YYYY": "%Y", 258 "YY": "%y", 259 "HH": "%I", 260 "HH12": "%I", 261 "HH24": "%H", 262 "MI": "%M", 263 "SS": "%S", 264 "SSSSS": "%f", 265 "TZH": "%z", 266 } 267 268 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 269 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 270 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 271 272 def normalize_identifier(self, expression: E) -> E: 273 if isinstance(expression, exp.Identifier): 274 parent = expression.parent 275 while isinstance(parent, exp.Dot): 276 parent = parent.parent 277 278 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 279 # by default. The following check uses a heuristic to detect tables based on whether 280 # they are qualified. This should generally be correct, because tables in BigQuery 281 # must be qualified with at least a dataset, unless @@dataset_id is set. 282 case_sensitive = ( 283 isinstance(parent, exp.UserDefinedFunction) 284 or ( 285 isinstance(parent, exp.Table) 286 and parent.db 287 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 288 ) 289 or expression.meta.get("is_table") 290 ) 291 if not case_sensitive: 292 expression.set("this", expression.this.lower()) 293 294 return expression 295 296 class Tokenizer(tokens.Tokenizer): 297 QUOTES = ["'", '"', '"""', "'''"] 298 COMMENTS = ["--", "#", ("/*", "*/")] 299 IDENTIFIERS = ["`"] 300 STRING_ESCAPES = ["\\"] 301 302 HEX_STRINGS = [("0x", ""), ("0X", "")] 303 304 BYTE_STRINGS = [ 305 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 306 ] 307 308 RAW_STRINGS = [ 309 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 310 ] 311 312 KEYWORDS = { 313 **tokens.Tokenizer.KEYWORDS, 314 "ANY TYPE": TokenType.VARIANT, 315 "BEGIN": TokenType.COMMAND, 316 "BEGIN TRANSACTION": TokenType.BEGIN, 317 "BYTES": TokenType.BINARY, 318 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 319 "DECLARE": TokenType.COMMAND, 320 "ELSEIF": TokenType.COMMAND, 321 "EXCEPTION": TokenType.COMMAND, 322 "FLOAT64": TokenType.DOUBLE, 323 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 324 "MODEL": TokenType.MODEL, 325 "NOT DETERMINISTIC": TokenType.VOLATILE, 326 "RECORD": TokenType.STRUCT, 327 "TIMESTAMP": TokenType.TIMESTAMPTZ, 328 } 329 KEYWORDS.pop("DIV") 330 KEYWORDS.pop("VALUES") 331 332 class Parser(parser.Parser): 333 PREFIXED_PIVOT_COLUMNS = True 334 LOG_DEFAULTS_TO_LN = True 335 SUPPORTS_IMPLICIT_UNNEST = True 336 337 FUNCTIONS = { 338 **parser.Parser.FUNCTIONS, 339 "DATE": _build_date, 340 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 341 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 342 "DATE_TRUNC": lambda args: exp.DateTrunc( 343 unit=exp.Literal.string(str(seq_get(args, 1))), 344 this=seq_get(args, 0), 345 ), 346 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 347 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 348 "DIV": binary_from_function(exp.IntDiv), 349 "FORMAT_DATE": lambda args: exp.TimeToStr( 350 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 351 ), 352 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 353 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 354 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 355 ), 356 "MD5": exp.MD5Digest.from_arg_list, 357 "TO_HEX": _build_to_hex, 358 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 359 [seq_get(args, 1), seq_get(args, 0)] 360 ), 361 "PARSE_TIMESTAMP": _build_parse_timestamp, 362 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 363 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 364 this=seq_get(args, 0), 365 expression=seq_get(args, 1), 366 position=seq_get(args, 2), 367 occurrence=seq_get(args, 3), 368 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 369 ), 370 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 371 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 372 "SPLIT": lambda args: exp.Split( 373 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 374 this=seq_get(args, 0), 375 expression=seq_get(args, 1) or exp.Literal.string(","), 376 ), 377 "TIME": _build_time, 378 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 379 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 380 "TIMESTAMP": _build_timestamp, 381 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 382 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 383 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 384 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 385 ), 386 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 387 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 388 ), 389 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 390 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 391 } 392 393 FUNCTION_PARSERS = { 394 **parser.Parser.FUNCTION_PARSERS, 395 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 396 } 397 FUNCTION_PARSERS.pop("TRIM") 398 399 NO_PAREN_FUNCTIONS = { 400 **parser.Parser.NO_PAREN_FUNCTIONS, 401 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 402 } 403 404 NESTED_TYPE_TOKENS = { 405 *parser.Parser.NESTED_TYPE_TOKENS, 406 TokenType.TABLE, 407 } 408 409 PROPERTY_PARSERS = { 410 **parser.Parser.PROPERTY_PARSERS, 411 "NOT DETERMINISTIC": lambda self: self.expression( 412 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 413 ), 414 "OPTIONS": lambda self: self._parse_with_property(), 415 } 416 417 CONSTRAINT_PARSERS = { 418 **parser.Parser.CONSTRAINT_PARSERS, 419 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 420 } 421 422 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 423 RANGE_PARSERS.pop(TokenType.OVERLAPS) 424 425 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 426 427 STATEMENT_PARSERS = { 428 **parser.Parser.STATEMENT_PARSERS, 429 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 430 TokenType.END: lambda self: self._parse_as_command(self._prev), 431 TokenType.FOR: lambda self: self._parse_for_in(), 432 } 433 434 BRACKET_OFFSETS = { 435 "OFFSET": (0, False), 436 "ORDINAL": (1, False), 437 "SAFE_OFFSET": (0, True), 438 "SAFE_ORDINAL": (1, True), 439 } 440 441 def _parse_for_in(self) -> exp.ForIn: 442 this = self._parse_range() 443 self._match_text_seq("DO") 444 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 445 446 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 447 this = super()._parse_table_part(schema=schema) or self._parse_number() 448 449 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 450 if isinstance(this, exp.Identifier): 451 table_name = this.name 452 while self._match(TokenType.DASH, advance=False) and self._next: 453 text = "" 454 while self._curr and self._curr.token_type != TokenType.DOT: 455 self._advance() 456 text += self._prev.text 457 table_name += text 458 459 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 460 elif isinstance(this, exp.Literal): 461 table_name = this.name 462 463 if self._is_connected() and self._parse_var(any_token=True): 464 table_name += self._prev.text 465 466 this = exp.Identifier(this=table_name, quoted=True) 467 468 return this 469 470 def _parse_table_parts( 471 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 472 ) -> exp.Table: 473 table = super()._parse_table_parts( 474 schema=schema, is_db_reference=is_db_reference, wildcard=True 475 ) 476 477 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 478 if not table.catalog: 479 if table.db: 480 parts = table.db.split(".") 481 if len(parts) == 2 and not table.args["db"].quoted: 482 table.set("catalog", exp.Identifier(this=parts[0])) 483 table.set("db", exp.Identifier(this=parts[1])) 484 else: 485 parts = table.name.split(".") 486 if len(parts) == 2 and not table.this.quoted: 487 table.set("db", exp.Identifier(this=parts[0])) 488 table.set("this", exp.Identifier(this=parts[1])) 489 490 if isinstance(table.this, exp.Identifier) and "." in table.name: 491 catalog, db, this, *rest = ( 492 t.cast(t.Optional[exp.Expression], exp.to_identifier(x, quoted=True)) 493 for x in split_num_words(table.name, ".", 3) 494 ) 495 496 if rest and this: 497 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 498 499 table = exp.Table(this=this, db=db, catalog=catalog) 500 table.meta["quoted_table"] = True 501 502 return table 503 504 @t.overload 505 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 506 ... 507 508 @t.overload 509 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 510 ... 511 512 def _parse_json_object(self, agg=False): 513 json_object = super()._parse_json_object() 514 array_kv_pair = seq_get(json_object.expressions, 0) 515 516 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 517 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 518 if ( 519 array_kv_pair 520 and isinstance(array_kv_pair.this, exp.Array) 521 and isinstance(array_kv_pair.expression, exp.Array) 522 ): 523 keys = array_kv_pair.this.expressions 524 values = array_kv_pair.expression.expressions 525 526 json_object.set( 527 "expressions", 528 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 529 ) 530 531 return json_object 532 533 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 534 bracket = super()._parse_bracket(this) 535 536 if this is bracket: 537 return bracket 538 539 if isinstance(bracket, exp.Bracket): 540 for expression in bracket.expressions: 541 name = expression.name.upper() 542 543 if name not in self.BRACKET_OFFSETS: 544 break 545 546 offset, safe = self.BRACKET_OFFSETS[name] 547 bracket.set("offset", offset) 548 bracket.set("safe", safe) 549 expression.replace(expression.expressions[0]) 550 551 return bracket 552 553 class Generator(generator.Generator): 554 EXPLICIT_UNION = True 555 INTERVAL_ALLOWS_PLURAL_FORM = False 556 JOIN_HINTS = False 557 QUERY_HINTS = False 558 TABLE_HINTS = False 559 LIMIT_FETCH = "LIMIT" 560 RENAME_TABLE_WITH_DB = False 561 NVL2_SUPPORTED = False 562 UNNEST_WITH_ORDINALITY = False 563 COLLATE_IS_FUNC = True 564 LIMIT_ONLY_LITERALS = True 565 SUPPORTS_TABLE_ALIAS_COLUMNS = False 566 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 567 JSON_KEY_VALUE_PAIR_SEP = "," 568 NULL_ORDERING_SUPPORTED = False 569 IGNORE_NULLS_IN_FUNC = True 570 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 571 CAN_IMPLEMENT_ARRAY_ANY = True 572 NAMED_PLACEHOLDER_TOKEN = "@" 573 574 TRANSFORMS = { 575 **generator.Generator.TRANSFORMS, 576 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 577 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 578 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 579 exp.ArrayContains: _array_contains_sql, 580 exp.ArrayFilter: filter_array_using_unnest, 581 exp.ArraySize: rename_func("ARRAY_LENGTH"), 582 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 583 exp.CollateProperty: lambda self, e: ( 584 f"DEFAULT COLLATE {self.sql(e, 'this')}" 585 if e.args.get("default") 586 else f"COLLATE {self.sql(e, 'this')}" 587 ), 588 exp.Commit: lambda *_: "COMMIT TRANSACTION", 589 exp.CountIf: rename_func("COUNTIF"), 590 exp.Create: _create_sql, 591 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 592 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 593 exp.DateDiff: lambda self, e: self.func( 594 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 595 ), 596 exp.DateFromParts: rename_func("DATE"), 597 exp.DateStrToDate: datestrtodate_sql, 598 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 599 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 600 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 601 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 602 exp.FromTimeZone: lambda self, e: self.func( 603 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 604 ), 605 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 606 exp.GroupConcat: rename_func("STRING_AGG"), 607 exp.Hex: rename_func("TO_HEX"), 608 exp.If: if_sql(false_value="NULL"), 609 exp.ILike: no_ilike_sql, 610 exp.IntDiv: rename_func("DIV"), 611 exp.JSONFormat: rename_func("TO_JSON_STRING"), 612 exp.Max: max_or_greatest, 613 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 614 exp.MD5Digest: rename_func("MD5"), 615 exp.Min: min_or_least, 616 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 617 exp.RegexpExtract: lambda self, e: self.func( 618 "REGEXP_EXTRACT", 619 e.this, 620 e.expression, 621 e.args.get("position"), 622 e.args.get("occurrence"), 623 ), 624 exp.RegexpReplace: regexp_replace_sql, 625 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 626 exp.ReturnsProperty: _returnsproperty_sql, 627 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 628 exp.Select: transforms.preprocess( 629 [ 630 transforms.explode_to_unnest(), 631 _unqualify_unnest, 632 transforms.eliminate_distinct_on, 633 _alias_ordered_group, 634 transforms.eliminate_semi_and_anti_joins, 635 ] 636 ), 637 exp.SHA2: lambda self, e: self.func( 638 "SHA256" if e.text("length") == "256" else "SHA512", e.this 639 ), 640 exp.StabilityProperty: lambda self, e: ( 641 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 642 ), 643 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 644 exp.StrToTime: lambda self, e: self.func( 645 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 646 ), 647 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 648 exp.TimeFromParts: rename_func("TIME"), 649 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 650 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 651 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 652 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 653 exp.TimeStrToTime: timestrtotime_sql, 654 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 655 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 656 exp.TsOrDsAdd: _ts_or_ds_add_sql, 657 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 658 exp.TsOrDsToTime: rename_func("TIME"), 659 exp.Unhex: rename_func("FROM_HEX"), 660 exp.UnixDate: rename_func("UNIX_DATE"), 661 exp.UnixToTime: _unix_to_time_sql, 662 exp.Values: _derived_table_values_to_unnest, 663 exp.VariancePop: rename_func("VAR_POP"), 664 } 665 666 SUPPORTED_JSON_PATH_PARTS = { 667 exp.JSONPathKey, 668 exp.JSONPathRoot, 669 exp.JSONPathSubscript, 670 } 671 672 TYPE_MAPPING = { 673 **generator.Generator.TYPE_MAPPING, 674 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 675 exp.DataType.Type.BIGINT: "INT64", 676 exp.DataType.Type.BINARY: "BYTES", 677 exp.DataType.Type.BOOLEAN: "BOOL", 678 exp.DataType.Type.CHAR: "STRING", 679 exp.DataType.Type.DECIMAL: "NUMERIC", 680 exp.DataType.Type.DOUBLE: "FLOAT64", 681 exp.DataType.Type.FLOAT: "FLOAT64", 682 exp.DataType.Type.INT: "INT64", 683 exp.DataType.Type.NCHAR: "STRING", 684 exp.DataType.Type.NVARCHAR: "STRING", 685 exp.DataType.Type.SMALLINT: "INT64", 686 exp.DataType.Type.TEXT: "STRING", 687 exp.DataType.Type.TIMESTAMP: "DATETIME", 688 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 689 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 690 exp.DataType.Type.TINYINT: "INT64", 691 exp.DataType.Type.VARBINARY: "BYTES", 692 exp.DataType.Type.VARCHAR: "STRING", 693 exp.DataType.Type.VARIANT: "ANY TYPE", 694 } 695 696 PROPERTIES_LOCATION = { 697 **generator.Generator.PROPERTIES_LOCATION, 698 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 699 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 700 } 701 702 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 703 RESERVED_KEYWORDS = { 704 *generator.Generator.RESERVED_KEYWORDS, 705 "all", 706 "and", 707 "any", 708 "array", 709 "as", 710 "asc", 711 "assert_rows_modified", 712 "at", 713 "between", 714 "by", 715 "case", 716 "cast", 717 "collate", 718 "contains", 719 "create", 720 "cross", 721 "cube", 722 "current", 723 "default", 724 "define", 725 "desc", 726 "distinct", 727 "else", 728 "end", 729 "enum", 730 "escape", 731 "except", 732 "exclude", 733 "exists", 734 "extract", 735 "false", 736 "fetch", 737 "following", 738 "for", 739 "from", 740 "full", 741 "group", 742 "grouping", 743 "groups", 744 "hash", 745 "having", 746 "if", 747 "ignore", 748 "in", 749 "inner", 750 "intersect", 751 "interval", 752 "into", 753 "is", 754 "join", 755 "lateral", 756 "left", 757 "like", 758 "limit", 759 "lookup", 760 "merge", 761 "natural", 762 "new", 763 "no", 764 "not", 765 "null", 766 "nulls", 767 "of", 768 "on", 769 "or", 770 "order", 771 "outer", 772 "over", 773 "partition", 774 "preceding", 775 "proto", 776 "qualify", 777 "range", 778 "recursive", 779 "respect", 780 "right", 781 "rollup", 782 "rows", 783 "select", 784 "set", 785 "some", 786 "struct", 787 "tablesample", 788 "then", 789 "to", 790 "treat", 791 "true", 792 "unbounded", 793 "union", 794 "unnest", 795 "using", 796 "when", 797 "where", 798 "window", 799 "with", 800 "within", 801 } 802 803 def table_parts(self, expression: exp.Table) -> str: 804 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 805 # we need to make sure the correct quoting is used in each case. 806 # 807 # For example, if there is a CTE x that clashes with a schema name, then the former will 808 # return the table y in that schema, whereas the latter will return the CTE's y column: 809 # 810 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 811 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 812 if expression.meta.get("quoted_table"): 813 table_parts = ".".join(p.name for p in expression.parts) 814 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 815 816 return super().table_parts(expression) 817 818 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 819 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 820 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 821 822 def eq_sql(self, expression: exp.EQ) -> str: 823 # Operands of = cannot be NULL in BigQuery 824 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 825 if not isinstance(expression.parent, exp.Update): 826 return "NULL" 827 828 return self.binary(expression, "=") 829 830 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 831 parent = expression.parent 832 833 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 834 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 835 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 836 return self.func( 837 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 838 ) 839 840 return super().attimezone_sql(expression) 841 842 def trycast_sql(self, expression: exp.TryCast) -> str: 843 return self.cast_sql(expression, safe_prefix="SAFE_") 844 845 def array_sql(self, expression: exp.Array) -> str: 846 first_arg = seq_get(expression.expressions, 0) 847 if isinstance(first_arg, exp.Query): 848 return f"ARRAY{self.wrap(self.sql(first_arg))}" 849 850 return inline_array_sql(self, expression) 851 852 def bracket_sql(self, expression: exp.Bracket) -> str: 853 this = self.sql(expression, "this") 854 expressions = expression.expressions 855 856 if len(expressions) == 1: 857 arg = expressions[0] 858 if arg.type is None: 859 from sqlglot.optimizer.annotate_types import annotate_types 860 861 arg = annotate_types(arg) 862 863 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 864 # BQ doesn't support bracket syntax with string values 865 return f"{this}.{arg.name}" 866 867 expressions_sql = ", ".join(self.sql(e) for e in expressions) 868 offset = expression.args.get("offset") 869 870 if offset == 0: 871 expressions_sql = f"OFFSET({expressions_sql})" 872 elif offset == 1: 873 expressions_sql = f"ORDINAL({expressions_sql})" 874 elif offset is not None: 875 self.unsupported(f"Unsupported array offset: {offset}") 876 877 if expression.args.get("safe"): 878 expressions_sql = f"SAFE_{expressions_sql}" 879 880 return f"{this}[{expressions_sql}]" 881 882 def in_unnest_op(self, expression: exp.Unnest) -> str: 883 return self.sql(expression) 884 885 def except_op(self, expression: exp.Except) -> str: 886 if not expression.args.get("distinct"): 887 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 888 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 889 890 def intersect_op(self, expression: exp.Intersect) -> str: 891 if not expression.args.get("distinct"): 892 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 893 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 894 895 def with_properties(self, properties: exp.Properties) -> str: 896 return self.properties(properties, prefix=self.seg("OPTIONS")) 897 898 def version_sql(self, expression: exp.Version) -> str: 899 if expression.name == "TIMESTAMP": 900 expression.set("this", "SYSTEM_TIME") 901 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
272 def normalize_identifier(self, expression: E) -> E: 273 if isinstance(expression, exp.Identifier): 274 parent = expression.parent 275 while isinstance(parent, exp.Dot): 276 parent = parent.parent 277 278 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 279 # by default. The following check uses a heuristic to detect tables based on whether 280 # they are qualified. This should generally be correct, because tables in BigQuery 281 # must be qualified with at least a dataset, unless @@dataset_id is set. 282 case_sensitive = ( 283 isinstance(parent, exp.UserDefinedFunction) 284 or ( 285 isinstance(parent, exp.Table) 286 and parent.db 287 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 288 ) 289 or expression.meta.get("is_table") 290 ) 291 if not case_sensitive: 292 expression.set("this", expression.this.lower()) 293 294 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
296 class Tokenizer(tokens.Tokenizer): 297 QUOTES = ["'", '"', '"""', "'''"] 298 COMMENTS = ["--", "#", ("/*", "*/")] 299 IDENTIFIERS = ["`"] 300 STRING_ESCAPES = ["\\"] 301 302 HEX_STRINGS = [("0x", ""), ("0X", "")] 303 304 BYTE_STRINGS = [ 305 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 306 ] 307 308 RAW_STRINGS = [ 309 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 310 ] 311 312 KEYWORDS = { 313 **tokens.Tokenizer.KEYWORDS, 314 "ANY TYPE": TokenType.VARIANT, 315 "BEGIN": TokenType.COMMAND, 316 "BEGIN TRANSACTION": TokenType.BEGIN, 317 "BYTES": TokenType.BINARY, 318 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 319 "DECLARE": TokenType.COMMAND, 320 "ELSEIF": TokenType.COMMAND, 321 "EXCEPTION": TokenType.COMMAND, 322 "FLOAT64": TokenType.DOUBLE, 323 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 324 "MODEL": TokenType.MODEL, 325 "NOT DETERMINISTIC": TokenType.VOLATILE, 326 "RECORD": TokenType.STRUCT, 327 "TIMESTAMP": TokenType.TIMESTAMPTZ, 328 } 329 KEYWORDS.pop("DIV") 330 KEYWORDS.pop("VALUES")
Inherited Members
332 class Parser(parser.Parser): 333 PREFIXED_PIVOT_COLUMNS = True 334 LOG_DEFAULTS_TO_LN = True 335 SUPPORTS_IMPLICIT_UNNEST = True 336 337 FUNCTIONS = { 338 **parser.Parser.FUNCTIONS, 339 "DATE": _build_date, 340 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 341 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 342 "DATE_TRUNC": lambda args: exp.DateTrunc( 343 unit=exp.Literal.string(str(seq_get(args, 1))), 344 this=seq_get(args, 0), 345 ), 346 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 347 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 348 "DIV": binary_from_function(exp.IntDiv), 349 "FORMAT_DATE": lambda args: exp.TimeToStr( 350 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 351 ), 352 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 353 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 354 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 355 ), 356 "MD5": exp.MD5Digest.from_arg_list, 357 "TO_HEX": _build_to_hex, 358 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 359 [seq_get(args, 1), seq_get(args, 0)] 360 ), 361 "PARSE_TIMESTAMP": _build_parse_timestamp, 362 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 363 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 364 this=seq_get(args, 0), 365 expression=seq_get(args, 1), 366 position=seq_get(args, 2), 367 occurrence=seq_get(args, 3), 368 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 369 ), 370 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 371 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 372 "SPLIT": lambda args: exp.Split( 373 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 374 this=seq_get(args, 0), 375 expression=seq_get(args, 1) or exp.Literal.string(","), 376 ), 377 "TIME": _build_time, 378 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 379 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 380 "TIMESTAMP": _build_timestamp, 381 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 382 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 383 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 384 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 385 ), 386 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 387 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 388 ), 389 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 390 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 391 } 392 393 FUNCTION_PARSERS = { 394 **parser.Parser.FUNCTION_PARSERS, 395 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 396 } 397 FUNCTION_PARSERS.pop("TRIM") 398 399 NO_PAREN_FUNCTIONS = { 400 **parser.Parser.NO_PAREN_FUNCTIONS, 401 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 402 } 403 404 NESTED_TYPE_TOKENS = { 405 *parser.Parser.NESTED_TYPE_TOKENS, 406 TokenType.TABLE, 407 } 408 409 PROPERTY_PARSERS = { 410 **parser.Parser.PROPERTY_PARSERS, 411 "NOT DETERMINISTIC": lambda self: self.expression( 412 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 413 ), 414 "OPTIONS": lambda self: self._parse_with_property(), 415 } 416 417 CONSTRAINT_PARSERS = { 418 **parser.Parser.CONSTRAINT_PARSERS, 419 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 420 } 421 422 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 423 RANGE_PARSERS.pop(TokenType.OVERLAPS) 424 425 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 426 427 STATEMENT_PARSERS = { 428 **parser.Parser.STATEMENT_PARSERS, 429 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 430 TokenType.END: lambda self: self._parse_as_command(self._prev), 431 TokenType.FOR: lambda self: self._parse_for_in(), 432 } 433 434 BRACKET_OFFSETS = { 435 "OFFSET": (0, False), 436 "ORDINAL": (1, False), 437 "SAFE_OFFSET": (0, True), 438 "SAFE_ORDINAL": (1, True), 439 } 440 441 def _parse_for_in(self) -> exp.ForIn: 442 this = self._parse_range() 443 self._match_text_seq("DO") 444 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 445 446 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 447 this = super()._parse_table_part(schema=schema) or self._parse_number() 448 449 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 450 if isinstance(this, exp.Identifier): 451 table_name = this.name 452 while self._match(TokenType.DASH, advance=False) and self._next: 453 text = "" 454 while self._curr and self._curr.token_type != TokenType.DOT: 455 self._advance() 456 text += self._prev.text 457 table_name += text 458 459 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 460 elif isinstance(this, exp.Literal): 461 table_name = this.name 462 463 if self._is_connected() and self._parse_var(any_token=True): 464 table_name += self._prev.text 465 466 this = exp.Identifier(this=table_name, quoted=True) 467 468 return this 469 470 def _parse_table_parts( 471 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 472 ) -> exp.Table: 473 table = super()._parse_table_parts( 474 schema=schema, is_db_reference=is_db_reference, wildcard=True 475 ) 476 477 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 478 if not table.catalog: 479 if table.db: 480 parts = table.db.split(".") 481 if len(parts) == 2 and not table.args["db"].quoted: 482 table.set("catalog", exp.Identifier(this=parts[0])) 483 table.set("db", exp.Identifier(this=parts[1])) 484 else: 485 parts = table.name.split(".") 486 if len(parts) == 2 and not table.this.quoted: 487 table.set("db", exp.Identifier(this=parts[0])) 488 table.set("this", exp.Identifier(this=parts[1])) 489 490 if isinstance(table.this, exp.Identifier) and "." in table.name: 491 catalog, db, this, *rest = ( 492 t.cast(t.Optional[exp.Expression], exp.to_identifier(x, quoted=True)) 493 for x in split_num_words(table.name, ".", 3) 494 ) 495 496 if rest and this: 497 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 498 499 table = exp.Table(this=this, db=db, catalog=catalog) 500 table.meta["quoted_table"] = True 501 502 return table 503 504 @t.overload 505 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 506 ... 507 508 @t.overload 509 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 510 ... 511 512 def _parse_json_object(self, agg=False): 513 json_object = super()._parse_json_object() 514 array_kv_pair = seq_get(json_object.expressions, 0) 515 516 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 517 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 518 if ( 519 array_kv_pair 520 and isinstance(array_kv_pair.this, exp.Array) 521 and isinstance(array_kv_pair.expression, exp.Array) 522 ): 523 keys = array_kv_pair.this.expressions 524 values = array_kv_pair.expression.expressions 525 526 json_object.set( 527 "expressions", 528 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 529 ) 530 531 return json_object 532 533 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 534 bracket = super()._parse_bracket(this) 535 536 if this is bracket: 537 return bracket 538 539 if isinstance(bracket, exp.Bracket): 540 for expression in bracket.expressions: 541 name = expression.name.upper() 542 543 if name not in self.BRACKET_OFFSETS: 544 break 545 546 offset, safe = self.BRACKET_OFFSETS[name] 547 bracket.set("offset", offset) 548 bracket.set("safe", safe) 549 expression.replace(expression.expressions[0]) 550 551 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- USABLES
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
553 class Generator(generator.Generator): 554 EXPLICIT_UNION = True 555 INTERVAL_ALLOWS_PLURAL_FORM = False 556 JOIN_HINTS = False 557 QUERY_HINTS = False 558 TABLE_HINTS = False 559 LIMIT_FETCH = "LIMIT" 560 RENAME_TABLE_WITH_DB = False 561 NVL2_SUPPORTED = False 562 UNNEST_WITH_ORDINALITY = False 563 COLLATE_IS_FUNC = True 564 LIMIT_ONLY_LITERALS = True 565 SUPPORTS_TABLE_ALIAS_COLUMNS = False 566 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 567 JSON_KEY_VALUE_PAIR_SEP = "," 568 NULL_ORDERING_SUPPORTED = False 569 IGNORE_NULLS_IN_FUNC = True 570 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 571 CAN_IMPLEMENT_ARRAY_ANY = True 572 NAMED_PLACEHOLDER_TOKEN = "@" 573 574 TRANSFORMS = { 575 **generator.Generator.TRANSFORMS, 576 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 577 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 578 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 579 exp.ArrayContains: _array_contains_sql, 580 exp.ArrayFilter: filter_array_using_unnest, 581 exp.ArraySize: rename_func("ARRAY_LENGTH"), 582 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 583 exp.CollateProperty: lambda self, e: ( 584 f"DEFAULT COLLATE {self.sql(e, 'this')}" 585 if e.args.get("default") 586 else f"COLLATE {self.sql(e, 'this')}" 587 ), 588 exp.Commit: lambda *_: "COMMIT TRANSACTION", 589 exp.CountIf: rename_func("COUNTIF"), 590 exp.Create: _create_sql, 591 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 592 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 593 exp.DateDiff: lambda self, e: self.func( 594 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 595 ), 596 exp.DateFromParts: rename_func("DATE"), 597 exp.DateStrToDate: datestrtodate_sql, 598 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 599 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 600 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 601 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 602 exp.FromTimeZone: lambda self, e: self.func( 603 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 604 ), 605 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 606 exp.GroupConcat: rename_func("STRING_AGG"), 607 exp.Hex: rename_func("TO_HEX"), 608 exp.If: if_sql(false_value="NULL"), 609 exp.ILike: no_ilike_sql, 610 exp.IntDiv: rename_func("DIV"), 611 exp.JSONFormat: rename_func("TO_JSON_STRING"), 612 exp.Max: max_or_greatest, 613 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 614 exp.MD5Digest: rename_func("MD5"), 615 exp.Min: min_or_least, 616 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 617 exp.RegexpExtract: lambda self, e: self.func( 618 "REGEXP_EXTRACT", 619 e.this, 620 e.expression, 621 e.args.get("position"), 622 e.args.get("occurrence"), 623 ), 624 exp.RegexpReplace: regexp_replace_sql, 625 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 626 exp.ReturnsProperty: _returnsproperty_sql, 627 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 628 exp.Select: transforms.preprocess( 629 [ 630 transforms.explode_to_unnest(), 631 _unqualify_unnest, 632 transforms.eliminate_distinct_on, 633 _alias_ordered_group, 634 transforms.eliminate_semi_and_anti_joins, 635 ] 636 ), 637 exp.SHA2: lambda self, e: self.func( 638 "SHA256" if e.text("length") == "256" else "SHA512", e.this 639 ), 640 exp.StabilityProperty: lambda self, e: ( 641 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 642 ), 643 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 644 exp.StrToTime: lambda self, e: self.func( 645 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 646 ), 647 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 648 exp.TimeFromParts: rename_func("TIME"), 649 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 650 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 651 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 652 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 653 exp.TimeStrToTime: timestrtotime_sql, 654 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 655 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 656 exp.TsOrDsAdd: _ts_or_ds_add_sql, 657 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 658 exp.TsOrDsToTime: rename_func("TIME"), 659 exp.Unhex: rename_func("FROM_HEX"), 660 exp.UnixDate: rename_func("UNIX_DATE"), 661 exp.UnixToTime: _unix_to_time_sql, 662 exp.Values: _derived_table_values_to_unnest, 663 exp.VariancePop: rename_func("VAR_POP"), 664 } 665 666 SUPPORTED_JSON_PATH_PARTS = { 667 exp.JSONPathKey, 668 exp.JSONPathRoot, 669 exp.JSONPathSubscript, 670 } 671 672 TYPE_MAPPING = { 673 **generator.Generator.TYPE_MAPPING, 674 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 675 exp.DataType.Type.BIGINT: "INT64", 676 exp.DataType.Type.BINARY: "BYTES", 677 exp.DataType.Type.BOOLEAN: "BOOL", 678 exp.DataType.Type.CHAR: "STRING", 679 exp.DataType.Type.DECIMAL: "NUMERIC", 680 exp.DataType.Type.DOUBLE: "FLOAT64", 681 exp.DataType.Type.FLOAT: "FLOAT64", 682 exp.DataType.Type.INT: "INT64", 683 exp.DataType.Type.NCHAR: "STRING", 684 exp.DataType.Type.NVARCHAR: "STRING", 685 exp.DataType.Type.SMALLINT: "INT64", 686 exp.DataType.Type.TEXT: "STRING", 687 exp.DataType.Type.TIMESTAMP: "DATETIME", 688 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 689 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 690 exp.DataType.Type.TINYINT: "INT64", 691 exp.DataType.Type.VARBINARY: "BYTES", 692 exp.DataType.Type.VARCHAR: "STRING", 693 exp.DataType.Type.VARIANT: "ANY TYPE", 694 } 695 696 PROPERTIES_LOCATION = { 697 **generator.Generator.PROPERTIES_LOCATION, 698 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 699 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 700 } 701 702 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 703 RESERVED_KEYWORDS = { 704 *generator.Generator.RESERVED_KEYWORDS, 705 "all", 706 "and", 707 "any", 708 "array", 709 "as", 710 "asc", 711 "assert_rows_modified", 712 "at", 713 "between", 714 "by", 715 "case", 716 "cast", 717 "collate", 718 "contains", 719 "create", 720 "cross", 721 "cube", 722 "current", 723 "default", 724 "define", 725 "desc", 726 "distinct", 727 "else", 728 "end", 729 "enum", 730 "escape", 731 "except", 732 "exclude", 733 "exists", 734 "extract", 735 "false", 736 "fetch", 737 "following", 738 "for", 739 "from", 740 "full", 741 "group", 742 "grouping", 743 "groups", 744 "hash", 745 "having", 746 "if", 747 "ignore", 748 "in", 749 "inner", 750 "intersect", 751 "interval", 752 "into", 753 "is", 754 "join", 755 "lateral", 756 "left", 757 "like", 758 "limit", 759 "lookup", 760 "merge", 761 "natural", 762 "new", 763 "no", 764 "not", 765 "null", 766 "nulls", 767 "of", 768 "on", 769 "or", 770 "order", 771 "outer", 772 "over", 773 "partition", 774 "preceding", 775 "proto", 776 "qualify", 777 "range", 778 "recursive", 779 "respect", 780 "right", 781 "rollup", 782 "rows", 783 "select", 784 "set", 785 "some", 786 "struct", 787 "tablesample", 788 "then", 789 "to", 790 "treat", 791 "true", 792 "unbounded", 793 "union", 794 "unnest", 795 "using", 796 "when", 797 "where", 798 "window", 799 "with", 800 "within", 801 } 802 803 def table_parts(self, expression: exp.Table) -> str: 804 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 805 # we need to make sure the correct quoting is used in each case. 806 # 807 # For example, if there is a CTE x that clashes with a schema name, then the former will 808 # return the table y in that schema, whereas the latter will return the CTE's y column: 809 # 810 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 811 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 812 if expression.meta.get("quoted_table"): 813 table_parts = ".".join(p.name for p in expression.parts) 814 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 815 816 return super().table_parts(expression) 817 818 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 819 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 820 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 821 822 def eq_sql(self, expression: exp.EQ) -> str: 823 # Operands of = cannot be NULL in BigQuery 824 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 825 if not isinstance(expression.parent, exp.Update): 826 return "NULL" 827 828 return self.binary(expression, "=") 829 830 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 831 parent = expression.parent 832 833 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 834 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 835 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 836 return self.func( 837 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 838 ) 839 840 return super().attimezone_sql(expression) 841 842 def trycast_sql(self, expression: exp.TryCast) -> str: 843 return self.cast_sql(expression, safe_prefix="SAFE_") 844 845 def array_sql(self, expression: exp.Array) -> str: 846 first_arg = seq_get(expression.expressions, 0) 847 if isinstance(first_arg, exp.Query): 848 return f"ARRAY{self.wrap(self.sql(first_arg))}" 849 850 return inline_array_sql(self, expression) 851 852 def bracket_sql(self, expression: exp.Bracket) -> str: 853 this = self.sql(expression, "this") 854 expressions = expression.expressions 855 856 if len(expressions) == 1: 857 arg = expressions[0] 858 if arg.type is None: 859 from sqlglot.optimizer.annotate_types import annotate_types 860 861 arg = annotate_types(arg) 862 863 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 864 # BQ doesn't support bracket syntax with string values 865 return f"{this}.{arg.name}" 866 867 expressions_sql = ", ".join(self.sql(e) for e in expressions) 868 offset = expression.args.get("offset") 869 870 if offset == 0: 871 expressions_sql = f"OFFSET({expressions_sql})" 872 elif offset == 1: 873 expressions_sql = f"ORDINAL({expressions_sql})" 874 elif offset is not None: 875 self.unsupported(f"Unsupported array offset: {offset}") 876 877 if expression.args.get("safe"): 878 expressions_sql = f"SAFE_{expressions_sql}" 879 880 return f"{this}[{expressions_sql}]" 881 882 def in_unnest_op(self, expression: exp.Unnest) -> str: 883 return self.sql(expression) 884 885 def except_op(self, expression: exp.Except) -> str: 886 if not expression.args.get("distinct"): 887 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 888 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 889 890 def intersect_op(self, expression: exp.Intersect) -> str: 891 if not expression.args.get("distinct"): 892 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 893 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 894 895 def with_properties(self, properties: exp.Properties) -> str: 896 return self.properties(properties, prefix=self.seg("OPTIONS")) 897 898 def version_sql(self, expression: exp.Version) -> str: 899 if expression.name == "TIMESTAMP": 900 expression.set("this", "SYSTEM_TIME") 901 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. Default: 2.
- indent: The indentation size in a formatted string. Default: 2.
- normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
803 def table_parts(self, expression: exp.Table) -> str: 804 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 805 # we need to make sure the correct quoting is used in each case. 806 # 807 # For example, if there is a CTE x that clashes with a schema name, then the former will 808 # return the table y in that schema, whereas the latter will return the CTE's y column: 809 # 810 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 811 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 812 if expression.meta.get("quoted_table"): 813 table_parts = ".".join(p.name for p in expression.parts) 814 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 815 816 return super().table_parts(expression)
830 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 831 parent = expression.parent 832 833 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 834 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 835 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 836 return self.func( 837 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 838 ) 839 840 return super().attimezone_sql(expression)
852 def bracket_sql(self, expression: exp.Bracket) -> str: 853 this = self.sql(expression, "this") 854 expressions = expression.expressions 855 856 if len(expressions) == 1: 857 arg = expressions[0] 858 if arg.type is None: 859 from sqlglot.optimizer.annotate_types import annotate_types 860 861 arg = annotate_types(arg) 862 863 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 864 # BQ doesn't support bracket syntax with string values 865 return f"{this}.{arg.name}" 866 867 expressions_sql = ", ".join(self.sql(e) for e in expressions) 868 offset = expression.args.get("offset") 869 870 if offset == 0: 871 expressions_sql = f"OFFSET({expressions_sql})" 872 elif offset == 1: 873 expressions_sql = f"ORDINAL({expressions_sql})" 874 elif offset is not None: 875 self.unsupported(f"Unsupported array offset: {offset}") 876 877 if expression.args.get("safe"): 878 expressions_sql = f"SAFE_{expressions_sql}" 879 880 return f"{this}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql