sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27) 28from sqlglot.helper import seq_get, split_num_words 29from sqlglot.tokens import TokenType 30 31if t.TYPE_CHECKING: 32 from sqlglot._typing import E, Lit 33 34logger = logging.getLogger("sqlglot") 35 36 37def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 38 if not expression.find_ancestor(exp.From, exp.Join): 39 return self.values_sql(expression) 40 41 structs = [] 42 alias = expression.args.get("alias") 43 for tup in expression.find_all(exp.Tuple): 44 field_aliases = ( 45 alias.columns 46 if alias and alias.columns 47 else (f"_c{i}" for i in range(len(tup.expressions))) 48 ) 49 expressions = [ 50 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 51 for name, fld in zip(field_aliases, tup.expressions) 52 ] 53 structs.append(exp.Struct(expressions=expressions)) 54 55 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 56 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 57 return self.unnest_sql( 58 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 59 ) 60 61 62def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 63 this = expression.this 64 if isinstance(this, exp.Schema): 65 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 66 else: 67 this = self.sql(this) 68 return f"RETURNS {this}" 69 70 71def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 72 returns = expression.find(exp.ReturnsProperty) 73 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 74 expression.set("kind", "TABLE FUNCTION") 75 76 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 77 expression.set("expression", expression.expression.this) 78 79 return self.create_sql(expression) 80 81 82def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 83 """Remove references to unnest table aliases since bigquery doesn't allow them. 84 85 These are added by the optimizer's qualify_column step. 86 """ 87 from sqlglot.optimizer.scope import find_all_in_scope 88 89 if isinstance(expression, exp.Select): 90 unnest_aliases = { 91 unnest.alias 92 for unnest in find_all_in_scope(expression, exp.Unnest) 93 if isinstance(unnest.parent, (exp.From, exp.Join)) 94 } 95 if unnest_aliases: 96 for column in expression.find_all(exp.Column): 97 if column.table in unnest_aliases: 98 column.set("table", None) 99 elif column.db in unnest_aliases: 100 column.set("db", None) 101 102 return expression 103 104 105# https://issuetracker.google.com/issues/162294746 106# workaround for bigquery bug when grouping by an expression and then ordering 107# WITH x AS (SELECT 1 y) 108# SELECT y + 1 z 109# FROM x 110# GROUP BY x + 1 111# ORDER by z 112def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 113 if isinstance(expression, exp.Select): 114 group = expression.args.get("group") 115 order = expression.args.get("order") 116 117 if group and order: 118 aliases = { 119 select.this: select.args["alias"] 120 for select in expression.selects 121 if isinstance(select, exp.Alias) 122 } 123 124 for grouped in group.expressions: 125 if grouped.is_int: 126 continue 127 alias = aliases.get(grouped) 128 if alias: 129 grouped.replace(exp.column(alias)) 130 131 return expression 132 133 134def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 135 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 136 if isinstance(expression, exp.CTE) and expression.alias_column_names: 137 cte_query = expression.this 138 139 if cte_query.is_star: 140 logger.warning( 141 "Can't push down CTE column names for star queries. Run the query through" 142 " the optimizer or use 'qualify' to expand the star projections first." 143 ) 144 return expression 145 146 column_names = expression.alias_column_names 147 expression.args["alias"].set("columns", None) 148 149 for name, select in zip(column_names, cte_query.selects): 150 to_replace = select 151 152 if isinstance(select, exp.Alias): 153 select = select.this 154 155 # Inner aliases are shadowed by the CTE column names 156 to_replace.replace(exp.alias_(select, name)) 157 158 return expression 159 160 161def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 162 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 163 this.set("zone", seq_get(args, 2)) 164 return this 165 166 167def _build_timestamp(args: t.List) -> exp.Timestamp: 168 timestamp = exp.Timestamp.from_arg_list(args) 169 timestamp.set("with_tz", True) 170 return timestamp 171 172 173def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 174 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 175 return expr_type.from_arg_list(args) 176 177 178def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 179 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 180 arg = seq_get(args, 0) 181 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 182 183 184def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 185 return self.sql( 186 exp.Exists( 187 this=exp.select("1") 188 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 189 .where(exp.column("_col").eq(expression.right)) 190 ) 191 ) 192 193 194def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 195 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 196 197 198def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 199 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 200 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 201 unit = expression.args.get("unit") or "DAY" 202 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 203 204 205def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 206 scale = expression.args.get("scale") 207 timestamp = expression.this 208 209 if scale in (None, exp.UnixToTime.SECONDS): 210 return self.func("TIMESTAMP_SECONDS", timestamp) 211 if scale == exp.UnixToTime.MILLIS: 212 return self.func("TIMESTAMP_MILLIS", timestamp) 213 if scale == exp.UnixToTime.MICROS: 214 return self.func("TIMESTAMP_MICROS", timestamp) 215 216 unix_seconds = exp.cast(exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), "int64") 217 return self.func("TIMESTAMP_SECONDS", unix_seconds) 218 219 220def _build_time(args: t.List) -> exp.Func: 221 if len(args) == 1: 222 return exp.TsOrDsToTime(this=args[0]) 223 if len(args) == 3: 224 return exp.TimeFromParts.from_arg_list(args) 225 226 return exp.Anonymous(this="TIME", expressions=args) 227 228 229class BigQuery(Dialect): 230 WEEK_OFFSET = -1 231 UNNEST_COLUMN_ONLY = True 232 SUPPORTS_USER_DEFINED_TYPES = False 233 SUPPORTS_SEMI_ANTI_JOIN = False 234 LOG_BASE_FIRST = False 235 236 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 237 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 238 239 # bigquery udfs are case sensitive 240 NORMALIZE_FUNCTIONS = False 241 242 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 243 TIME_MAPPING = { 244 "%D": "%m/%d/%y", 245 "%E*S": "%S.%f", 246 "%E6S": "%S.%f", 247 } 248 249 ESCAPE_SEQUENCES = { 250 "\\a": "\a", 251 "\\b": "\b", 252 "\\f": "\f", 253 "\\n": "\n", 254 "\\r": "\r", 255 "\\t": "\t", 256 "\\v": "\v", 257 } 258 259 FORMAT_MAPPING = { 260 "DD": "%d", 261 "MM": "%m", 262 "MON": "%b", 263 "MONTH": "%B", 264 "YYYY": "%Y", 265 "YY": "%y", 266 "HH": "%I", 267 "HH12": "%I", 268 "HH24": "%H", 269 "MI": "%M", 270 "SS": "%S", 271 "SSSSS": "%f", 272 "TZH": "%z", 273 } 274 275 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 276 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 277 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 278 279 def normalize_identifier(self, expression: E) -> E: 280 if isinstance(expression, exp.Identifier): 281 parent = expression.parent 282 while isinstance(parent, exp.Dot): 283 parent = parent.parent 284 285 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 286 # by default. The following check uses a heuristic to detect tables based on whether 287 # they are qualified. This should generally be correct, because tables in BigQuery 288 # must be qualified with at least a dataset, unless @@dataset_id is set. 289 case_sensitive = ( 290 isinstance(parent, exp.UserDefinedFunction) 291 or ( 292 isinstance(parent, exp.Table) 293 and parent.db 294 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 295 ) 296 or expression.meta.get("is_table") 297 ) 298 if not case_sensitive: 299 expression.set("this", expression.this.lower()) 300 301 return expression 302 303 class Tokenizer(tokens.Tokenizer): 304 QUOTES = ["'", '"', '"""', "'''"] 305 COMMENTS = ["--", "#", ("/*", "*/")] 306 IDENTIFIERS = ["`"] 307 STRING_ESCAPES = ["\\"] 308 309 HEX_STRINGS = [("0x", ""), ("0X", "")] 310 311 BYTE_STRINGS = [ 312 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 313 ] 314 315 RAW_STRINGS = [ 316 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 317 ] 318 319 KEYWORDS = { 320 **tokens.Tokenizer.KEYWORDS, 321 "ANY TYPE": TokenType.VARIANT, 322 "BEGIN": TokenType.COMMAND, 323 "BEGIN TRANSACTION": TokenType.BEGIN, 324 "BYTES": TokenType.BINARY, 325 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 326 "DECLARE": TokenType.COMMAND, 327 "ELSEIF": TokenType.COMMAND, 328 "EXCEPTION": TokenType.COMMAND, 329 "FLOAT64": TokenType.DOUBLE, 330 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 331 "MODEL": TokenType.MODEL, 332 "NOT DETERMINISTIC": TokenType.VOLATILE, 333 "RECORD": TokenType.STRUCT, 334 "TIMESTAMP": TokenType.TIMESTAMPTZ, 335 } 336 KEYWORDS.pop("DIV") 337 KEYWORDS.pop("VALUES") 338 339 class Parser(parser.Parser): 340 PREFIXED_PIVOT_COLUMNS = True 341 LOG_DEFAULTS_TO_LN = True 342 SUPPORTS_IMPLICIT_UNNEST = True 343 344 FUNCTIONS = { 345 **parser.Parser.FUNCTIONS, 346 "DATE": _build_date, 347 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 348 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 349 "DATE_TRUNC": lambda args: exp.DateTrunc( 350 unit=exp.Literal.string(str(seq_get(args, 1))), 351 this=seq_get(args, 0), 352 ), 353 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 354 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 355 "DIV": binary_from_function(exp.IntDiv), 356 "FORMAT_DATE": lambda args: exp.TimeToStr( 357 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 358 ), 359 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 360 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 361 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 362 ), 363 "MD5": exp.MD5Digest.from_arg_list, 364 "TO_HEX": _build_to_hex, 365 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 366 [seq_get(args, 1), seq_get(args, 0)] 367 ), 368 "PARSE_TIMESTAMP": _build_parse_timestamp, 369 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 370 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 371 this=seq_get(args, 0), 372 expression=seq_get(args, 1), 373 position=seq_get(args, 2), 374 occurrence=seq_get(args, 3), 375 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 376 ), 377 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 378 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 379 "SPLIT": lambda args: exp.Split( 380 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 381 this=seq_get(args, 0), 382 expression=seq_get(args, 1) or exp.Literal.string(","), 383 ), 384 "TIME": _build_time, 385 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 386 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 387 "TIMESTAMP": _build_timestamp, 388 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 389 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 390 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 391 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 392 ), 393 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 394 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 395 ), 396 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 397 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 398 } 399 400 FUNCTION_PARSERS = { 401 **parser.Parser.FUNCTION_PARSERS, 402 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 403 } 404 FUNCTION_PARSERS.pop("TRIM") 405 406 NO_PAREN_FUNCTIONS = { 407 **parser.Parser.NO_PAREN_FUNCTIONS, 408 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 409 } 410 411 NESTED_TYPE_TOKENS = { 412 *parser.Parser.NESTED_TYPE_TOKENS, 413 TokenType.TABLE, 414 } 415 416 PROPERTY_PARSERS = { 417 **parser.Parser.PROPERTY_PARSERS, 418 "NOT DETERMINISTIC": lambda self: self.expression( 419 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 420 ), 421 "OPTIONS": lambda self: self._parse_with_property(), 422 } 423 424 CONSTRAINT_PARSERS = { 425 **parser.Parser.CONSTRAINT_PARSERS, 426 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 427 } 428 429 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 430 RANGE_PARSERS.pop(TokenType.OVERLAPS) 431 432 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 433 434 STATEMENT_PARSERS = { 435 **parser.Parser.STATEMENT_PARSERS, 436 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 437 TokenType.END: lambda self: self._parse_as_command(self._prev), 438 TokenType.FOR: lambda self: self._parse_for_in(), 439 } 440 441 BRACKET_OFFSETS = { 442 "OFFSET": (0, False), 443 "ORDINAL": (1, False), 444 "SAFE_OFFSET": (0, True), 445 "SAFE_ORDINAL": (1, True), 446 } 447 448 def _parse_for_in(self) -> exp.ForIn: 449 this = self._parse_range() 450 self._match_text_seq("DO") 451 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 452 453 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 454 this = super()._parse_table_part(schema=schema) or self._parse_number() 455 456 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 457 if isinstance(this, exp.Identifier): 458 table_name = this.name 459 while self._match(TokenType.DASH, advance=False) and self._next: 460 text = "" 461 while self._curr and self._curr.token_type != TokenType.DOT: 462 self._advance() 463 text += self._prev.text 464 table_name += text 465 466 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 467 elif isinstance(this, exp.Literal): 468 table_name = this.name 469 470 if self._is_connected() and self._parse_var(any_token=True): 471 table_name += self._prev.text 472 473 this = exp.Identifier(this=table_name, quoted=True) 474 475 return this 476 477 def _parse_table_parts( 478 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 479 ) -> exp.Table: 480 table = super()._parse_table_parts( 481 schema=schema, is_db_reference=is_db_reference, wildcard=True 482 ) 483 484 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 485 if not table.catalog: 486 if table.db: 487 parts = table.db.split(".") 488 if len(parts) == 2 and not table.args["db"].quoted: 489 table.set("catalog", exp.Identifier(this=parts[0])) 490 table.set("db", exp.Identifier(this=parts[1])) 491 else: 492 parts = table.name.split(".") 493 if len(parts) == 2 and not table.this.quoted: 494 table.set("db", exp.Identifier(this=parts[0])) 495 table.set("this", exp.Identifier(this=parts[1])) 496 497 if any("." in p.name for p in table.parts): 498 catalog, db, this, *rest = ( 499 exp.to_identifier(p, quoted=True) 500 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 501 ) 502 503 if rest and this: 504 this = exp.Dot.build([this, *rest]) # type: ignore 505 506 table = exp.Table(this=this, db=db, catalog=catalog) 507 table.meta["quoted_table"] = True 508 509 return table 510 511 @t.overload 512 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 513 514 @t.overload 515 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 516 517 def _parse_json_object(self, agg=False): 518 json_object = super()._parse_json_object() 519 array_kv_pair = seq_get(json_object.expressions, 0) 520 521 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 522 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 523 if ( 524 array_kv_pair 525 and isinstance(array_kv_pair.this, exp.Array) 526 and isinstance(array_kv_pair.expression, exp.Array) 527 ): 528 keys = array_kv_pair.this.expressions 529 values = array_kv_pair.expression.expressions 530 531 json_object.set( 532 "expressions", 533 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 534 ) 535 536 return json_object 537 538 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 539 bracket = super()._parse_bracket(this) 540 541 if this is bracket: 542 return bracket 543 544 if isinstance(bracket, exp.Bracket): 545 for expression in bracket.expressions: 546 name = expression.name.upper() 547 548 if name not in self.BRACKET_OFFSETS: 549 break 550 551 offset, safe = self.BRACKET_OFFSETS[name] 552 bracket.set("offset", offset) 553 bracket.set("safe", safe) 554 expression.replace(expression.expressions[0]) 555 556 return bracket 557 558 class Generator(generator.Generator): 559 EXPLICIT_UNION = True 560 INTERVAL_ALLOWS_PLURAL_FORM = False 561 JOIN_HINTS = False 562 QUERY_HINTS = False 563 TABLE_HINTS = False 564 LIMIT_FETCH = "LIMIT" 565 RENAME_TABLE_WITH_DB = False 566 NVL2_SUPPORTED = False 567 UNNEST_WITH_ORDINALITY = False 568 COLLATE_IS_FUNC = True 569 LIMIT_ONLY_LITERALS = True 570 SUPPORTS_TABLE_ALIAS_COLUMNS = False 571 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 572 JSON_KEY_VALUE_PAIR_SEP = "," 573 NULL_ORDERING_SUPPORTED = False 574 IGNORE_NULLS_IN_FUNC = True 575 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 576 CAN_IMPLEMENT_ARRAY_ANY = True 577 SUPPORTS_TO_NUMBER = False 578 NAMED_PLACEHOLDER_TOKEN = "@" 579 580 TRANSFORMS = { 581 **generator.Generator.TRANSFORMS, 582 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 583 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 584 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 585 exp.ArrayContains: _array_contains_sql, 586 exp.ArrayFilter: filter_array_using_unnest, 587 exp.ArraySize: rename_func("ARRAY_LENGTH"), 588 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 589 exp.CollateProperty: lambda self, e: ( 590 f"DEFAULT COLLATE {self.sql(e, 'this')}" 591 if e.args.get("default") 592 else f"COLLATE {self.sql(e, 'this')}" 593 ), 594 exp.Commit: lambda *_: "COMMIT TRANSACTION", 595 exp.CountIf: rename_func("COUNTIF"), 596 exp.Create: _create_sql, 597 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 598 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 599 exp.DateDiff: lambda self, e: self.func( 600 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 601 ), 602 exp.DateFromParts: rename_func("DATE"), 603 exp.DateStrToDate: datestrtodate_sql, 604 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 605 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 606 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 607 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 608 exp.FromTimeZone: lambda self, e: self.func( 609 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 610 ), 611 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 612 exp.GroupConcat: rename_func("STRING_AGG"), 613 exp.Hex: rename_func("TO_HEX"), 614 exp.If: if_sql(false_value="NULL"), 615 exp.ILike: no_ilike_sql, 616 exp.IntDiv: rename_func("DIV"), 617 exp.JSONFormat: rename_func("TO_JSON_STRING"), 618 exp.Max: max_or_greatest, 619 exp.Mod: rename_func("MOD"), 620 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 621 exp.MD5Digest: rename_func("MD5"), 622 exp.Min: min_or_least, 623 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 624 exp.RegexpExtract: lambda self, e: self.func( 625 "REGEXP_EXTRACT", 626 e.this, 627 e.expression, 628 e.args.get("position"), 629 e.args.get("occurrence"), 630 ), 631 exp.RegexpReplace: regexp_replace_sql, 632 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 633 exp.ReturnsProperty: _returnsproperty_sql, 634 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 635 exp.Select: transforms.preprocess( 636 [ 637 transforms.explode_to_unnest(), 638 _unqualify_unnest, 639 transforms.eliminate_distinct_on, 640 _alias_ordered_group, 641 transforms.eliminate_semi_and_anti_joins, 642 ] 643 ), 644 exp.SHA2: lambda self, e: self.func( 645 "SHA256" if e.text("length") == "256" else "SHA512", e.this 646 ), 647 exp.StabilityProperty: lambda self, e: ( 648 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 649 ), 650 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 651 exp.StrToTime: lambda self, e: self.func( 652 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 653 ), 654 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 655 exp.TimeFromParts: rename_func("TIME"), 656 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 657 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 658 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 659 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 660 exp.TimeStrToTime: timestrtotime_sql, 661 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 662 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 663 exp.TsOrDsAdd: _ts_or_ds_add_sql, 664 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 665 exp.TsOrDsToTime: rename_func("TIME"), 666 exp.Unhex: rename_func("FROM_HEX"), 667 exp.UnixDate: rename_func("UNIX_DATE"), 668 exp.UnixToTime: _unix_to_time_sql, 669 exp.Values: _derived_table_values_to_unnest, 670 exp.VariancePop: rename_func("VAR_POP"), 671 } 672 673 SUPPORTED_JSON_PATH_PARTS = { 674 exp.JSONPathKey, 675 exp.JSONPathRoot, 676 exp.JSONPathSubscript, 677 } 678 679 TYPE_MAPPING = { 680 **generator.Generator.TYPE_MAPPING, 681 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 682 exp.DataType.Type.BIGINT: "INT64", 683 exp.DataType.Type.BINARY: "BYTES", 684 exp.DataType.Type.BOOLEAN: "BOOL", 685 exp.DataType.Type.CHAR: "STRING", 686 exp.DataType.Type.DECIMAL: "NUMERIC", 687 exp.DataType.Type.DOUBLE: "FLOAT64", 688 exp.DataType.Type.FLOAT: "FLOAT64", 689 exp.DataType.Type.INT: "INT64", 690 exp.DataType.Type.NCHAR: "STRING", 691 exp.DataType.Type.NVARCHAR: "STRING", 692 exp.DataType.Type.SMALLINT: "INT64", 693 exp.DataType.Type.TEXT: "STRING", 694 exp.DataType.Type.TIMESTAMP: "DATETIME", 695 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 696 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 697 exp.DataType.Type.TINYINT: "INT64", 698 exp.DataType.Type.VARBINARY: "BYTES", 699 exp.DataType.Type.VARCHAR: "STRING", 700 exp.DataType.Type.VARIANT: "ANY TYPE", 701 } 702 703 PROPERTIES_LOCATION = { 704 **generator.Generator.PROPERTIES_LOCATION, 705 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 706 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 707 } 708 709 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 710 RESERVED_KEYWORDS = { 711 *generator.Generator.RESERVED_KEYWORDS, 712 "all", 713 "and", 714 "any", 715 "array", 716 "as", 717 "asc", 718 "assert_rows_modified", 719 "at", 720 "between", 721 "by", 722 "case", 723 "cast", 724 "collate", 725 "contains", 726 "create", 727 "cross", 728 "cube", 729 "current", 730 "default", 731 "define", 732 "desc", 733 "distinct", 734 "else", 735 "end", 736 "enum", 737 "escape", 738 "except", 739 "exclude", 740 "exists", 741 "extract", 742 "false", 743 "fetch", 744 "following", 745 "for", 746 "from", 747 "full", 748 "group", 749 "grouping", 750 "groups", 751 "hash", 752 "having", 753 "if", 754 "ignore", 755 "in", 756 "inner", 757 "intersect", 758 "interval", 759 "into", 760 "is", 761 "join", 762 "lateral", 763 "left", 764 "like", 765 "limit", 766 "lookup", 767 "merge", 768 "natural", 769 "new", 770 "no", 771 "not", 772 "null", 773 "nulls", 774 "of", 775 "on", 776 "or", 777 "order", 778 "outer", 779 "over", 780 "partition", 781 "preceding", 782 "proto", 783 "qualify", 784 "range", 785 "recursive", 786 "respect", 787 "right", 788 "rollup", 789 "rows", 790 "select", 791 "set", 792 "some", 793 "struct", 794 "tablesample", 795 "then", 796 "to", 797 "treat", 798 "true", 799 "unbounded", 800 "union", 801 "unnest", 802 "using", 803 "when", 804 "where", 805 "window", 806 "with", 807 "within", 808 } 809 810 def table_parts(self, expression: exp.Table) -> str: 811 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 812 # we need to make sure the correct quoting is used in each case. 813 # 814 # For example, if there is a CTE x that clashes with a schema name, then the former will 815 # return the table y in that schema, whereas the latter will return the CTE's y column: 816 # 817 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 818 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 819 if expression.meta.get("quoted_table"): 820 table_parts = ".".join(p.name for p in expression.parts) 821 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 822 823 return super().table_parts(expression) 824 825 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 826 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 827 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 828 829 def eq_sql(self, expression: exp.EQ) -> str: 830 # Operands of = cannot be NULL in BigQuery 831 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 832 if not isinstance(expression.parent, exp.Update): 833 return "NULL" 834 835 return self.binary(expression, "=") 836 837 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 838 parent = expression.parent 839 840 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 841 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 842 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 843 return self.func( 844 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 845 ) 846 847 return super().attimezone_sql(expression) 848 849 def trycast_sql(self, expression: exp.TryCast) -> str: 850 return self.cast_sql(expression, safe_prefix="SAFE_") 851 852 def array_sql(self, expression: exp.Array) -> str: 853 first_arg = seq_get(expression.expressions, 0) 854 if isinstance(first_arg, exp.Query): 855 return f"ARRAY{self.wrap(self.sql(first_arg))}" 856 857 return inline_array_sql(self, expression) 858 859 def bracket_sql(self, expression: exp.Bracket) -> str: 860 this = expression.this 861 expressions = expression.expressions 862 863 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 864 arg = expressions[0] 865 if arg.type is None: 866 from sqlglot.optimizer.annotate_types import annotate_types 867 868 arg = annotate_types(arg) 869 870 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 871 # BQ doesn't support bracket syntax with string values for structs 872 return f"{self.sql(this)}.{arg.name}" 873 874 expressions_sql = self.expressions(expression, flat=True) 875 offset = expression.args.get("offset") 876 877 if offset == 0: 878 expressions_sql = f"OFFSET({expressions_sql})" 879 elif offset == 1: 880 expressions_sql = f"ORDINAL({expressions_sql})" 881 elif offset is not None: 882 self.unsupported(f"Unsupported array offset: {offset}") 883 884 if expression.args.get("safe"): 885 expressions_sql = f"SAFE_{expressions_sql}" 886 887 return f"{self.sql(this)}[{expressions_sql}]" 888 889 def in_unnest_op(self, expression: exp.Unnest) -> str: 890 return self.sql(expression) 891 892 def except_op(self, expression: exp.Except) -> str: 893 if not expression.args.get("distinct"): 894 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 895 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 896 897 def intersect_op(self, expression: exp.Intersect) -> str: 898 if not expression.args.get("distinct"): 899 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 900 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 901 902 def with_properties(self, properties: exp.Properties) -> str: 903 return self.properties(properties, prefix=self.seg("OPTIONS")) 904 905 def version_sql(self, expression: exp.Version) -> str: 906 if expression.name == "TIMESTAMP": 907 expression.set("this", "SYSTEM_TIME") 908 return super().version_sql(expression)
230class BigQuery(Dialect): 231 WEEK_OFFSET = -1 232 UNNEST_COLUMN_ONLY = True 233 SUPPORTS_USER_DEFINED_TYPES = False 234 SUPPORTS_SEMI_ANTI_JOIN = False 235 LOG_BASE_FIRST = False 236 237 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 238 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 239 240 # bigquery udfs are case sensitive 241 NORMALIZE_FUNCTIONS = False 242 243 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 244 TIME_MAPPING = { 245 "%D": "%m/%d/%y", 246 "%E*S": "%S.%f", 247 "%E6S": "%S.%f", 248 } 249 250 ESCAPE_SEQUENCES = { 251 "\\a": "\a", 252 "\\b": "\b", 253 "\\f": "\f", 254 "\\n": "\n", 255 "\\r": "\r", 256 "\\t": "\t", 257 "\\v": "\v", 258 } 259 260 FORMAT_MAPPING = { 261 "DD": "%d", 262 "MM": "%m", 263 "MON": "%b", 264 "MONTH": "%B", 265 "YYYY": "%Y", 266 "YY": "%y", 267 "HH": "%I", 268 "HH12": "%I", 269 "HH24": "%H", 270 "MI": "%M", 271 "SS": "%S", 272 "SSSSS": "%f", 273 "TZH": "%z", 274 } 275 276 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 277 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 278 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 279 280 def normalize_identifier(self, expression: E) -> E: 281 if isinstance(expression, exp.Identifier): 282 parent = expression.parent 283 while isinstance(parent, exp.Dot): 284 parent = parent.parent 285 286 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 287 # by default. The following check uses a heuristic to detect tables based on whether 288 # they are qualified. This should generally be correct, because tables in BigQuery 289 # must be qualified with at least a dataset, unless @@dataset_id is set. 290 case_sensitive = ( 291 isinstance(parent, exp.UserDefinedFunction) 292 or ( 293 isinstance(parent, exp.Table) 294 and parent.db 295 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 296 ) 297 or expression.meta.get("is_table") 298 ) 299 if not case_sensitive: 300 expression.set("this", expression.this.lower()) 301 302 return expression 303 304 class Tokenizer(tokens.Tokenizer): 305 QUOTES = ["'", '"', '"""', "'''"] 306 COMMENTS = ["--", "#", ("/*", "*/")] 307 IDENTIFIERS = ["`"] 308 STRING_ESCAPES = ["\\"] 309 310 HEX_STRINGS = [("0x", ""), ("0X", "")] 311 312 BYTE_STRINGS = [ 313 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 314 ] 315 316 RAW_STRINGS = [ 317 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 318 ] 319 320 KEYWORDS = { 321 **tokens.Tokenizer.KEYWORDS, 322 "ANY TYPE": TokenType.VARIANT, 323 "BEGIN": TokenType.COMMAND, 324 "BEGIN TRANSACTION": TokenType.BEGIN, 325 "BYTES": TokenType.BINARY, 326 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 327 "DECLARE": TokenType.COMMAND, 328 "ELSEIF": TokenType.COMMAND, 329 "EXCEPTION": TokenType.COMMAND, 330 "FLOAT64": TokenType.DOUBLE, 331 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 332 "MODEL": TokenType.MODEL, 333 "NOT DETERMINISTIC": TokenType.VOLATILE, 334 "RECORD": TokenType.STRUCT, 335 "TIMESTAMP": TokenType.TIMESTAMPTZ, 336 } 337 KEYWORDS.pop("DIV") 338 KEYWORDS.pop("VALUES") 339 340 class Parser(parser.Parser): 341 PREFIXED_PIVOT_COLUMNS = True 342 LOG_DEFAULTS_TO_LN = True 343 SUPPORTS_IMPLICIT_UNNEST = True 344 345 FUNCTIONS = { 346 **parser.Parser.FUNCTIONS, 347 "DATE": _build_date, 348 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 349 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 350 "DATE_TRUNC": lambda args: exp.DateTrunc( 351 unit=exp.Literal.string(str(seq_get(args, 1))), 352 this=seq_get(args, 0), 353 ), 354 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 355 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 356 "DIV": binary_from_function(exp.IntDiv), 357 "FORMAT_DATE": lambda args: exp.TimeToStr( 358 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 359 ), 360 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 361 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 362 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 363 ), 364 "MD5": exp.MD5Digest.from_arg_list, 365 "TO_HEX": _build_to_hex, 366 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 367 [seq_get(args, 1), seq_get(args, 0)] 368 ), 369 "PARSE_TIMESTAMP": _build_parse_timestamp, 370 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 371 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 372 this=seq_get(args, 0), 373 expression=seq_get(args, 1), 374 position=seq_get(args, 2), 375 occurrence=seq_get(args, 3), 376 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 377 ), 378 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 379 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 380 "SPLIT": lambda args: exp.Split( 381 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 382 this=seq_get(args, 0), 383 expression=seq_get(args, 1) or exp.Literal.string(","), 384 ), 385 "TIME": _build_time, 386 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 387 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 388 "TIMESTAMP": _build_timestamp, 389 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 390 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 391 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 392 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 393 ), 394 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 395 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 396 ), 397 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 398 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 399 } 400 401 FUNCTION_PARSERS = { 402 **parser.Parser.FUNCTION_PARSERS, 403 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 404 } 405 FUNCTION_PARSERS.pop("TRIM") 406 407 NO_PAREN_FUNCTIONS = { 408 **parser.Parser.NO_PAREN_FUNCTIONS, 409 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 410 } 411 412 NESTED_TYPE_TOKENS = { 413 *parser.Parser.NESTED_TYPE_TOKENS, 414 TokenType.TABLE, 415 } 416 417 PROPERTY_PARSERS = { 418 **parser.Parser.PROPERTY_PARSERS, 419 "NOT DETERMINISTIC": lambda self: self.expression( 420 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 421 ), 422 "OPTIONS": lambda self: self._parse_with_property(), 423 } 424 425 CONSTRAINT_PARSERS = { 426 **parser.Parser.CONSTRAINT_PARSERS, 427 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 428 } 429 430 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 431 RANGE_PARSERS.pop(TokenType.OVERLAPS) 432 433 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 434 435 STATEMENT_PARSERS = { 436 **parser.Parser.STATEMENT_PARSERS, 437 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 438 TokenType.END: lambda self: self._parse_as_command(self._prev), 439 TokenType.FOR: lambda self: self._parse_for_in(), 440 } 441 442 BRACKET_OFFSETS = { 443 "OFFSET": (0, False), 444 "ORDINAL": (1, False), 445 "SAFE_OFFSET": (0, True), 446 "SAFE_ORDINAL": (1, True), 447 } 448 449 def _parse_for_in(self) -> exp.ForIn: 450 this = self._parse_range() 451 self._match_text_seq("DO") 452 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 453 454 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 455 this = super()._parse_table_part(schema=schema) or self._parse_number() 456 457 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 458 if isinstance(this, exp.Identifier): 459 table_name = this.name 460 while self._match(TokenType.DASH, advance=False) and self._next: 461 text = "" 462 while self._curr and self._curr.token_type != TokenType.DOT: 463 self._advance() 464 text += self._prev.text 465 table_name += text 466 467 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 468 elif isinstance(this, exp.Literal): 469 table_name = this.name 470 471 if self._is_connected() and self._parse_var(any_token=True): 472 table_name += self._prev.text 473 474 this = exp.Identifier(this=table_name, quoted=True) 475 476 return this 477 478 def _parse_table_parts( 479 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 480 ) -> exp.Table: 481 table = super()._parse_table_parts( 482 schema=schema, is_db_reference=is_db_reference, wildcard=True 483 ) 484 485 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 486 if not table.catalog: 487 if table.db: 488 parts = table.db.split(".") 489 if len(parts) == 2 and not table.args["db"].quoted: 490 table.set("catalog", exp.Identifier(this=parts[0])) 491 table.set("db", exp.Identifier(this=parts[1])) 492 else: 493 parts = table.name.split(".") 494 if len(parts) == 2 and not table.this.quoted: 495 table.set("db", exp.Identifier(this=parts[0])) 496 table.set("this", exp.Identifier(this=parts[1])) 497 498 if any("." in p.name for p in table.parts): 499 catalog, db, this, *rest = ( 500 exp.to_identifier(p, quoted=True) 501 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 502 ) 503 504 if rest and this: 505 this = exp.Dot.build([this, *rest]) # type: ignore 506 507 table = exp.Table(this=this, db=db, catalog=catalog) 508 table.meta["quoted_table"] = True 509 510 return table 511 512 @t.overload 513 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 514 515 @t.overload 516 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 517 518 def _parse_json_object(self, agg=False): 519 json_object = super()._parse_json_object() 520 array_kv_pair = seq_get(json_object.expressions, 0) 521 522 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 523 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 524 if ( 525 array_kv_pair 526 and isinstance(array_kv_pair.this, exp.Array) 527 and isinstance(array_kv_pair.expression, exp.Array) 528 ): 529 keys = array_kv_pair.this.expressions 530 values = array_kv_pair.expression.expressions 531 532 json_object.set( 533 "expressions", 534 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 535 ) 536 537 return json_object 538 539 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 540 bracket = super()._parse_bracket(this) 541 542 if this is bracket: 543 return bracket 544 545 if isinstance(bracket, exp.Bracket): 546 for expression in bracket.expressions: 547 name = expression.name.upper() 548 549 if name not in self.BRACKET_OFFSETS: 550 break 551 552 offset, safe = self.BRACKET_OFFSETS[name] 553 bracket.set("offset", offset) 554 bracket.set("safe", safe) 555 expression.replace(expression.expressions[0]) 556 557 return bracket 558 559 class Generator(generator.Generator): 560 EXPLICIT_UNION = True 561 INTERVAL_ALLOWS_PLURAL_FORM = False 562 JOIN_HINTS = False 563 QUERY_HINTS = False 564 TABLE_HINTS = False 565 LIMIT_FETCH = "LIMIT" 566 RENAME_TABLE_WITH_DB = False 567 NVL2_SUPPORTED = False 568 UNNEST_WITH_ORDINALITY = False 569 COLLATE_IS_FUNC = True 570 LIMIT_ONLY_LITERALS = True 571 SUPPORTS_TABLE_ALIAS_COLUMNS = False 572 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 573 JSON_KEY_VALUE_PAIR_SEP = "," 574 NULL_ORDERING_SUPPORTED = False 575 IGNORE_NULLS_IN_FUNC = True 576 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 577 CAN_IMPLEMENT_ARRAY_ANY = True 578 SUPPORTS_TO_NUMBER = False 579 NAMED_PLACEHOLDER_TOKEN = "@" 580 581 TRANSFORMS = { 582 **generator.Generator.TRANSFORMS, 583 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 584 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 585 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 586 exp.ArrayContains: _array_contains_sql, 587 exp.ArrayFilter: filter_array_using_unnest, 588 exp.ArraySize: rename_func("ARRAY_LENGTH"), 589 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 590 exp.CollateProperty: lambda self, e: ( 591 f"DEFAULT COLLATE {self.sql(e, 'this')}" 592 if e.args.get("default") 593 else f"COLLATE {self.sql(e, 'this')}" 594 ), 595 exp.Commit: lambda *_: "COMMIT TRANSACTION", 596 exp.CountIf: rename_func("COUNTIF"), 597 exp.Create: _create_sql, 598 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 599 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 600 exp.DateDiff: lambda self, e: self.func( 601 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 602 ), 603 exp.DateFromParts: rename_func("DATE"), 604 exp.DateStrToDate: datestrtodate_sql, 605 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 606 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 607 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 608 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 609 exp.FromTimeZone: lambda self, e: self.func( 610 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 611 ), 612 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 613 exp.GroupConcat: rename_func("STRING_AGG"), 614 exp.Hex: rename_func("TO_HEX"), 615 exp.If: if_sql(false_value="NULL"), 616 exp.ILike: no_ilike_sql, 617 exp.IntDiv: rename_func("DIV"), 618 exp.JSONFormat: rename_func("TO_JSON_STRING"), 619 exp.Max: max_or_greatest, 620 exp.Mod: rename_func("MOD"), 621 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 622 exp.MD5Digest: rename_func("MD5"), 623 exp.Min: min_or_least, 624 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 625 exp.RegexpExtract: lambda self, e: self.func( 626 "REGEXP_EXTRACT", 627 e.this, 628 e.expression, 629 e.args.get("position"), 630 e.args.get("occurrence"), 631 ), 632 exp.RegexpReplace: regexp_replace_sql, 633 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 634 exp.ReturnsProperty: _returnsproperty_sql, 635 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 636 exp.Select: transforms.preprocess( 637 [ 638 transforms.explode_to_unnest(), 639 _unqualify_unnest, 640 transforms.eliminate_distinct_on, 641 _alias_ordered_group, 642 transforms.eliminate_semi_and_anti_joins, 643 ] 644 ), 645 exp.SHA2: lambda self, e: self.func( 646 "SHA256" if e.text("length") == "256" else "SHA512", e.this 647 ), 648 exp.StabilityProperty: lambda self, e: ( 649 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 650 ), 651 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 652 exp.StrToTime: lambda self, e: self.func( 653 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 654 ), 655 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 656 exp.TimeFromParts: rename_func("TIME"), 657 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 658 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 659 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 660 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 661 exp.TimeStrToTime: timestrtotime_sql, 662 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 663 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 664 exp.TsOrDsAdd: _ts_or_ds_add_sql, 665 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 666 exp.TsOrDsToTime: rename_func("TIME"), 667 exp.Unhex: rename_func("FROM_HEX"), 668 exp.UnixDate: rename_func("UNIX_DATE"), 669 exp.UnixToTime: _unix_to_time_sql, 670 exp.Values: _derived_table_values_to_unnest, 671 exp.VariancePop: rename_func("VAR_POP"), 672 } 673 674 SUPPORTED_JSON_PATH_PARTS = { 675 exp.JSONPathKey, 676 exp.JSONPathRoot, 677 exp.JSONPathSubscript, 678 } 679 680 TYPE_MAPPING = { 681 **generator.Generator.TYPE_MAPPING, 682 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 683 exp.DataType.Type.BIGINT: "INT64", 684 exp.DataType.Type.BINARY: "BYTES", 685 exp.DataType.Type.BOOLEAN: "BOOL", 686 exp.DataType.Type.CHAR: "STRING", 687 exp.DataType.Type.DECIMAL: "NUMERIC", 688 exp.DataType.Type.DOUBLE: "FLOAT64", 689 exp.DataType.Type.FLOAT: "FLOAT64", 690 exp.DataType.Type.INT: "INT64", 691 exp.DataType.Type.NCHAR: "STRING", 692 exp.DataType.Type.NVARCHAR: "STRING", 693 exp.DataType.Type.SMALLINT: "INT64", 694 exp.DataType.Type.TEXT: "STRING", 695 exp.DataType.Type.TIMESTAMP: "DATETIME", 696 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 697 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 698 exp.DataType.Type.TINYINT: "INT64", 699 exp.DataType.Type.VARBINARY: "BYTES", 700 exp.DataType.Type.VARCHAR: "STRING", 701 exp.DataType.Type.VARIANT: "ANY TYPE", 702 } 703 704 PROPERTIES_LOCATION = { 705 **generator.Generator.PROPERTIES_LOCATION, 706 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 707 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 708 } 709 710 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 711 RESERVED_KEYWORDS = { 712 *generator.Generator.RESERVED_KEYWORDS, 713 "all", 714 "and", 715 "any", 716 "array", 717 "as", 718 "asc", 719 "assert_rows_modified", 720 "at", 721 "between", 722 "by", 723 "case", 724 "cast", 725 "collate", 726 "contains", 727 "create", 728 "cross", 729 "cube", 730 "current", 731 "default", 732 "define", 733 "desc", 734 "distinct", 735 "else", 736 "end", 737 "enum", 738 "escape", 739 "except", 740 "exclude", 741 "exists", 742 "extract", 743 "false", 744 "fetch", 745 "following", 746 "for", 747 "from", 748 "full", 749 "group", 750 "grouping", 751 "groups", 752 "hash", 753 "having", 754 "if", 755 "ignore", 756 "in", 757 "inner", 758 "intersect", 759 "interval", 760 "into", 761 "is", 762 "join", 763 "lateral", 764 "left", 765 "like", 766 "limit", 767 "lookup", 768 "merge", 769 "natural", 770 "new", 771 "no", 772 "not", 773 "null", 774 "nulls", 775 "of", 776 "on", 777 "or", 778 "order", 779 "outer", 780 "over", 781 "partition", 782 "preceding", 783 "proto", 784 "qualify", 785 "range", 786 "recursive", 787 "respect", 788 "right", 789 "rollup", 790 "rows", 791 "select", 792 "set", 793 "some", 794 "struct", 795 "tablesample", 796 "then", 797 "to", 798 "treat", 799 "true", 800 "unbounded", 801 "union", 802 "unnest", 803 "using", 804 "when", 805 "where", 806 "window", 807 "with", 808 "within", 809 } 810 811 def table_parts(self, expression: exp.Table) -> str: 812 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 813 # we need to make sure the correct quoting is used in each case. 814 # 815 # For example, if there is a CTE x that clashes with a schema name, then the former will 816 # return the table y in that schema, whereas the latter will return the CTE's y column: 817 # 818 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 819 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 820 if expression.meta.get("quoted_table"): 821 table_parts = ".".join(p.name for p in expression.parts) 822 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 823 824 return super().table_parts(expression) 825 826 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 827 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 828 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 829 830 def eq_sql(self, expression: exp.EQ) -> str: 831 # Operands of = cannot be NULL in BigQuery 832 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 833 if not isinstance(expression.parent, exp.Update): 834 return "NULL" 835 836 return self.binary(expression, "=") 837 838 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 839 parent = expression.parent 840 841 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 842 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 843 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 844 return self.func( 845 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 846 ) 847 848 return super().attimezone_sql(expression) 849 850 def trycast_sql(self, expression: exp.TryCast) -> str: 851 return self.cast_sql(expression, safe_prefix="SAFE_") 852 853 def array_sql(self, expression: exp.Array) -> str: 854 first_arg = seq_get(expression.expressions, 0) 855 if isinstance(first_arg, exp.Query): 856 return f"ARRAY{self.wrap(self.sql(first_arg))}" 857 858 return inline_array_sql(self, expression) 859 860 def bracket_sql(self, expression: exp.Bracket) -> str: 861 this = expression.this 862 expressions = expression.expressions 863 864 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 865 arg = expressions[0] 866 if arg.type is None: 867 from sqlglot.optimizer.annotate_types import annotate_types 868 869 arg = annotate_types(arg) 870 871 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 872 # BQ doesn't support bracket syntax with string values for structs 873 return f"{self.sql(this)}.{arg.name}" 874 875 expressions_sql = self.expressions(expression, flat=True) 876 offset = expression.args.get("offset") 877 878 if offset == 0: 879 expressions_sql = f"OFFSET({expressions_sql})" 880 elif offset == 1: 881 expressions_sql = f"ORDINAL({expressions_sql})" 882 elif offset is not None: 883 self.unsupported(f"Unsupported array offset: {offset}") 884 885 if expression.args.get("safe"): 886 expressions_sql = f"SAFE_{expressions_sql}" 887 888 return f"{self.sql(this)}[{expressions_sql}]" 889 890 def in_unnest_op(self, expression: exp.Unnest) -> str: 891 return self.sql(expression) 892 893 def except_op(self, expression: exp.Except) -> str: 894 if not expression.args.get("distinct"): 895 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 896 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 897 898 def intersect_op(self, expression: exp.Intersect) -> str: 899 if not expression.args.get("distinct"): 900 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 901 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 902 903 def with_properties(self, properties: exp.Properties) -> str: 904 return self.properties(properties, prefix=self.seg("OPTIONS")) 905 906 def version_sql(self, expression: exp.Version) -> str: 907 if expression.name == "TIMESTAMP": 908 expression.set("this", "SYSTEM_TIME") 909 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
280 def normalize_identifier(self, expression: E) -> E: 281 if isinstance(expression, exp.Identifier): 282 parent = expression.parent 283 while isinstance(parent, exp.Dot): 284 parent = parent.parent 285 286 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 287 # by default. The following check uses a heuristic to detect tables based on whether 288 # they are qualified. This should generally be correct, because tables in BigQuery 289 # must be qualified with at least a dataset, unless @@dataset_id is set. 290 case_sensitive = ( 291 isinstance(parent, exp.UserDefinedFunction) 292 or ( 293 isinstance(parent, exp.Table) 294 and parent.db 295 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 296 ) 297 or expression.meta.get("is_table") 298 ) 299 if not case_sensitive: 300 expression.set("this", expression.this.lower()) 301 302 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
304 class Tokenizer(tokens.Tokenizer): 305 QUOTES = ["'", '"', '"""', "'''"] 306 COMMENTS = ["--", "#", ("/*", "*/")] 307 IDENTIFIERS = ["`"] 308 STRING_ESCAPES = ["\\"] 309 310 HEX_STRINGS = [("0x", ""), ("0X", "")] 311 312 BYTE_STRINGS = [ 313 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 314 ] 315 316 RAW_STRINGS = [ 317 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 318 ] 319 320 KEYWORDS = { 321 **tokens.Tokenizer.KEYWORDS, 322 "ANY TYPE": TokenType.VARIANT, 323 "BEGIN": TokenType.COMMAND, 324 "BEGIN TRANSACTION": TokenType.BEGIN, 325 "BYTES": TokenType.BINARY, 326 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 327 "DECLARE": TokenType.COMMAND, 328 "ELSEIF": TokenType.COMMAND, 329 "EXCEPTION": TokenType.COMMAND, 330 "FLOAT64": TokenType.DOUBLE, 331 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 332 "MODEL": TokenType.MODEL, 333 "NOT DETERMINISTIC": TokenType.VOLATILE, 334 "RECORD": TokenType.STRUCT, 335 "TIMESTAMP": TokenType.TIMESTAMPTZ, 336 } 337 KEYWORDS.pop("DIV") 338 KEYWORDS.pop("VALUES")
Inherited Members
340 class Parser(parser.Parser): 341 PREFIXED_PIVOT_COLUMNS = True 342 LOG_DEFAULTS_TO_LN = True 343 SUPPORTS_IMPLICIT_UNNEST = True 344 345 FUNCTIONS = { 346 **parser.Parser.FUNCTIONS, 347 "DATE": _build_date, 348 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 349 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 350 "DATE_TRUNC": lambda args: exp.DateTrunc( 351 unit=exp.Literal.string(str(seq_get(args, 1))), 352 this=seq_get(args, 0), 353 ), 354 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 355 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 356 "DIV": binary_from_function(exp.IntDiv), 357 "FORMAT_DATE": lambda args: exp.TimeToStr( 358 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 359 ), 360 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 361 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 362 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 363 ), 364 "MD5": exp.MD5Digest.from_arg_list, 365 "TO_HEX": _build_to_hex, 366 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 367 [seq_get(args, 1), seq_get(args, 0)] 368 ), 369 "PARSE_TIMESTAMP": _build_parse_timestamp, 370 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 371 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 372 this=seq_get(args, 0), 373 expression=seq_get(args, 1), 374 position=seq_get(args, 2), 375 occurrence=seq_get(args, 3), 376 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 377 ), 378 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 379 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 380 "SPLIT": lambda args: exp.Split( 381 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 382 this=seq_get(args, 0), 383 expression=seq_get(args, 1) or exp.Literal.string(","), 384 ), 385 "TIME": _build_time, 386 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 387 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 388 "TIMESTAMP": _build_timestamp, 389 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 390 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 391 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 392 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 393 ), 394 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 395 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 396 ), 397 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 398 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 399 } 400 401 FUNCTION_PARSERS = { 402 **parser.Parser.FUNCTION_PARSERS, 403 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 404 } 405 FUNCTION_PARSERS.pop("TRIM") 406 407 NO_PAREN_FUNCTIONS = { 408 **parser.Parser.NO_PAREN_FUNCTIONS, 409 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 410 } 411 412 NESTED_TYPE_TOKENS = { 413 *parser.Parser.NESTED_TYPE_TOKENS, 414 TokenType.TABLE, 415 } 416 417 PROPERTY_PARSERS = { 418 **parser.Parser.PROPERTY_PARSERS, 419 "NOT DETERMINISTIC": lambda self: self.expression( 420 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 421 ), 422 "OPTIONS": lambda self: self._parse_with_property(), 423 } 424 425 CONSTRAINT_PARSERS = { 426 **parser.Parser.CONSTRAINT_PARSERS, 427 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 428 } 429 430 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 431 RANGE_PARSERS.pop(TokenType.OVERLAPS) 432 433 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 434 435 STATEMENT_PARSERS = { 436 **parser.Parser.STATEMENT_PARSERS, 437 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 438 TokenType.END: lambda self: self._parse_as_command(self._prev), 439 TokenType.FOR: lambda self: self._parse_for_in(), 440 } 441 442 BRACKET_OFFSETS = { 443 "OFFSET": (0, False), 444 "ORDINAL": (1, False), 445 "SAFE_OFFSET": (0, True), 446 "SAFE_ORDINAL": (1, True), 447 } 448 449 def _parse_for_in(self) -> exp.ForIn: 450 this = self._parse_range() 451 self._match_text_seq("DO") 452 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 453 454 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 455 this = super()._parse_table_part(schema=schema) or self._parse_number() 456 457 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 458 if isinstance(this, exp.Identifier): 459 table_name = this.name 460 while self._match(TokenType.DASH, advance=False) and self._next: 461 text = "" 462 while self._curr and self._curr.token_type != TokenType.DOT: 463 self._advance() 464 text += self._prev.text 465 table_name += text 466 467 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 468 elif isinstance(this, exp.Literal): 469 table_name = this.name 470 471 if self._is_connected() and self._parse_var(any_token=True): 472 table_name += self._prev.text 473 474 this = exp.Identifier(this=table_name, quoted=True) 475 476 return this 477 478 def _parse_table_parts( 479 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 480 ) -> exp.Table: 481 table = super()._parse_table_parts( 482 schema=schema, is_db_reference=is_db_reference, wildcard=True 483 ) 484 485 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 486 if not table.catalog: 487 if table.db: 488 parts = table.db.split(".") 489 if len(parts) == 2 and not table.args["db"].quoted: 490 table.set("catalog", exp.Identifier(this=parts[0])) 491 table.set("db", exp.Identifier(this=parts[1])) 492 else: 493 parts = table.name.split(".") 494 if len(parts) == 2 and not table.this.quoted: 495 table.set("db", exp.Identifier(this=parts[0])) 496 table.set("this", exp.Identifier(this=parts[1])) 497 498 if any("." in p.name for p in table.parts): 499 catalog, db, this, *rest = ( 500 exp.to_identifier(p, quoted=True) 501 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 502 ) 503 504 if rest and this: 505 this = exp.Dot.build([this, *rest]) # type: ignore 506 507 table = exp.Table(this=this, db=db, catalog=catalog) 508 table.meta["quoted_table"] = True 509 510 return table 511 512 @t.overload 513 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 514 515 @t.overload 516 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 517 518 def _parse_json_object(self, agg=False): 519 json_object = super()._parse_json_object() 520 array_kv_pair = seq_get(json_object.expressions, 0) 521 522 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 523 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 524 if ( 525 array_kv_pair 526 and isinstance(array_kv_pair.this, exp.Array) 527 and isinstance(array_kv_pair.expression, exp.Array) 528 ): 529 keys = array_kv_pair.this.expressions 530 values = array_kv_pair.expression.expressions 531 532 json_object.set( 533 "expressions", 534 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 535 ) 536 537 return json_object 538 539 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 540 bracket = super()._parse_bracket(this) 541 542 if this is bracket: 543 return bracket 544 545 if isinstance(bracket, exp.Bracket): 546 for expression in bracket.expressions: 547 name = expression.name.upper() 548 549 if name not in self.BRACKET_OFFSETS: 550 break 551 552 offset, safe = self.BRACKET_OFFSETS[name] 553 bracket.set("offset", offset) 554 bracket.set("safe", safe) 555 expression.replace(expression.expressions[0]) 556 557 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
559 class Generator(generator.Generator): 560 EXPLICIT_UNION = True 561 INTERVAL_ALLOWS_PLURAL_FORM = False 562 JOIN_HINTS = False 563 QUERY_HINTS = False 564 TABLE_HINTS = False 565 LIMIT_FETCH = "LIMIT" 566 RENAME_TABLE_WITH_DB = False 567 NVL2_SUPPORTED = False 568 UNNEST_WITH_ORDINALITY = False 569 COLLATE_IS_FUNC = True 570 LIMIT_ONLY_LITERALS = True 571 SUPPORTS_TABLE_ALIAS_COLUMNS = False 572 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 573 JSON_KEY_VALUE_PAIR_SEP = "," 574 NULL_ORDERING_SUPPORTED = False 575 IGNORE_NULLS_IN_FUNC = True 576 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 577 CAN_IMPLEMENT_ARRAY_ANY = True 578 SUPPORTS_TO_NUMBER = False 579 NAMED_PLACEHOLDER_TOKEN = "@" 580 581 TRANSFORMS = { 582 **generator.Generator.TRANSFORMS, 583 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 584 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 585 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 586 exp.ArrayContains: _array_contains_sql, 587 exp.ArrayFilter: filter_array_using_unnest, 588 exp.ArraySize: rename_func("ARRAY_LENGTH"), 589 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 590 exp.CollateProperty: lambda self, e: ( 591 f"DEFAULT COLLATE {self.sql(e, 'this')}" 592 if e.args.get("default") 593 else f"COLLATE {self.sql(e, 'this')}" 594 ), 595 exp.Commit: lambda *_: "COMMIT TRANSACTION", 596 exp.CountIf: rename_func("COUNTIF"), 597 exp.Create: _create_sql, 598 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 599 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 600 exp.DateDiff: lambda self, e: self.func( 601 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 602 ), 603 exp.DateFromParts: rename_func("DATE"), 604 exp.DateStrToDate: datestrtodate_sql, 605 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 606 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 607 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 608 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 609 exp.FromTimeZone: lambda self, e: self.func( 610 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 611 ), 612 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 613 exp.GroupConcat: rename_func("STRING_AGG"), 614 exp.Hex: rename_func("TO_HEX"), 615 exp.If: if_sql(false_value="NULL"), 616 exp.ILike: no_ilike_sql, 617 exp.IntDiv: rename_func("DIV"), 618 exp.JSONFormat: rename_func("TO_JSON_STRING"), 619 exp.Max: max_or_greatest, 620 exp.Mod: rename_func("MOD"), 621 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 622 exp.MD5Digest: rename_func("MD5"), 623 exp.Min: min_or_least, 624 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 625 exp.RegexpExtract: lambda self, e: self.func( 626 "REGEXP_EXTRACT", 627 e.this, 628 e.expression, 629 e.args.get("position"), 630 e.args.get("occurrence"), 631 ), 632 exp.RegexpReplace: regexp_replace_sql, 633 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 634 exp.ReturnsProperty: _returnsproperty_sql, 635 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 636 exp.Select: transforms.preprocess( 637 [ 638 transforms.explode_to_unnest(), 639 _unqualify_unnest, 640 transforms.eliminate_distinct_on, 641 _alias_ordered_group, 642 transforms.eliminate_semi_and_anti_joins, 643 ] 644 ), 645 exp.SHA2: lambda self, e: self.func( 646 "SHA256" if e.text("length") == "256" else "SHA512", e.this 647 ), 648 exp.StabilityProperty: lambda self, e: ( 649 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 650 ), 651 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 652 exp.StrToTime: lambda self, e: self.func( 653 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 654 ), 655 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 656 exp.TimeFromParts: rename_func("TIME"), 657 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 658 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 659 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 660 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 661 exp.TimeStrToTime: timestrtotime_sql, 662 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 663 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 664 exp.TsOrDsAdd: _ts_or_ds_add_sql, 665 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 666 exp.TsOrDsToTime: rename_func("TIME"), 667 exp.Unhex: rename_func("FROM_HEX"), 668 exp.UnixDate: rename_func("UNIX_DATE"), 669 exp.UnixToTime: _unix_to_time_sql, 670 exp.Values: _derived_table_values_to_unnest, 671 exp.VariancePop: rename_func("VAR_POP"), 672 } 673 674 SUPPORTED_JSON_PATH_PARTS = { 675 exp.JSONPathKey, 676 exp.JSONPathRoot, 677 exp.JSONPathSubscript, 678 } 679 680 TYPE_MAPPING = { 681 **generator.Generator.TYPE_MAPPING, 682 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 683 exp.DataType.Type.BIGINT: "INT64", 684 exp.DataType.Type.BINARY: "BYTES", 685 exp.DataType.Type.BOOLEAN: "BOOL", 686 exp.DataType.Type.CHAR: "STRING", 687 exp.DataType.Type.DECIMAL: "NUMERIC", 688 exp.DataType.Type.DOUBLE: "FLOAT64", 689 exp.DataType.Type.FLOAT: "FLOAT64", 690 exp.DataType.Type.INT: "INT64", 691 exp.DataType.Type.NCHAR: "STRING", 692 exp.DataType.Type.NVARCHAR: "STRING", 693 exp.DataType.Type.SMALLINT: "INT64", 694 exp.DataType.Type.TEXT: "STRING", 695 exp.DataType.Type.TIMESTAMP: "DATETIME", 696 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 697 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 698 exp.DataType.Type.TINYINT: "INT64", 699 exp.DataType.Type.VARBINARY: "BYTES", 700 exp.DataType.Type.VARCHAR: "STRING", 701 exp.DataType.Type.VARIANT: "ANY TYPE", 702 } 703 704 PROPERTIES_LOCATION = { 705 **generator.Generator.PROPERTIES_LOCATION, 706 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 707 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 708 } 709 710 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 711 RESERVED_KEYWORDS = { 712 *generator.Generator.RESERVED_KEYWORDS, 713 "all", 714 "and", 715 "any", 716 "array", 717 "as", 718 "asc", 719 "assert_rows_modified", 720 "at", 721 "between", 722 "by", 723 "case", 724 "cast", 725 "collate", 726 "contains", 727 "create", 728 "cross", 729 "cube", 730 "current", 731 "default", 732 "define", 733 "desc", 734 "distinct", 735 "else", 736 "end", 737 "enum", 738 "escape", 739 "except", 740 "exclude", 741 "exists", 742 "extract", 743 "false", 744 "fetch", 745 "following", 746 "for", 747 "from", 748 "full", 749 "group", 750 "grouping", 751 "groups", 752 "hash", 753 "having", 754 "if", 755 "ignore", 756 "in", 757 "inner", 758 "intersect", 759 "interval", 760 "into", 761 "is", 762 "join", 763 "lateral", 764 "left", 765 "like", 766 "limit", 767 "lookup", 768 "merge", 769 "natural", 770 "new", 771 "no", 772 "not", 773 "null", 774 "nulls", 775 "of", 776 "on", 777 "or", 778 "order", 779 "outer", 780 "over", 781 "partition", 782 "preceding", 783 "proto", 784 "qualify", 785 "range", 786 "recursive", 787 "respect", 788 "right", 789 "rollup", 790 "rows", 791 "select", 792 "set", 793 "some", 794 "struct", 795 "tablesample", 796 "then", 797 "to", 798 "treat", 799 "true", 800 "unbounded", 801 "union", 802 "unnest", 803 "using", 804 "when", 805 "where", 806 "window", 807 "with", 808 "within", 809 } 810 811 def table_parts(self, expression: exp.Table) -> str: 812 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 813 # we need to make sure the correct quoting is used in each case. 814 # 815 # For example, if there is a CTE x that clashes with a schema name, then the former will 816 # return the table y in that schema, whereas the latter will return the CTE's y column: 817 # 818 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 819 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 820 if expression.meta.get("quoted_table"): 821 table_parts = ".".join(p.name for p in expression.parts) 822 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 823 824 return super().table_parts(expression) 825 826 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 827 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 828 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 829 830 def eq_sql(self, expression: exp.EQ) -> str: 831 # Operands of = cannot be NULL in BigQuery 832 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 833 if not isinstance(expression.parent, exp.Update): 834 return "NULL" 835 836 return self.binary(expression, "=") 837 838 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 839 parent = expression.parent 840 841 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 842 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 843 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 844 return self.func( 845 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 846 ) 847 848 return super().attimezone_sql(expression) 849 850 def trycast_sql(self, expression: exp.TryCast) -> str: 851 return self.cast_sql(expression, safe_prefix="SAFE_") 852 853 def array_sql(self, expression: exp.Array) -> str: 854 first_arg = seq_get(expression.expressions, 0) 855 if isinstance(first_arg, exp.Query): 856 return f"ARRAY{self.wrap(self.sql(first_arg))}" 857 858 return inline_array_sql(self, expression) 859 860 def bracket_sql(self, expression: exp.Bracket) -> str: 861 this = expression.this 862 expressions = expression.expressions 863 864 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 865 arg = expressions[0] 866 if arg.type is None: 867 from sqlglot.optimizer.annotate_types import annotate_types 868 869 arg = annotate_types(arg) 870 871 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 872 # BQ doesn't support bracket syntax with string values for structs 873 return f"{self.sql(this)}.{arg.name}" 874 875 expressions_sql = self.expressions(expression, flat=True) 876 offset = expression.args.get("offset") 877 878 if offset == 0: 879 expressions_sql = f"OFFSET({expressions_sql})" 880 elif offset == 1: 881 expressions_sql = f"ORDINAL({expressions_sql})" 882 elif offset is not None: 883 self.unsupported(f"Unsupported array offset: {offset}") 884 885 if expression.args.get("safe"): 886 expressions_sql = f"SAFE_{expressions_sql}" 887 888 return f"{self.sql(this)}[{expressions_sql}]" 889 890 def in_unnest_op(self, expression: exp.Unnest) -> str: 891 return self.sql(expression) 892 893 def except_op(self, expression: exp.Except) -> str: 894 if not expression.args.get("distinct"): 895 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 896 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 897 898 def intersect_op(self, expression: exp.Intersect) -> str: 899 if not expression.args.get("distinct"): 900 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 901 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 902 903 def with_properties(self, properties: exp.Properties) -> str: 904 return self.properties(properties, prefix=self.seg("OPTIONS")) 905 906 def version_sql(self, expression: exp.Version) -> str: 907 if expression.name == "TIMESTAMP": 908 expression.set("this", "SYSTEM_TIME") 909 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. Default: 2.
- indent: The indentation size in a formatted string. Default: 2.
- normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
811 def table_parts(self, expression: exp.Table) -> str: 812 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 813 # we need to make sure the correct quoting is used in each case. 814 # 815 # For example, if there is a CTE x that clashes with a schema name, then the former will 816 # return the table y in that schema, whereas the latter will return the CTE's y column: 817 # 818 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 819 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 820 if expression.meta.get("quoted_table"): 821 table_parts = ".".join(p.name for p in expression.parts) 822 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 823 824 return super().table_parts(expression)
838 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 839 parent = expression.parent 840 841 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 842 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 843 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 844 return self.func( 845 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 846 ) 847 848 return super().attimezone_sql(expression)
860 def bracket_sql(self, expression: exp.Bracket) -> str: 861 this = expression.this 862 expressions = expression.expressions 863 864 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 865 arg = expressions[0] 866 if arg.type is None: 867 from sqlglot.optimizer.annotate_types import annotate_types 868 869 arg = annotate_types(arg) 870 871 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 872 # BQ doesn't support bracket syntax with string values for structs 873 return f"{self.sql(this)}.{arg.name}" 874 875 expressions_sql = self.expressions(expression, flat=True) 876 offset = expression.args.get("offset") 877 878 if offset == 0: 879 expressions_sql = f"OFFSET({expressions_sql})" 880 elif offset == 1: 881 expressions_sql = f"ORDINAL({expressions_sql})" 882 elif offset is not None: 883 self.unsupported(f"Unsupported array offset: {offset}") 884 885 if expression.args.get("safe"): 886 expressions_sql = f"SAFE_{expressions_sql}" 887 888 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql