sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27) 28from sqlglot.helper import seq_get, split_num_words 29from sqlglot.tokens import TokenType 30 31if t.TYPE_CHECKING: 32 from sqlglot._typing import E, Lit 33 34logger = logging.getLogger("sqlglot") 35 36 37def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 38 if not expression.find_ancestor(exp.From, exp.Join): 39 return self.values_sql(expression) 40 41 structs = [] 42 alias = expression.args.get("alias") 43 for tup in expression.find_all(exp.Tuple): 44 field_aliases = ( 45 alias.columns 46 if alias and alias.columns 47 else (f"_c{i}" for i in range(len(tup.expressions))) 48 ) 49 expressions = [ 50 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 51 for name, fld in zip(field_aliases, tup.expressions) 52 ] 53 structs.append(exp.Struct(expressions=expressions)) 54 55 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 56 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 57 return self.unnest_sql( 58 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 59 ) 60 61 62def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 63 this = expression.this 64 if isinstance(this, exp.Schema): 65 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 66 else: 67 this = self.sql(this) 68 return f"RETURNS {this}" 69 70 71def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 72 returns = expression.find(exp.ReturnsProperty) 73 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 74 expression.set("kind", "TABLE FUNCTION") 75 76 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 77 expression.set("expression", expression.expression.this) 78 79 return self.create_sql(expression) 80 81 82def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 83 """Remove references to unnest table aliases since bigquery doesn't allow them. 84 85 These are added by the optimizer's qualify_column step. 86 """ 87 from sqlglot.optimizer.scope import find_all_in_scope 88 89 if isinstance(expression, exp.Select): 90 unnest_aliases = { 91 unnest.alias 92 for unnest in find_all_in_scope(expression, exp.Unnest) 93 if isinstance(unnest.parent, (exp.From, exp.Join)) 94 } 95 if unnest_aliases: 96 for column in expression.find_all(exp.Column): 97 if column.table in unnest_aliases: 98 column.set("table", None) 99 elif column.db in unnest_aliases: 100 column.set("db", None) 101 102 return expression 103 104 105# https://issuetracker.google.com/issues/162294746 106# workaround for bigquery bug when grouping by an expression and then ordering 107# WITH x AS (SELECT 1 y) 108# SELECT y + 1 z 109# FROM x 110# GROUP BY x + 1 111# ORDER by z 112def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 113 if isinstance(expression, exp.Select): 114 group = expression.args.get("group") 115 order = expression.args.get("order") 116 117 if group and order: 118 aliases = { 119 select.this: select.args["alias"] 120 for select in expression.selects 121 if isinstance(select, exp.Alias) 122 } 123 124 for grouped in group.expressions: 125 if grouped.is_int: 126 continue 127 alias = aliases.get(grouped) 128 if alias: 129 grouped.replace(exp.column(alias)) 130 131 return expression 132 133 134def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 135 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 136 if isinstance(expression, exp.CTE) and expression.alias_column_names: 137 cte_query = expression.this 138 139 if cte_query.is_star: 140 logger.warning( 141 "Can't push down CTE column names for star queries. Run the query through" 142 " the optimizer or use 'qualify' to expand the star projections first." 143 ) 144 return expression 145 146 column_names = expression.alias_column_names 147 expression.args["alias"].set("columns", None) 148 149 for name, select in zip(column_names, cte_query.selects): 150 to_replace = select 151 152 if isinstance(select, exp.Alias): 153 select = select.this 154 155 # Inner aliases are shadowed by the CTE column names 156 to_replace.replace(exp.alias_(select, name)) 157 158 return expression 159 160 161def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 162 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 163 this.set("zone", seq_get(args, 2)) 164 return this 165 166 167def _build_timestamp(args: t.List) -> exp.Timestamp: 168 timestamp = exp.Timestamp.from_arg_list(args) 169 timestamp.set("with_tz", True) 170 return timestamp 171 172 173def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 174 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 175 return expr_type.from_arg_list(args) 176 177 178def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 179 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 180 arg = seq_get(args, 0) 181 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 182 183 184def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 185 return self.sql( 186 exp.Exists( 187 this=exp.select("1") 188 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 189 .where(exp.column("_col").eq(expression.right)) 190 ) 191 ) 192 193 194def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 195 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 196 197 198def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 199 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 200 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 201 unit = expression.args.get("unit") or "DAY" 202 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 203 204 205def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 206 scale = expression.args.get("scale") 207 timestamp = expression.this 208 209 if scale in (None, exp.UnixToTime.SECONDS): 210 return self.func("TIMESTAMP_SECONDS", timestamp) 211 if scale == exp.UnixToTime.MILLIS: 212 return self.func("TIMESTAMP_MILLIS", timestamp) 213 if scale == exp.UnixToTime.MICROS: 214 return self.func("TIMESTAMP_MICROS", timestamp) 215 216 unix_seconds = exp.cast(exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), "int64") 217 return self.func("TIMESTAMP_SECONDS", unix_seconds) 218 219 220def _build_time(args: t.List) -> exp.Func: 221 if len(args) == 1: 222 return exp.TsOrDsToTime(this=args[0]) 223 if len(args) == 3: 224 return exp.TimeFromParts.from_arg_list(args) 225 226 return exp.Anonymous(this="TIME", expressions=args) 227 228 229class BigQuery(Dialect): 230 WEEK_OFFSET = -1 231 UNNEST_COLUMN_ONLY = True 232 SUPPORTS_USER_DEFINED_TYPES = False 233 SUPPORTS_SEMI_ANTI_JOIN = False 234 LOG_BASE_FIRST = False 235 236 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 237 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 238 239 # bigquery udfs are case sensitive 240 NORMALIZE_FUNCTIONS = False 241 242 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 243 TIME_MAPPING = { 244 "%D": "%m/%d/%y", 245 "%E*S": "%S.%f", 246 "%E6S": "%S.%f", 247 } 248 249 FORMAT_MAPPING = { 250 "DD": "%d", 251 "MM": "%m", 252 "MON": "%b", 253 "MONTH": "%B", 254 "YYYY": "%Y", 255 "YY": "%y", 256 "HH": "%I", 257 "HH12": "%I", 258 "HH24": "%H", 259 "MI": "%M", 260 "SS": "%S", 261 "SSSSS": "%f", 262 "TZH": "%z", 263 } 264 265 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 266 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 267 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 268 269 def normalize_identifier(self, expression: E) -> E: 270 if isinstance(expression, exp.Identifier): 271 parent = expression.parent 272 while isinstance(parent, exp.Dot): 273 parent = parent.parent 274 275 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 276 # by default. The following check uses a heuristic to detect tables based on whether 277 # they are qualified. This should generally be correct, because tables in BigQuery 278 # must be qualified with at least a dataset, unless @@dataset_id is set. 279 case_sensitive = ( 280 isinstance(parent, exp.UserDefinedFunction) 281 or ( 282 isinstance(parent, exp.Table) 283 and parent.db 284 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 285 ) 286 or expression.meta.get("is_table") 287 ) 288 if not case_sensitive: 289 expression.set("this", expression.this.lower()) 290 291 return expression 292 293 class Tokenizer(tokens.Tokenizer): 294 QUOTES = ["'", '"', '"""', "'''"] 295 COMMENTS = ["--", "#", ("/*", "*/")] 296 IDENTIFIERS = ["`"] 297 STRING_ESCAPES = ["\\"] 298 299 HEX_STRINGS = [("0x", ""), ("0X", "")] 300 301 BYTE_STRINGS = [ 302 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 303 ] 304 305 RAW_STRINGS = [ 306 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 307 ] 308 309 KEYWORDS = { 310 **tokens.Tokenizer.KEYWORDS, 311 "ANY TYPE": TokenType.VARIANT, 312 "BEGIN": TokenType.COMMAND, 313 "BEGIN TRANSACTION": TokenType.BEGIN, 314 "BYTES": TokenType.BINARY, 315 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 316 "DATETIME": TokenType.TIMESTAMP, 317 "DECLARE": TokenType.COMMAND, 318 "ELSEIF": TokenType.COMMAND, 319 "EXCEPTION": TokenType.COMMAND, 320 "FLOAT64": TokenType.DOUBLE, 321 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 322 "MODEL": TokenType.MODEL, 323 "NOT DETERMINISTIC": TokenType.VOLATILE, 324 "RECORD": TokenType.STRUCT, 325 "TIMESTAMP": TokenType.TIMESTAMPTZ, 326 } 327 KEYWORDS.pop("DIV") 328 KEYWORDS.pop("VALUES") 329 330 class Parser(parser.Parser): 331 PREFIXED_PIVOT_COLUMNS = True 332 LOG_DEFAULTS_TO_LN = True 333 SUPPORTS_IMPLICIT_UNNEST = True 334 335 FUNCTIONS = { 336 **parser.Parser.FUNCTIONS, 337 "DATE": _build_date, 338 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 339 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 340 "DATE_TRUNC": lambda args: exp.DateTrunc( 341 unit=exp.Literal.string(str(seq_get(args, 1))), 342 this=seq_get(args, 0), 343 ), 344 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 345 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 346 "DIV": binary_from_function(exp.IntDiv), 347 "FORMAT_DATE": lambda args: exp.TimeToStr( 348 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 349 ), 350 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 351 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 352 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 353 ), 354 "MD5": exp.MD5Digest.from_arg_list, 355 "TO_HEX": _build_to_hex, 356 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 357 [seq_get(args, 1), seq_get(args, 0)] 358 ), 359 "PARSE_TIMESTAMP": _build_parse_timestamp, 360 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 361 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 362 this=seq_get(args, 0), 363 expression=seq_get(args, 1), 364 position=seq_get(args, 2), 365 occurrence=seq_get(args, 3), 366 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 367 ), 368 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 369 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 370 "SPLIT": lambda args: exp.Split( 371 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 372 this=seq_get(args, 0), 373 expression=seq_get(args, 1) or exp.Literal.string(","), 374 ), 375 "TIME": _build_time, 376 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 377 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 378 "TIMESTAMP": _build_timestamp, 379 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 380 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 381 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 383 ), 384 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 385 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 386 ), 387 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 388 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 389 } 390 391 FUNCTION_PARSERS = { 392 **parser.Parser.FUNCTION_PARSERS, 393 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 394 } 395 FUNCTION_PARSERS.pop("TRIM") 396 397 NO_PAREN_FUNCTIONS = { 398 **parser.Parser.NO_PAREN_FUNCTIONS, 399 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 400 } 401 402 NESTED_TYPE_TOKENS = { 403 *parser.Parser.NESTED_TYPE_TOKENS, 404 TokenType.TABLE, 405 } 406 407 PROPERTY_PARSERS = { 408 **parser.Parser.PROPERTY_PARSERS, 409 "NOT DETERMINISTIC": lambda self: self.expression( 410 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 411 ), 412 "OPTIONS": lambda self: self._parse_with_property(), 413 } 414 415 CONSTRAINT_PARSERS = { 416 **parser.Parser.CONSTRAINT_PARSERS, 417 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 418 } 419 420 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 421 RANGE_PARSERS.pop(TokenType.OVERLAPS) 422 423 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 424 425 STATEMENT_PARSERS = { 426 **parser.Parser.STATEMENT_PARSERS, 427 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 428 TokenType.END: lambda self: self._parse_as_command(self._prev), 429 TokenType.FOR: lambda self: self._parse_for_in(), 430 } 431 432 BRACKET_OFFSETS = { 433 "OFFSET": (0, False), 434 "ORDINAL": (1, False), 435 "SAFE_OFFSET": (0, True), 436 "SAFE_ORDINAL": (1, True), 437 } 438 439 def _parse_for_in(self) -> exp.ForIn: 440 this = self._parse_range() 441 self._match_text_seq("DO") 442 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 443 444 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 445 this = super()._parse_table_part(schema=schema) or self._parse_number() 446 447 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 448 if isinstance(this, exp.Identifier): 449 table_name = this.name 450 while self._match(TokenType.DASH, advance=False) and self._next: 451 text = "" 452 while self._curr and self._curr.token_type != TokenType.DOT: 453 self._advance() 454 text += self._prev.text 455 table_name += text 456 457 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 458 elif isinstance(this, exp.Literal): 459 table_name = this.name 460 461 if self._is_connected() and self._parse_var(any_token=True): 462 table_name += self._prev.text 463 464 this = exp.Identifier(this=table_name, quoted=True) 465 466 return this 467 468 def _parse_table_parts( 469 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 470 ) -> exp.Table: 471 table = super()._parse_table_parts( 472 schema=schema, is_db_reference=is_db_reference, wildcard=True 473 ) 474 475 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 476 if not table.catalog: 477 if table.db: 478 parts = table.db.split(".") 479 if len(parts) == 2 and not table.args["db"].quoted: 480 table.set("catalog", exp.Identifier(this=parts[0])) 481 table.set("db", exp.Identifier(this=parts[1])) 482 else: 483 parts = table.name.split(".") 484 if len(parts) == 2 and not table.this.quoted: 485 table.set("db", exp.Identifier(this=parts[0])) 486 table.set("this", exp.Identifier(this=parts[1])) 487 488 if any("." in p.name for p in table.parts): 489 catalog, db, this, *rest = ( 490 exp.to_identifier(p, quoted=True) 491 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 492 ) 493 494 if rest and this: 495 this = exp.Dot.build([this, *rest]) # type: ignore 496 497 table = exp.Table(this=this, db=db, catalog=catalog) 498 table.meta["quoted_table"] = True 499 500 return table 501 502 @t.overload 503 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 504 505 @t.overload 506 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 507 508 def _parse_json_object(self, agg=False): 509 json_object = super()._parse_json_object() 510 array_kv_pair = seq_get(json_object.expressions, 0) 511 512 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 513 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 514 if ( 515 array_kv_pair 516 and isinstance(array_kv_pair.this, exp.Array) 517 and isinstance(array_kv_pair.expression, exp.Array) 518 ): 519 keys = array_kv_pair.this.expressions 520 values = array_kv_pair.expression.expressions 521 522 json_object.set( 523 "expressions", 524 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 525 ) 526 527 return json_object 528 529 def _parse_bracket( 530 self, this: t.Optional[exp.Expression] = None 531 ) -> t.Optional[exp.Expression]: 532 bracket = super()._parse_bracket(this) 533 534 if this is bracket: 535 return bracket 536 537 if isinstance(bracket, exp.Bracket): 538 for expression in bracket.expressions: 539 name = expression.name.upper() 540 541 if name not in self.BRACKET_OFFSETS: 542 break 543 544 offset, safe = self.BRACKET_OFFSETS[name] 545 bracket.set("offset", offset) 546 bracket.set("safe", safe) 547 expression.replace(expression.expressions[0]) 548 549 return bracket 550 551 class Generator(generator.Generator): 552 EXPLICIT_UNION = True 553 INTERVAL_ALLOWS_PLURAL_FORM = False 554 JOIN_HINTS = False 555 QUERY_HINTS = False 556 TABLE_HINTS = False 557 LIMIT_FETCH = "LIMIT" 558 RENAME_TABLE_WITH_DB = False 559 NVL2_SUPPORTED = False 560 UNNEST_WITH_ORDINALITY = False 561 COLLATE_IS_FUNC = True 562 LIMIT_ONLY_LITERALS = True 563 SUPPORTS_TABLE_ALIAS_COLUMNS = False 564 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 565 JSON_KEY_VALUE_PAIR_SEP = "," 566 NULL_ORDERING_SUPPORTED = False 567 IGNORE_NULLS_IN_FUNC = True 568 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 569 CAN_IMPLEMENT_ARRAY_ANY = True 570 SUPPORTS_TO_NUMBER = False 571 NAMED_PLACEHOLDER_TOKEN = "@" 572 573 TRANSFORMS = { 574 **generator.Generator.TRANSFORMS, 575 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 576 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 577 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 578 exp.ArrayContains: _array_contains_sql, 579 exp.ArrayFilter: filter_array_using_unnest, 580 exp.ArraySize: rename_func("ARRAY_LENGTH"), 581 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 582 exp.CollateProperty: lambda self, e: ( 583 f"DEFAULT COLLATE {self.sql(e, 'this')}" 584 if e.args.get("default") 585 else f"COLLATE {self.sql(e, 'this')}" 586 ), 587 exp.Commit: lambda *_: "COMMIT TRANSACTION", 588 exp.CountIf: rename_func("COUNTIF"), 589 exp.Create: _create_sql, 590 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 591 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 592 exp.DateDiff: lambda self, e: self.func( 593 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 594 ), 595 exp.DateFromParts: rename_func("DATE"), 596 exp.DateStrToDate: datestrtodate_sql, 597 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 598 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 599 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 600 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 601 exp.FromTimeZone: lambda self, e: self.func( 602 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 603 ), 604 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 605 exp.GroupConcat: rename_func("STRING_AGG"), 606 exp.Hex: rename_func("TO_HEX"), 607 exp.If: if_sql(false_value="NULL"), 608 exp.ILike: no_ilike_sql, 609 exp.IntDiv: rename_func("DIV"), 610 exp.JSONFormat: rename_func("TO_JSON_STRING"), 611 exp.Max: max_or_greatest, 612 exp.Mod: rename_func("MOD"), 613 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 614 exp.MD5Digest: rename_func("MD5"), 615 exp.Min: min_or_least, 616 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 617 exp.RegexpExtract: lambda self, e: self.func( 618 "REGEXP_EXTRACT", 619 e.this, 620 e.expression, 621 e.args.get("position"), 622 e.args.get("occurrence"), 623 ), 624 exp.RegexpReplace: regexp_replace_sql, 625 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 626 exp.ReturnsProperty: _returnsproperty_sql, 627 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 628 exp.Select: transforms.preprocess( 629 [ 630 transforms.explode_to_unnest(), 631 _unqualify_unnest, 632 transforms.eliminate_distinct_on, 633 _alias_ordered_group, 634 transforms.eliminate_semi_and_anti_joins, 635 ] 636 ), 637 exp.SHA2: lambda self, e: self.func( 638 "SHA256" if e.text("length") == "256" else "SHA512", e.this 639 ), 640 exp.StabilityProperty: lambda self, e: ( 641 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 642 ), 643 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 644 exp.StrToTime: lambda self, e: self.func( 645 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 646 ), 647 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 648 exp.TimeFromParts: rename_func("TIME"), 649 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 650 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 651 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 652 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 653 exp.TimeStrToTime: timestrtotime_sql, 654 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 655 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 656 exp.TsOrDsAdd: _ts_or_ds_add_sql, 657 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 658 exp.TsOrDsToTime: rename_func("TIME"), 659 exp.Unhex: rename_func("FROM_HEX"), 660 exp.UnixDate: rename_func("UNIX_DATE"), 661 exp.UnixToTime: _unix_to_time_sql, 662 exp.Values: _derived_table_values_to_unnest, 663 exp.VariancePop: rename_func("VAR_POP"), 664 } 665 666 SUPPORTED_JSON_PATH_PARTS = { 667 exp.JSONPathKey, 668 exp.JSONPathRoot, 669 exp.JSONPathSubscript, 670 } 671 672 TYPE_MAPPING = { 673 **generator.Generator.TYPE_MAPPING, 674 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 675 exp.DataType.Type.BIGINT: "INT64", 676 exp.DataType.Type.BINARY: "BYTES", 677 exp.DataType.Type.BOOLEAN: "BOOL", 678 exp.DataType.Type.CHAR: "STRING", 679 exp.DataType.Type.DECIMAL: "NUMERIC", 680 exp.DataType.Type.DOUBLE: "FLOAT64", 681 exp.DataType.Type.FLOAT: "FLOAT64", 682 exp.DataType.Type.INT: "INT64", 683 exp.DataType.Type.NCHAR: "STRING", 684 exp.DataType.Type.NVARCHAR: "STRING", 685 exp.DataType.Type.SMALLINT: "INT64", 686 exp.DataType.Type.TEXT: "STRING", 687 exp.DataType.Type.TIMESTAMP: "DATETIME", 688 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 689 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 690 exp.DataType.Type.TINYINT: "INT64", 691 exp.DataType.Type.VARBINARY: "BYTES", 692 exp.DataType.Type.VARCHAR: "STRING", 693 exp.DataType.Type.VARIANT: "ANY TYPE", 694 } 695 696 PROPERTIES_LOCATION = { 697 **generator.Generator.PROPERTIES_LOCATION, 698 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 699 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 700 } 701 702 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 703 RESERVED_KEYWORDS = { 704 *generator.Generator.RESERVED_KEYWORDS, 705 "all", 706 "and", 707 "any", 708 "array", 709 "as", 710 "asc", 711 "assert_rows_modified", 712 "at", 713 "between", 714 "by", 715 "case", 716 "cast", 717 "collate", 718 "contains", 719 "create", 720 "cross", 721 "cube", 722 "current", 723 "default", 724 "define", 725 "desc", 726 "distinct", 727 "else", 728 "end", 729 "enum", 730 "escape", 731 "except", 732 "exclude", 733 "exists", 734 "extract", 735 "false", 736 "fetch", 737 "following", 738 "for", 739 "from", 740 "full", 741 "group", 742 "grouping", 743 "groups", 744 "hash", 745 "having", 746 "if", 747 "ignore", 748 "in", 749 "inner", 750 "intersect", 751 "interval", 752 "into", 753 "is", 754 "join", 755 "lateral", 756 "left", 757 "like", 758 "limit", 759 "lookup", 760 "merge", 761 "natural", 762 "new", 763 "no", 764 "not", 765 "null", 766 "nulls", 767 "of", 768 "on", 769 "or", 770 "order", 771 "outer", 772 "over", 773 "partition", 774 "preceding", 775 "proto", 776 "qualify", 777 "range", 778 "recursive", 779 "respect", 780 "right", 781 "rollup", 782 "rows", 783 "select", 784 "set", 785 "some", 786 "struct", 787 "tablesample", 788 "then", 789 "to", 790 "treat", 791 "true", 792 "unbounded", 793 "union", 794 "unnest", 795 "using", 796 "when", 797 "where", 798 "window", 799 "with", 800 "within", 801 } 802 803 def table_parts(self, expression: exp.Table) -> str: 804 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 805 # we need to make sure the correct quoting is used in each case. 806 # 807 # For example, if there is a CTE x that clashes with a schema name, then the former will 808 # return the table y in that schema, whereas the latter will return the CTE's y column: 809 # 810 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 811 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 812 if expression.meta.get("quoted_table"): 813 table_parts = ".".join(p.name for p in expression.parts) 814 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 815 816 return super().table_parts(expression) 817 818 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 819 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 820 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 821 822 def eq_sql(self, expression: exp.EQ) -> str: 823 # Operands of = cannot be NULL in BigQuery 824 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 825 if not isinstance(expression.parent, exp.Update): 826 return "NULL" 827 828 return self.binary(expression, "=") 829 830 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 831 parent = expression.parent 832 833 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 834 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 835 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 836 return self.func( 837 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 838 ) 839 840 return super().attimezone_sql(expression) 841 842 def trycast_sql(self, expression: exp.TryCast) -> str: 843 return self.cast_sql(expression, safe_prefix="SAFE_") 844 845 def array_sql(self, expression: exp.Array) -> str: 846 first_arg = seq_get(expression.expressions, 0) 847 if isinstance(first_arg, exp.Query): 848 return f"ARRAY{self.wrap(self.sql(first_arg))}" 849 850 return inline_array_sql(self, expression) 851 852 def bracket_sql(self, expression: exp.Bracket) -> str: 853 this = expression.this 854 expressions = expression.expressions 855 856 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 857 arg = expressions[0] 858 if arg.type is None: 859 from sqlglot.optimizer.annotate_types import annotate_types 860 861 arg = annotate_types(arg) 862 863 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 864 # BQ doesn't support bracket syntax with string values for structs 865 return f"{self.sql(this)}.{arg.name}" 866 867 expressions_sql = self.expressions(expression, flat=True) 868 offset = expression.args.get("offset") 869 870 if offset == 0: 871 expressions_sql = f"OFFSET({expressions_sql})" 872 elif offset == 1: 873 expressions_sql = f"ORDINAL({expressions_sql})" 874 elif offset is not None: 875 self.unsupported(f"Unsupported array offset: {offset}") 876 877 if expression.args.get("safe"): 878 expressions_sql = f"SAFE_{expressions_sql}" 879 880 return f"{self.sql(this)}[{expressions_sql}]" 881 882 def in_unnest_op(self, expression: exp.Unnest) -> str: 883 return self.sql(expression) 884 885 def except_op(self, expression: exp.Except) -> str: 886 if not expression.args.get("distinct"): 887 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 888 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 889 890 def intersect_op(self, expression: exp.Intersect) -> str: 891 if not expression.args.get("distinct"): 892 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 893 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 894 895 def with_properties(self, properties: exp.Properties) -> str: 896 return self.properties(properties, prefix=self.seg("OPTIONS")) 897 898 def version_sql(self, expression: exp.Version) -> str: 899 if expression.name == "TIMESTAMP": 900 expression.set("this", "SYSTEM_TIME") 901 return super().version_sql(expression)
230class BigQuery(Dialect): 231 WEEK_OFFSET = -1 232 UNNEST_COLUMN_ONLY = True 233 SUPPORTS_USER_DEFINED_TYPES = False 234 SUPPORTS_SEMI_ANTI_JOIN = False 235 LOG_BASE_FIRST = False 236 237 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 238 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 239 240 # bigquery udfs are case sensitive 241 NORMALIZE_FUNCTIONS = False 242 243 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 244 TIME_MAPPING = { 245 "%D": "%m/%d/%y", 246 "%E*S": "%S.%f", 247 "%E6S": "%S.%f", 248 } 249 250 FORMAT_MAPPING = { 251 "DD": "%d", 252 "MM": "%m", 253 "MON": "%b", 254 "MONTH": "%B", 255 "YYYY": "%Y", 256 "YY": "%y", 257 "HH": "%I", 258 "HH12": "%I", 259 "HH24": "%H", 260 "MI": "%M", 261 "SS": "%S", 262 "SSSSS": "%f", 263 "TZH": "%z", 264 } 265 266 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 267 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 268 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 269 270 def normalize_identifier(self, expression: E) -> E: 271 if isinstance(expression, exp.Identifier): 272 parent = expression.parent 273 while isinstance(parent, exp.Dot): 274 parent = parent.parent 275 276 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 277 # by default. The following check uses a heuristic to detect tables based on whether 278 # they are qualified. This should generally be correct, because tables in BigQuery 279 # must be qualified with at least a dataset, unless @@dataset_id is set. 280 case_sensitive = ( 281 isinstance(parent, exp.UserDefinedFunction) 282 or ( 283 isinstance(parent, exp.Table) 284 and parent.db 285 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 286 ) 287 or expression.meta.get("is_table") 288 ) 289 if not case_sensitive: 290 expression.set("this", expression.this.lower()) 291 292 return expression 293 294 class Tokenizer(tokens.Tokenizer): 295 QUOTES = ["'", '"', '"""', "'''"] 296 COMMENTS = ["--", "#", ("/*", "*/")] 297 IDENTIFIERS = ["`"] 298 STRING_ESCAPES = ["\\"] 299 300 HEX_STRINGS = [("0x", ""), ("0X", "")] 301 302 BYTE_STRINGS = [ 303 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 304 ] 305 306 RAW_STRINGS = [ 307 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 308 ] 309 310 KEYWORDS = { 311 **tokens.Tokenizer.KEYWORDS, 312 "ANY TYPE": TokenType.VARIANT, 313 "BEGIN": TokenType.COMMAND, 314 "BEGIN TRANSACTION": TokenType.BEGIN, 315 "BYTES": TokenType.BINARY, 316 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 317 "DATETIME": TokenType.TIMESTAMP, 318 "DECLARE": TokenType.COMMAND, 319 "ELSEIF": TokenType.COMMAND, 320 "EXCEPTION": TokenType.COMMAND, 321 "FLOAT64": TokenType.DOUBLE, 322 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 323 "MODEL": TokenType.MODEL, 324 "NOT DETERMINISTIC": TokenType.VOLATILE, 325 "RECORD": TokenType.STRUCT, 326 "TIMESTAMP": TokenType.TIMESTAMPTZ, 327 } 328 KEYWORDS.pop("DIV") 329 KEYWORDS.pop("VALUES") 330 331 class Parser(parser.Parser): 332 PREFIXED_PIVOT_COLUMNS = True 333 LOG_DEFAULTS_TO_LN = True 334 SUPPORTS_IMPLICIT_UNNEST = True 335 336 FUNCTIONS = { 337 **parser.Parser.FUNCTIONS, 338 "DATE": _build_date, 339 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 340 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 341 "DATE_TRUNC": lambda args: exp.DateTrunc( 342 unit=exp.Literal.string(str(seq_get(args, 1))), 343 this=seq_get(args, 0), 344 ), 345 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 346 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 347 "DIV": binary_from_function(exp.IntDiv), 348 "FORMAT_DATE": lambda args: exp.TimeToStr( 349 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 350 ), 351 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 352 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 353 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 354 ), 355 "MD5": exp.MD5Digest.from_arg_list, 356 "TO_HEX": _build_to_hex, 357 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 358 [seq_get(args, 1), seq_get(args, 0)] 359 ), 360 "PARSE_TIMESTAMP": _build_parse_timestamp, 361 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 362 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 363 this=seq_get(args, 0), 364 expression=seq_get(args, 1), 365 position=seq_get(args, 2), 366 occurrence=seq_get(args, 3), 367 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 368 ), 369 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 370 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 371 "SPLIT": lambda args: exp.Split( 372 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 373 this=seq_get(args, 0), 374 expression=seq_get(args, 1) or exp.Literal.string(","), 375 ), 376 "TIME": _build_time, 377 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 378 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 379 "TIMESTAMP": _build_timestamp, 380 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 381 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 382 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 383 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 384 ), 385 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 386 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 387 ), 388 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 389 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 390 } 391 392 FUNCTION_PARSERS = { 393 **parser.Parser.FUNCTION_PARSERS, 394 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 395 } 396 FUNCTION_PARSERS.pop("TRIM") 397 398 NO_PAREN_FUNCTIONS = { 399 **parser.Parser.NO_PAREN_FUNCTIONS, 400 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 401 } 402 403 NESTED_TYPE_TOKENS = { 404 *parser.Parser.NESTED_TYPE_TOKENS, 405 TokenType.TABLE, 406 } 407 408 PROPERTY_PARSERS = { 409 **parser.Parser.PROPERTY_PARSERS, 410 "NOT DETERMINISTIC": lambda self: self.expression( 411 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 412 ), 413 "OPTIONS": lambda self: self._parse_with_property(), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 419 } 420 421 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 422 RANGE_PARSERS.pop(TokenType.OVERLAPS) 423 424 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 425 426 STATEMENT_PARSERS = { 427 **parser.Parser.STATEMENT_PARSERS, 428 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 429 TokenType.END: lambda self: self._parse_as_command(self._prev), 430 TokenType.FOR: lambda self: self._parse_for_in(), 431 } 432 433 BRACKET_OFFSETS = { 434 "OFFSET": (0, False), 435 "ORDINAL": (1, False), 436 "SAFE_OFFSET": (0, True), 437 "SAFE_ORDINAL": (1, True), 438 } 439 440 def _parse_for_in(self) -> exp.ForIn: 441 this = self._parse_range() 442 self._match_text_seq("DO") 443 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 444 445 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 446 this = super()._parse_table_part(schema=schema) or self._parse_number() 447 448 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 449 if isinstance(this, exp.Identifier): 450 table_name = this.name 451 while self._match(TokenType.DASH, advance=False) and self._next: 452 text = "" 453 while self._curr and self._curr.token_type != TokenType.DOT: 454 self._advance() 455 text += self._prev.text 456 table_name += text 457 458 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 459 elif isinstance(this, exp.Literal): 460 table_name = this.name 461 462 if self._is_connected() and self._parse_var(any_token=True): 463 table_name += self._prev.text 464 465 this = exp.Identifier(this=table_name, quoted=True) 466 467 return this 468 469 def _parse_table_parts( 470 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 471 ) -> exp.Table: 472 table = super()._parse_table_parts( 473 schema=schema, is_db_reference=is_db_reference, wildcard=True 474 ) 475 476 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 477 if not table.catalog: 478 if table.db: 479 parts = table.db.split(".") 480 if len(parts) == 2 and not table.args["db"].quoted: 481 table.set("catalog", exp.Identifier(this=parts[0])) 482 table.set("db", exp.Identifier(this=parts[1])) 483 else: 484 parts = table.name.split(".") 485 if len(parts) == 2 and not table.this.quoted: 486 table.set("db", exp.Identifier(this=parts[0])) 487 table.set("this", exp.Identifier(this=parts[1])) 488 489 if any("." in p.name for p in table.parts): 490 catalog, db, this, *rest = ( 491 exp.to_identifier(p, quoted=True) 492 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 493 ) 494 495 if rest and this: 496 this = exp.Dot.build([this, *rest]) # type: ignore 497 498 table = exp.Table(this=this, db=db, catalog=catalog) 499 table.meta["quoted_table"] = True 500 501 return table 502 503 @t.overload 504 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 505 506 @t.overload 507 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 508 509 def _parse_json_object(self, agg=False): 510 json_object = super()._parse_json_object() 511 array_kv_pair = seq_get(json_object.expressions, 0) 512 513 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 514 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 515 if ( 516 array_kv_pair 517 and isinstance(array_kv_pair.this, exp.Array) 518 and isinstance(array_kv_pair.expression, exp.Array) 519 ): 520 keys = array_kv_pair.this.expressions 521 values = array_kv_pair.expression.expressions 522 523 json_object.set( 524 "expressions", 525 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 526 ) 527 528 return json_object 529 530 def _parse_bracket( 531 self, this: t.Optional[exp.Expression] = None 532 ) -> t.Optional[exp.Expression]: 533 bracket = super()._parse_bracket(this) 534 535 if this is bracket: 536 return bracket 537 538 if isinstance(bracket, exp.Bracket): 539 for expression in bracket.expressions: 540 name = expression.name.upper() 541 542 if name not in self.BRACKET_OFFSETS: 543 break 544 545 offset, safe = self.BRACKET_OFFSETS[name] 546 bracket.set("offset", offset) 547 bracket.set("safe", safe) 548 expression.replace(expression.expressions[0]) 549 550 return bracket 551 552 class Generator(generator.Generator): 553 EXPLICIT_UNION = True 554 INTERVAL_ALLOWS_PLURAL_FORM = False 555 JOIN_HINTS = False 556 QUERY_HINTS = False 557 TABLE_HINTS = False 558 LIMIT_FETCH = "LIMIT" 559 RENAME_TABLE_WITH_DB = False 560 NVL2_SUPPORTED = False 561 UNNEST_WITH_ORDINALITY = False 562 COLLATE_IS_FUNC = True 563 LIMIT_ONLY_LITERALS = True 564 SUPPORTS_TABLE_ALIAS_COLUMNS = False 565 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 566 JSON_KEY_VALUE_PAIR_SEP = "," 567 NULL_ORDERING_SUPPORTED = False 568 IGNORE_NULLS_IN_FUNC = True 569 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 570 CAN_IMPLEMENT_ARRAY_ANY = True 571 SUPPORTS_TO_NUMBER = False 572 NAMED_PLACEHOLDER_TOKEN = "@" 573 574 TRANSFORMS = { 575 **generator.Generator.TRANSFORMS, 576 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 577 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 578 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 579 exp.ArrayContains: _array_contains_sql, 580 exp.ArrayFilter: filter_array_using_unnest, 581 exp.ArraySize: rename_func("ARRAY_LENGTH"), 582 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 583 exp.CollateProperty: lambda self, e: ( 584 f"DEFAULT COLLATE {self.sql(e, 'this')}" 585 if e.args.get("default") 586 else f"COLLATE {self.sql(e, 'this')}" 587 ), 588 exp.Commit: lambda *_: "COMMIT TRANSACTION", 589 exp.CountIf: rename_func("COUNTIF"), 590 exp.Create: _create_sql, 591 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 592 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 593 exp.DateDiff: lambda self, e: self.func( 594 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 595 ), 596 exp.DateFromParts: rename_func("DATE"), 597 exp.DateStrToDate: datestrtodate_sql, 598 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 599 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 600 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 601 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 602 exp.FromTimeZone: lambda self, e: self.func( 603 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 604 ), 605 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 606 exp.GroupConcat: rename_func("STRING_AGG"), 607 exp.Hex: rename_func("TO_HEX"), 608 exp.If: if_sql(false_value="NULL"), 609 exp.ILike: no_ilike_sql, 610 exp.IntDiv: rename_func("DIV"), 611 exp.JSONFormat: rename_func("TO_JSON_STRING"), 612 exp.Max: max_or_greatest, 613 exp.Mod: rename_func("MOD"), 614 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 615 exp.MD5Digest: rename_func("MD5"), 616 exp.Min: min_or_least, 617 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 618 exp.RegexpExtract: lambda self, e: self.func( 619 "REGEXP_EXTRACT", 620 e.this, 621 e.expression, 622 e.args.get("position"), 623 e.args.get("occurrence"), 624 ), 625 exp.RegexpReplace: regexp_replace_sql, 626 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 627 exp.ReturnsProperty: _returnsproperty_sql, 628 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 629 exp.Select: transforms.preprocess( 630 [ 631 transforms.explode_to_unnest(), 632 _unqualify_unnest, 633 transforms.eliminate_distinct_on, 634 _alias_ordered_group, 635 transforms.eliminate_semi_and_anti_joins, 636 ] 637 ), 638 exp.SHA2: lambda self, e: self.func( 639 "SHA256" if e.text("length") == "256" else "SHA512", e.this 640 ), 641 exp.StabilityProperty: lambda self, e: ( 642 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 643 ), 644 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 645 exp.StrToTime: lambda self, e: self.func( 646 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 647 ), 648 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 649 exp.TimeFromParts: rename_func("TIME"), 650 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 651 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 652 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 653 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 654 exp.TimeStrToTime: timestrtotime_sql, 655 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 656 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 657 exp.TsOrDsAdd: _ts_or_ds_add_sql, 658 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 659 exp.TsOrDsToTime: rename_func("TIME"), 660 exp.Unhex: rename_func("FROM_HEX"), 661 exp.UnixDate: rename_func("UNIX_DATE"), 662 exp.UnixToTime: _unix_to_time_sql, 663 exp.Values: _derived_table_values_to_unnest, 664 exp.VariancePop: rename_func("VAR_POP"), 665 } 666 667 SUPPORTED_JSON_PATH_PARTS = { 668 exp.JSONPathKey, 669 exp.JSONPathRoot, 670 exp.JSONPathSubscript, 671 } 672 673 TYPE_MAPPING = { 674 **generator.Generator.TYPE_MAPPING, 675 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 676 exp.DataType.Type.BIGINT: "INT64", 677 exp.DataType.Type.BINARY: "BYTES", 678 exp.DataType.Type.BOOLEAN: "BOOL", 679 exp.DataType.Type.CHAR: "STRING", 680 exp.DataType.Type.DECIMAL: "NUMERIC", 681 exp.DataType.Type.DOUBLE: "FLOAT64", 682 exp.DataType.Type.FLOAT: "FLOAT64", 683 exp.DataType.Type.INT: "INT64", 684 exp.DataType.Type.NCHAR: "STRING", 685 exp.DataType.Type.NVARCHAR: "STRING", 686 exp.DataType.Type.SMALLINT: "INT64", 687 exp.DataType.Type.TEXT: "STRING", 688 exp.DataType.Type.TIMESTAMP: "DATETIME", 689 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 690 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 691 exp.DataType.Type.TINYINT: "INT64", 692 exp.DataType.Type.VARBINARY: "BYTES", 693 exp.DataType.Type.VARCHAR: "STRING", 694 exp.DataType.Type.VARIANT: "ANY TYPE", 695 } 696 697 PROPERTIES_LOCATION = { 698 **generator.Generator.PROPERTIES_LOCATION, 699 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 700 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 701 } 702 703 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 704 RESERVED_KEYWORDS = { 705 *generator.Generator.RESERVED_KEYWORDS, 706 "all", 707 "and", 708 "any", 709 "array", 710 "as", 711 "asc", 712 "assert_rows_modified", 713 "at", 714 "between", 715 "by", 716 "case", 717 "cast", 718 "collate", 719 "contains", 720 "create", 721 "cross", 722 "cube", 723 "current", 724 "default", 725 "define", 726 "desc", 727 "distinct", 728 "else", 729 "end", 730 "enum", 731 "escape", 732 "except", 733 "exclude", 734 "exists", 735 "extract", 736 "false", 737 "fetch", 738 "following", 739 "for", 740 "from", 741 "full", 742 "group", 743 "grouping", 744 "groups", 745 "hash", 746 "having", 747 "if", 748 "ignore", 749 "in", 750 "inner", 751 "intersect", 752 "interval", 753 "into", 754 "is", 755 "join", 756 "lateral", 757 "left", 758 "like", 759 "limit", 760 "lookup", 761 "merge", 762 "natural", 763 "new", 764 "no", 765 "not", 766 "null", 767 "nulls", 768 "of", 769 "on", 770 "or", 771 "order", 772 "outer", 773 "over", 774 "partition", 775 "preceding", 776 "proto", 777 "qualify", 778 "range", 779 "recursive", 780 "respect", 781 "right", 782 "rollup", 783 "rows", 784 "select", 785 "set", 786 "some", 787 "struct", 788 "tablesample", 789 "then", 790 "to", 791 "treat", 792 "true", 793 "unbounded", 794 "union", 795 "unnest", 796 "using", 797 "when", 798 "where", 799 "window", 800 "with", 801 "within", 802 } 803 804 def table_parts(self, expression: exp.Table) -> str: 805 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 806 # we need to make sure the correct quoting is used in each case. 807 # 808 # For example, if there is a CTE x that clashes with a schema name, then the former will 809 # return the table y in that schema, whereas the latter will return the CTE's y column: 810 # 811 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 812 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 813 if expression.meta.get("quoted_table"): 814 table_parts = ".".join(p.name for p in expression.parts) 815 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 816 817 return super().table_parts(expression) 818 819 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 820 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 821 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 822 823 def eq_sql(self, expression: exp.EQ) -> str: 824 # Operands of = cannot be NULL in BigQuery 825 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 826 if not isinstance(expression.parent, exp.Update): 827 return "NULL" 828 829 return self.binary(expression, "=") 830 831 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 832 parent = expression.parent 833 834 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 835 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 836 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 837 return self.func( 838 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 839 ) 840 841 return super().attimezone_sql(expression) 842 843 def trycast_sql(self, expression: exp.TryCast) -> str: 844 return self.cast_sql(expression, safe_prefix="SAFE_") 845 846 def array_sql(self, expression: exp.Array) -> str: 847 first_arg = seq_get(expression.expressions, 0) 848 if isinstance(first_arg, exp.Query): 849 return f"ARRAY{self.wrap(self.sql(first_arg))}" 850 851 return inline_array_sql(self, expression) 852 853 def bracket_sql(self, expression: exp.Bracket) -> str: 854 this = expression.this 855 expressions = expression.expressions 856 857 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 858 arg = expressions[0] 859 if arg.type is None: 860 from sqlglot.optimizer.annotate_types import annotate_types 861 862 arg = annotate_types(arg) 863 864 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 865 # BQ doesn't support bracket syntax with string values for structs 866 return f"{self.sql(this)}.{arg.name}" 867 868 expressions_sql = self.expressions(expression, flat=True) 869 offset = expression.args.get("offset") 870 871 if offset == 0: 872 expressions_sql = f"OFFSET({expressions_sql})" 873 elif offset == 1: 874 expressions_sql = f"ORDINAL({expressions_sql})" 875 elif offset is not None: 876 self.unsupported(f"Unsupported array offset: {offset}") 877 878 if expression.args.get("safe"): 879 expressions_sql = f"SAFE_{expressions_sql}" 880 881 return f"{self.sql(this)}[{expressions_sql}]" 882 883 def in_unnest_op(self, expression: exp.Unnest) -> str: 884 return self.sql(expression) 885 886 def except_op(self, expression: exp.Except) -> str: 887 if not expression.args.get("distinct"): 888 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 889 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 890 891 def intersect_op(self, expression: exp.Intersect) -> str: 892 if not expression.args.get("distinct"): 893 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 894 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 895 896 def with_properties(self, properties: exp.Properties) -> str: 897 return self.properties(properties, prefix=self.seg("OPTIONS")) 898 899 def version_sql(self, expression: exp.Version) -> str: 900 if expression.name == "TIMESTAMP": 901 expression.set("this", "SYSTEM_TIME") 902 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
270 def normalize_identifier(self, expression: E) -> E: 271 if isinstance(expression, exp.Identifier): 272 parent = expression.parent 273 while isinstance(parent, exp.Dot): 274 parent = parent.parent 275 276 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 277 # by default. The following check uses a heuristic to detect tables based on whether 278 # they are qualified. This should generally be correct, because tables in BigQuery 279 # must be qualified with at least a dataset, unless @@dataset_id is set. 280 case_sensitive = ( 281 isinstance(parent, exp.UserDefinedFunction) 282 or ( 283 isinstance(parent, exp.Table) 284 and parent.db 285 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 286 ) 287 or expression.meta.get("is_table") 288 ) 289 if not case_sensitive: 290 expression.set("this", expression.this.lower()) 291 292 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
294 class Tokenizer(tokens.Tokenizer): 295 QUOTES = ["'", '"', '"""', "'''"] 296 COMMENTS = ["--", "#", ("/*", "*/")] 297 IDENTIFIERS = ["`"] 298 STRING_ESCAPES = ["\\"] 299 300 HEX_STRINGS = [("0x", ""), ("0X", "")] 301 302 BYTE_STRINGS = [ 303 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 304 ] 305 306 RAW_STRINGS = [ 307 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 308 ] 309 310 KEYWORDS = { 311 **tokens.Tokenizer.KEYWORDS, 312 "ANY TYPE": TokenType.VARIANT, 313 "BEGIN": TokenType.COMMAND, 314 "BEGIN TRANSACTION": TokenType.BEGIN, 315 "BYTES": TokenType.BINARY, 316 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 317 "DATETIME": TokenType.TIMESTAMP, 318 "DECLARE": TokenType.COMMAND, 319 "ELSEIF": TokenType.COMMAND, 320 "EXCEPTION": TokenType.COMMAND, 321 "FLOAT64": TokenType.DOUBLE, 322 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 323 "MODEL": TokenType.MODEL, 324 "NOT DETERMINISTIC": TokenType.VOLATILE, 325 "RECORD": TokenType.STRUCT, 326 "TIMESTAMP": TokenType.TIMESTAMPTZ, 327 } 328 KEYWORDS.pop("DIV") 329 KEYWORDS.pop("VALUES")
Inherited Members
331 class Parser(parser.Parser): 332 PREFIXED_PIVOT_COLUMNS = True 333 LOG_DEFAULTS_TO_LN = True 334 SUPPORTS_IMPLICIT_UNNEST = True 335 336 FUNCTIONS = { 337 **parser.Parser.FUNCTIONS, 338 "DATE": _build_date, 339 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 340 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 341 "DATE_TRUNC": lambda args: exp.DateTrunc( 342 unit=exp.Literal.string(str(seq_get(args, 1))), 343 this=seq_get(args, 0), 344 ), 345 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 346 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 347 "DIV": binary_from_function(exp.IntDiv), 348 "FORMAT_DATE": lambda args: exp.TimeToStr( 349 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 350 ), 351 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 352 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 353 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 354 ), 355 "MD5": exp.MD5Digest.from_arg_list, 356 "TO_HEX": _build_to_hex, 357 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 358 [seq_get(args, 1), seq_get(args, 0)] 359 ), 360 "PARSE_TIMESTAMP": _build_parse_timestamp, 361 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 362 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 363 this=seq_get(args, 0), 364 expression=seq_get(args, 1), 365 position=seq_get(args, 2), 366 occurrence=seq_get(args, 3), 367 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 368 ), 369 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 370 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 371 "SPLIT": lambda args: exp.Split( 372 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 373 this=seq_get(args, 0), 374 expression=seq_get(args, 1) or exp.Literal.string(","), 375 ), 376 "TIME": _build_time, 377 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 378 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 379 "TIMESTAMP": _build_timestamp, 380 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 381 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 382 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 383 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 384 ), 385 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 386 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 387 ), 388 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 389 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 390 } 391 392 FUNCTION_PARSERS = { 393 **parser.Parser.FUNCTION_PARSERS, 394 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 395 } 396 FUNCTION_PARSERS.pop("TRIM") 397 398 NO_PAREN_FUNCTIONS = { 399 **parser.Parser.NO_PAREN_FUNCTIONS, 400 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 401 } 402 403 NESTED_TYPE_TOKENS = { 404 *parser.Parser.NESTED_TYPE_TOKENS, 405 TokenType.TABLE, 406 } 407 408 PROPERTY_PARSERS = { 409 **parser.Parser.PROPERTY_PARSERS, 410 "NOT DETERMINISTIC": lambda self: self.expression( 411 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 412 ), 413 "OPTIONS": lambda self: self._parse_with_property(), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 419 } 420 421 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 422 RANGE_PARSERS.pop(TokenType.OVERLAPS) 423 424 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 425 426 STATEMENT_PARSERS = { 427 **parser.Parser.STATEMENT_PARSERS, 428 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 429 TokenType.END: lambda self: self._parse_as_command(self._prev), 430 TokenType.FOR: lambda self: self._parse_for_in(), 431 } 432 433 BRACKET_OFFSETS = { 434 "OFFSET": (0, False), 435 "ORDINAL": (1, False), 436 "SAFE_OFFSET": (0, True), 437 "SAFE_ORDINAL": (1, True), 438 } 439 440 def _parse_for_in(self) -> exp.ForIn: 441 this = self._parse_range() 442 self._match_text_seq("DO") 443 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 444 445 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 446 this = super()._parse_table_part(schema=schema) or self._parse_number() 447 448 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 449 if isinstance(this, exp.Identifier): 450 table_name = this.name 451 while self._match(TokenType.DASH, advance=False) and self._next: 452 text = "" 453 while self._curr and self._curr.token_type != TokenType.DOT: 454 self._advance() 455 text += self._prev.text 456 table_name += text 457 458 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 459 elif isinstance(this, exp.Literal): 460 table_name = this.name 461 462 if self._is_connected() and self._parse_var(any_token=True): 463 table_name += self._prev.text 464 465 this = exp.Identifier(this=table_name, quoted=True) 466 467 return this 468 469 def _parse_table_parts( 470 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 471 ) -> exp.Table: 472 table = super()._parse_table_parts( 473 schema=schema, is_db_reference=is_db_reference, wildcard=True 474 ) 475 476 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 477 if not table.catalog: 478 if table.db: 479 parts = table.db.split(".") 480 if len(parts) == 2 and not table.args["db"].quoted: 481 table.set("catalog", exp.Identifier(this=parts[0])) 482 table.set("db", exp.Identifier(this=parts[1])) 483 else: 484 parts = table.name.split(".") 485 if len(parts) == 2 and not table.this.quoted: 486 table.set("db", exp.Identifier(this=parts[0])) 487 table.set("this", exp.Identifier(this=parts[1])) 488 489 if any("." in p.name for p in table.parts): 490 catalog, db, this, *rest = ( 491 exp.to_identifier(p, quoted=True) 492 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 493 ) 494 495 if rest and this: 496 this = exp.Dot.build([this, *rest]) # type: ignore 497 498 table = exp.Table(this=this, db=db, catalog=catalog) 499 table.meta["quoted_table"] = True 500 501 return table 502 503 @t.overload 504 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 505 506 @t.overload 507 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 508 509 def _parse_json_object(self, agg=False): 510 json_object = super()._parse_json_object() 511 array_kv_pair = seq_get(json_object.expressions, 0) 512 513 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 514 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 515 if ( 516 array_kv_pair 517 and isinstance(array_kv_pair.this, exp.Array) 518 and isinstance(array_kv_pair.expression, exp.Array) 519 ): 520 keys = array_kv_pair.this.expressions 521 values = array_kv_pair.expression.expressions 522 523 json_object.set( 524 "expressions", 525 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 526 ) 527 528 return json_object 529 530 def _parse_bracket( 531 self, this: t.Optional[exp.Expression] = None 532 ) -> t.Optional[exp.Expression]: 533 bracket = super()._parse_bracket(this) 534 535 if this is bracket: 536 return bracket 537 538 if isinstance(bracket, exp.Bracket): 539 for expression in bracket.expressions: 540 name = expression.name.upper() 541 542 if name not in self.BRACKET_OFFSETS: 543 break 544 545 offset, safe = self.BRACKET_OFFSETS[name] 546 bracket.set("offset", offset) 547 bracket.set("safe", safe) 548 expression.replace(expression.expressions[0]) 549 550 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
552 class Generator(generator.Generator): 553 EXPLICIT_UNION = True 554 INTERVAL_ALLOWS_PLURAL_FORM = False 555 JOIN_HINTS = False 556 QUERY_HINTS = False 557 TABLE_HINTS = False 558 LIMIT_FETCH = "LIMIT" 559 RENAME_TABLE_WITH_DB = False 560 NVL2_SUPPORTED = False 561 UNNEST_WITH_ORDINALITY = False 562 COLLATE_IS_FUNC = True 563 LIMIT_ONLY_LITERALS = True 564 SUPPORTS_TABLE_ALIAS_COLUMNS = False 565 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 566 JSON_KEY_VALUE_PAIR_SEP = "," 567 NULL_ORDERING_SUPPORTED = False 568 IGNORE_NULLS_IN_FUNC = True 569 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 570 CAN_IMPLEMENT_ARRAY_ANY = True 571 SUPPORTS_TO_NUMBER = False 572 NAMED_PLACEHOLDER_TOKEN = "@" 573 574 TRANSFORMS = { 575 **generator.Generator.TRANSFORMS, 576 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 577 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 578 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 579 exp.ArrayContains: _array_contains_sql, 580 exp.ArrayFilter: filter_array_using_unnest, 581 exp.ArraySize: rename_func("ARRAY_LENGTH"), 582 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 583 exp.CollateProperty: lambda self, e: ( 584 f"DEFAULT COLLATE {self.sql(e, 'this')}" 585 if e.args.get("default") 586 else f"COLLATE {self.sql(e, 'this')}" 587 ), 588 exp.Commit: lambda *_: "COMMIT TRANSACTION", 589 exp.CountIf: rename_func("COUNTIF"), 590 exp.Create: _create_sql, 591 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 592 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 593 exp.DateDiff: lambda self, e: self.func( 594 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 595 ), 596 exp.DateFromParts: rename_func("DATE"), 597 exp.DateStrToDate: datestrtodate_sql, 598 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 599 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 600 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 601 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 602 exp.FromTimeZone: lambda self, e: self.func( 603 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 604 ), 605 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 606 exp.GroupConcat: rename_func("STRING_AGG"), 607 exp.Hex: rename_func("TO_HEX"), 608 exp.If: if_sql(false_value="NULL"), 609 exp.ILike: no_ilike_sql, 610 exp.IntDiv: rename_func("DIV"), 611 exp.JSONFormat: rename_func("TO_JSON_STRING"), 612 exp.Max: max_or_greatest, 613 exp.Mod: rename_func("MOD"), 614 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 615 exp.MD5Digest: rename_func("MD5"), 616 exp.Min: min_or_least, 617 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 618 exp.RegexpExtract: lambda self, e: self.func( 619 "REGEXP_EXTRACT", 620 e.this, 621 e.expression, 622 e.args.get("position"), 623 e.args.get("occurrence"), 624 ), 625 exp.RegexpReplace: regexp_replace_sql, 626 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 627 exp.ReturnsProperty: _returnsproperty_sql, 628 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 629 exp.Select: transforms.preprocess( 630 [ 631 transforms.explode_to_unnest(), 632 _unqualify_unnest, 633 transforms.eliminate_distinct_on, 634 _alias_ordered_group, 635 transforms.eliminate_semi_and_anti_joins, 636 ] 637 ), 638 exp.SHA2: lambda self, e: self.func( 639 "SHA256" if e.text("length") == "256" else "SHA512", e.this 640 ), 641 exp.StabilityProperty: lambda self, e: ( 642 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 643 ), 644 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 645 exp.StrToTime: lambda self, e: self.func( 646 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 647 ), 648 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 649 exp.TimeFromParts: rename_func("TIME"), 650 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 651 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 652 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 653 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 654 exp.TimeStrToTime: timestrtotime_sql, 655 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 656 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 657 exp.TsOrDsAdd: _ts_or_ds_add_sql, 658 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 659 exp.TsOrDsToTime: rename_func("TIME"), 660 exp.Unhex: rename_func("FROM_HEX"), 661 exp.UnixDate: rename_func("UNIX_DATE"), 662 exp.UnixToTime: _unix_to_time_sql, 663 exp.Values: _derived_table_values_to_unnest, 664 exp.VariancePop: rename_func("VAR_POP"), 665 } 666 667 SUPPORTED_JSON_PATH_PARTS = { 668 exp.JSONPathKey, 669 exp.JSONPathRoot, 670 exp.JSONPathSubscript, 671 } 672 673 TYPE_MAPPING = { 674 **generator.Generator.TYPE_MAPPING, 675 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 676 exp.DataType.Type.BIGINT: "INT64", 677 exp.DataType.Type.BINARY: "BYTES", 678 exp.DataType.Type.BOOLEAN: "BOOL", 679 exp.DataType.Type.CHAR: "STRING", 680 exp.DataType.Type.DECIMAL: "NUMERIC", 681 exp.DataType.Type.DOUBLE: "FLOAT64", 682 exp.DataType.Type.FLOAT: "FLOAT64", 683 exp.DataType.Type.INT: "INT64", 684 exp.DataType.Type.NCHAR: "STRING", 685 exp.DataType.Type.NVARCHAR: "STRING", 686 exp.DataType.Type.SMALLINT: "INT64", 687 exp.DataType.Type.TEXT: "STRING", 688 exp.DataType.Type.TIMESTAMP: "DATETIME", 689 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 690 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 691 exp.DataType.Type.TINYINT: "INT64", 692 exp.DataType.Type.VARBINARY: "BYTES", 693 exp.DataType.Type.VARCHAR: "STRING", 694 exp.DataType.Type.VARIANT: "ANY TYPE", 695 } 696 697 PROPERTIES_LOCATION = { 698 **generator.Generator.PROPERTIES_LOCATION, 699 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 700 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 701 } 702 703 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 704 RESERVED_KEYWORDS = { 705 *generator.Generator.RESERVED_KEYWORDS, 706 "all", 707 "and", 708 "any", 709 "array", 710 "as", 711 "asc", 712 "assert_rows_modified", 713 "at", 714 "between", 715 "by", 716 "case", 717 "cast", 718 "collate", 719 "contains", 720 "create", 721 "cross", 722 "cube", 723 "current", 724 "default", 725 "define", 726 "desc", 727 "distinct", 728 "else", 729 "end", 730 "enum", 731 "escape", 732 "except", 733 "exclude", 734 "exists", 735 "extract", 736 "false", 737 "fetch", 738 "following", 739 "for", 740 "from", 741 "full", 742 "group", 743 "grouping", 744 "groups", 745 "hash", 746 "having", 747 "if", 748 "ignore", 749 "in", 750 "inner", 751 "intersect", 752 "interval", 753 "into", 754 "is", 755 "join", 756 "lateral", 757 "left", 758 "like", 759 "limit", 760 "lookup", 761 "merge", 762 "natural", 763 "new", 764 "no", 765 "not", 766 "null", 767 "nulls", 768 "of", 769 "on", 770 "or", 771 "order", 772 "outer", 773 "over", 774 "partition", 775 "preceding", 776 "proto", 777 "qualify", 778 "range", 779 "recursive", 780 "respect", 781 "right", 782 "rollup", 783 "rows", 784 "select", 785 "set", 786 "some", 787 "struct", 788 "tablesample", 789 "then", 790 "to", 791 "treat", 792 "true", 793 "unbounded", 794 "union", 795 "unnest", 796 "using", 797 "when", 798 "where", 799 "window", 800 "with", 801 "within", 802 } 803 804 def table_parts(self, expression: exp.Table) -> str: 805 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 806 # we need to make sure the correct quoting is used in each case. 807 # 808 # For example, if there is a CTE x that clashes with a schema name, then the former will 809 # return the table y in that schema, whereas the latter will return the CTE's y column: 810 # 811 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 812 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 813 if expression.meta.get("quoted_table"): 814 table_parts = ".".join(p.name for p in expression.parts) 815 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 816 817 return super().table_parts(expression) 818 819 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 820 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 821 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 822 823 def eq_sql(self, expression: exp.EQ) -> str: 824 # Operands of = cannot be NULL in BigQuery 825 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 826 if not isinstance(expression.parent, exp.Update): 827 return "NULL" 828 829 return self.binary(expression, "=") 830 831 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 832 parent = expression.parent 833 834 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 835 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 836 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 837 return self.func( 838 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 839 ) 840 841 return super().attimezone_sql(expression) 842 843 def trycast_sql(self, expression: exp.TryCast) -> str: 844 return self.cast_sql(expression, safe_prefix="SAFE_") 845 846 def array_sql(self, expression: exp.Array) -> str: 847 first_arg = seq_get(expression.expressions, 0) 848 if isinstance(first_arg, exp.Query): 849 return f"ARRAY{self.wrap(self.sql(first_arg))}" 850 851 return inline_array_sql(self, expression) 852 853 def bracket_sql(self, expression: exp.Bracket) -> str: 854 this = expression.this 855 expressions = expression.expressions 856 857 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 858 arg = expressions[0] 859 if arg.type is None: 860 from sqlglot.optimizer.annotate_types import annotate_types 861 862 arg = annotate_types(arg) 863 864 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 865 # BQ doesn't support bracket syntax with string values for structs 866 return f"{self.sql(this)}.{arg.name}" 867 868 expressions_sql = self.expressions(expression, flat=True) 869 offset = expression.args.get("offset") 870 871 if offset == 0: 872 expressions_sql = f"OFFSET({expressions_sql})" 873 elif offset == 1: 874 expressions_sql = f"ORDINAL({expressions_sql})" 875 elif offset is not None: 876 self.unsupported(f"Unsupported array offset: {offset}") 877 878 if expression.args.get("safe"): 879 expressions_sql = f"SAFE_{expressions_sql}" 880 881 return f"{self.sql(this)}[{expressions_sql}]" 882 883 def in_unnest_op(self, expression: exp.Unnest) -> str: 884 return self.sql(expression) 885 886 def except_op(self, expression: exp.Except) -> str: 887 if not expression.args.get("distinct"): 888 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 889 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 890 891 def intersect_op(self, expression: exp.Intersect) -> str: 892 if not expression.args.get("distinct"): 893 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 894 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 895 896 def with_properties(self, properties: exp.Properties) -> str: 897 return self.properties(properties, prefix=self.seg("OPTIONS")) 898 899 def version_sql(self, expression: exp.Version) -> str: 900 if expression.name == "TIMESTAMP": 901 expression.set("this", "SYSTEM_TIME") 902 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
804 def table_parts(self, expression: exp.Table) -> str: 805 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 806 # we need to make sure the correct quoting is used in each case. 807 # 808 # For example, if there is a CTE x that clashes with a schema name, then the former will 809 # return the table y in that schema, whereas the latter will return the CTE's y column: 810 # 811 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 812 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 813 if expression.meta.get("quoted_table"): 814 table_parts = ".".join(p.name for p in expression.parts) 815 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 816 817 return super().table_parts(expression)
831 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 832 parent = expression.parent 833 834 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 835 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 836 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 837 return self.func( 838 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 839 ) 840 841 return super().attimezone_sql(expression)
853 def bracket_sql(self, expression: exp.Bracket) -> str: 854 this = expression.this 855 expressions = expression.expressions 856 857 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 858 arg = expressions[0] 859 if arg.type is None: 860 from sqlglot.optimizer.annotate_types import annotate_types 861 862 arg = annotate_types(arg) 863 864 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 865 # BQ doesn't support bracket syntax with string values for structs 866 return f"{self.sql(this)}.{arg.name}" 867 868 expressions_sql = self.expressions(expression, flat=True) 869 offset = expression.args.get("offset") 870 871 if offset == 0: 872 expressions_sql = f"OFFSET({expressions_sql})" 873 elif offset == 1: 874 expressions_sql = f"ORDINAL({expressions_sql})" 875 elif offset is not None: 876 self.unsupported(f"Unsupported array offset: {offset}") 877 878 if expression.args.get("safe"): 879 expressions_sql = f"SAFE_{expressions_sql}" 880 881 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql