sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27 unit_to_var, 28) 29from sqlglot.helper import seq_get, split_num_words 30from sqlglot.tokens import TokenType 31 32if t.TYPE_CHECKING: 33 from sqlglot._typing import E, Lit 34 35logger = logging.getLogger("sqlglot") 36 37 38def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 39 if not expression.find_ancestor(exp.From, exp.Join): 40 return self.values_sql(expression) 41 42 structs = [] 43 alias = expression.args.get("alias") 44 for tup in expression.find_all(exp.Tuple): 45 field_aliases = ( 46 alias.columns 47 if alias and alias.columns 48 else (f"_c{i}" for i in range(len(tup.expressions))) 49 ) 50 expressions = [ 51 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 52 for name, fld in zip(field_aliases, tup.expressions) 53 ] 54 structs.append(exp.Struct(expressions=expressions)) 55 56 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 57 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 58 return self.unnest_sql( 59 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 60 ) 61 62 63def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 64 this = expression.this 65 if isinstance(this, exp.Schema): 66 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 67 else: 68 this = self.sql(this) 69 return f"RETURNS {this}" 70 71 72def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 73 returns = expression.find(exp.ReturnsProperty) 74 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 75 expression.set("kind", "TABLE FUNCTION") 76 77 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 78 expression.set("expression", expression.expression.this) 79 80 return self.create_sql(expression) 81 82 83# https://issuetracker.google.com/issues/162294746 84# workaround for bigquery bug when grouping by an expression and then ordering 85# WITH x AS (SELECT 1 y) 86# SELECT y + 1 z 87# FROM x 88# GROUP BY x + 1 89# ORDER by z 90def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 91 if isinstance(expression, exp.Select): 92 group = expression.args.get("group") 93 order = expression.args.get("order") 94 95 if group and order: 96 aliases = { 97 select.this: select.args["alias"] 98 for select in expression.selects 99 if isinstance(select, exp.Alias) 100 } 101 102 for grouped in group.expressions: 103 if grouped.is_int: 104 continue 105 alias = aliases.get(grouped) 106 if alias: 107 grouped.replace(exp.column(alias)) 108 109 return expression 110 111 112def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 113 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 114 if isinstance(expression, exp.CTE) and expression.alias_column_names: 115 cte_query = expression.this 116 117 if cte_query.is_star: 118 logger.warning( 119 "Can't push down CTE column names for star queries. Run the query through" 120 " the optimizer or use 'qualify' to expand the star projections first." 121 ) 122 return expression 123 124 column_names = expression.alias_column_names 125 expression.args["alias"].set("columns", None) 126 127 for name, select in zip(column_names, cte_query.selects): 128 to_replace = select 129 130 if isinstance(select, exp.Alias): 131 select = select.this 132 133 # Inner aliases are shadowed by the CTE column names 134 to_replace.replace(exp.alias_(select, name)) 135 136 return expression 137 138 139def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 140 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 141 this.set("zone", seq_get(args, 2)) 142 return this 143 144 145def _build_timestamp(args: t.List) -> exp.Timestamp: 146 timestamp = exp.Timestamp.from_arg_list(args) 147 timestamp.set("with_tz", True) 148 return timestamp 149 150 151def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 152 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 153 return expr_type.from_arg_list(args) 154 155 156def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 157 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 158 arg = seq_get(args, 0) 159 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 160 161 162def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 163 return self.sql( 164 exp.Exists( 165 this=exp.select("1") 166 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 167 .where(exp.column("_col").eq(expression.right)) 168 ) 169 ) 170 171 172def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 173 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 174 175 176def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 177 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 178 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 179 unit = unit_to_var(expression) 180 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 181 182 183def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 184 scale = expression.args.get("scale") 185 timestamp = expression.this 186 187 if scale in (None, exp.UnixToTime.SECONDS): 188 return self.func("TIMESTAMP_SECONDS", timestamp) 189 if scale == exp.UnixToTime.MILLIS: 190 return self.func("TIMESTAMP_MILLIS", timestamp) 191 if scale == exp.UnixToTime.MICROS: 192 return self.func("TIMESTAMP_MICROS", timestamp) 193 194 unix_seconds = exp.cast( 195 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 196 ) 197 return self.func("TIMESTAMP_SECONDS", unix_seconds) 198 199 200def _build_time(args: t.List) -> exp.Func: 201 if len(args) == 1: 202 return exp.TsOrDsToTime(this=args[0]) 203 if len(args) == 3: 204 return exp.TimeFromParts.from_arg_list(args) 205 206 return exp.Anonymous(this="TIME", expressions=args) 207 208 209class BigQuery(Dialect): 210 WEEK_OFFSET = -1 211 UNNEST_COLUMN_ONLY = True 212 SUPPORTS_USER_DEFINED_TYPES = False 213 SUPPORTS_SEMI_ANTI_JOIN = False 214 LOG_BASE_FIRST = False 215 216 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 217 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 218 219 # bigquery udfs are case sensitive 220 NORMALIZE_FUNCTIONS = False 221 222 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 223 TIME_MAPPING = { 224 "%D": "%m/%d/%y", 225 "%E*S": "%S.%f", 226 "%E6S": "%S.%f", 227 } 228 229 FORMAT_MAPPING = { 230 "DD": "%d", 231 "MM": "%m", 232 "MON": "%b", 233 "MONTH": "%B", 234 "YYYY": "%Y", 235 "YY": "%y", 236 "HH": "%I", 237 "HH12": "%I", 238 "HH24": "%H", 239 "MI": "%M", 240 "SS": "%S", 241 "SSSSS": "%f", 242 "TZH": "%z", 243 } 244 245 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 246 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 247 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 248 249 def normalize_identifier(self, expression: E) -> E: 250 if isinstance(expression, exp.Identifier): 251 parent = expression.parent 252 while isinstance(parent, exp.Dot): 253 parent = parent.parent 254 255 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 256 # by default. The following check uses a heuristic to detect tables based on whether 257 # they are qualified. This should generally be correct, because tables in BigQuery 258 # must be qualified with at least a dataset, unless @@dataset_id is set. 259 case_sensitive = ( 260 isinstance(parent, exp.UserDefinedFunction) 261 or ( 262 isinstance(parent, exp.Table) 263 and parent.db 264 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 265 ) 266 or expression.meta.get("is_table") 267 ) 268 if not case_sensitive: 269 expression.set("this", expression.this.lower()) 270 271 return expression 272 273 class Tokenizer(tokens.Tokenizer): 274 QUOTES = ["'", '"', '"""', "'''"] 275 COMMENTS = ["--", "#", ("/*", "*/")] 276 IDENTIFIERS = ["`"] 277 STRING_ESCAPES = ["\\"] 278 279 HEX_STRINGS = [("0x", ""), ("0X", "")] 280 281 BYTE_STRINGS = [ 282 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 283 ] 284 285 RAW_STRINGS = [ 286 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 287 ] 288 289 KEYWORDS = { 290 **tokens.Tokenizer.KEYWORDS, 291 "ANY TYPE": TokenType.VARIANT, 292 "BEGIN": TokenType.COMMAND, 293 "BEGIN TRANSACTION": TokenType.BEGIN, 294 "BYTES": TokenType.BINARY, 295 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 296 "DATETIME": TokenType.TIMESTAMP, 297 "DECLARE": TokenType.COMMAND, 298 "ELSEIF": TokenType.COMMAND, 299 "EXCEPTION": TokenType.COMMAND, 300 "FLOAT64": TokenType.DOUBLE, 301 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 302 "MODEL": TokenType.MODEL, 303 "NOT DETERMINISTIC": TokenType.VOLATILE, 304 "RECORD": TokenType.STRUCT, 305 "TIMESTAMP": TokenType.TIMESTAMPTZ, 306 } 307 KEYWORDS.pop("DIV") 308 KEYWORDS.pop("VALUES") 309 310 class Parser(parser.Parser): 311 PREFIXED_PIVOT_COLUMNS = True 312 LOG_DEFAULTS_TO_LN = True 313 SUPPORTS_IMPLICIT_UNNEST = True 314 315 FUNCTIONS = { 316 **parser.Parser.FUNCTIONS, 317 "DATE": _build_date, 318 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 319 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 320 "DATE_TRUNC": lambda args: exp.DateTrunc( 321 unit=exp.Literal.string(str(seq_get(args, 1))), 322 this=seq_get(args, 0), 323 ), 324 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 325 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 326 "DIV": binary_from_function(exp.IntDiv), 327 "FORMAT_DATE": lambda args: exp.TimeToStr( 328 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 329 ), 330 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 331 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 332 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 333 ), 334 "MD5": exp.MD5Digest.from_arg_list, 335 "TO_HEX": _build_to_hex, 336 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 337 [seq_get(args, 1), seq_get(args, 0)] 338 ), 339 "PARSE_TIMESTAMP": _build_parse_timestamp, 340 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 341 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 342 this=seq_get(args, 0), 343 expression=seq_get(args, 1), 344 position=seq_get(args, 2), 345 occurrence=seq_get(args, 3), 346 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 347 ), 348 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 349 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 350 "SPLIT": lambda args: exp.Split( 351 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 352 this=seq_get(args, 0), 353 expression=seq_get(args, 1) or exp.Literal.string(","), 354 ), 355 "TIME": _build_time, 356 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 357 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 358 "TIMESTAMP": _build_timestamp, 359 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 360 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 361 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 362 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 363 ), 364 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 365 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 366 ), 367 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 368 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 369 } 370 371 FUNCTION_PARSERS = { 372 **parser.Parser.FUNCTION_PARSERS, 373 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 374 } 375 FUNCTION_PARSERS.pop("TRIM") 376 377 NO_PAREN_FUNCTIONS = { 378 **parser.Parser.NO_PAREN_FUNCTIONS, 379 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 380 } 381 382 NESTED_TYPE_TOKENS = { 383 *parser.Parser.NESTED_TYPE_TOKENS, 384 TokenType.TABLE, 385 } 386 387 PROPERTY_PARSERS = { 388 **parser.Parser.PROPERTY_PARSERS, 389 "NOT DETERMINISTIC": lambda self: self.expression( 390 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 391 ), 392 "OPTIONS": lambda self: self._parse_with_property(), 393 } 394 395 CONSTRAINT_PARSERS = { 396 **parser.Parser.CONSTRAINT_PARSERS, 397 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 398 } 399 400 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 401 RANGE_PARSERS.pop(TokenType.OVERLAPS) 402 403 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 404 405 STATEMENT_PARSERS = { 406 **parser.Parser.STATEMENT_PARSERS, 407 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 408 TokenType.END: lambda self: self._parse_as_command(self._prev), 409 TokenType.FOR: lambda self: self._parse_for_in(), 410 } 411 412 BRACKET_OFFSETS = { 413 "OFFSET": (0, False), 414 "ORDINAL": (1, False), 415 "SAFE_OFFSET": (0, True), 416 "SAFE_ORDINAL": (1, True), 417 } 418 419 def _parse_for_in(self) -> exp.ForIn: 420 this = self._parse_range() 421 self._match_text_seq("DO") 422 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 423 424 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 425 this = super()._parse_table_part(schema=schema) or self._parse_number() 426 427 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 428 if isinstance(this, exp.Identifier): 429 table_name = this.name 430 while self._match(TokenType.DASH, advance=False) and self._next: 431 text = "" 432 while self._curr and self._curr.token_type != TokenType.DOT: 433 self._advance() 434 text += self._prev.text 435 table_name += text 436 437 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 438 elif isinstance(this, exp.Literal): 439 table_name = this.name 440 441 if self._is_connected() and self._parse_var(any_token=True): 442 table_name += self._prev.text 443 444 this = exp.Identifier(this=table_name, quoted=True) 445 446 return this 447 448 def _parse_table_parts( 449 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 450 ) -> exp.Table: 451 table = super()._parse_table_parts( 452 schema=schema, is_db_reference=is_db_reference, wildcard=True 453 ) 454 455 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 456 if not table.catalog: 457 if table.db: 458 parts = table.db.split(".") 459 if len(parts) == 2 and not table.args["db"].quoted: 460 table.set("catalog", exp.Identifier(this=parts[0])) 461 table.set("db", exp.Identifier(this=parts[1])) 462 else: 463 parts = table.name.split(".") 464 if len(parts) == 2 and not table.this.quoted: 465 table.set("db", exp.Identifier(this=parts[0])) 466 table.set("this", exp.Identifier(this=parts[1])) 467 468 if any("." in p.name for p in table.parts): 469 catalog, db, this, *rest = ( 470 exp.to_identifier(p, quoted=True) 471 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 472 ) 473 474 if rest and this: 475 this = exp.Dot.build([this, *rest]) # type: ignore 476 477 table = exp.Table(this=this, db=db, catalog=catalog) 478 table.meta["quoted_table"] = True 479 480 return table 481 482 @t.overload 483 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 484 485 @t.overload 486 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 487 488 def _parse_json_object(self, agg=False): 489 json_object = super()._parse_json_object() 490 array_kv_pair = seq_get(json_object.expressions, 0) 491 492 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 493 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 494 if ( 495 array_kv_pair 496 and isinstance(array_kv_pair.this, exp.Array) 497 and isinstance(array_kv_pair.expression, exp.Array) 498 ): 499 keys = array_kv_pair.this.expressions 500 values = array_kv_pair.expression.expressions 501 502 json_object.set( 503 "expressions", 504 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 505 ) 506 507 return json_object 508 509 def _parse_bracket( 510 self, this: t.Optional[exp.Expression] = None 511 ) -> t.Optional[exp.Expression]: 512 bracket = super()._parse_bracket(this) 513 514 if this is bracket: 515 return bracket 516 517 if isinstance(bracket, exp.Bracket): 518 for expression in bracket.expressions: 519 name = expression.name.upper() 520 521 if name not in self.BRACKET_OFFSETS: 522 break 523 524 offset, safe = self.BRACKET_OFFSETS[name] 525 bracket.set("offset", offset) 526 bracket.set("safe", safe) 527 expression.replace(expression.expressions[0]) 528 529 return bracket 530 531 class Generator(generator.Generator): 532 EXPLICIT_UNION = True 533 INTERVAL_ALLOWS_PLURAL_FORM = False 534 JOIN_HINTS = False 535 QUERY_HINTS = False 536 TABLE_HINTS = False 537 LIMIT_FETCH = "LIMIT" 538 RENAME_TABLE_WITH_DB = False 539 NVL2_SUPPORTED = False 540 UNNEST_WITH_ORDINALITY = False 541 COLLATE_IS_FUNC = True 542 LIMIT_ONLY_LITERALS = True 543 SUPPORTS_TABLE_ALIAS_COLUMNS = False 544 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 545 JSON_KEY_VALUE_PAIR_SEP = "," 546 NULL_ORDERING_SUPPORTED = False 547 IGNORE_NULLS_IN_FUNC = True 548 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 549 CAN_IMPLEMENT_ARRAY_ANY = True 550 SUPPORTS_TO_NUMBER = False 551 NAMED_PLACEHOLDER_TOKEN = "@" 552 553 TRANSFORMS = { 554 **generator.Generator.TRANSFORMS, 555 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 556 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 557 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 558 exp.Array: inline_array_unless_query, 559 exp.ArrayContains: _array_contains_sql, 560 exp.ArrayFilter: filter_array_using_unnest, 561 exp.ArraySize: rename_func("ARRAY_LENGTH"), 562 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 563 exp.CollateProperty: lambda self, e: ( 564 f"DEFAULT COLLATE {self.sql(e, 'this')}" 565 if e.args.get("default") 566 else f"COLLATE {self.sql(e, 'this')}" 567 ), 568 exp.Commit: lambda *_: "COMMIT TRANSACTION", 569 exp.CountIf: rename_func("COUNTIF"), 570 exp.Create: _create_sql, 571 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 572 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 573 exp.DateDiff: lambda self, e: self.func( 574 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 575 ), 576 exp.DateFromParts: rename_func("DATE"), 577 exp.DateStrToDate: datestrtodate_sql, 578 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 579 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 580 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 581 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 582 exp.FromTimeZone: lambda self, e: self.func( 583 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 584 ), 585 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 586 exp.GroupConcat: rename_func("STRING_AGG"), 587 exp.Hex: rename_func("TO_HEX"), 588 exp.If: if_sql(false_value="NULL"), 589 exp.ILike: no_ilike_sql, 590 exp.IntDiv: rename_func("DIV"), 591 exp.JSONFormat: rename_func("TO_JSON_STRING"), 592 exp.Max: max_or_greatest, 593 exp.Mod: rename_func("MOD"), 594 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 595 exp.MD5Digest: rename_func("MD5"), 596 exp.Min: min_or_least, 597 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 598 exp.RegexpExtract: lambda self, e: self.func( 599 "REGEXP_EXTRACT", 600 e.this, 601 e.expression, 602 e.args.get("position"), 603 e.args.get("occurrence"), 604 ), 605 exp.RegexpReplace: regexp_replace_sql, 606 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 607 exp.ReturnsProperty: _returnsproperty_sql, 608 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 609 exp.Select: transforms.preprocess( 610 [ 611 transforms.explode_to_unnest(), 612 transforms.unqualify_unnest, 613 transforms.eliminate_distinct_on, 614 _alias_ordered_group, 615 transforms.eliminate_semi_and_anti_joins, 616 ] 617 ), 618 exp.SHA2: lambda self, e: self.func( 619 "SHA256" if e.text("length") == "256" else "SHA512", e.this 620 ), 621 exp.StabilityProperty: lambda self, e: ( 622 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 623 ), 624 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 625 exp.StrToTime: lambda self, e: self.func( 626 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 627 ), 628 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 629 exp.TimeFromParts: rename_func("TIME"), 630 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 631 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 632 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 633 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 634 exp.TimeStrToTime: timestrtotime_sql, 635 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 636 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 637 exp.TsOrDsAdd: _ts_or_ds_add_sql, 638 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 639 exp.TsOrDsToTime: rename_func("TIME"), 640 exp.Unhex: rename_func("FROM_HEX"), 641 exp.UnixDate: rename_func("UNIX_DATE"), 642 exp.UnixToTime: _unix_to_time_sql, 643 exp.Values: _derived_table_values_to_unnest, 644 exp.VariancePop: rename_func("VAR_POP"), 645 } 646 647 SUPPORTED_JSON_PATH_PARTS = { 648 exp.JSONPathKey, 649 exp.JSONPathRoot, 650 exp.JSONPathSubscript, 651 } 652 653 TYPE_MAPPING = { 654 **generator.Generator.TYPE_MAPPING, 655 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 656 exp.DataType.Type.BIGINT: "INT64", 657 exp.DataType.Type.BINARY: "BYTES", 658 exp.DataType.Type.BOOLEAN: "BOOL", 659 exp.DataType.Type.CHAR: "STRING", 660 exp.DataType.Type.DECIMAL: "NUMERIC", 661 exp.DataType.Type.DOUBLE: "FLOAT64", 662 exp.DataType.Type.FLOAT: "FLOAT64", 663 exp.DataType.Type.INT: "INT64", 664 exp.DataType.Type.NCHAR: "STRING", 665 exp.DataType.Type.NVARCHAR: "STRING", 666 exp.DataType.Type.SMALLINT: "INT64", 667 exp.DataType.Type.TEXT: "STRING", 668 exp.DataType.Type.TIMESTAMP: "DATETIME", 669 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 670 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 671 exp.DataType.Type.TINYINT: "INT64", 672 exp.DataType.Type.VARBINARY: "BYTES", 673 exp.DataType.Type.VARCHAR: "STRING", 674 exp.DataType.Type.VARIANT: "ANY TYPE", 675 } 676 677 PROPERTIES_LOCATION = { 678 **generator.Generator.PROPERTIES_LOCATION, 679 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 680 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 681 } 682 683 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 684 RESERVED_KEYWORDS = { 685 *generator.Generator.RESERVED_KEYWORDS, 686 "all", 687 "and", 688 "any", 689 "array", 690 "as", 691 "asc", 692 "assert_rows_modified", 693 "at", 694 "between", 695 "by", 696 "case", 697 "cast", 698 "collate", 699 "contains", 700 "create", 701 "cross", 702 "cube", 703 "current", 704 "default", 705 "define", 706 "desc", 707 "distinct", 708 "else", 709 "end", 710 "enum", 711 "escape", 712 "except", 713 "exclude", 714 "exists", 715 "extract", 716 "false", 717 "fetch", 718 "following", 719 "for", 720 "from", 721 "full", 722 "group", 723 "grouping", 724 "groups", 725 "hash", 726 "having", 727 "if", 728 "ignore", 729 "in", 730 "inner", 731 "intersect", 732 "interval", 733 "into", 734 "is", 735 "join", 736 "lateral", 737 "left", 738 "like", 739 "limit", 740 "lookup", 741 "merge", 742 "natural", 743 "new", 744 "no", 745 "not", 746 "null", 747 "nulls", 748 "of", 749 "on", 750 "or", 751 "order", 752 "outer", 753 "over", 754 "partition", 755 "preceding", 756 "proto", 757 "qualify", 758 "range", 759 "recursive", 760 "respect", 761 "right", 762 "rollup", 763 "rows", 764 "select", 765 "set", 766 "some", 767 "struct", 768 "tablesample", 769 "then", 770 "to", 771 "treat", 772 "true", 773 "unbounded", 774 "union", 775 "unnest", 776 "using", 777 "when", 778 "where", 779 "window", 780 "with", 781 "within", 782 } 783 784 def table_parts(self, expression: exp.Table) -> str: 785 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 786 # we need to make sure the correct quoting is used in each case. 787 # 788 # For example, if there is a CTE x that clashes with a schema name, then the former will 789 # return the table y in that schema, whereas the latter will return the CTE's y column: 790 # 791 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 792 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 793 if expression.meta.get("quoted_table"): 794 table_parts = ".".join(p.name for p in expression.parts) 795 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 796 797 return super().table_parts(expression) 798 799 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 800 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 801 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 802 803 def eq_sql(self, expression: exp.EQ) -> str: 804 # Operands of = cannot be NULL in BigQuery 805 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 806 if not isinstance(expression.parent, exp.Update): 807 return "NULL" 808 809 return self.binary(expression, "=") 810 811 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 812 parent = expression.parent 813 814 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 815 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 816 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 817 return self.func( 818 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 819 ) 820 821 return super().attimezone_sql(expression) 822 823 def trycast_sql(self, expression: exp.TryCast) -> str: 824 return self.cast_sql(expression, safe_prefix="SAFE_") 825 826 def bracket_sql(self, expression: exp.Bracket) -> str: 827 this = expression.this 828 expressions = expression.expressions 829 830 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 831 arg = expressions[0] 832 if arg.type is None: 833 from sqlglot.optimizer.annotate_types import annotate_types 834 835 arg = annotate_types(arg) 836 837 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 838 # BQ doesn't support bracket syntax with string values for structs 839 return f"{self.sql(this)}.{arg.name}" 840 841 expressions_sql = self.expressions(expression, flat=True) 842 offset = expression.args.get("offset") 843 844 if offset == 0: 845 expressions_sql = f"OFFSET({expressions_sql})" 846 elif offset == 1: 847 expressions_sql = f"ORDINAL({expressions_sql})" 848 elif offset is not None: 849 self.unsupported(f"Unsupported array offset: {offset}") 850 851 if expression.args.get("safe"): 852 expressions_sql = f"SAFE_{expressions_sql}" 853 854 return f"{self.sql(this)}[{expressions_sql}]" 855 856 def in_unnest_op(self, expression: exp.Unnest) -> str: 857 return self.sql(expression) 858 859 def except_op(self, expression: exp.Except) -> str: 860 if not expression.args.get("distinct"): 861 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 862 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 863 864 def intersect_op(self, expression: exp.Intersect) -> str: 865 if not expression.args.get("distinct"): 866 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 867 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 868 869 def with_properties(self, properties: exp.Properties) -> str: 870 return self.properties(properties, prefix=self.seg("OPTIONS")) 871 872 def version_sql(self, expression: exp.Version) -> str: 873 if expression.name == "TIMESTAMP": 874 expression.set("this", "SYSTEM_TIME") 875 return super().version_sql(expression)
210class BigQuery(Dialect): 211 WEEK_OFFSET = -1 212 UNNEST_COLUMN_ONLY = True 213 SUPPORTS_USER_DEFINED_TYPES = False 214 SUPPORTS_SEMI_ANTI_JOIN = False 215 LOG_BASE_FIRST = False 216 217 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 218 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 219 220 # bigquery udfs are case sensitive 221 NORMALIZE_FUNCTIONS = False 222 223 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 224 TIME_MAPPING = { 225 "%D": "%m/%d/%y", 226 "%E*S": "%S.%f", 227 "%E6S": "%S.%f", 228 } 229 230 FORMAT_MAPPING = { 231 "DD": "%d", 232 "MM": "%m", 233 "MON": "%b", 234 "MONTH": "%B", 235 "YYYY": "%Y", 236 "YY": "%y", 237 "HH": "%I", 238 "HH12": "%I", 239 "HH24": "%H", 240 "MI": "%M", 241 "SS": "%S", 242 "SSSSS": "%f", 243 "TZH": "%z", 244 } 245 246 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 247 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 248 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 249 250 def normalize_identifier(self, expression: E) -> E: 251 if isinstance(expression, exp.Identifier): 252 parent = expression.parent 253 while isinstance(parent, exp.Dot): 254 parent = parent.parent 255 256 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 257 # by default. The following check uses a heuristic to detect tables based on whether 258 # they are qualified. This should generally be correct, because tables in BigQuery 259 # must be qualified with at least a dataset, unless @@dataset_id is set. 260 case_sensitive = ( 261 isinstance(parent, exp.UserDefinedFunction) 262 or ( 263 isinstance(parent, exp.Table) 264 and parent.db 265 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 266 ) 267 or expression.meta.get("is_table") 268 ) 269 if not case_sensitive: 270 expression.set("this", expression.this.lower()) 271 272 return expression 273 274 class Tokenizer(tokens.Tokenizer): 275 QUOTES = ["'", '"', '"""', "'''"] 276 COMMENTS = ["--", "#", ("/*", "*/")] 277 IDENTIFIERS = ["`"] 278 STRING_ESCAPES = ["\\"] 279 280 HEX_STRINGS = [("0x", ""), ("0X", "")] 281 282 BYTE_STRINGS = [ 283 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 284 ] 285 286 RAW_STRINGS = [ 287 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 288 ] 289 290 KEYWORDS = { 291 **tokens.Tokenizer.KEYWORDS, 292 "ANY TYPE": TokenType.VARIANT, 293 "BEGIN": TokenType.COMMAND, 294 "BEGIN TRANSACTION": TokenType.BEGIN, 295 "BYTES": TokenType.BINARY, 296 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 297 "DATETIME": TokenType.TIMESTAMP, 298 "DECLARE": TokenType.COMMAND, 299 "ELSEIF": TokenType.COMMAND, 300 "EXCEPTION": TokenType.COMMAND, 301 "FLOAT64": TokenType.DOUBLE, 302 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 303 "MODEL": TokenType.MODEL, 304 "NOT DETERMINISTIC": TokenType.VOLATILE, 305 "RECORD": TokenType.STRUCT, 306 "TIMESTAMP": TokenType.TIMESTAMPTZ, 307 } 308 KEYWORDS.pop("DIV") 309 KEYWORDS.pop("VALUES") 310 311 class Parser(parser.Parser): 312 PREFIXED_PIVOT_COLUMNS = True 313 LOG_DEFAULTS_TO_LN = True 314 SUPPORTS_IMPLICIT_UNNEST = True 315 316 FUNCTIONS = { 317 **parser.Parser.FUNCTIONS, 318 "DATE": _build_date, 319 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 320 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 321 "DATE_TRUNC": lambda args: exp.DateTrunc( 322 unit=exp.Literal.string(str(seq_get(args, 1))), 323 this=seq_get(args, 0), 324 ), 325 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 326 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 327 "DIV": binary_from_function(exp.IntDiv), 328 "FORMAT_DATE": lambda args: exp.TimeToStr( 329 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 330 ), 331 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 332 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 333 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 334 ), 335 "MD5": exp.MD5Digest.from_arg_list, 336 "TO_HEX": _build_to_hex, 337 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 338 [seq_get(args, 1), seq_get(args, 0)] 339 ), 340 "PARSE_TIMESTAMP": _build_parse_timestamp, 341 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 342 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 343 this=seq_get(args, 0), 344 expression=seq_get(args, 1), 345 position=seq_get(args, 2), 346 occurrence=seq_get(args, 3), 347 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 348 ), 349 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 350 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 351 "SPLIT": lambda args: exp.Split( 352 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 353 this=seq_get(args, 0), 354 expression=seq_get(args, 1) or exp.Literal.string(","), 355 ), 356 "TIME": _build_time, 357 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 358 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 359 "TIMESTAMP": _build_timestamp, 360 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 361 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 362 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 363 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 364 ), 365 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 366 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 367 ), 368 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 369 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 370 } 371 372 FUNCTION_PARSERS = { 373 **parser.Parser.FUNCTION_PARSERS, 374 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 375 } 376 FUNCTION_PARSERS.pop("TRIM") 377 378 NO_PAREN_FUNCTIONS = { 379 **parser.Parser.NO_PAREN_FUNCTIONS, 380 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 381 } 382 383 NESTED_TYPE_TOKENS = { 384 *parser.Parser.NESTED_TYPE_TOKENS, 385 TokenType.TABLE, 386 } 387 388 PROPERTY_PARSERS = { 389 **parser.Parser.PROPERTY_PARSERS, 390 "NOT DETERMINISTIC": lambda self: self.expression( 391 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 392 ), 393 "OPTIONS": lambda self: self._parse_with_property(), 394 } 395 396 CONSTRAINT_PARSERS = { 397 **parser.Parser.CONSTRAINT_PARSERS, 398 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 399 } 400 401 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 402 RANGE_PARSERS.pop(TokenType.OVERLAPS) 403 404 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 405 406 STATEMENT_PARSERS = { 407 **parser.Parser.STATEMENT_PARSERS, 408 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 409 TokenType.END: lambda self: self._parse_as_command(self._prev), 410 TokenType.FOR: lambda self: self._parse_for_in(), 411 } 412 413 BRACKET_OFFSETS = { 414 "OFFSET": (0, False), 415 "ORDINAL": (1, False), 416 "SAFE_OFFSET": (0, True), 417 "SAFE_ORDINAL": (1, True), 418 } 419 420 def _parse_for_in(self) -> exp.ForIn: 421 this = self._parse_range() 422 self._match_text_seq("DO") 423 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 424 425 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 426 this = super()._parse_table_part(schema=schema) or self._parse_number() 427 428 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 429 if isinstance(this, exp.Identifier): 430 table_name = this.name 431 while self._match(TokenType.DASH, advance=False) and self._next: 432 text = "" 433 while self._curr and self._curr.token_type != TokenType.DOT: 434 self._advance() 435 text += self._prev.text 436 table_name += text 437 438 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 439 elif isinstance(this, exp.Literal): 440 table_name = this.name 441 442 if self._is_connected() and self._parse_var(any_token=True): 443 table_name += self._prev.text 444 445 this = exp.Identifier(this=table_name, quoted=True) 446 447 return this 448 449 def _parse_table_parts( 450 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 451 ) -> exp.Table: 452 table = super()._parse_table_parts( 453 schema=schema, is_db_reference=is_db_reference, wildcard=True 454 ) 455 456 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 457 if not table.catalog: 458 if table.db: 459 parts = table.db.split(".") 460 if len(parts) == 2 and not table.args["db"].quoted: 461 table.set("catalog", exp.Identifier(this=parts[0])) 462 table.set("db", exp.Identifier(this=parts[1])) 463 else: 464 parts = table.name.split(".") 465 if len(parts) == 2 and not table.this.quoted: 466 table.set("db", exp.Identifier(this=parts[0])) 467 table.set("this", exp.Identifier(this=parts[1])) 468 469 if any("." in p.name for p in table.parts): 470 catalog, db, this, *rest = ( 471 exp.to_identifier(p, quoted=True) 472 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 473 ) 474 475 if rest and this: 476 this = exp.Dot.build([this, *rest]) # type: ignore 477 478 table = exp.Table(this=this, db=db, catalog=catalog) 479 table.meta["quoted_table"] = True 480 481 return table 482 483 @t.overload 484 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 485 486 @t.overload 487 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 488 489 def _parse_json_object(self, agg=False): 490 json_object = super()._parse_json_object() 491 array_kv_pair = seq_get(json_object.expressions, 0) 492 493 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 494 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 495 if ( 496 array_kv_pair 497 and isinstance(array_kv_pair.this, exp.Array) 498 and isinstance(array_kv_pair.expression, exp.Array) 499 ): 500 keys = array_kv_pair.this.expressions 501 values = array_kv_pair.expression.expressions 502 503 json_object.set( 504 "expressions", 505 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 506 ) 507 508 return json_object 509 510 def _parse_bracket( 511 self, this: t.Optional[exp.Expression] = None 512 ) -> t.Optional[exp.Expression]: 513 bracket = super()._parse_bracket(this) 514 515 if this is bracket: 516 return bracket 517 518 if isinstance(bracket, exp.Bracket): 519 for expression in bracket.expressions: 520 name = expression.name.upper() 521 522 if name not in self.BRACKET_OFFSETS: 523 break 524 525 offset, safe = self.BRACKET_OFFSETS[name] 526 bracket.set("offset", offset) 527 bracket.set("safe", safe) 528 expression.replace(expression.expressions[0]) 529 530 return bracket 531 532 class Generator(generator.Generator): 533 EXPLICIT_UNION = True 534 INTERVAL_ALLOWS_PLURAL_FORM = False 535 JOIN_HINTS = False 536 QUERY_HINTS = False 537 TABLE_HINTS = False 538 LIMIT_FETCH = "LIMIT" 539 RENAME_TABLE_WITH_DB = False 540 NVL2_SUPPORTED = False 541 UNNEST_WITH_ORDINALITY = False 542 COLLATE_IS_FUNC = True 543 LIMIT_ONLY_LITERALS = True 544 SUPPORTS_TABLE_ALIAS_COLUMNS = False 545 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 546 JSON_KEY_VALUE_PAIR_SEP = "," 547 NULL_ORDERING_SUPPORTED = False 548 IGNORE_NULLS_IN_FUNC = True 549 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 550 CAN_IMPLEMENT_ARRAY_ANY = True 551 SUPPORTS_TO_NUMBER = False 552 NAMED_PLACEHOLDER_TOKEN = "@" 553 554 TRANSFORMS = { 555 **generator.Generator.TRANSFORMS, 556 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 557 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 558 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 559 exp.Array: inline_array_unless_query, 560 exp.ArrayContains: _array_contains_sql, 561 exp.ArrayFilter: filter_array_using_unnest, 562 exp.ArraySize: rename_func("ARRAY_LENGTH"), 563 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 564 exp.CollateProperty: lambda self, e: ( 565 f"DEFAULT COLLATE {self.sql(e, 'this')}" 566 if e.args.get("default") 567 else f"COLLATE {self.sql(e, 'this')}" 568 ), 569 exp.Commit: lambda *_: "COMMIT TRANSACTION", 570 exp.CountIf: rename_func("COUNTIF"), 571 exp.Create: _create_sql, 572 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 573 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 574 exp.DateDiff: lambda self, e: self.func( 575 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 576 ), 577 exp.DateFromParts: rename_func("DATE"), 578 exp.DateStrToDate: datestrtodate_sql, 579 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 580 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 581 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 582 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 583 exp.FromTimeZone: lambda self, e: self.func( 584 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 585 ), 586 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 587 exp.GroupConcat: rename_func("STRING_AGG"), 588 exp.Hex: rename_func("TO_HEX"), 589 exp.If: if_sql(false_value="NULL"), 590 exp.ILike: no_ilike_sql, 591 exp.IntDiv: rename_func("DIV"), 592 exp.JSONFormat: rename_func("TO_JSON_STRING"), 593 exp.Max: max_or_greatest, 594 exp.Mod: rename_func("MOD"), 595 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 596 exp.MD5Digest: rename_func("MD5"), 597 exp.Min: min_or_least, 598 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 599 exp.RegexpExtract: lambda self, e: self.func( 600 "REGEXP_EXTRACT", 601 e.this, 602 e.expression, 603 e.args.get("position"), 604 e.args.get("occurrence"), 605 ), 606 exp.RegexpReplace: regexp_replace_sql, 607 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 608 exp.ReturnsProperty: _returnsproperty_sql, 609 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 610 exp.Select: transforms.preprocess( 611 [ 612 transforms.explode_to_unnest(), 613 transforms.unqualify_unnest, 614 transforms.eliminate_distinct_on, 615 _alias_ordered_group, 616 transforms.eliminate_semi_and_anti_joins, 617 ] 618 ), 619 exp.SHA2: lambda self, e: self.func( 620 "SHA256" if e.text("length") == "256" else "SHA512", e.this 621 ), 622 exp.StabilityProperty: lambda self, e: ( 623 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 624 ), 625 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 626 exp.StrToTime: lambda self, e: self.func( 627 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 628 ), 629 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 630 exp.TimeFromParts: rename_func("TIME"), 631 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 632 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 633 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 634 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 635 exp.TimeStrToTime: timestrtotime_sql, 636 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 637 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 638 exp.TsOrDsAdd: _ts_or_ds_add_sql, 639 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 640 exp.TsOrDsToTime: rename_func("TIME"), 641 exp.Unhex: rename_func("FROM_HEX"), 642 exp.UnixDate: rename_func("UNIX_DATE"), 643 exp.UnixToTime: _unix_to_time_sql, 644 exp.Values: _derived_table_values_to_unnest, 645 exp.VariancePop: rename_func("VAR_POP"), 646 } 647 648 SUPPORTED_JSON_PATH_PARTS = { 649 exp.JSONPathKey, 650 exp.JSONPathRoot, 651 exp.JSONPathSubscript, 652 } 653 654 TYPE_MAPPING = { 655 **generator.Generator.TYPE_MAPPING, 656 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 657 exp.DataType.Type.BIGINT: "INT64", 658 exp.DataType.Type.BINARY: "BYTES", 659 exp.DataType.Type.BOOLEAN: "BOOL", 660 exp.DataType.Type.CHAR: "STRING", 661 exp.DataType.Type.DECIMAL: "NUMERIC", 662 exp.DataType.Type.DOUBLE: "FLOAT64", 663 exp.DataType.Type.FLOAT: "FLOAT64", 664 exp.DataType.Type.INT: "INT64", 665 exp.DataType.Type.NCHAR: "STRING", 666 exp.DataType.Type.NVARCHAR: "STRING", 667 exp.DataType.Type.SMALLINT: "INT64", 668 exp.DataType.Type.TEXT: "STRING", 669 exp.DataType.Type.TIMESTAMP: "DATETIME", 670 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 671 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 672 exp.DataType.Type.TINYINT: "INT64", 673 exp.DataType.Type.VARBINARY: "BYTES", 674 exp.DataType.Type.VARCHAR: "STRING", 675 exp.DataType.Type.VARIANT: "ANY TYPE", 676 } 677 678 PROPERTIES_LOCATION = { 679 **generator.Generator.PROPERTIES_LOCATION, 680 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 681 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 682 } 683 684 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 685 RESERVED_KEYWORDS = { 686 *generator.Generator.RESERVED_KEYWORDS, 687 "all", 688 "and", 689 "any", 690 "array", 691 "as", 692 "asc", 693 "assert_rows_modified", 694 "at", 695 "between", 696 "by", 697 "case", 698 "cast", 699 "collate", 700 "contains", 701 "create", 702 "cross", 703 "cube", 704 "current", 705 "default", 706 "define", 707 "desc", 708 "distinct", 709 "else", 710 "end", 711 "enum", 712 "escape", 713 "except", 714 "exclude", 715 "exists", 716 "extract", 717 "false", 718 "fetch", 719 "following", 720 "for", 721 "from", 722 "full", 723 "group", 724 "grouping", 725 "groups", 726 "hash", 727 "having", 728 "if", 729 "ignore", 730 "in", 731 "inner", 732 "intersect", 733 "interval", 734 "into", 735 "is", 736 "join", 737 "lateral", 738 "left", 739 "like", 740 "limit", 741 "lookup", 742 "merge", 743 "natural", 744 "new", 745 "no", 746 "not", 747 "null", 748 "nulls", 749 "of", 750 "on", 751 "or", 752 "order", 753 "outer", 754 "over", 755 "partition", 756 "preceding", 757 "proto", 758 "qualify", 759 "range", 760 "recursive", 761 "respect", 762 "right", 763 "rollup", 764 "rows", 765 "select", 766 "set", 767 "some", 768 "struct", 769 "tablesample", 770 "then", 771 "to", 772 "treat", 773 "true", 774 "unbounded", 775 "union", 776 "unnest", 777 "using", 778 "when", 779 "where", 780 "window", 781 "with", 782 "within", 783 } 784 785 def table_parts(self, expression: exp.Table) -> str: 786 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 787 # we need to make sure the correct quoting is used in each case. 788 # 789 # For example, if there is a CTE x that clashes with a schema name, then the former will 790 # return the table y in that schema, whereas the latter will return the CTE's y column: 791 # 792 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 793 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 794 if expression.meta.get("quoted_table"): 795 table_parts = ".".join(p.name for p in expression.parts) 796 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 797 798 return super().table_parts(expression) 799 800 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 801 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 802 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 803 804 def eq_sql(self, expression: exp.EQ) -> str: 805 # Operands of = cannot be NULL in BigQuery 806 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 807 if not isinstance(expression.parent, exp.Update): 808 return "NULL" 809 810 return self.binary(expression, "=") 811 812 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 813 parent = expression.parent 814 815 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 816 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 817 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 818 return self.func( 819 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 820 ) 821 822 return super().attimezone_sql(expression) 823 824 def trycast_sql(self, expression: exp.TryCast) -> str: 825 return self.cast_sql(expression, safe_prefix="SAFE_") 826 827 def bracket_sql(self, expression: exp.Bracket) -> str: 828 this = expression.this 829 expressions = expression.expressions 830 831 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 832 arg = expressions[0] 833 if arg.type is None: 834 from sqlglot.optimizer.annotate_types import annotate_types 835 836 arg = annotate_types(arg) 837 838 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 839 # BQ doesn't support bracket syntax with string values for structs 840 return f"{self.sql(this)}.{arg.name}" 841 842 expressions_sql = self.expressions(expression, flat=True) 843 offset = expression.args.get("offset") 844 845 if offset == 0: 846 expressions_sql = f"OFFSET({expressions_sql})" 847 elif offset == 1: 848 expressions_sql = f"ORDINAL({expressions_sql})" 849 elif offset is not None: 850 self.unsupported(f"Unsupported array offset: {offset}") 851 852 if expression.args.get("safe"): 853 expressions_sql = f"SAFE_{expressions_sql}" 854 855 return f"{self.sql(this)}[{expressions_sql}]" 856 857 def in_unnest_op(self, expression: exp.Unnest) -> str: 858 return self.sql(expression) 859 860 def except_op(self, expression: exp.Except) -> str: 861 if not expression.args.get("distinct"): 862 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 863 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 864 865 def intersect_op(self, expression: exp.Intersect) -> str: 866 if not expression.args.get("distinct"): 867 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 868 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 869 870 def with_properties(self, properties: exp.Properties) -> str: 871 return self.properties(properties, prefix=self.seg("OPTIONS")) 872 873 def version_sql(self, expression: exp.Version) -> str: 874 if expression.name == "TIMESTAMP": 875 expression.set("this", "SYSTEM_TIME") 876 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
250 def normalize_identifier(self, expression: E) -> E: 251 if isinstance(expression, exp.Identifier): 252 parent = expression.parent 253 while isinstance(parent, exp.Dot): 254 parent = parent.parent 255 256 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 257 # by default. The following check uses a heuristic to detect tables based on whether 258 # they are qualified. This should generally be correct, because tables in BigQuery 259 # must be qualified with at least a dataset, unless @@dataset_id is set. 260 case_sensitive = ( 261 isinstance(parent, exp.UserDefinedFunction) 262 or ( 263 isinstance(parent, exp.Table) 264 and parent.db 265 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 266 ) 267 or expression.meta.get("is_table") 268 ) 269 if not case_sensitive: 270 expression.set("this", expression.this.lower()) 271 272 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
274 class Tokenizer(tokens.Tokenizer): 275 QUOTES = ["'", '"', '"""', "'''"] 276 COMMENTS = ["--", "#", ("/*", "*/")] 277 IDENTIFIERS = ["`"] 278 STRING_ESCAPES = ["\\"] 279 280 HEX_STRINGS = [("0x", ""), ("0X", "")] 281 282 BYTE_STRINGS = [ 283 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 284 ] 285 286 RAW_STRINGS = [ 287 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 288 ] 289 290 KEYWORDS = { 291 **tokens.Tokenizer.KEYWORDS, 292 "ANY TYPE": TokenType.VARIANT, 293 "BEGIN": TokenType.COMMAND, 294 "BEGIN TRANSACTION": TokenType.BEGIN, 295 "BYTES": TokenType.BINARY, 296 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 297 "DATETIME": TokenType.TIMESTAMP, 298 "DECLARE": TokenType.COMMAND, 299 "ELSEIF": TokenType.COMMAND, 300 "EXCEPTION": TokenType.COMMAND, 301 "FLOAT64": TokenType.DOUBLE, 302 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 303 "MODEL": TokenType.MODEL, 304 "NOT DETERMINISTIC": TokenType.VOLATILE, 305 "RECORD": TokenType.STRUCT, 306 "TIMESTAMP": TokenType.TIMESTAMPTZ, 307 } 308 KEYWORDS.pop("DIV") 309 KEYWORDS.pop("VALUES")
Inherited Members
311 class Parser(parser.Parser): 312 PREFIXED_PIVOT_COLUMNS = True 313 LOG_DEFAULTS_TO_LN = True 314 SUPPORTS_IMPLICIT_UNNEST = True 315 316 FUNCTIONS = { 317 **parser.Parser.FUNCTIONS, 318 "DATE": _build_date, 319 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 320 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 321 "DATE_TRUNC": lambda args: exp.DateTrunc( 322 unit=exp.Literal.string(str(seq_get(args, 1))), 323 this=seq_get(args, 0), 324 ), 325 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 326 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 327 "DIV": binary_from_function(exp.IntDiv), 328 "FORMAT_DATE": lambda args: exp.TimeToStr( 329 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 330 ), 331 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 332 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 333 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 334 ), 335 "MD5": exp.MD5Digest.from_arg_list, 336 "TO_HEX": _build_to_hex, 337 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 338 [seq_get(args, 1), seq_get(args, 0)] 339 ), 340 "PARSE_TIMESTAMP": _build_parse_timestamp, 341 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 342 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 343 this=seq_get(args, 0), 344 expression=seq_get(args, 1), 345 position=seq_get(args, 2), 346 occurrence=seq_get(args, 3), 347 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 348 ), 349 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 350 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 351 "SPLIT": lambda args: exp.Split( 352 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 353 this=seq_get(args, 0), 354 expression=seq_get(args, 1) or exp.Literal.string(","), 355 ), 356 "TIME": _build_time, 357 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 358 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 359 "TIMESTAMP": _build_timestamp, 360 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 361 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 362 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 363 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 364 ), 365 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 366 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 367 ), 368 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 369 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 370 } 371 372 FUNCTION_PARSERS = { 373 **parser.Parser.FUNCTION_PARSERS, 374 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 375 } 376 FUNCTION_PARSERS.pop("TRIM") 377 378 NO_PAREN_FUNCTIONS = { 379 **parser.Parser.NO_PAREN_FUNCTIONS, 380 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 381 } 382 383 NESTED_TYPE_TOKENS = { 384 *parser.Parser.NESTED_TYPE_TOKENS, 385 TokenType.TABLE, 386 } 387 388 PROPERTY_PARSERS = { 389 **parser.Parser.PROPERTY_PARSERS, 390 "NOT DETERMINISTIC": lambda self: self.expression( 391 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 392 ), 393 "OPTIONS": lambda self: self._parse_with_property(), 394 } 395 396 CONSTRAINT_PARSERS = { 397 **parser.Parser.CONSTRAINT_PARSERS, 398 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 399 } 400 401 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 402 RANGE_PARSERS.pop(TokenType.OVERLAPS) 403 404 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 405 406 STATEMENT_PARSERS = { 407 **parser.Parser.STATEMENT_PARSERS, 408 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 409 TokenType.END: lambda self: self._parse_as_command(self._prev), 410 TokenType.FOR: lambda self: self._parse_for_in(), 411 } 412 413 BRACKET_OFFSETS = { 414 "OFFSET": (0, False), 415 "ORDINAL": (1, False), 416 "SAFE_OFFSET": (0, True), 417 "SAFE_ORDINAL": (1, True), 418 } 419 420 def _parse_for_in(self) -> exp.ForIn: 421 this = self._parse_range() 422 self._match_text_seq("DO") 423 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 424 425 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 426 this = super()._parse_table_part(schema=schema) or self._parse_number() 427 428 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 429 if isinstance(this, exp.Identifier): 430 table_name = this.name 431 while self._match(TokenType.DASH, advance=False) and self._next: 432 text = "" 433 while self._curr and self._curr.token_type != TokenType.DOT: 434 self._advance() 435 text += self._prev.text 436 table_name += text 437 438 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 439 elif isinstance(this, exp.Literal): 440 table_name = this.name 441 442 if self._is_connected() and self._parse_var(any_token=True): 443 table_name += self._prev.text 444 445 this = exp.Identifier(this=table_name, quoted=True) 446 447 return this 448 449 def _parse_table_parts( 450 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 451 ) -> exp.Table: 452 table = super()._parse_table_parts( 453 schema=schema, is_db_reference=is_db_reference, wildcard=True 454 ) 455 456 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 457 if not table.catalog: 458 if table.db: 459 parts = table.db.split(".") 460 if len(parts) == 2 and not table.args["db"].quoted: 461 table.set("catalog", exp.Identifier(this=parts[0])) 462 table.set("db", exp.Identifier(this=parts[1])) 463 else: 464 parts = table.name.split(".") 465 if len(parts) == 2 and not table.this.quoted: 466 table.set("db", exp.Identifier(this=parts[0])) 467 table.set("this", exp.Identifier(this=parts[1])) 468 469 if any("." in p.name for p in table.parts): 470 catalog, db, this, *rest = ( 471 exp.to_identifier(p, quoted=True) 472 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 473 ) 474 475 if rest and this: 476 this = exp.Dot.build([this, *rest]) # type: ignore 477 478 table = exp.Table(this=this, db=db, catalog=catalog) 479 table.meta["quoted_table"] = True 480 481 return table 482 483 @t.overload 484 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 485 486 @t.overload 487 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 488 489 def _parse_json_object(self, agg=False): 490 json_object = super()._parse_json_object() 491 array_kv_pair = seq_get(json_object.expressions, 0) 492 493 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 494 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 495 if ( 496 array_kv_pair 497 and isinstance(array_kv_pair.this, exp.Array) 498 and isinstance(array_kv_pair.expression, exp.Array) 499 ): 500 keys = array_kv_pair.this.expressions 501 values = array_kv_pair.expression.expressions 502 503 json_object.set( 504 "expressions", 505 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 506 ) 507 508 return json_object 509 510 def _parse_bracket( 511 self, this: t.Optional[exp.Expression] = None 512 ) -> t.Optional[exp.Expression]: 513 bracket = super()._parse_bracket(this) 514 515 if this is bracket: 516 return bracket 517 518 if isinstance(bracket, exp.Bracket): 519 for expression in bracket.expressions: 520 name = expression.name.upper() 521 522 if name not in self.BRACKET_OFFSETS: 523 break 524 525 offset, safe = self.BRACKET_OFFSETS[name] 526 bracket.set("offset", offset) 527 bracket.set("safe", safe) 528 expression.replace(expression.expressions[0]) 529 530 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
532 class Generator(generator.Generator): 533 EXPLICIT_UNION = True 534 INTERVAL_ALLOWS_PLURAL_FORM = False 535 JOIN_HINTS = False 536 QUERY_HINTS = False 537 TABLE_HINTS = False 538 LIMIT_FETCH = "LIMIT" 539 RENAME_TABLE_WITH_DB = False 540 NVL2_SUPPORTED = False 541 UNNEST_WITH_ORDINALITY = False 542 COLLATE_IS_FUNC = True 543 LIMIT_ONLY_LITERALS = True 544 SUPPORTS_TABLE_ALIAS_COLUMNS = False 545 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 546 JSON_KEY_VALUE_PAIR_SEP = "," 547 NULL_ORDERING_SUPPORTED = False 548 IGNORE_NULLS_IN_FUNC = True 549 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 550 CAN_IMPLEMENT_ARRAY_ANY = True 551 SUPPORTS_TO_NUMBER = False 552 NAMED_PLACEHOLDER_TOKEN = "@" 553 554 TRANSFORMS = { 555 **generator.Generator.TRANSFORMS, 556 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 557 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 558 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 559 exp.Array: inline_array_unless_query, 560 exp.ArrayContains: _array_contains_sql, 561 exp.ArrayFilter: filter_array_using_unnest, 562 exp.ArraySize: rename_func("ARRAY_LENGTH"), 563 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 564 exp.CollateProperty: lambda self, e: ( 565 f"DEFAULT COLLATE {self.sql(e, 'this')}" 566 if e.args.get("default") 567 else f"COLLATE {self.sql(e, 'this')}" 568 ), 569 exp.Commit: lambda *_: "COMMIT TRANSACTION", 570 exp.CountIf: rename_func("COUNTIF"), 571 exp.Create: _create_sql, 572 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 573 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 574 exp.DateDiff: lambda self, e: self.func( 575 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 576 ), 577 exp.DateFromParts: rename_func("DATE"), 578 exp.DateStrToDate: datestrtodate_sql, 579 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 580 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 581 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 582 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 583 exp.FromTimeZone: lambda self, e: self.func( 584 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 585 ), 586 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 587 exp.GroupConcat: rename_func("STRING_AGG"), 588 exp.Hex: rename_func("TO_HEX"), 589 exp.If: if_sql(false_value="NULL"), 590 exp.ILike: no_ilike_sql, 591 exp.IntDiv: rename_func("DIV"), 592 exp.JSONFormat: rename_func("TO_JSON_STRING"), 593 exp.Max: max_or_greatest, 594 exp.Mod: rename_func("MOD"), 595 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 596 exp.MD5Digest: rename_func("MD5"), 597 exp.Min: min_or_least, 598 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 599 exp.RegexpExtract: lambda self, e: self.func( 600 "REGEXP_EXTRACT", 601 e.this, 602 e.expression, 603 e.args.get("position"), 604 e.args.get("occurrence"), 605 ), 606 exp.RegexpReplace: regexp_replace_sql, 607 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 608 exp.ReturnsProperty: _returnsproperty_sql, 609 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 610 exp.Select: transforms.preprocess( 611 [ 612 transforms.explode_to_unnest(), 613 transforms.unqualify_unnest, 614 transforms.eliminate_distinct_on, 615 _alias_ordered_group, 616 transforms.eliminate_semi_and_anti_joins, 617 ] 618 ), 619 exp.SHA2: lambda self, e: self.func( 620 "SHA256" if e.text("length") == "256" else "SHA512", e.this 621 ), 622 exp.StabilityProperty: lambda self, e: ( 623 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 624 ), 625 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 626 exp.StrToTime: lambda self, e: self.func( 627 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 628 ), 629 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 630 exp.TimeFromParts: rename_func("TIME"), 631 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 632 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 633 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 634 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 635 exp.TimeStrToTime: timestrtotime_sql, 636 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 637 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 638 exp.TsOrDsAdd: _ts_or_ds_add_sql, 639 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 640 exp.TsOrDsToTime: rename_func("TIME"), 641 exp.Unhex: rename_func("FROM_HEX"), 642 exp.UnixDate: rename_func("UNIX_DATE"), 643 exp.UnixToTime: _unix_to_time_sql, 644 exp.Values: _derived_table_values_to_unnest, 645 exp.VariancePop: rename_func("VAR_POP"), 646 } 647 648 SUPPORTED_JSON_PATH_PARTS = { 649 exp.JSONPathKey, 650 exp.JSONPathRoot, 651 exp.JSONPathSubscript, 652 } 653 654 TYPE_MAPPING = { 655 **generator.Generator.TYPE_MAPPING, 656 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 657 exp.DataType.Type.BIGINT: "INT64", 658 exp.DataType.Type.BINARY: "BYTES", 659 exp.DataType.Type.BOOLEAN: "BOOL", 660 exp.DataType.Type.CHAR: "STRING", 661 exp.DataType.Type.DECIMAL: "NUMERIC", 662 exp.DataType.Type.DOUBLE: "FLOAT64", 663 exp.DataType.Type.FLOAT: "FLOAT64", 664 exp.DataType.Type.INT: "INT64", 665 exp.DataType.Type.NCHAR: "STRING", 666 exp.DataType.Type.NVARCHAR: "STRING", 667 exp.DataType.Type.SMALLINT: "INT64", 668 exp.DataType.Type.TEXT: "STRING", 669 exp.DataType.Type.TIMESTAMP: "DATETIME", 670 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 671 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 672 exp.DataType.Type.TINYINT: "INT64", 673 exp.DataType.Type.VARBINARY: "BYTES", 674 exp.DataType.Type.VARCHAR: "STRING", 675 exp.DataType.Type.VARIANT: "ANY TYPE", 676 } 677 678 PROPERTIES_LOCATION = { 679 **generator.Generator.PROPERTIES_LOCATION, 680 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 681 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 682 } 683 684 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 685 RESERVED_KEYWORDS = { 686 *generator.Generator.RESERVED_KEYWORDS, 687 "all", 688 "and", 689 "any", 690 "array", 691 "as", 692 "asc", 693 "assert_rows_modified", 694 "at", 695 "between", 696 "by", 697 "case", 698 "cast", 699 "collate", 700 "contains", 701 "create", 702 "cross", 703 "cube", 704 "current", 705 "default", 706 "define", 707 "desc", 708 "distinct", 709 "else", 710 "end", 711 "enum", 712 "escape", 713 "except", 714 "exclude", 715 "exists", 716 "extract", 717 "false", 718 "fetch", 719 "following", 720 "for", 721 "from", 722 "full", 723 "group", 724 "grouping", 725 "groups", 726 "hash", 727 "having", 728 "if", 729 "ignore", 730 "in", 731 "inner", 732 "intersect", 733 "interval", 734 "into", 735 "is", 736 "join", 737 "lateral", 738 "left", 739 "like", 740 "limit", 741 "lookup", 742 "merge", 743 "natural", 744 "new", 745 "no", 746 "not", 747 "null", 748 "nulls", 749 "of", 750 "on", 751 "or", 752 "order", 753 "outer", 754 "over", 755 "partition", 756 "preceding", 757 "proto", 758 "qualify", 759 "range", 760 "recursive", 761 "respect", 762 "right", 763 "rollup", 764 "rows", 765 "select", 766 "set", 767 "some", 768 "struct", 769 "tablesample", 770 "then", 771 "to", 772 "treat", 773 "true", 774 "unbounded", 775 "union", 776 "unnest", 777 "using", 778 "when", 779 "where", 780 "window", 781 "with", 782 "within", 783 } 784 785 def table_parts(self, expression: exp.Table) -> str: 786 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 787 # we need to make sure the correct quoting is used in each case. 788 # 789 # For example, if there is a CTE x that clashes with a schema name, then the former will 790 # return the table y in that schema, whereas the latter will return the CTE's y column: 791 # 792 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 793 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 794 if expression.meta.get("quoted_table"): 795 table_parts = ".".join(p.name for p in expression.parts) 796 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 797 798 return super().table_parts(expression) 799 800 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 801 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 802 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 803 804 def eq_sql(self, expression: exp.EQ) -> str: 805 # Operands of = cannot be NULL in BigQuery 806 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 807 if not isinstance(expression.parent, exp.Update): 808 return "NULL" 809 810 return self.binary(expression, "=") 811 812 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 813 parent = expression.parent 814 815 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 816 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 817 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 818 return self.func( 819 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 820 ) 821 822 return super().attimezone_sql(expression) 823 824 def trycast_sql(self, expression: exp.TryCast) -> str: 825 return self.cast_sql(expression, safe_prefix="SAFE_") 826 827 def bracket_sql(self, expression: exp.Bracket) -> str: 828 this = expression.this 829 expressions = expression.expressions 830 831 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 832 arg = expressions[0] 833 if arg.type is None: 834 from sqlglot.optimizer.annotate_types import annotate_types 835 836 arg = annotate_types(arg) 837 838 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 839 # BQ doesn't support bracket syntax with string values for structs 840 return f"{self.sql(this)}.{arg.name}" 841 842 expressions_sql = self.expressions(expression, flat=True) 843 offset = expression.args.get("offset") 844 845 if offset == 0: 846 expressions_sql = f"OFFSET({expressions_sql})" 847 elif offset == 1: 848 expressions_sql = f"ORDINAL({expressions_sql})" 849 elif offset is not None: 850 self.unsupported(f"Unsupported array offset: {offset}") 851 852 if expression.args.get("safe"): 853 expressions_sql = f"SAFE_{expressions_sql}" 854 855 return f"{self.sql(this)}[{expressions_sql}]" 856 857 def in_unnest_op(self, expression: exp.Unnest) -> str: 858 return self.sql(expression) 859 860 def except_op(self, expression: exp.Except) -> str: 861 if not expression.args.get("distinct"): 862 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 863 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 864 865 def intersect_op(self, expression: exp.Intersect) -> str: 866 if not expression.args.get("distinct"): 867 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 868 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 869 870 def with_properties(self, properties: exp.Properties) -> str: 871 return self.properties(properties, prefix=self.seg("OPTIONS")) 872 873 def version_sql(self, expression: exp.Version) -> str: 874 if expression.name == "TIMESTAMP": 875 expression.set("this", "SYSTEM_TIME") 876 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
785 def table_parts(self, expression: exp.Table) -> str: 786 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 787 # we need to make sure the correct quoting is used in each case. 788 # 789 # For example, if there is a CTE x that clashes with a schema name, then the former will 790 # return the table y in that schema, whereas the latter will return the CTE's y column: 791 # 792 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 793 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 794 if expression.meta.get("quoted_table"): 795 table_parts = ".".join(p.name for p in expression.parts) 796 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 797 798 return super().table_parts(expression)
812 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 813 parent = expression.parent 814 815 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 816 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 817 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 818 return self.func( 819 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 820 ) 821 822 return super().attimezone_sql(expression)
827 def bracket_sql(self, expression: exp.Bracket) -> str: 828 this = expression.this 829 expressions = expression.expressions 830 831 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 832 arg = expressions[0] 833 if arg.type is None: 834 from sqlglot.optimizer.annotate_types import annotate_types 835 836 arg = annotate_types(arg) 837 838 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 839 # BQ doesn't support bracket syntax with string values for structs 840 return f"{self.sql(this)}.{arg.name}" 841 842 expressions_sql = self.expressions(expression, flat=True) 843 offset = expression.args.get("offset") 844 845 if offset == 0: 846 expressions_sql = f"OFFSET({expressions_sql})" 847 elif offset == 1: 848 expressions_sql = f"ORDINAL({expressions_sql})" 849 elif offset is not None: 850 self.unsupported(f"Unsupported array offset: {offset}") 851 852 if expression.args.get("safe"): 853 expressions_sql = f"SAFE_{expressions_sql}" 854 855 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- OUTER_UNION_MODIFIERS
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql