sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27 unit_to_var, 28) 29from sqlglot.helper import seq_get, split_num_words 30from sqlglot.tokens import TokenType 31 32if t.TYPE_CHECKING: 33 from sqlglot._typing import E, Lit 34 35logger = logging.getLogger("sqlglot") 36 37 38def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 39 if not expression.find_ancestor(exp.From, exp.Join): 40 return self.values_sql(expression) 41 42 structs = [] 43 alias = expression.args.get("alias") 44 for tup in expression.find_all(exp.Tuple): 45 field_aliases = ( 46 alias.columns 47 if alias and alias.columns 48 else (f"_c{i}" for i in range(len(tup.expressions))) 49 ) 50 expressions = [ 51 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 52 for name, fld in zip(field_aliases, tup.expressions) 53 ] 54 structs.append(exp.Struct(expressions=expressions)) 55 56 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 57 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 58 return self.unnest_sql( 59 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 60 ) 61 62 63def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 64 this = expression.this 65 if isinstance(this, exp.Schema): 66 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 67 else: 68 this = self.sql(this) 69 return f"RETURNS {this}" 70 71 72def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 73 returns = expression.find(exp.ReturnsProperty) 74 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 75 expression.set("kind", "TABLE FUNCTION") 76 77 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 78 expression.set("expression", expression.expression.this) 79 80 return self.create_sql(expression) 81 82 83# https://issuetracker.google.com/issues/162294746 84# workaround for bigquery bug when grouping by an expression and then ordering 85# WITH x AS (SELECT 1 y) 86# SELECT y + 1 z 87# FROM x 88# GROUP BY x + 1 89# ORDER by z 90def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 91 if isinstance(expression, exp.Select): 92 group = expression.args.get("group") 93 order = expression.args.get("order") 94 95 if group and order: 96 aliases = { 97 select.this: select.args["alias"] 98 for select in expression.selects 99 if isinstance(select, exp.Alias) 100 } 101 102 for grouped in group.expressions: 103 if grouped.is_int: 104 continue 105 alias = aliases.get(grouped) 106 if alias: 107 grouped.replace(exp.column(alias)) 108 109 return expression 110 111 112def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 113 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 114 if isinstance(expression, exp.CTE) and expression.alias_column_names: 115 cte_query = expression.this 116 117 if cte_query.is_star: 118 logger.warning( 119 "Can't push down CTE column names for star queries. Run the query through" 120 " the optimizer or use 'qualify' to expand the star projections first." 121 ) 122 return expression 123 124 column_names = expression.alias_column_names 125 expression.args["alias"].set("columns", None) 126 127 for name, select in zip(column_names, cte_query.selects): 128 to_replace = select 129 130 if isinstance(select, exp.Alias): 131 select = select.this 132 133 # Inner aliases are shadowed by the CTE column names 134 to_replace.replace(exp.alias_(select, name)) 135 136 return expression 137 138 139def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 140 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 141 this.set("zone", seq_get(args, 2)) 142 return this 143 144 145def _build_timestamp(args: t.List) -> exp.Timestamp: 146 timestamp = exp.Timestamp.from_arg_list(args) 147 timestamp.set("with_tz", True) 148 return timestamp 149 150 151def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 152 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 153 return expr_type.from_arg_list(args) 154 155 156def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 157 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 158 arg = seq_get(args, 0) 159 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 160 161 162def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 163 return self.sql( 164 exp.Exists( 165 this=exp.select("1") 166 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 167 .where(exp.column("_col").eq(expression.right)) 168 ) 169 ) 170 171 172def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 173 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 174 175 176def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 177 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 178 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 179 unit = unit_to_var(expression) 180 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 181 182 183def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 184 scale = expression.args.get("scale") 185 timestamp = expression.this 186 187 if scale in (None, exp.UnixToTime.SECONDS): 188 return self.func("TIMESTAMP_SECONDS", timestamp) 189 if scale == exp.UnixToTime.MILLIS: 190 return self.func("TIMESTAMP_MILLIS", timestamp) 191 if scale == exp.UnixToTime.MICROS: 192 return self.func("TIMESTAMP_MICROS", timestamp) 193 194 unix_seconds = exp.cast( 195 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 196 ) 197 return self.func("TIMESTAMP_SECONDS", unix_seconds) 198 199 200def _build_time(args: t.List) -> exp.Func: 201 if len(args) == 1: 202 return exp.TsOrDsToTime(this=args[0]) 203 if len(args) == 3: 204 return exp.TimeFromParts.from_arg_list(args) 205 206 return exp.Anonymous(this="TIME", expressions=args) 207 208 209class BigQuery(Dialect): 210 WEEK_OFFSET = -1 211 UNNEST_COLUMN_ONLY = True 212 SUPPORTS_USER_DEFINED_TYPES = False 213 SUPPORTS_SEMI_ANTI_JOIN = False 214 LOG_BASE_FIRST = False 215 216 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 217 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 218 219 # bigquery udfs are case sensitive 220 NORMALIZE_FUNCTIONS = False 221 222 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 223 TIME_MAPPING = { 224 "%D": "%m/%d/%y", 225 "%E6S": "%S.%f", 226 } 227 228 FORMAT_MAPPING = { 229 "DD": "%d", 230 "MM": "%m", 231 "MON": "%b", 232 "MONTH": "%B", 233 "YYYY": "%Y", 234 "YY": "%y", 235 "HH": "%I", 236 "HH12": "%I", 237 "HH24": "%H", 238 "MI": "%M", 239 "SS": "%S", 240 "SSSSS": "%f", 241 "TZH": "%z", 242 } 243 244 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 245 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 246 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 247 248 def normalize_identifier(self, expression: E) -> E: 249 if isinstance(expression, exp.Identifier): 250 parent = expression.parent 251 while isinstance(parent, exp.Dot): 252 parent = parent.parent 253 254 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 255 # by default. The following check uses a heuristic to detect tables based on whether 256 # they are qualified. This should generally be correct, because tables in BigQuery 257 # must be qualified with at least a dataset, unless @@dataset_id is set. 258 case_sensitive = ( 259 isinstance(parent, exp.UserDefinedFunction) 260 or ( 261 isinstance(parent, exp.Table) 262 and parent.db 263 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 264 ) 265 or expression.meta.get("is_table") 266 ) 267 if not case_sensitive: 268 expression.set("this", expression.this.lower()) 269 270 return expression 271 272 class Tokenizer(tokens.Tokenizer): 273 QUOTES = ["'", '"', '"""', "'''"] 274 COMMENTS = ["--", "#", ("/*", "*/")] 275 IDENTIFIERS = ["`"] 276 STRING_ESCAPES = ["\\"] 277 278 HEX_STRINGS = [("0x", ""), ("0X", "")] 279 280 BYTE_STRINGS = [ 281 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 282 ] 283 284 RAW_STRINGS = [ 285 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 286 ] 287 288 KEYWORDS = { 289 **tokens.Tokenizer.KEYWORDS, 290 "ANY TYPE": TokenType.VARIANT, 291 "BEGIN": TokenType.COMMAND, 292 "BEGIN TRANSACTION": TokenType.BEGIN, 293 "BYTES": TokenType.BINARY, 294 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 295 "DATETIME": TokenType.TIMESTAMP, 296 "DECLARE": TokenType.COMMAND, 297 "ELSEIF": TokenType.COMMAND, 298 "EXCEPTION": TokenType.COMMAND, 299 "FLOAT64": TokenType.DOUBLE, 300 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 301 "MODEL": TokenType.MODEL, 302 "NOT DETERMINISTIC": TokenType.VOLATILE, 303 "RECORD": TokenType.STRUCT, 304 "TIMESTAMP": TokenType.TIMESTAMPTZ, 305 } 306 KEYWORDS.pop("DIV") 307 KEYWORDS.pop("VALUES") 308 309 class Parser(parser.Parser): 310 PREFIXED_PIVOT_COLUMNS = True 311 LOG_DEFAULTS_TO_LN = True 312 SUPPORTS_IMPLICIT_UNNEST = True 313 314 FUNCTIONS = { 315 **parser.Parser.FUNCTIONS, 316 "DATE": _build_date, 317 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 318 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 319 "DATE_TRUNC": lambda args: exp.DateTrunc( 320 unit=exp.Literal.string(str(seq_get(args, 1))), 321 this=seq_get(args, 0), 322 ), 323 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 324 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 325 "DIV": binary_from_function(exp.IntDiv), 326 "FORMAT_DATE": lambda args: exp.TimeToStr( 327 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 328 ), 329 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 330 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 331 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 332 ), 333 "MD5": exp.MD5Digest.from_arg_list, 334 "TO_HEX": _build_to_hex, 335 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 336 [seq_get(args, 1), seq_get(args, 0)] 337 ), 338 "PARSE_TIMESTAMP": _build_parse_timestamp, 339 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 340 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 341 this=seq_get(args, 0), 342 expression=seq_get(args, 1), 343 position=seq_get(args, 2), 344 occurrence=seq_get(args, 3), 345 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 346 ), 347 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 348 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 349 "SPLIT": lambda args: exp.Split( 350 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 351 this=seq_get(args, 0), 352 expression=seq_get(args, 1) or exp.Literal.string(","), 353 ), 354 "TIME": _build_time, 355 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 356 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 357 "TIMESTAMP": _build_timestamp, 358 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 359 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 360 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 361 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 362 ), 363 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 364 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 365 ), 366 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 367 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 368 } 369 370 FUNCTION_PARSERS = { 371 **parser.Parser.FUNCTION_PARSERS, 372 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 373 } 374 FUNCTION_PARSERS.pop("TRIM") 375 376 NO_PAREN_FUNCTIONS = { 377 **parser.Parser.NO_PAREN_FUNCTIONS, 378 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 379 } 380 381 NESTED_TYPE_TOKENS = { 382 *parser.Parser.NESTED_TYPE_TOKENS, 383 TokenType.TABLE, 384 } 385 386 PROPERTY_PARSERS = { 387 **parser.Parser.PROPERTY_PARSERS, 388 "NOT DETERMINISTIC": lambda self: self.expression( 389 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 390 ), 391 "OPTIONS": lambda self: self._parse_with_property(), 392 } 393 394 CONSTRAINT_PARSERS = { 395 **parser.Parser.CONSTRAINT_PARSERS, 396 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 397 } 398 399 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 400 RANGE_PARSERS.pop(TokenType.OVERLAPS) 401 402 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 403 404 STATEMENT_PARSERS = { 405 **parser.Parser.STATEMENT_PARSERS, 406 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 407 TokenType.END: lambda self: self._parse_as_command(self._prev), 408 TokenType.FOR: lambda self: self._parse_for_in(), 409 } 410 411 BRACKET_OFFSETS = { 412 "OFFSET": (0, False), 413 "ORDINAL": (1, False), 414 "SAFE_OFFSET": (0, True), 415 "SAFE_ORDINAL": (1, True), 416 } 417 418 def _parse_for_in(self) -> exp.ForIn: 419 this = self._parse_range() 420 self._match_text_seq("DO") 421 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 422 423 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 424 this = super()._parse_table_part(schema=schema) or self._parse_number() 425 426 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 427 if isinstance(this, exp.Identifier): 428 table_name = this.name 429 while self._match(TokenType.DASH, advance=False) and self._next: 430 text = "" 431 while self._curr and self._curr.token_type != TokenType.DOT: 432 self._advance() 433 text += self._prev.text 434 table_name += text 435 436 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 437 elif isinstance(this, exp.Literal): 438 table_name = this.name 439 440 if self._is_connected() and self._parse_var(any_token=True): 441 table_name += self._prev.text 442 443 this = exp.Identifier(this=table_name, quoted=True) 444 445 return this 446 447 def _parse_table_parts( 448 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 449 ) -> exp.Table: 450 table = super()._parse_table_parts( 451 schema=schema, is_db_reference=is_db_reference, wildcard=True 452 ) 453 454 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 455 if not table.catalog: 456 if table.db: 457 parts = table.db.split(".") 458 if len(parts) == 2 and not table.args["db"].quoted: 459 table.set("catalog", exp.Identifier(this=parts[0])) 460 table.set("db", exp.Identifier(this=parts[1])) 461 else: 462 parts = table.name.split(".") 463 if len(parts) == 2 and not table.this.quoted: 464 table.set("db", exp.Identifier(this=parts[0])) 465 table.set("this", exp.Identifier(this=parts[1])) 466 467 if any("." in p.name for p in table.parts): 468 catalog, db, this, *rest = ( 469 exp.to_identifier(p, quoted=True) 470 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 471 ) 472 473 if rest and this: 474 this = exp.Dot.build([this, *rest]) # type: ignore 475 476 table = exp.Table( 477 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 478 ) 479 table.meta["quoted_table"] = True 480 481 return table 482 483 def _parse_column(self) -> t.Optional[exp.Expression]: 484 column = super()._parse_column() 485 if isinstance(column, exp.Column): 486 parts = column.parts 487 if any("." in p.name for p in parts): 488 catalog, db, table, this, *rest = ( 489 exp.to_identifier(p, quoted=True) 490 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 491 ) 492 493 if rest and this: 494 this = exp.Dot.build([this, *rest]) # type: ignore 495 496 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 497 column.meta["quoted_column"] = True 498 499 return column 500 501 @t.overload 502 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 503 504 @t.overload 505 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 506 507 def _parse_json_object(self, agg=False): 508 json_object = super()._parse_json_object() 509 array_kv_pair = seq_get(json_object.expressions, 0) 510 511 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 512 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 513 if ( 514 array_kv_pair 515 and isinstance(array_kv_pair.this, exp.Array) 516 and isinstance(array_kv_pair.expression, exp.Array) 517 ): 518 keys = array_kv_pair.this.expressions 519 values = array_kv_pair.expression.expressions 520 521 json_object.set( 522 "expressions", 523 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 524 ) 525 526 return json_object 527 528 def _parse_bracket( 529 self, this: t.Optional[exp.Expression] = None 530 ) -> t.Optional[exp.Expression]: 531 bracket = super()._parse_bracket(this) 532 533 if this is bracket: 534 return bracket 535 536 if isinstance(bracket, exp.Bracket): 537 for expression in bracket.expressions: 538 name = expression.name.upper() 539 540 if name not in self.BRACKET_OFFSETS: 541 break 542 543 offset, safe = self.BRACKET_OFFSETS[name] 544 bracket.set("offset", offset) 545 bracket.set("safe", safe) 546 expression.replace(expression.expressions[0]) 547 548 return bracket 549 550 class Generator(generator.Generator): 551 EXPLICIT_UNION = True 552 INTERVAL_ALLOWS_PLURAL_FORM = False 553 JOIN_HINTS = False 554 QUERY_HINTS = False 555 TABLE_HINTS = False 556 LIMIT_FETCH = "LIMIT" 557 RENAME_TABLE_WITH_DB = False 558 NVL2_SUPPORTED = False 559 UNNEST_WITH_ORDINALITY = False 560 COLLATE_IS_FUNC = True 561 LIMIT_ONLY_LITERALS = True 562 SUPPORTS_TABLE_ALIAS_COLUMNS = False 563 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 564 JSON_KEY_VALUE_PAIR_SEP = "," 565 NULL_ORDERING_SUPPORTED = False 566 IGNORE_NULLS_IN_FUNC = True 567 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 568 CAN_IMPLEMENT_ARRAY_ANY = True 569 SUPPORTS_TO_NUMBER = False 570 NAMED_PLACEHOLDER_TOKEN = "@" 571 572 TRANSFORMS = { 573 **generator.Generator.TRANSFORMS, 574 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 575 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 576 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 577 exp.Array: inline_array_unless_query, 578 exp.ArrayContains: _array_contains_sql, 579 exp.ArrayFilter: filter_array_using_unnest, 580 exp.ArraySize: rename_func("ARRAY_LENGTH"), 581 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 582 exp.CollateProperty: lambda self, e: ( 583 f"DEFAULT COLLATE {self.sql(e, 'this')}" 584 if e.args.get("default") 585 else f"COLLATE {self.sql(e, 'this')}" 586 ), 587 exp.Commit: lambda *_: "COMMIT TRANSACTION", 588 exp.CountIf: rename_func("COUNTIF"), 589 exp.Create: _create_sql, 590 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 591 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 592 exp.DateDiff: lambda self, e: self.func( 593 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 594 ), 595 exp.DateFromParts: rename_func("DATE"), 596 exp.DateStrToDate: datestrtodate_sql, 597 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 598 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 599 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 600 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 601 exp.FromTimeZone: lambda self, e: self.func( 602 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 603 ), 604 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 605 exp.GroupConcat: rename_func("STRING_AGG"), 606 exp.Hex: rename_func("TO_HEX"), 607 exp.If: if_sql(false_value="NULL"), 608 exp.ILike: no_ilike_sql, 609 exp.IntDiv: rename_func("DIV"), 610 exp.JSONFormat: rename_func("TO_JSON_STRING"), 611 exp.Max: max_or_greatest, 612 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 613 exp.MD5Digest: rename_func("MD5"), 614 exp.Min: min_or_least, 615 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 616 exp.RegexpExtract: lambda self, e: self.func( 617 "REGEXP_EXTRACT", 618 e.this, 619 e.expression, 620 e.args.get("position"), 621 e.args.get("occurrence"), 622 ), 623 exp.RegexpReplace: regexp_replace_sql, 624 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 625 exp.ReturnsProperty: _returnsproperty_sql, 626 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 627 exp.Select: transforms.preprocess( 628 [ 629 transforms.explode_to_unnest(), 630 transforms.unqualify_unnest, 631 transforms.eliminate_distinct_on, 632 _alias_ordered_group, 633 transforms.eliminate_semi_and_anti_joins, 634 ] 635 ), 636 exp.SHA: rename_func("SHA1"), 637 exp.SHA2: lambda self, e: self.func( 638 "SHA256" if e.text("length") == "256" else "SHA512", e.this 639 ), 640 exp.StabilityProperty: lambda self, e: ( 641 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 642 ), 643 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 644 exp.StrToTime: lambda self, e: self.func( 645 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 646 ), 647 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 648 exp.TimeFromParts: rename_func("TIME"), 649 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 650 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 651 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 652 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 653 exp.TimeStrToTime: timestrtotime_sql, 654 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 655 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 656 exp.TsOrDsAdd: _ts_or_ds_add_sql, 657 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 658 exp.TsOrDsToTime: rename_func("TIME"), 659 exp.Unhex: rename_func("FROM_HEX"), 660 exp.UnixDate: rename_func("UNIX_DATE"), 661 exp.UnixToTime: _unix_to_time_sql, 662 exp.Values: _derived_table_values_to_unnest, 663 exp.VariancePop: rename_func("VAR_POP"), 664 } 665 666 SUPPORTED_JSON_PATH_PARTS = { 667 exp.JSONPathKey, 668 exp.JSONPathRoot, 669 exp.JSONPathSubscript, 670 } 671 672 TYPE_MAPPING = { 673 **generator.Generator.TYPE_MAPPING, 674 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 675 exp.DataType.Type.BIGINT: "INT64", 676 exp.DataType.Type.BINARY: "BYTES", 677 exp.DataType.Type.BOOLEAN: "BOOL", 678 exp.DataType.Type.CHAR: "STRING", 679 exp.DataType.Type.DECIMAL: "NUMERIC", 680 exp.DataType.Type.DOUBLE: "FLOAT64", 681 exp.DataType.Type.FLOAT: "FLOAT64", 682 exp.DataType.Type.INT: "INT64", 683 exp.DataType.Type.NCHAR: "STRING", 684 exp.DataType.Type.NVARCHAR: "STRING", 685 exp.DataType.Type.SMALLINT: "INT64", 686 exp.DataType.Type.TEXT: "STRING", 687 exp.DataType.Type.TIMESTAMP: "DATETIME", 688 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 689 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 690 exp.DataType.Type.TINYINT: "INT64", 691 exp.DataType.Type.VARBINARY: "BYTES", 692 exp.DataType.Type.ROWVERSION: "BYTES", 693 exp.DataType.Type.VARCHAR: "STRING", 694 exp.DataType.Type.VARIANT: "ANY TYPE", 695 } 696 697 PROPERTIES_LOCATION = { 698 **generator.Generator.PROPERTIES_LOCATION, 699 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 700 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 701 } 702 703 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 704 RESERVED_KEYWORDS = { 705 *generator.Generator.RESERVED_KEYWORDS, 706 "all", 707 "and", 708 "any", 709 "array", 710 "as", 711 "asc", 712 "assert_rows_modified", 713 "at", 714 "between", 715 "by", 716 "case", 717 "cast", 718 "collate", 719 "contains", 720 "create", 721 "cross", 722 "cube", 723 "current", 724 "default", 725 "define", 726 "desc", 727 "distinct", 728 "else", 729 "end", 730 "enum", 731 "escape", 732 "except", 733 "exclude", 734 "exists", 735 "extract", 736 "false", 737 "fetch", 738 "following", 739 "for", 740 "from", 741 "full", 742 "group", 743 "grouping", 744 "groups", 745 "hash", 746 "having", 747 "if", 748 "ignore", 749 "in", 750 "inner", 751 "intersect", 752 "interval", 753 "into", 754 "is", 755 "join", 756 "lateral", 757 "left", 758 "like", 759 "limit", 760 "lookup", 761 "merge", 762 "natural", 763 "new", 764 "no", 765 "not", 766 "null", 767 "nulls", 768 "of", 769 "on", 770 "or", 771 "order", 772 "outer", 773 "over", 774 "partition", 775 "preceding", 776 "proto", 777 "qualify", 778 "range", 779 "recursive", 780 "respect", 781 "right", 782 "rollup", 783 "rows", 784 "select", 785 "set", 786 "some", 787 "struct", 788 "tablesample", 789 "then", 790 "to", 791 "treat", 792 "true", 793 "unbounded", 794 "union", 795 "unnest", 796 "using", 797 "when", 798 "where", 799 "window", 800 "with", 801 "within", 802 } 803 804 def mod_sql(self, expression: exp.Mod) -> str: 805 this = expression.this 806 expr = expression.expression 807 return self.func( 808 "MOD", 809 this.unnest() if isinstance(this, exp.Paren) else this, 810 expr.unnest() if isinstance(expr, exp.Paren) else expr, 811 ) 812 813 def column_parts(self, expression: exp.Column) -> str: 814 if expression.meta.get("quoted_column"): 815 # If a column reference is of the form `dataset.table`.name, we need 816 # to preserve the quoted table path, otherwise the reference breaks 817 table_parts = ".".join(p.name for p in expression.parts[:-1]) 818 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 819 return f"{table_path}.{self.sql(expression, 'this')}" 820 821 return super().column_parts(expression) 822 823 def table_parts(self, expression: exp.Table) -> str: 824 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 825 # we need to make sure the correct quoting is used in each case. 826 # 827 # For example, if there is a CTE x that clashes with a schema name, then the former will 828 # return the table y in that schema, whereas the latter will return the CTE's y column: 829 # 830 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 831 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 832 if expression.meta.get("quoted_table"): 833 table_parts = ".".join(p.name for p in expression.parts) 834 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 835 836 return super().table_parts(expression) 837 838 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 839 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 840 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 841 842 def eq_sql(self, expression: exp.EQ) -> str: 843 # Operands of = cannot be NULL in BigQuery 844 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 845 if not isinstance(expression.parent, exp.Update): 846 return "NULL" 847 848 return self.binary(expression, "=") 849 850 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 851 parent = expression.parent 852 853 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 854 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 855 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 856 return self.func( 857 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 858 ) 859 860 return super().attimezone_sql(expression) 861 862 def trycast_sql(self, expression: exp.TryCast) -> str: 863 return self.cast_sql(expression, safe_prefix="SAFE_") 864 865 def bracket_sql(self, expression: exp.Bracket) -> str: 866 this = expression.this 867 expressions = expression.expressions 868 869 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 870 arg = expressions[0] 871 if arg.type is None: 872 from sqlglot.optimizer.annotate_types import annotate_types 873 874 arg = annotate_types(arg) 875 876 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 877 # BQ doesn't support bracket syntax with string values for structs 878 return f"{self.sql(this)}.{arg.name}" 879 880 expressions_sql = self.expressions(expression, flat=True) 881 offset = expression.args.get("offset") 882 883 if offset == 0: 884 expressions_sql = f"OFFSET({expressions_sql})" 885 elif offset == 1: 886 expressions_sql = f"ORDINAL({expressions_sql})" 887 elif offset is not None: 888 self.unsupported(f"Unsupported array offset: {offset}") 889 890 if expression.args.get("safe"): 891 expressions_sql = f"SAFE_{expressions_sql}" 892 893 return f"{self.sql(this)}[{expressions_sql}]" 894 895 def in_unnest_op(self, expression: exp.Unnest) -> str: 896 return self.sql(expression) 897 898 def except_op(self, expression: exp.Except) -> str: 899 if not expression.args.get("distinct"): 900 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 901 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 902 903 def intersect_op(self, expression: exp.Intersect) -> str: 904 if not expression.args.get("distinct"): 905 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 906 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 907 908 def with_properties(self, properties: exp.Properties) -> str: 909 return self.properties(properties, prefix=self.seg("OPTIONS")) 910 911 def version_sql(self, expression: exp.Version) -> str: 912 if expression.name == "TIMESTAMP": 913 expression.set("this", "SYSTEM_TIME") 914 return super().version_sql(expression)
210class BigQuery(Dialect): 211 WEEK_OFFSET = -1 212 UNNEST_COLUMN_ONLY = True 213 SUPPORTS_USER_DEFINED_TYPES = False 214 SUPPORTS_SEMI_ANTI_JOIN = False 215 LOG_BASE_FIRST = False 216 217 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 218 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 219 220 # bigquery udfs are case sensitive 221 NORMALIZE_FUNCTIONS = False 222 223 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 224 TIME_MAPPING = { 225 "%D": "%m/%d/%y", 226 "%E6S": "%S.%f", 227 } 228 229 FORMAT_MAPPING = { 230 "DD": "%d", 231 "MM": "%m", 232 "MON": "%b", 233 "MONTH": "%B", 234 "YYYY": "%Y", 235 "YY": "%y", 236 "HH": "%I", 237 "HH12": "%I", 238 "HH24": "%H", 239 "MI": "%M", 240 "SS": "%S", 241 "SSSSS": "%f", 242 "TZH": "%z", 243 } 244 245 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 246 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 247 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 248 249 def normalize_identifier(self, expression: E) -> E: 250 if isinstance(expression, exp.Identifier): 251 parent = expression.parent 252 while isinstance(parent, exp.Dot): 253 parent = parent.parent 254 255 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 256 # by default. The following check uses a heuristic to detect tables based on whether 257 # they are qualified. This should generally be correct, because tables in BigQuery 258 # must be qualified with at least a dataset, unless @@dataset_id is set. 259 case_sensitive = ( 260 isinstance(parent, exp.UserDefinedFunction) 261 or ( 262 isinstance(parent, exp.Table) 263 and parent.db 264 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 265 ) 266 or expression.meta.get("is_table") 267 ) 268 if not case_sensitive: 269 expression.set("this", expression.this.lower()) 270 271 return expression 272 273 class Tokenizer(tokens.Tokenizer): 274 QUOTES = ["'", '"', '"""', "'''"] 275 COMMENTS = ["--", "#", ("/*", "*/")] 276 IDENTIFIERS = ["`"] 277 STRING_ESCAPES = ["\\"] 278 279 HEX_STRINGS = [("0x", ""), ("0X", "")] 280 281 BYTE_STRINGS = [ 282 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 283 ] 284 285 RAW_STRINGS = [ 286 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 287 ] 288 289 KEYWORDS = { 290 **tokens.Tokenizer.KEYWORDS, 291 "ANY TYPE": TokenType.VARIANT, 292 "BEGIN": TokenType.COMMAND, 293 "BEGIN TRANSACTION": TokenType.BEGIN, 294 "BYTES": TokenType.BINARY, 295 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 296 "DATETIME": TokenType.TIMESTAMP, 297 "DECLARE": TokenType.COMMAND, 298 "ELSEIF": TokenType.COMMAND, 299 "EXCEPTION": TokenType.COMMAND, 300 "FLOAT64": TokenType.DOUBLE, 301 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 302 "MODEL": TokenType.MODEL, 303 "NOT DETERMINISTIC": TokenType.VOLATILE, 304 "RECORD": TokenType.STRUCT, 305 "TIMESTAMP": TokenType.TIMESTAMPTZ, 306 } 307 KEYWORDS.pop("DIV") 308 KEYWORDS.pop("VALUES") 309 310 class Parser(parser.Parser): 311 PREFIXED_PIVOT_COLUMNS = True 312 LOG_DEFAULTS_TO_LN = True 313 SUPPORTS_IMPLICIT_UNNEST = True 314 315 FUNCTIONS = { 316 **parser.Parser.FUNCTIONS, 317 "DATE": _build_date, 318 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 319 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 320 "DATE_TRUNC": lambda args: exp.DateTrunc( 321 unit=exp.Literal.string(str(seq_get(args, 1))), 322 this=seq_get(args, 0), 323 ), 324 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 325 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 326 "DIV": binary_from_function(exp.IntDiv), 327 "FORMAT_DATE": lambda args: exp.TimeToStr( 328 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 329 ), 330 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 331 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 332 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 333 ), 334 "MD5": exp.MD5Digest.from_arg_list, 335 "TO_HEX": _build_to_hex, 336 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 337 [seq_get(args, 1), seq_get(args, 0)] 338 ), 339 "PARSE_TIMESTAMP": _build_parse_timestamp, 340 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 341 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 342 this=seq_get(args, 0), 343 expression=seq_get(args, 1), 344 position=seq_get(args, 2), 345 occurrence=seq_get(args, 3), 346 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 347 ), 348 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 349 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 350 "SPLIT": lambda args: exp.Split( 351 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 352 this=seq_get(args, 0), 353 expression=seq_get(args, 1) or exp.Literal.string(","), 354 ), 355 "TIME": _build_time, 356 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 357 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 358 "TIMESTAMP": _build_timestamp, 359 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 360 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 361 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 362 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 363 ), 364 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 365 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 366 ), 367 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 368 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 369 } 370 371 FUNCTION_PARSERS = { 372 **parser.Parser.FUNCTION_PARSERS, 373 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 374 } 375 FUNCTION_PARSERS.pop("TRIM") 376 377 NO_PAREN_FUNCTIONS = { 378 **parser.Parser.NO_PAREN_FUNCTIONS, 379 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 380 } 381 382 NESTED_TYPE_TOKENS = { 383 *parser.Parser.NESTED_TYPE_TOKENS, 384 TokenType.TABLE, 385 } 386 387 PROPERTY_PARSERS = { 388 **parser.Parser.PROPERTY_PARSERS, 389 "NOT DETERMINISTIC": lambda self: self.expression( 390 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 391 ), 392 "OPTIONS": lambda self: self._parse_with_property(), 393 } 394 395 CONSTRAINT_PARSERS = { 396 **parser.Parser.CONSTRAINT_PARSERS, 397 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 398 } 399 400 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 401 RANGE_PARSERS.pop(TokenType.OVERLAPS) 402 403 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 404 405 STATEMENT_PARSERS = { 406 **parser.Parser.STATEMENT_PARSERS, 407 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 408 TokenType.END: lambda self: self._parse_as_command(self._prev), 409 TokenType.FOR: lambda self: self._parse_for_in(), 410 } 411 412 BRACKET_OFFSETS = { 413 "OFFSET": (0, False), 414 "ORDINAL": (1, False), 415 "SAFE_OFFSET": (0, True), 416 "SAFE_ORDINAL": (1, True), 417 } 418 419 def _parse_for_in(self) -> exp.ForIn: 420 this = self._parse_range() 421 self._match_text_seq("DO") 422 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 423 424 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 425 this = super()._parse_table_part(schema=schema) or self._parse_number() 426 427 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 428 if isinstance(this, exp.Identifier): 429 table_name = this.name 430 while self._match(TokenType.DASH, advance=False) and self._next: 431 text = "" 432 while self._curr and self._curr.token_type != TokenType.DOT: 433 self._advance() 434 text += self._prev.text 435 table_name += text 436 437 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 438 elif isinstance(this, exp.Literal): 439 table_name = this.name 440 441 if self._is_connected() and self._parse_var(any_token=True): 442 table_name += self._prev.text 443 444 this = exp.Identifier(this=table_name, quoted=True) 445 446 return this 447 448 def _parse_table_parts( 449 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 450 ) -> exp.Table: 451 table = super()._parse_table_parts( 452 schema=schema, is_db_reference=is_db_reference, wildcard=True 453 ) 454 455 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 456 if not table.catalog: 457 if table.db: 458 parts = table.db.split(".") 459 if len(parts) == 2 and not table.args["db"].quoted: 460 table.set("catalog", exp.Identifier(this=parts[0])) 461 table.set("db", exp.Identifier(this=parts[1])) 462 else: 463 parts = table.name.split(".") 464 if len(parts) == 2 and not table.this.quoted: 465 table.set("db", exp.Identifier(this=parts[0])) 466 table.set("this", exp.Identifier(this=parts[1])) 467 468 if any("." in p.name for p in table.parts): 469 catalog, db, this, *rest = ( 470 exp.to_identifier(p, quoted=True) 471 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 472 ) 473 474 if rest and this: 475 this = exp.Dot.build([this, *rest]) # type: ignore 476 477 table = exp.Table( 478 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 479 ) 480 table.meta["quoted_table"] = True 481 482 return table 483 484 def _parse_column(self) -> t.Optional[exp.Expression]: 485 column = super()._parse_column() 486 if isinstance(column, exp.Column): 487 parts = column.parts 488 if any("." in p.name for p in parts): 489 catalog, db, table, this, *rest = ( 490 exp.to_identifier(p, quoted=True) 491 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 492 ) 493 494 if rest and this: 495 this = exp.Dot.build([this, *rest]) # type: ignore 496 497 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 498 column.meta["quoted_column"] = True 499 500 return column 501 502 @t.overload 503 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 504 505 @t.overload 506 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 507 508 def _parse_json_object(self, agg=False): 509 json_object = super()._parse_json_object() 510 array_kv_pair = seq_get(json_object.expressions, 0) 511 512 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 513 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 514 if ( 515 array_kv_pair 516 and isinstance(array_kv_pair.this, exp.Array) 517 and isinstance(array_kv_pair.expression, exp.Array) 518 ): 519 keys = array_kv_pair.this.expressions 520 values = array_kv_pair.expression.expressions 521 522 json_object.set( 523 "expressions", 524 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 525 ) 526 527 return json_object 528 529 def _parse_bracket( 530 self, this: t.Optional[exp.Expression] = None 531 ) -> t.Optional[exp.Expression]: 532 bracket = super()._parse_bracket(this) 533 534 if this is bracket: 535 return bracket 536 537 if isinstance(bracket, exp.Bracket): 538 for expression in bracket.expressions: 539 name = expression.name.upper() 540 541 if name not in self.BRACKET_OFFSETS: 542 break 543 544 offset, safe = self.BRACKET_OFFSETS[name] 545 bracket.set("offset", offset) 546 bracket.set("safe", safe) 547 expression.replace(expression.expressions[0]) 548 549 return bracket 550 551 class Generator(generator.Generator): 552 EXPLICIT_UNION = True 553 INTERVAL_ALLOWS_PLURAL_FORM = False 554 JOIN_HINTS = False 555 QUERY_HINTS = False 556 TABLE_HINTS = False 557 LIMIT_FETCH = "LIMIT" 558 RENAME_TABLE_WITH_DB = False 559 NVL2_SUPPORTED = False 560 UNNEST_WITH_ORDINALITY = False 561 COLLATE_IS_FUNC = True 562 LIMIT_ONLY_LITERALS = True 563 SUPPORTS_TABLE_ALIAS_COLUMNS = False 564 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 565 JSON_KEY_VALUE_PAIR_SEP = "," 566 NULL_ORDERING_SUPPORTED = False 567 IGNORE_NULLS_IN_FUNC = True 568 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 569 CAN_IMPLEMENT_ARRAY_ANY = True 570 SUPPORTS_TO_NUMBER = False 571 NAMED_PLACEHOLDER_TOKEN = "@" 572 573 TRANSFORMS = { 574 **generator.Generator.TRANSFORMS, 575 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 576 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 577 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 578 exp.Array: inline_array_unless_query, 579 exp.ArrayContains: _array_contains_sql, 580 exp.ArrayFilter: filter_array_using_unnest, 581 exp.ArraySize: rename_func("ARRAY_LENGTH"), 582 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 583 exp.CollateProperty: lambda self, e: ( 584 f"DEFAULT COLLATE {self.sql(e, 'this')}" 585 if e.args.get("default") 586 else f"COLLATE {self.sql(e, 'this')}" 587 ), 588 exp.Commit: lambda *_: "COMMIT TRANSACTION", 589 exp.CountIf: rename_func("COUNTIF"), 590 exp.Create: _create_sql, 591 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 592 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 593 exp.DateDiff: lambda self, e: self.func( 594 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 595 ), 596 exp.DateFromParts: rename_func("DATE"), 597 exp.DateStrToDate: datestrtodate_sql, 598 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 599 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 600 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 601 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 602 exp.FromTimeZone: lambda self, e: self.func( 603 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 604 ), 605 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 606 exp.GroupConcat: rename_func("STRING_AGG"), 607 exp.Hex: rename_func("TO_HEX"), 608 exp.If: if_sql(false_value="NULL"), 609 exp.ILike: no_ilike_sql, 610 exp.IntDiv: rename_func("DIV"), 611 exp.JSONFormat: rename_func("TO_JSON_STRING"), 612 exp.Max: max_or_greatest, 613 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 614 exp.MD5Digest: rename_func("MD5"), 615 exp.Min: min_or_least, 616 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 617 exp.RegexpExtract: lambda self, e: self.func( 618 "REGEXP_EXTRACT", 619 e.this, 620 e.expression, 621 e.args.get("position"), 622 e.args.get("occurrence"), 623 ), 624 exp.RegexpReplace: regexp_replace_sql, 625 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 626 exp.ReturnsProperty: _returnsproperty_sql, 627 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 628 exp.Select: transforms.preprocess( 629 [ 630 transforms.explode_to_unnest(), 631 transforms.unqualify_unnest, 632 transforms.eliminate_distinct_on, 633 _alias_ordered_group, 634 transforms.eliminate_semi_and_anti_joins, 635 ] 636 ), 637 exp.SHA: rename_func("SHA1"), 638 exp.SHA2: lambda self, e: self.func( 639 "SHA256" if e.text("length") == "256" else "SHA512", e.this 640 ), 641 exp.StabilityProperty: lambda self, e: ( 642 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 643 ), 644 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 645 exp.StrToTime: lambda self, e: self.func( 646 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 647 ), 648 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 649 exp.TimeFromParts: rename_func("TIME"), 650 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 651 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 652 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 653 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 654 exp.TimeStrToTime: timestrtotime_sql, 655 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 656 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 657 exp.TsOrDsAdd: _ts_or_ds_add_sql, 658 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 659 exp.TsOrDsToTime: rename_func("TIME"), 660 exp.Unhex: rename_func("FROM_HEX"), 661 exp.UnixDate: rename_func("UNIX_DATE"), 662 exp.UnixToTime: _unix_to_time_sql, 663 exp.Values: _derived_table_values_to_unnest, 664 exp.VariancePop: rename_func("VAR_POP"), 665 } 666 667 SUPPORTED_JSON_PATH_PARTS = { 668 exp.JSONPathKey, 669 exp.JSONPathRoot, 670 exp.JSONPathSubscript, 671 } 672 673 TYPE_MAPPING = { 674 **generator.Generator.TYPE_MAPPING, 675 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 676 exp.DataType.Type.BIGINT: "INT64", 677 exp.DataType.Type.BINARY: "BYTES", 678 exp.DataType.Type.BOOLEAN: "BOOL", 679 exp.DataType.Type.CHAR: "STRING", 680 exp.DataType.Type.DECIMAL: "NUMERIC", 681 exp.DataType.Type.DOUBLE: "FLOAT64", 682 exp.DataType.Type.FLOAT: "FLOAT64", 683 exp.DataType.Type.INT: "INT64", 684 exp.DataType.Type.NCHAR: "STRING", 685 exp.DataType.Type.NVARCHAR: "STRING", 686 exp.DataType.Type.SMALLINT: "INT64", 687 exp.DataType.Type.TEXT: "STRING", 688 exp.DataType.Type.TIMESTAMP: "DATETIME", 689 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 690 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 691 exp.DataType.Type.TINYINT: "INT64", 692 exp.DataType.Type.VARBINARY: "BYTES", 693 exp.DataType.Type.ROWVERSION: "BYTES", 694 exp.DataType.Type.VARCHAR: "STRING", 695 exp.DataType.Type.VARIANT: "ANY TYPE", 696 } 697 698 PROPERTIES_LOCATION = { 699 **generator.Generator.PROPERTIES_LOCATION, 700 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 701 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 702 } 703 704 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 705 RESERVED_KEYWORDS = { 706 *generator.Generator.RESERVED_KEYWORDS, 707 "all", 708 "and", 709 "any", 710 "array", 711 "as", 712 "asc", 713 "assert_rows_modified", 714 "at", 715 "between", 716 "by", 717 "case", 718 "cast", 719 "collate", 720 "contains", 721 "create", 722 "cross", 723 "cube", 724 "current", 725 "default", 726 "define", 727 "desc", 728 "distinct", 729 "else", 730 "end", 731 "enum", 732 "escape", 733 "except", 734 "exclude", 735 "exists", 736 "extract", 737 "false", 738 "fetch", 739 "following", 740 "for", 741 "from", 742 "full", 743 "group", 744 "grouping", 745 "groups", 746 "hash", 747 "having", 748 "if", 749 "ignore", 750 "in", 751 "inner", 752 "intersect", 753 "interval", 754 "into", 755 "is", 756 "join", 757 "lateral", 758 "left", 759 "like", 760 "limit", 761 "lookup", 762 "merge", 763 "natural", 764 "new", 765 "no", 766 "not", 767 "null", 768 "nulls", 769 "of", 770 "on", 771 "or", 772 "order", 773 "outer", 774 "over", 775 "partition", 776 "preceding", 777 "proto", 778 "qualify", 779 "range", 780 "recursive", 781 "respect", 782 "right", 783 "rollup", 784 "rows", 785 "select", 786 "set", 787 "some", 788 "struct", 789 "tablesample", 790 "then", 791 "to", 792 "treat", 793 "true", 794 "unbounded", 795 "union", 796 "unnest", 797 "using", 798 "when", 799 "where", 800 "window", 801 "with", 802 "within", 803 } 804 805 def mod_sql(self, expression: exp.Mod) -> str: 806 this = expression.this 807 expr = expression.expression 808 return self.func( 809 "MOD", 810 this.unnest() if isinstance(this, exp.Paren) else this, 811 expr.unnest() if isinstance(expr, exp.Paren) else expr, 812 ) 813 814 def column_parts(self, expression: exp.Column) -> str: 815 if expression.meta.get("quoted_column"): 816 # If a column reference is of the form `dataset.table`.name, we need 817 # to preserve the quoted table path, otherwise the reference breaks 818 table_parts = ".".join(p.name for p in expression.parts[:-1]) 819 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 820 return f"{table_path}.{self.sql(expression, 'this')}" 821 822 return super().column_parts(expression) 823 824 def table_parts(self, expression: exp.Table) -> str: 825 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 826 # we need to make sure the correct quoting is used in each case. 827 # 828 # For example, if there is a CTE x that clashes with a schema name, then the former will 829 # return the table y in that schema, whereas the latter will return the CTE's y column: 830 # 831 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 832 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 833 if expression.meta.get("quoted_table"): 834 table_parts = ".".join(p.name for p in expression.parts) 835 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 836 837 return super().table_parts(expression) 838 839 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 840 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 841 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 842 843 def eq_sql(self, expression: exp.EQ) -> str: 844 # Operands of = cannot be NULL in BigQuery 845 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 846 if not isinstance(expression.parent, exp.Update): 847 return "NULL" 848 849 return self.binary(expression, "=") 850 851 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 852 parent = expression.parent 853 854 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 855 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 856 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 857 return self.func( 858 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 859 ) 860 861 return super().attimezone_sql(expression) 862 863 def trycast_sql(self, expression: exp.TryCast) -> str: 864 return self.cast_sql(expression, safe_prefix="SAFE_") 865 866 def bracket_sql(self, expression: exp.Bracket) -> str: 867 this = expression.this 868 expressions = expression.expressions 869 870 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 871 arg = expressions[0] 872 if arg.type is None: 873 from sqlglot.optimizer.annotate_types import annotate_types 874 875 arg = annotate_types(arg) 876 877 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 878 # BQ doesn't support bracket syntax with string values for structs 879 return f"{self.sql(this)}.{arg.name}" 880 881 expressions_sql = self.expressions(expression, flat=True) 882 offset = expression.args.get("offset") 883 884 if offset == 0: 885 expressions_sql = f"OFFSET({expressions_sql})" 886 elif offset == 1: 887 expressions_sql = f"ORDINAL({expressions_sql})" 888 elif offset is not None: 889 self.unsupported(f"Unsupported array offset: {offset}") 890 891 if expression.args.get("safe"): 892 expressions_sql = f"SAFE_{expressions_sql}" 893 894 return f"{self.sql(this)}[{expressions_sql}]" 895 896 def in_unnest_op(self, expression: exp.Unnest) -> str: 897 return self.sql(expression) 898 899 def except_op(self, expression: exp.Except) -> str: 900 if not expression.args.get("distinct"): 901 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 902 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 903 904 def intersect_op(self, expression: exp.Intersect) -> str: 905 if not expression.args.get("distinct"): 906 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 907 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 908 909 def with_properties(self, properties: exp.Properties) -> str: 910 return self.properties(properties, prefix=self.seg("OPTIONS")) 911 912 def version_sql(self, expression: exp.Version) -> str: 913 if expression.name == "TIMESTAMP": 914 expression.set("this", "SYSTEM_TIME") 915 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
249 def normalize_identifier(self, expression: E) -> E: 250 if isinstance(expression, exp.Identifier): 251 parent = expression.parent 252 while isinstance(parent, exp.Dot): 253 parent = parent.parent 254 255 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 256 # by default. The following check uses a heuristic to detect tables based on whether 257 # they are qualified. This should generally be correct, because tables in BigQuery 258 # must be qualified with at least a dataset, unless @@dataset_id is set. 259 case_sensitive = ( 260 isinstance(parent, exp.UserDefinedFunction) 261 or ( 262 isinstance(parent, exp.Table) 263 and parent.db 264 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 265 ) 266 or expression.meta.get("is_table") 267 ) 268 if not case_sensitive: 269 expression.set("this", expression.this.lower()) 270 271 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- COPY_PARAMS_ARE_CSV
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
273 class Tokenizer(tokens.Tokenizer): 274 QUOTES = ["'", '"', '"""', "'''"] 275 COMMENTS = ["--", "#", ("/*", "*/")] 276 IDENTIFIERS = ["`"] 277 STRING_ESCAPES = ["\\"] 278 279 HEX_STRINGS = [("0x", ""), ("0X", "")] 280 281 BYTE_STRINGS = [ 282 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 283 ] 284 285 RAW_STRINGS = [ 286 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 287 ] 288 289 KEYWORDS = { 290 **tokens.Tokenizer.KEYWORDS, 291 "ANY TYPE": TokenType.VARIANT, 292 "BEGIN": TokenType.COMMAND, 293 "BEGIN TRANSACTION": TokenType.BEGIN, 294 "BYTES": TokenType.BINARY, 295 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 296 "DATETIME": TokenType.TIMESTAMP, 297 "DECLARE": TokenType.COMMAND, 298 "ELSEIF": TokenType.COMMAND, 299 "EXCEPTION": TokenType.COMMAND, 300 "FLOAT64": TokenType.DOUBLE, 301 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 302 "MODEL": TokenType.MODEL, 303 "NOT DETERMINISTIC": TokenType.VOLATILE, 304 "RECORD": TokenType.STRUCT, 305 "TIMESTAMP": TokenType.TIMESTAMPTZ, 306 } 307 KEYWORDS.pop("DIV") 308 KEYWORDS.pop("VALUES")
Inherited Members
310 class Parser(parser.Parser): 311 PREFIXED_PIVOT_COLUMNS = True 312 LOG_DEFAULTS_TO_LN = True 313 SUPPORTS_IMPLICIT_UNNEST = True 314 315 FUNCTIONS = { 316 **parser.Parser.FUNCTIONS, 317 "DATE": _build_date, 318 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 319 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 320 "DATE_TRUNC": lambda args: exp.DateTrunc( 321 unit=exp.Literal.string(str(seq_get(args, 1))), 322 this=seq_get(args, 0), 323 ), 324 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 325 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 326 "DIV": binary_from_function(exp.IntDiv), 327 "FORMAT_DATE": lambda args: exp.TimeToStr( 328 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 329 ), 330 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 331 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 332 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 333 ), 334 "MD5": exp.MD5Digest.from_arg_list, 335 "TO_HEX": _build_to_hex, 336 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 337 [seq_get(args, 1), seq_get(args, 0)] 338 ), 339 "PARSE_TIMESTAMP": _build_parse_timestamp, 340 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 341 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 342 this=seq_get(args, 0), 343 expression=seq_get(args, 1), 344 position=seq_get(args, 2), 345 occurrence=seq_get(args, 3), 346 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 347 ), 348 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 349 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 350 "SPLIT": lambda args: exp.Split( 351 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 352 this=seq_get(args, 0), 353 expression=seq_get(args, 1) or exp.Literal.string(","), 354 ), 355 "TIME": _build_time, 356 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 357 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 358 "TIMESTAMP": _build_timestamp, 359 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 360 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 361 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 362 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 363 ), 364 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 365 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 366 ), 367 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 368 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 369 } 370 371 FUNCTION_PARSERS = { 372 **parser.Parser.FUNCTION_PARSERS, 373 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 374 } 375 FUNCTION_PARSERS.pop("TRIM") 376 377 NO_PAREN_FUNCTIONS = { 378 **parser.Parser.NO_PAREN_FUNCTIONS, 379 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 380 } 381 382 NESTED_TYPE_TOKENS = { 383 *parser.Parser.NESTED_TYPE_TOKENS, 384 TokenType.TABLE, 385 } 386 387 PROPERTY_PARSERS = { 388 **parser.Parser.PROPERTY_PARSERS, 389 "NOT DETERMINISTIC": lambda self: self.expression( 390 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 391 ), 392 "OPTIONS": lambda self: self._parse_with_property(), 393 } 394 395 CONSTRAINT_PARSERS = { 396 **parser.Parser.CONSTRAINT_PARSERS, 397 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 398 } 399 400 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 401 RANGE_PARSERS.pop(TokenType.OVERLAPS) 402 403 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 404 405 STATEMENT_PARSERS = { 406 **parser.Parser.STATEMENT_PARSERS, 407 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 408 TokenType.END: lambda self: self._parse_as_command(self._prev), 409 TokenType.FOR: lambda self: self._parse_for_in(), 410 } 411 412 BRACKET_OFFSETS = { 413 "OFFSET": (0, False), 414 "ORDINAL": (1, False), 415 "SAFE_OFFSET": (0, True), 416 "SAFE_ORDINAL": (1, True), 417 } 418 419 def _parse_for_in(self) -> exp.ForIn: 420 this = self._parse_range() 421 self._match_text_seq("DO") 422 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 423 424 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 425 this = super()._parse_table_part(schema=schema) or self._parse_number() 426 427 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 428 if isinstance(this, exp.Identifier): 429 table_name = this.name 430 while self._match(TokenType.DASH, advance=False) and self._next: 431 text = "" 432 while self._curr and self._curr.token_type != TokenType.DOT: 433 self._advance() 434 text += self._prev.text 435 table_name += text 436 437 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 438 elif isinstance(this, exp.Literal): 439 table_name = this.name 440 441 if self._is_connected() and self._parse_var(any_token=True): 442 table_name += self._prev.text 443 444 this = exp.Identifier(this=table_name, quoted=True) 445 446 return this 447 448 def _parse_table_parts( 449 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 450 ) -> exp.Table: 451 table = super()._parse_table_parts( 452 schema=schema, is_db_reference=is_db_reference, wildcard=True 453 ) 454 455 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 456 if not table.catalog: 457 if table.db: 458 parts = table.db.split(".") 459 if len(parts) == 2 and not table.args["db"].quoted: 460 table.set("catalog", exp.Identifier(this=parts[0])) 461 table.set("db", exp.Identifier(this=parts[1])) 462 else: 463 parts = table.name.split(".") 464 if len(parts) == 2 and not table.this.quoted: 465 table.set("db", exp.Identifier(this=parts[0])) 466 table.set("this", exp.Identifier(this=parts[1])) 467 468 if any("." in p.name for p in table.parts): 469 catalog, db, this, *rest = ( 470 exp.to_identifier(p, quoted=True) 471 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 472 ) 473 474 if rest and this: 475 this = exp.Dot.build([this, *rest]) # type: ignore 476 477 table = exp.Table( 478 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 479 ) 480 table.meta["quoted_table"] = True 481 482 return table 483 484 def _parse_column(self) -> t.Optional[exp.Expression]: 485 column = super()._parse_column() 486 if isinstance(column, exp.Column): 487 parts = column.parts 488 if any("." in p.name for p in parts): 489 catalog, db, table, this, *rest = ( 490 exp.to_identifier(p, quoted=True) 491 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 492 ) 493 494 if rest and this: 495 this = exp.Dot.build([this, *rest]) # type: ignore 496 497 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 498 column.meta["quoted_column"] = True 499 500 return column 501 502 @t.overload 503 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 504 505 @t.overload 506 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 507 508 def _parse_json_object(self, agg=False): 509 json_object = super()._parse_json_object() 510 array_kv_pair = seq_get(json_object.expressions, 0) 511 512 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 513 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 514 if ( 515 array_kv_pair 516 and isinstance(array_kv_pair.this, exp.Array) 517 and isinstance(array_kv_pair.expression, exp.Array) 518 ): 519 keys = array_kv_pair.this.expressions 520 values = array_kv_pair.expression.expressions 521 522 json_object.set( 523 "expressions", 524 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 525 ) 526 527 return json_object 528 529 def _parse_bracket( 530 self, this: t.Optional[exp.Expression] = None 531 ) -> t.Optional[exp.Expression]: 532 bracket = super()._parse_bracket(this) 533 534 if this is bracket: 535 return bracket 536 537 if isinstance(bracket, exp.Bracket): 538 for expression in bracket.expressions: 539 name = expression.name.upper() 540 541 if name not in self.BRACKET_OFFSETS: 542 break 543 544 offset, safe = self.BRACKET_OFFSETS[name] 545 bracket.set("offset", offset) 546 bracket.set("safe", safe) 547 expression.replace(expression.expressions[0]) 548 549 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
551 class Generator(generator.Generator): 552 EXPLICIT_UNION = True 553 INTERVAL_ALLOWS_PLURAL_FORM = False 554 JOIN_HINTS = False 555 QUERY_HINTS = False 556 TABLE_HINTS = False 557 LIMIT_FETCH = "LIMIT" 558 RENAME_TABLE_WITH_DB = False 559 NVL2_SUPPORTED = False 560 UNNEST_WITH_ORDINALITY = False 561 COLLATE_IS_FUNC = True 562 LIMIT_ONLY_LITERALS = True 563 SUPPORTS_TABLE_ALIAS_COLUMNS = False 564 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 565 JSON_KEY_VALUE_PAIR_SEP = "," 566 NULL_ORDERING_SUPPORTED = False 567 IGNORE_NULLS_IN_FUNC = True 568 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 569 CAN_IMPLEMENT_ARRAY_ANY = True 570 SUPPORTS_TO_NUMBER = False 571 NAMED_PLACEHOLDER_TOKEN = "@" 572 573 TRANSFORMS = { 574 **generator.Generator.TRANSFORMS, 575 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 576 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 577 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 578 exp.Array: inline_array_unless_query, 579 exp.ArrayContains: _array_contains_sql, 580 exp.ArrayFilter: filter_array_using_unnest, 581 exp.ArraySize: rename_func("ARRAY_LENGTH"), 582 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 583 exp.CollateProperty: lambda self, e: ( 584 f"DEFAULT COLLATE {self.sql(e, 'this')}" 585 if e.args.get("default") 586 else f"COLLATE {self.sql(e, 'this')}" 587 ), 588 exp.Commit: lambda *_: "COMMIT TRANSACTION", 589 exp.CountIf: rename_func("COUNTIF"), 590 exp.Create: _create_sql, 591 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 592 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 593 exp.DateDiff: lambda self, e: self.func( 594 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 595 ), 596 exp.DateFromParts: rename_func("DATE"), 597 exp.DateStrToDate: datestrtodate_sql, 598 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 599 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 600 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 601 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 602 exp.FromTimeZone: lambda self, e: self.func( 603 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 604 ), 605 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 606 exp.GroupConcat: rename_func("STRING_AGG"), 607 exp.Hex: rename_func("TO_HEX"), 608 exp.If: if_sql(false_value="NULL"), 609 exp.ILike: no_ilike_sql, 610 exp.IntDiv: rename_func("DIV"), 611 exp.JSONFormat: rename_func("TO_JSON_STRING"), 612 exp.Max: max_or_greatest, 613 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 614 exp.MD5Digest: rename_func("MD5"), 615 exp.Min: min_or_least, 616 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 617 exp.RegexpExtract: lambda self, e: self.func( 618 "REGEXP_EXTRACT", 619 e.this, 620 e.expression, 621 e.args.get("position"), 622 e.args.get("occurrence"), 623 ), 624 exp.RegexpReplace: regexp_replace_sql, 625 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 626 exp.ReturnsProperty: _returnsproperty_sql, 627 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 628 exp.Select: transforms.preprocess( 629 [ 630 transforms.explode_to_unnest(), 631 transforms.unqualify_unnest, 632 transforms.eliminate_distinct_on, 633 _alias_ordered_group, 634 transforms.eliminate_semi_and_anti_joins, 635 ] 636 ), 637 exp.SHA: rename_func("SHA1"), 638 exp.SHA2: lambda self, e: self.func( 639 "SHA256" if e.text("length") == "256" else "SHA512", e.this 640 ), 641 exp.StabilityProperty: lambda self, e: ( 642 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 643 ), 644 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 645 exp.StrToTime: lambda self, e: self.func( 646 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 647 ), 648 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 649 exp.TimeFromParts: rename_func("TIME"), 650 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 651 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 652 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 653 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 654 exp.TimeStrToTime: timestrtotime_sql, 655 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 656 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 657 exp.TsOrDsAdd: _ts_or_ds_add_sql, 658 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 659 exp.TsOrDsToTime: rename_func("TIME"), 660 exp.Unhex: rename_func("FROM_HEX"), 661 exp.UnixDate: rename_func("UNIX_DATE"), 662 exp.UnixToTime: _unix_to_time_sql, 663 exp.Values: _derived_table_values_to_unnest, 664 exp.VariancePop: rename_func("VAR_POP"), 665 } 666 667 SUPPORTED_JSON_PATH_PARTS = { 668 exp.JSONPathKey, 669 exp.JSONPathRoot, 670 exp.JSONPathSubscript, 671 } 672 673 TYPE_MAPPING = { 674 **generator.Generator.TYPE_MAPPING, 675 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 676 exp.DataType.Type.BIGINT: "INT64", 677 exp.DataType.Type.BINARY: "BYTES", 678 exp.DataType.Type.BOOLEAN: "BOOL", 679 exp.DataType.Type.CHAR: "STRING", 680 exp.DataType.Type.DECIMAL: "NUMERIC", 681 exp.DataType.Type.DOUBLE: "FLOAT64", 682 exp.DataType.Type.FLOAT: "FLOAT64", 683 exp.DataType.Type.INT: "INT64", 684 exp.DataType.Type.NCHAR: "STRING", 685 exp.DataType.Type.NVARCHAR: "STRING", 686 exp.DataType.Type.SMALLINT: "INT64", 687 exp.DataType.Type.TEXT: "STRING", 688 exp.DataType.Type.TIMESTAMP: "DATETIME", 689 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 690 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 691 exp.DataType.Type.TINYINT: "INT64", 692 exp.DataType.Type.VARBINARY: "BYTES", 693 exp.DataType.Type.ROWVERSION: "BYTES", 694 exp.DataType.Type.VARCHAR: "STRING", 695 exp.DataType.Type.VARIANT: "ANY TYPE", 696 } 697 698 PROPERTIES_LOCATION = { 699 **generator.Generator.PROPERTIES_LOCATION, 700 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 701 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 702 } 703 704 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 705 RESERVED_KEYWORDS = { 706 *generator.Generator.RESERVED_KEYWORDS, 707 "all", 708 "and", 709 "any", 710 "array", 711 "as", 712 "asc", 713 "assert_rows_modified", 714 "at", 715 "between", 716 "by", 717 "case", 718 "cast", 719 "collate", 720 "contains", 721 "create", 722 "cross", 723 "cube", 724 "current", 725 "default", 726 "define", 727 "desc", 728 "distinct", 729 "else", 730 "end", 731 "enum", 732 "escape", 733 "except", 734 "exclude", 735 "exists", 736 "extract", 737 "false", 738 "fetch", 739 "following", 740 "for", 741 "from", 742 "full", 743 "group", 744 "grouping", 745 "groups", 746 "hash", 747 "having", 748 "if", 749 "ignore", 750 "in", 751 "inner", 752 "intersect", 753 "interval", 754 "into", 755 "is", 756 "join", 757 "lateral", 758 "left", 759 "like", 760 "limit", 761 "lookup", 762 "merge", 763 "natural", 764 "new", 765 "no", 766 "not", 767 "null", 768 "nulls", 769 "of", 770 "on", 771 "or", 772 "order", 773 "outer", 774 "over", 775 "partition", 776 "preceding", 777 "proto", 778 "qualify", 779 "range", 780 "recursive", 781 "respect", 782 "right", 783 "rollup", 784 "rows", 785 "select", 786 "set", 787 "some", 788 "struct", 789 "tablesample", 790 "then", 791 "to", 792 "treat", 793 "true", 794 "unbounded", 795 "union", 796 "unnest", 797 "using", 798 "when", 799 "where", 800 "window", 801 "with", 802 "within", 803 } 804 805 def mod_sql(self, expression: exp.Mod) -> str: 806 this = expression.this 807 expr = expression.expression 808 return self.func( 809 "MOD", 810 this.unnest() if isinstance(this, exp.Paren) else this, 811 expr.unnest() if isinstance(expr, exp.Paren) else expr, 812 ) 813 814 def column_parts(self, expression: exp.Column) -> str: 815 if expression.meta.get("quoted_column"): 816 # If a column reference is of the form `dataset.table`.name, we need 817 # to preserve the quoted table path, otherwise the reference breaks 818 table_parts = ".".join(p.name for p in expression.parts[:-1]) 819 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 820 return f"{table_path}.{self.sql(expression, 'this')}" 821 822 return super().column_parts(expression) 823 824 def table_parts(self, expression: exp.Table) -> str: 825 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 826 # we need to make sure the correct quoting is used in each case. 827 # 828 # For example, if there is a CTE x that clashes with a schema name, then the former will 829 # return the table y in that schema, whereas the latter will return the CTE's y column: 830 # 831 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 832 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 833 if expression.meta.get("quoted_table"): 834 table_parts = ".".join(p.name for p in expression.parts) 835 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 836 837 return super().table_parts(expression) 838 839 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 840 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 841 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 842 843 def eq_sql(self, expression: exp.EQ) -> str: 844 # Operands of = cannot be NULL in BigQuery 845 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 846 if not isinstance(expression.parent, exp.Update): 847 return "NULL" 848 849 return self.binary(expression, "=") 850 851 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 852 parent = expression.parent 853 854 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 855 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 856 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 857 return self.func( 858 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 859 ) 860 861 return super().attimezone_sql(expression) 862 863 def trycast_sql(self, expression: exp.TryCast) -> str: 864 return self.cast_sql(expression, safe_prefix="SAFE_") 865 866 def bracket_sql(self, expression: exp.Bracket) -> str: 867 this = expression.this 868 expressions = expression.expressions 869 870 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 871 arg = expressions[0] 872 if arg.type is None: 873 from sqlglot.optimizer.annotate_types import annotate_types 874 875 arg = annotate_types(arg) 876 877 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 878 # BQ doesn't support bracket syntax with string values for structs 879 return f"{self.sql(this)}.{arg.name}" 880 881 expressions_sql = self.expressions(expression, flat=True) 882 offset = expression.args.get("offset") 883 884 if offset == 0: 885 expressions_sql = f"OFFSET({expressions_sql})" 886 elif offset == 1: 887 expressions_sql = f"ORDINAL({expressions_sql})" 888 elif offset is not None: 889 self.unsupported(f"Unsupported array offset: {offset}") 890 891 if expression.args.get("safe"): 892 expressions_sql = f"SAFE_{expressions_sql}" 893 894 return f"{self.sql(this)}[{expressions_sql}]" 895 896 def in_unnest_op(self, expression: exp.Unnest) -> str: 897 return self.sql(expression) 898 899 def except_op(self, expression: exp.Except) -> str: 900 if not expression.args.get("distinct"): 901 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 902 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 903 904 def intersect_op(self, expression: exp.Intersect) -> str: 905 if not expression.args.get("distinct"): 906 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 907 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 908 909 def with_properties(self, properties: exp.Properties) -> str: 910 return self.properties(properties, prefix=self.seg("OPTIONS")) 911 912 def version_sql(self, expression: exp.Version) -> str: 913 if expression.name == "TIMESTAMP": 914 expression.set("this", "SYSTEM_TIME") 915 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
814 def column_parts(self, expression: exp.Column) -> str: 815 if expression.meta.get("quoted_column"): 816 # If a column reference is of the form `dataset.table`.name, we need 817 # to preserve the quoted table path, otherwise the reference breaks 818 table_parts = ".".join(p.name for p in expression.parts[:-1]) 819 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 820 return f"{table_path}.{self.sql(expression, 'this')}" 821 822 return super().column_parts(expression)
824 def table_parts(self, expression: exp.Table) -> str: 825 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 826 # we need to make sure the correct quoting is used in each case. 827 # 828 # For example, if there is a CTE x that clashes with a schema name, then the former will 829 # return the table y in that schema, whereas the latter will return the CTE's y column: 830 # 831 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 832 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 833 if expression.meta.get("quoted_table"): 834 table_parts = ".".join(p.name for p in expression.parts) 835 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 836 837 return super().table_parts(expression)
851 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 852 parent = expression.parent 853 854 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 855 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 856 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 857 return self.func( 858 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 859 ) 860 861 return super().attimezone_sql(expression)
866 def bracket_sql(self, expression: exp.Bracket) -> str: 867 this = expression.this 868 expressions = expression.expressions 869 870 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 871 arg = expressions[0] 872 if arg.type is None: 873 from sqlglot.optimizer.annotate_types import annotate_types 874 875 arg = annotate_types(arg) 876 877 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 878 # BQ doesn't support bracket syntax with string values for structs 879 return f"{self.sql(this)}.{arg.name}" 880 881 expressions_sql = self.expressions(expression, flat=True) 882 offset = expression.args.get("offset") 883 884 if offset == 0: 885 expressions_sql = f"OFFSET({expressions_sql})" 886 elif offset == 1: 887 expressions_sql = f"ORDINAL({expressions_sql})" 888 elif offset is not None: 889 self.unsupported(f"Unsupported array offset: {offset}") 890 891 if expression.args.get("safe"): 892 expressions_sql = f"SAFE_{expressions_sql}" 893 894 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- OUTER_UNION_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql