sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27) 28from sqlglot.helper import seq_get, split_num_words 29from sqlglot.tokens import TokenType 30 31if t.TYPE_CHECKING: 32 from sqlglot._typing import E, Lit 33 34logger = logging.getLogger("sqlglot") 35 36 37def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 38 if not expression.find_ancestor(exp.From, exp.Join): 39 return self.values_sql(expression) 40 41 structs = [] 42 alias = expression.args.get("alias") 43 for tup in expression.find_all(exp.Tuple): 44 field_aliases = alias.columns if alias else (f"_c{i}" for i in range(len(tup.expressions))) 45 expressions = [exp.alias_(fld, name) for fld, name in zip(tup.expressions, field_aliases)] 46 structs.append(exp.Struct(expressions=expressions)) 47 48 return self.unnest_sql(exp.Unnest(expressions=[exp.array(*structs, copy=False)])) 49 50 51def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 52 this = expression.this 53 if isinstance(this, exp.Schema): 54 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 55 else: 56 this = self.sql(this) 57 return f"RETURNS {this}" 58 59 60def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 61 returns = expression.find(exp.ReturnsProperty) 62 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 63 expression.set("kind", "TABLE FUNCTION") 64 65 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 66 expression.set("expression", expression.expression.this) 67 68 return self.create_sql(expression) 69 70 71def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 72 """Remove references to unnest table aliases since bigquery doesn't allow them. 73 74 These are added by the optimizer's qualify_column step. 75 """ 76 from sqlglot.optimizer.scope import find_all_in_scope 77 78 if isinstance(expression, exp.Select): 79 unnest_aliases = { 80 unnest.alias 81 for unnest in find_all_in_scope(expression, exp.Unnest) 82 if isinstance(unnest.parent, (exp.From, exp.Join)) 83 } 84 if unnest_aliases: 85 for column in expression.find_all(exp.Column): 86 if column.table in unnest_aliases: 87 column.set("table", None) 88 elif column.db in unnest_aliases: 89 column.set("db", None) 90 91 return expression 92 93 94# https://issuetracker.google.com/issues/162294746 95# workaround for bigquery bug when grouping by an expression and then ordering 96# WITH x AS (SELECT 1 y) 97# SELECT y + 1 z 98# FROM x 99# GROUP BY x + 1 100# ORDER by z 101def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 102 if isinstance(expression, exp.Select): 103 group = expression.args.get("group") 104 order = expression.args.get("order") 105 106 if group and order: 107 aliases = { 108 select.this: select.args["alias"] 109 for select in expression.selects 110 if isinstance(select, exp.Alias) 111 } 112 113 for grouped in group.expressions: 114 if grouped.is_int: 115 continue 116 alias = aliases.get(grouped) 117 if alias: 118 grouped.replace(exp.column(alias)) 119 120 return expression 121 122 123def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 124 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 125 if isinstance(expression, exp.CTE) and expression.alias_column_names: 126 cte_query = expression.this 127 128 if cte_query.is_star: 129 logger.warning( 130 "Can't push down CTE column names for star queries. Run the query through" 131 " the optimizer or use 'qualify' to expand the star projections first." 132 ) 133 return expression 134 135 column_names = expression.alias_column_names 136 expression.args["alias"].set("columns", None) 137 138 for name, select in zip(column_names, cte_query.selects): 139 to_replace = select 140 141 if isinstance(select, exp.Alias): 142 select = select.this 143 144 # Inner aliases are shadowed by the CTE column names 145 to_replace.replace(exp.alias_(select, name)) 146 147 return expression 148 149 150def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 151 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 152 this.set("zone", seq_get(args, 2)) 153 return this 154 155 156def _build_timestamp(args: t.List) -> exp.Timestamp: 157 timestamp = exp.Timestamp.from_arg_list(args) 158 timestamp.set("with_tz", True) 159 return timestamp 160 161 162def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 163 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 164 return expr_type.from_arg_list(args) 165 166 167def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 168 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 169 arg = seq_get(args, 0) 170 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 171 172 173def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 174 return self.sql( 175 exp.Exists( 176 this=exp.select("1") 177 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 178 .where(exp.column("_col").eq(expression.right)) 179 ) 180 ) 181 182 183def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 184 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 185 186 187def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 188 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 189 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 190 unit = expression.args.get("unit") or "DAY" 191 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 192 193 194def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 195 scale = expression.args.get("scale") 196 timestamp = expression.this 197 198 if scale in (None, exp.UnixToTime.SECONDS): 199 return self.func("TIMESTAMP_SECONDS", timestamp) 200 if scale == exp.UnixToTime.MILLIS: 201 return self.func("TIMESTAMP_MILLIS", timestamp) 202 if scale == exp.UnixToTime.MICROS: 203 return self.func("TIMESTAMP_MICROS", timestamp) 204 205 unix_seconds = exp.cast(exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), "int64") 206 return self.func("TIMESTAMP_SECONDS", unix_seconds) 207 208 209def _build_time(args: t.List) -> exp.Func: 210 if len(args) == 1: 211 return exp.TsOrDsToTime(this=args[0]) 212 if len(args) == 3: 213 return exp.TimeFromParts.from_arg_list(args) 214 215 return exp.Anonymous(this="TIME", expressions=args) 216 217 218class BigQuery(Dialect): 219 WEEK_OFFSET = -1 220 UNNEST_COLUMN_ONLY = True 221 SUPPORTS_USER_DEFINED_TYPES = False 222 SUPPORTS_SEMI_ANTI_JOIN = False 223 LOG_BASE_FIRST = False 224 225 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 226 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 227 228 # bigquery udfs are case sensitive 229 NORMALIZE_FUNCTIONS = False 230 231 TIME_MAPPING = { 232 "%D": "%m/%d/%y", 233 } 234 235 ESCAPE_SEQUENCES = { 236 "\\a": "\a", 237 "\\b": "\b", 238 "\\f": "\f", 239 "\\n": "\n", 240 "\\r": "\r", 241 "\\t": "\t", 242 "\\v": "\v", 243 } 244 245 FORMAT_MAPPING = { 246 "DD": "%d", 247 "MM": "%m", 248 "MON": "%b", 249 "MONTH": "%B", 250 "YYYY": "%Y", 251 "YY": "%y", 252 "HH": "%I", 253 "HH12": "%I", 254 "HH24": "%H", 255 "MI": "%M", 256 "SS": "%S", 257 "SSSSS": "%f", 258 "TZH": "%z", 259 } 260 261 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 262 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 263 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 264 265 def normalize_identifier(self, expression: E) -> E: 266 if isinstance(expression, exp.Identifier): 267 parent = expression.parent 268 while isinstance(parent, exp.Dot): 269 parent = parent.parent 270 271 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 272 # The following check is essentially a heuristic to detect tables based on whether or 273 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 274 if ( 275 not isinstance(parent, exp.UserDefinedFunction) 276 and not (isinstance(parent, exp.Table) and parent.db) 277 and not expression.meta.get("is_table") 278 ): 279 expression.set("this", expression.this.lower()) 280 281 return expression 282 283 class Tokenizer(tokens.Tokenizer): 284 QUOTES = ["'", '"', '"""', "'''"] 285 COMMENTS = ["--", "#", ("/*", "*/")] 286 IDENTIFIERS = ["`"] 287 STRING_ESCAPES = ["\\"] 288 289 HEX_STRINGS = [("0x", ""), ("0X", "")] 290 291 BYTE_STRINGS = [ 292 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 293 ] 294 295 RAW_STRINGS = [ 296 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 297 ] 298 299 KEYWORDS = { 300 **tokens.Tokenizer.KEYWORDS, 301 "ANY TYPE": TokenType.VARIANT, 302 "BEGIN": TokenType.COMMAND, 303 "BEGIN TRANSACTION": TokenType.BEGIN, 304 "BYTES": TokenType.BINARY, 305 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 306 "DECLARE": TokenType.COMMAND, 307 "ELSEIF": TokenType.COMMAND, 308 "EXCEPTION": TokenType.COMMAND, 309 "FLOAT64": TokenType.DOUBLE, 310 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 311 "MODEL": TokenType.MODEL, 312 "NOT DETERMINISTIC": TokenType.VOLATILE, 313 "RECORD": TokenType.STRUCT, 314 "TIMESTAMP": TokenType.TIMESTAMPTZ, 315 } 316 KEYWORDS.pop("DIV") 317 KEYWORDS.pop("VALUES") 318 319 class Parser(parser.Parser): 320 PREFIXED_PIVOT_COLUMNS = True 321 322 LOG_DEFAULTS_TO_LN = True 323 324 FUNCTIONS = { 325 **parser.Parser.FUNCTIONS, 326 "DATE": _build_date, 327 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 328 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 329 "DATE_TRUNC": lambda args: exp.DateTrunc( 330 unit=exp.Literal.string(str(seq_get(args, 1))), 331 this=seq_get(args, 0), 332 ), 333 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 334 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 335 "DIV": binary_from_function(exp.IntDiv), 336 "FORMAT_DATE": lambda args: exp.TimeToStr( 337 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 338 ), 339 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 340 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 341 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 342 ), 343 "MD5": exp.MD5Digest.from_arg_list, 344 "TO_HEX": _build_to_hex, 345 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 346 [seq_get(args, 1), seq_get(args, 0)] 347 ), 348 "PARSE_TIMESTAMP": _build_parse_timestamp, 349 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 350 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 351 this=seq_get(args, 0), 352 expression=seq_get(args, 1), 353 position=seq_get(args, 2), 354 occurrence=seq_get(args, 3), 355 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 356 ), 357 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 358 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 359 "SPLIT": lambda args: exp.Split( 360 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 361 this=seq_get(args, 0), 362 expression=seq_get(args, 1) or exp.Literal.string(","), 363 ), 364 "TIME": _build_time, 365 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 366 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 367 "TIMESTAMP": _build_timestamp, 368 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 369 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 370 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 371 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 372 ), 373 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 374 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 375 ), 376 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 377 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 378 } 379 380 FUNCTION_PARSERS = { 381 **parser.Parser.FUNCTION_PARSERS, 382 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 383 } 384 FUNCTION_PARSERS.pop("TRIM") 385 386 NO_PAREN_FUNCTIONS = { 387 **parser.Parser.NO_PAREN_FUNCTIONS, 388 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 389 } 390 391 NESTED_TYPE_TOKENS = { 392 *parser.Parser.NESTED_TYPE_TOKENS, 393 TokenType.TABLE, 394 } 395 396 PROPERTY_PARSERS = { 397 **parser.Parser.PROPERTY_PARSERS, 398 "NOT DETERMINISTIC": lambda self: self.expression( 399 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 400 ), 401 "OPTIONS": lambda self: self._parse_with_property(), 402 } 403 404 CONSTRAINT_PARSERS = { 405 **parser.Parser.CONSTRAINT_PARSERS, 406 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 407 } 408 409 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 410 RANGE_PARSERS.pop(TokenType.OVERLAPS) 411 412 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 413 414 STATEMENT_PARSERS = { 415 **parser.Parser.STATEMENT_PARSERS, 416 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 417 TokenType.END: lambda self: self._parse_as_command(self._prev), 418 TokenType.FOR: lambda self: self._parse_for_in(), 419 } 420 421 BRACKET_OFFSETS = { 422 "OFFSET": (0, False), 423 "ORDINAL": (1, False), 424 "SAFE_OFFSET": (0, True), 425 "SAFE_ORDINAL": (1, True), 426 } 427 428 def _parse_for_in(self) -> exp.ForIn: 429 this = self._parse_range() 430 self._match_text_seq("DO") 431 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 432 433 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 434 this = super()._parse_table_part(schema=schema) or self._parse_number() 435 436 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 437 if isinstance(this, exp.Identifier): 438 table_name = this.name 439 while self._match(TokenType.DASH, advance=False) and self._next: 440 self._advance(2) 441 table_name += f"-{self._prev.text}" 442 443 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 444 elif isinstance(this, exp.Literal): 445 table_name = this.name 446 447 if self._is_connected() and self._parse_var(any_token=True): 448 table_name += self._prev.text 449 450 this = exp.Identifier(this=table_name, quoted=True) 451 452 return this 453 454 def _parse_table_parts( 455 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 456 ) -> exp.Table: 457 table = super()._parse_table_parts( 458 schema=schema, is_db_reference=is_db_reference, wildcard=True 459 ) 460 461 if isinstance(table.this, exp.Identifier) and "." in table.name: 462 catalog, db, this, *rest = ( 463 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 464 for x in split_num_words(table.name, ".", 3) 465 ) 466 467 if rest and this: 468 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 469 470 table = exp.Table(this=this, db=db, catalog=catalog) 471 472 return table 473 474 @t.overload 475 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 476 ... 477 478 @t.overload 479 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 480 ... 481 482 def _parse_json_object(self, agg=False): 483 json_object = super()._parse_json_object() 484 array_kv_pair = seq_get(json_object.expressions, 0) 485 486 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 487 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 488 if ( 489 array_kv_pair 490 and isinstance(array_kv_pair.this, exp.Array) 491 and isinstance(array_kv_pair.expression, exp.Array) 492 ): 493 keys = array_kv_pair.this.expressions 494 values = array_kv_pair.expression.expressions 495 496 json_object.set( 497 "expressions", 498 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 499 ) 500 501 return json_object 502 503 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 504 bracket = super()._parse_bracket(this) 505 506 if this is bracket: 507 return bracket 508 509 if isinstance(bracket, exp.Bracket): 510 for expression in bracket.expressions: 511 name = expression.name.upper() 512 513 if name not in self.BRACKET_OFFSETS: 514 break 515 516 offset, safe = self.BRACKET_OFFSETS[name] 517 bracket.set("offset", offset) 518 bracket.set("safe", safe) 519 expression.replace(expression.expressions[0]) 520 521 return bracket 522 523 class Generator(generator.Generator): 524 EXPLICIT_UNION = True 525 INTERVAL_ALLOWS_PLURAL_FORM = False 526 JOIN_HINTS = False 527 QUERY_HINTS = False 528 TABLE_HINTS = False 529 LIMIT_FETCH = "LIMIT" 530 RENAME_TABLE_WITH_DB = False 531 NVL2_SUPPORTED = False 532 UNNEST_WITH_ORDINALITY = False 533 COLLATE_IS_FUNC = True 534 LIMIT_ONLY_LITERALS = True 535 SUPPORTS_TABLE_ALIAS_COLUMNS = False 536 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 537 JSON_KEY_VALUE_PAIR_SEP = "," 538 NULL_ORDERING_SUPPORTED = False 539 IGNORE_NULLS_IN_FUNC = True 540 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 541 CAN_IMPLEMENT_ARRAY_ANY = True 542 NAMED_PLACEHOLDER_TOKEN = "@" 543 544 TRANSFORMS = { 545 **generator.Generator.TRANSFORMS, 546 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 547 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 548 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 549 exp.ArrayContains: _array_contains_sql, 550 exp.ArrayFilter: filter_array_using_unnest, 551 exp.ArraySize: rename_func("ARRAY_LENGTH"), 552 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 553 exp.CollateProperty: lambda self, e: ( 554 f"DEFAULT COLLATE {self.sql(e, 'this')}" 555 if e.args.get("default") 556 else f"COLLATE {self.sql(e, 'this')}" 557 ), 558 exp.Commit: lambda *_: "COMMIT TRANSACTION", 559 exp.CountIf: rename_func("COUNTIF"), 560 exp.Create: _create_sql, 561 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 562 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 563 exp.DateDiff: lambda self, e: self.func( 564 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 565 ), 566 exp.DateFromParts: rename_func("DATE"), 567 exp.DateStrToDate: datestrtodate_sql, 568 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 569 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 570 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 571 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 572 exp.FromTimeZone: lambda self, e: self.func( 573 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 574 ), 575 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 576 exp.GroupConcat: rename_func("STRING_AGG"), 577 exp.Hex: rename_func("TO_HEX"), 578 exp.If: if_sql(false_value="NULL"), 579 exp.ILike: no_ilike_sql, 580 exp.IntDiv: rename_func("DIV"), 581 exp.JSONFormat: rename_func("TO_JSON_STRING"), 582 exp.Max: max_or_greatest, 583 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 584 exp.MD5Digest: rename_func("MD5"), 585 exp.Min: min_or_least, 586 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 587 exp.RegexpExtract: lambda self, e: self.func( 588 "REGEXP_EXTRACT", 589 e.this, 590 e.expression, 591 e.args.get("position"), 592 e.args.get("occurrence"), 593 ), 594 exp.RegexpReplace: regexp_replace_sql, 595 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 596 exp.ReturnsProperty: _returnsproperty_sql, 597 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 598 exp.Select: transforms.preprocess( 599 [ 600 transforms.explode_to_unnest(), 601 _unqualify_unnest, 602 transforms.eliminate_distinct_on, 603 _alias_ordered_group, 604 transforms.eliminate_semi_and_anti_joins, 605 ] 606 ), 607 exp.SHA2: lambda self, e: self.func( 608 "SHA256" if e.text("length") == "256" else "SHA512", e.this 609 ), 610 exp.StabilityProperty: lambda self, e: ( 611 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 612 ), 613 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 614 exp.StrToTime: lambda self, e: self.func( 615 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 616 ), 617 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 618 exp.TimeFromParts: rename_func("TIME"), 619 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 620 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 621 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 622 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 623 exp.TimeStrToTime: timestrtotime_sql, 624 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 625 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 626 exp.TsOrDsAdd: _ts_or_ds_add_sql, 627 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 628 exp.TsOrDsToTime: rename_func("TIME"), 629 exp.Unhex: rename_func("FROM_HEX"), 630 exp.UnixDate: rename_func("UNIX_DATE"), 631 exp.UnixToTime: _unix_to_time_sql, 632 exp.Values: _derived_table_values_to_unnest, 633 exp.VariancePop: rename_func("VAR_POP"), 634 } 635 636 SUPPORTED_JSON_PATH_PARTS = { 637 exp.JSONPathKey, 638 exp.JSONPathRoot, 639 exp.JSONPathSubscript, 640 } 641 642 TYPE_MAPPING = { 643 **generator.Generator.TYPE_MAPPING, 644 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 645 exp.DataType.Type.BIGINT: "INT64", 646 exp.DataType.Type.BINARY: "BYTES", 647 exp.DataType.Type.BOOLEAN: "BOOL", 648 exp.DataType.Type.CHAR: "STRING", 649 exp.DataType.Type.DECIMAL: "NUMERIC", 650 exp.DataType.Type.DOUBLE: "FLOAT64", 651 exp.DataType.Type.FLOAT: "FLOAT64", 652 exp.DataType.Type.INT: "INT64", 653 exp.DataType.Type.NCHAR: "STRING", 654 exp.DataType.Type.NVARCHAR: "STRING", 655 exp.DataType.Type.SMALLINT: "INT64", 656 exp.DataType.Type.TEXT: "STRING", 657 exp.DataType.Type.TIMESTAMP: "DATETIME", 658 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 659 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 660 exp.DataType.Type.TINYINT: "INT64", 661 exp.DataType.Type.VARBINARY: "BYTES", 662 exp.DataType.Type.VARCHAR: "STRING", 663 exp.DataType.Type.VARIANT: "ANY TYPE", 664 } 665 666 PROPERTIES_LOCATION = { 667 **generator.Generator.PROPERTIES_LOCATION, 668 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 669 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 670 } 671 672 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 673 RESERVED_KEYWORDS = { 674 *generator.Generator.RESERVED_KEYWORDS, 675 "all", 676 "and", 677 "any", 678 "array", 679 "as", 680 "asc", 681 "assert_rows_modified", 682 "at", 683 "between", 684 "by", 685 "case", 686 "cast", 687 "collate", 688 "contains", 689 "create", 690 "cross", 691 "cube", 692 "current", 693 "default", 694 "define", 695 "desc", 696 "distinct", 697 "else", 698 "end", 699 "enum", 700 "escape", 701 "except", 702 "exclude", 703 "exists", 704 "extract", 705 "false", 706 "fetch", 707 "following", 708 "for", 709 "from", 710 "full", 711 "group", 712 "grouping", 713 "groups", 714 "hash", 715 "having", 716 "if", 717 "ignore", 718 "in", 719 "inner", 720 "intersect", 721 "interval", 722 "into", 723 "is", 724 "join", 725 "lateral", 726 "left", 727 "like", 728 "limit", 729 "lookup", 730 "merge", 731 "natural", 732 "new", 733 "no", 734 "not", 735 "null", 736 "nulls", 737 "of", 738 "on", 739 "or", 740 "order", 741 "outer", 742 "over", 743 "partition", 744 "preceding", 745 "proto", 746 "qualify", 747 "range", 748 "recursive", 749 "respect", 750 "right", 751 "rollup", 752 "rows", 753 "select", 754 "set", 755 "some", 756 "struct", 757 "tablesample", 758 "then", 759 "to", 760 "treat", 761 "true", 762 "unbounded", 763 "union", 764 "unnest", 765 "using", 766 "when", 767 "where", 768 "window", 769 "with", 770 "within", 771 } 772 773 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 774 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 775 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 776 777 def struct_sql(self, expression: exp.Struct) -> str: 778 args = [] 779 for expr in expression.expressions: 780 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 781 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 782 else: 783 arg = self.sql(expr) 784 785 args.append(arg) 786 787 return self.func("STRUCT", *args) 788 789 def eq_sql(self, expression: exp.EQ) -> str: 790 # Operands of = cannot be NULL in BigQuery 791 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 792 if not isinstance(expression.parent, exp.Update): 793 return "NULL" 794 795 return self.binary(expression, "=") 796 797 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 798 parent = expression.parent 799 800 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 801 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 802 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 803 return self.func( 804 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 805 ) 806 807 return super().attimezone_sql(expression) 808 809 def trycast_sql(self, expression: exp.TryCast) -> str: 810 return self.cast_sql(expression, safe_prefix="SAFE_") 811 812 def array_sql(self, expression: exp.Array) -> str: 813 first_arg = seq_get(expression.expressions, 0) 814 if isinstance(first_arg, exp.Subqueryable): 815 return f"ARRAY{self.wrap(self.sql(first_arg))}" 816 817 return inline_array_sql(self, expression) 818 819 def bracket_sql(self, expression: exp.Bracket) -> str: 820 this = self.sql(expression, "this") 821 expressions = expression.expressions 822 823 if len(expressions) == 1: 824 arg = expressions[0] 825 if arg.type is None: 826 from sqlglot.optimizer.annotate_types import annotate_types 827 828 arg = annotate_types(arg) 829 830 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 831 # BQ doesn't support bracket syntax with string values 832 return f"{this}.{arg.name}" 833 834 expressions_sql = ", ".join(self.sql(e) for e in expressions) 835 offset = expression.args.get("offset") 836 837 if offset == 0: 838 expressions_sql = f"OFFSET({expressions_sql})" 839 elif offset == 1: 840 expressions_sql = f"ORDINAL({expressions_sql})" 841 elif offset is not None: 842 self.unsupported(f"Unsupported array offset: {offset}") 843 844 if expression.args.get("safe"): 845 expressions_sql = f"SAFE_{expressions_sql}" 846 847 return f"{this}[{expressions_sql}]" 848 849 def in_unnest_op(self, expression: exp.Unnest) -> str: 850 return self.sql(expression) 851 852 def except_op(self, expression: exp.Except) -> str: 853 if not expression.args.get("distinct"): 854 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 855 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 856 857 def intersect_op(self, expression: exp.Intersect) -> str: 858 if not expression.args.get("distinct"): 859 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 860 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 861 862 def with_properties(self, properties: exp.Properties) -> str: 863 return self.properties(properties, prefix=self.seg("OPTIONS")) 864 865 def version_sql(self, expression: exp.Version) -> str: 866 if expression.name == "TIMESTAMP": 867 expression.set("this", "SYSTEM_TIME") 868 return super().version_sql(expression)
219class BigQuery(Dialect): 220 WEEK_OFFSET = -1 221 UNNEST_COLUMN_ONLY = True 222 SUPPORTS_USER_DEFINED_TYPES = False 223 SUPPORTS_SEMI_ANTI_JOIN = False 224 LOG_BASE_FIRST = False 225 226 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 227 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 228 229 # bigquery udfs are case sensitive 230 NORMALIZE_FUNCTIONS = False 231 232 TIME_MAPPING = { 233 "%D": "%m/%d/%y", 234 } 235 236 ESCAPE_SEQUENCES = { 237 "\\a": "\a", 238 "\\b": "\b", 239 "\\f": "\f", 240 "\\n": "\n", 241 "\\r": "\r", 242 "\\t": "\t", 243 "\\v": "\v", 244 } 245 246 FORMAT_MAPPING = { 247 "DD": "%d", 248 "MM": "%m", 249 "MON": "%b", 250 "MONTH": "%B", 251 "YYYY": "%Y", 252 "YY": "%y", 253 "HH": "%I", 254 "HH12": "%I", 255 "HH24": "%H", 256 "MI": "%M", 257 "SS": "%S", 258 "SSSSS": "%f", 259 "TZH": "%z", 260 } 261 262 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 263 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 264 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 265 266 def normalize_identifier(self, expression: E) -> E: 267 if isinstance(expression, exp.Identifier): 268 parent = expression.parent 269 while isinstance(parent, exp.Dot): 270 parent = parent.parent 271 272 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 273 # The following check is essentially a heuristic to detect tables based on whether or 274 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 275 if ( 276 not isinstance(parent, exp.UserDefinedFunction) 277 and not (isinstance(parent, exp.Table) and parent.db) 278 and not expression.meta.get("is_table") 279 ): 280 expression.set("this", expression.this.lower()) 281 282 return expression 283 284 class Tokenizer(tokens.Tokenizer): 285 QUOTES = ["'", '"', '"""', "'''"] 286 COMMENTS = ["--", "#", ("/*", "*/")] 287 IDENTIFIERS = ["`"] 288 STRING_ESCAPES = ["\\"] 289 290 HEX_STRINGS = [("0x", ""), ("0X", "")] 291 292 BYTE_STRINGS = [ 293 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 294 ] 295 296 RAW_STRINGS = [ 297 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 298 ] 299 300 KEYWORDS = { 301 **tokens.Tokenizer.KEYWORDS, 302 "ANY TYPE": TokenType.VARIANT, 303 "BEGIN": TokenType.COMMAND, 304 "BEGIN TRANSACTION": TokenType.BEGIN, 305 "BYTES": TokenType.BINARY, 306 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 307 "DECLARE": TokenType.COMMAND, 308 "ELSEIF": TokenType.COMMAND, 309 "EXCEPTION": TokenType.COMMAND, 310 "FLOAT64": TokenType.DOUBLE, 311 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 312 "MODEL": TokenType.MODEL, 313 "NOT DETERMINISTIC": TokenType.VOLATILE, 314 "RECORD": TokenType.STRUCT, 315 "TIMESTAMP": TokenType.TIMESTAMPTZ, 316 } 317 KEYWORDS.pop("DIV") 318 KEYWORDS.pop("VALUES") 319 320 class Parser(parser.Parser): 321 PREFIXED_PIVOT_COLUMNS = True 322 323 LOG_DEFAULTS_TO_LN = True 324 325 FUNCTIONS = { 326 **parser.Parser.FUNCTIONS, 327 "DATE": _build_date, 328 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 329 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 330 "DATE_TRUNC": lambda args: exp.DateTrunc( 331 unit=exp.Literal.string(str(seq_get(args, 1))), 332 this=seq_get(args, 0), 333 ), 334 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 335 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 336 "DIV": binary_from_function(exp.IntDiv), 337 "FORMAT_DATE": lambda args: exp.TimeToStr( 338 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 339 ), 340 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 341 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 342 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 343 ), 344 "MD5": exp.MD5Digest.from_arg_list, 345 "TO_HEX": _build_to_hex, 346 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 347 [seq_get(args, 1), seq_get(args, 0)] 348 ), 349 "PARSE_TIMESTAMP": _build_parse_timestamp, 350 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 351 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 352 this=seq_get(args, 0), 353 expression=seq_get(args, 1), 354 position=seq_get(args, 2), 355 occurrence=seq_get(args, 3), 356 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 357 ), 358 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 359 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 360 "SPLIT": lambda args: exp.Split( 361 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 362 this=seq_get(args, 0), 363 expression=seq_get(args, 1) or exp.Literal.string(","), 364 ), 365 "TIME": _build_time, 366 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 367 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 368 "TIMESTAMP": _build_timestamp, 369 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 370 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 371 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 372 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 373 ), 374 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 375 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 376 ), 377 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 378 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 379 } 380 381 FUNCTION_PARSERS = { 382 **parser.Parser.FUNCTION_PARSERS, 383 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 384 } 385 FUNCTION_PARSERS.pop("TRIM") 386 387 NO_PAREN_FUNCTIONS = { 388 **parser.Parser.NO_PAREN_FUNCTIONS, 389 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 390 } 391 392 NESTED_TYPE_TOKENS = { 393 *parser.Parser.NESTED_TYPE_TOKENS, 394 TokenType.TABLE, 395 } 396 397 PROPERTY_PARSERS = { 398 **parser.Parser.PROPERTY_PARSERS, 399 "NOT DETERMINISTIC": lambda self: self.expression( 400 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 401 ), 402 "OPTIONS": lambda self: self._parse_with_property(), 403 } 404 405 CONSTRAINT_PARSERS = { 406 **parser.Parser.CONSTRAINT_PARSERS, 407 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 408 } 409 410 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 411 RANGE_PARSERS.pop(TokenType.OVERLAPS) 412 413 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 414 415 STATEMENT_PARSERS = { 416 **parser.Parser.STATEMENT_PARSERS, 417 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 418 TokenType.END: lambda self: self._parse_as_command(self._prev), 419 TokenType.FOR: lambda self: self._parse_for_in(), 420 } 421 422 BRACKET_OFFSETS = { 423 "OFFSET": (0, False), 424 "ORDINAL": (1, False), 425 "SAFE_OFFSET": (0, True), 426 "SAFE_ORDINAL": (1, True), 427 } 428 429 def _parse_for_in(self) -> exp.ForIn: 430 this = self._parse_range() 431 self._match_text_seq("DO") 432 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 433 434 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 435 this = super()._parse_table_part(schema=schema) or self._parse_number() 436 437 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 438 if isinstance(this, exp.Identifier): 439 table_name = this.name 440 while self._match(TokenType.DASH, advance=False) and self._next: 441 self._advance(2) 442 table_name += f"-{self._prev.text}" 443 444 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 445 elif isinstance(this, exp.Literal): 446 table_name = this.name 447 448 if self._is_connected() and self._parse_var(any_token=True): 449 table_name += self._prev.text 450 451 this = exp.Identifier(this=table_name, quoted=True) 452 453 return this 454 455 def _parse_table_parts( 456 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 457 ) -> exp.Table: 458 table = super()._parse_table_parts( 459 schema=schema, is_db_reference=is_db_reference, wildcard=True 460 ) 461 462 if isinstance(table.this, exp.Identifier) and "." in table.name: 463 catalog, db, this, *rest = ( 464 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 465 for x in split_num_words(table.name, ".", 3) 466 ) 467 468 if rest and this: 469 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 470 471 table = exp.Table(this=this, db=db, catalog=catalog) 472 473 return table 474 475 @t.overload 476 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 477 ... 478 479 @t.overload 480 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 481 ... 482 483 def _parse_json_object(self, agg=False): 484 json_object = super()._parse_json_object() 485 array_kv_pair = seq_get(json_object.expressions, 0) 486 487 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 488 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 489 if ( 490 array_kv_pair 491 and isinstance(array_kv_pair.this, exp.Array) 492 and isinstance(array_kv_pair.expression, exp.Array) 493 ): 494 keys = array_kv_pair.this.expressions 495 values = array_kv_pair.expression.expressions 496 497 json_object.set( 498 "expressions", 499 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 500 ) 501 502 return json_object 503 504 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 505 bracket = super()._parse_bracket(this) 506 507 if this is bracket: 508 return bracket 509 510 if isinstance(bracket, exp.Bracket): 511 for expression in bracket.expressions: 512 name = expression.name.upper() 513 514 if name not in self.BRACKET_OFFSETS: 515 break 516 517 offset, safe = self.BRACKET_OFFSETS[name] 518 bracket.set("offset", offset) 519 bracket.set("safe", safe) 520 expression.replace(expression.expressions[0]) 521 522 return bracket 523 524 class Generator(generator.Generator): 525 EXPLICIT_UNION = True 526 INTERVAL_ALLOWS_PLURAL_FORM = False 527 JOIN_HINTS = False 528 QUERY_HINTS = False 529 TABLE_HINTS = False 530 LIMIT_FETCH = "LIMIT" 531 RENAME_TABLE_WITH_DB = False 532 NVL2_SUPPORTED = False 533 UNNEST_WITH_ORDINALITY = False 534 COLLATE_IS_FUNC = True 535 LIMIT_ONLY_LITERALS = True 536 SUPPORTS_TABLE_ALIAS_COLUMNS = False 537 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 538 JSON_KEY_VALUE_PAIR_SEP = "," 539 NULL_ORDERING_SUPPORTED = False 540 IGNORE_NULLS_IN_FUNC = True 541 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 542 CAN_IMPLEMENT_ARRAY_ANY = True 543 NAMED_PLACEHOLDER_TOKEN = "@" 544 545 TRANSFORMS = { 546 **generator.Generator.TRANSFORMS, 547 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 548 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 549 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 550 exp.ArrayContains: _array_contains_sql, 551 exp.ArrayFilter: filter_array_using_unnest, 552 exp.ArraySize: rename_func("ARRAY_LENGTH"), 553 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 554 exp.CollateProperty: lambda self, e: ( 555 f"DEFAULT COLLATE {self.sql(e, 'this')}" 556 if e.args.get("default") 557 else f"COLLATE {self.sql(e, 'this')}" 558 ), 559 exp.Commit: lambda *_: "COMMIT TRANSACTION", 560 exp.CountIf: rename_func("COUNTIF"), 561 exp.Create: _create_sql, 562 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 563 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 564 exp.DateDiff: lambda self, e: self.func( 565 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 566 ), 567 exp.DateFromParts: rename_func("DATE"), 568 exp.DateStrToDate: datestrtodate_sql, 569 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 570 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 571 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 572 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 573 exp.FromTimeZone: lambda self, e: self.func( 574 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 575 ), 576 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 577 exp.GroupConcat: rename_func("STRING_AGG"), 578 exp.Hex: rename_func("TO_HEX"), 579 exp.If: if_sql(false_value="NULL"), 580 exp.ILike: no_ilike_sql, 581 exp.IntDiv: rename_func("DIV"), 582 exp.JSONFormat: rename_func("TO_JSON_STRING"), 583 exp.Max: max_or_greatest, 584 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 585 exp.MD5Digest: rename_func("MD5"), 586 exp.Min: min_or_least, 587 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 588 exp.RegexpExtract: lambda self, e: self.func( 589 "REGEXP_EXTRACT", 590 e.this, 591 e.expression, 592 e.args.get("position"), 593 e.args.get("occurrence"), 594 ), 595 exp.RegexpReplace: regexp_replace_sql, 596 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 597 exp.ReturnsProperty: _returnsproperty_sql, 598 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 599 exp.Select: transforms.preprocess( 600 [ 601 transforms.explode_to_unnest(), 602 _unqualify_unnest, 603 transforms.eliminate_distinct_on, 604 _alias_ordered_group, 605 transforms.eliminate_semi_and_anti_joins, 606 ] 607 ), 608 exp.SHA2: lambda self, e: self.func( 609 "SHA256" if e.text("length") == "256" else "SHA512", e.this 610 ), 611 exp.StabilityProperty: lambda self, e: ( 612 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 613 ), 614 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 615 exp.StrToTime: lambda self, e: self.func( 616 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 617 ), 618 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 619 exp.TimeFromParts: rename_func("TIME"), 620 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 621 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 622 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 623 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 624 exp.TimeStrToTime: timestrtotime_sql, 625 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 626 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 627 exp.TsOrDsAdd: _ts_or_ds_add_sql, 628 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 629 exp.TsOrDsToTime: rename_func("TIME"), 630 exp.Unhex: rename_func("FROM_HEX"), 631 exp.UnixDate: rename_func("UNIX_DATE"), 632 exp.UnixToTime: _unix_to_time_sql, 633 exp.Values: _derived_table_values_to_unnest, 634 exp.VariancePop: rename_func("VAR_POP"), 635 } 636 637 SUPPORTED_JSON_PATH_PARTS = { 638 exp.JSONPathKey, 639 exp.JSONPathRoot, 640 exp.JSONPathSubscript, 641 } 642 643 TYPE_MAPPING = { 644 **generator.Generator.TYPE_MAPPING, 645 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 646 exp.DataType.Type.BIGINT: "INT64", 647 exp.DataType.Type.BINARY: "BYTES", 648 exp.DataType.Type.BOOLEAN: "BOOL", 649 exp.DataType.Type.CHAR: "STRING", 650 exp.DataType.Type.DECIMAL: "NUMERIC", 651 exp.DataType.Type.DOUBLE: "FLOAT64", 652 exp.DataType.Type.FLOAT: "FLOAT64", 653 exp.DataType.Type.INT: "INT64", 654 exp.DataType.Type.NCHAR: "STRING", 655 exp.DataType.Type.NVARCHAR: "STRING", 656 exp.DataType.Type.SMALLINT: "INT64", 657 exp.DataType.Type.TEXT: "STRING", 658 exp.DataType.Type.TIMESTAMP: "DATETIME", 659 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 660 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 661 exp.DataType.Type.TINYINT: "INT64", 662 exp.DataType.Type.VARBINARY: "BYTES", 663 exp.DataType.Type.VARCHAR: "STRING", 664 exp.DataType.Type.VARIANT: "ANY TYPE", 665 } 666 667 PROPERTIES_LOCATION = { 668 **generator.Generator.PROPERTIES_LOCATION, 669 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 670 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 671 } 672 673 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 674 RESERVED_KEYWORDS = { 675 *generator.Generator.RESERVED_KEYWORDS, 676 "all", 677 "and", 678 "any", 679 "array", 680 "as", 681 "asc", 682 "assert_rows_modified", 683 "at", 684 "between", 685 "by", 686 "case", 687 "cast", 688 "collate", 689 "contains", 690 "create", 691 "cross", 692 "cube", 693 "current", 694 "default", 695 "define", 696 "desc", 697 "distinct", 698 "else", 699 "end", 700 "enum", 701 "escape", 702 "except", 703 "exclude", 704 "exists", 705 "extract", 706 "false", 707 "fetch", 708 "following", 709 "for", 710 "from", 711 "full", 712 "group", 713 "grouping", 714 "groups", 715 "hash", 716 "having", 717 "if", 718 "ignore", 719 "in", 720 "inner", 721 "intersect", 722 "interval", 723 "into", 724 "is", 725 "join", 726 "lateral", 727 "left", 728 "like", 729 "limit", 730 "lookup", 731 "merge", 732 "natural", 733 "new", 734 "no", 735 "not", 736 "null", 737 "nulls", 738 "of", 739 "on", 740 "or", 741 "order", 742 "outer", 743 "over", 744 "partition", 745 "preceding", 746 "proto", 747 "qualify", 748 "range", 749 "recursive", 750 "respect", 751 "right", 752 "rollup", 753 "rows", 754 "select", 755 "set", 756 "some", 757 "struct", 758 "tablesample", 759 "then", 760 "to", 761 "treat", 762 "true", 763 "unbounded", 764 "union", 765 "unnest", 766 "using", 767 "when", 768 "where", 769 "window", 770 "with", 771 "within", 772 } 773 774 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 775 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 776 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 777 778 def struct_sql(self, expression: exp.Struct) -> str: 779 args = [] 780 for expr in expression.expressions: 781 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 782 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 783 else: 784 arg = self.sql(expr) 785 786 args.append(arg) 787 788 return self.func("STRUCT", *args) 789 790 def eq_sql(self, expression: exp.EQ) -> str: 791 # Operands of = cannot be NULL in BigQuery 792 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 793 if not isinstance(expression.parent, exp.Update): 794 return "NULL" 795 796 return self.binary(expression, "=") 797 798 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 799 parent = expression.parent 800 801 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 802 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 803 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 804 return self.func( 805 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 806 ) 807 808 return super().attimezone_sql(expression) 809 810 def trycast_sql(self, expression: exp.TryCast) -> str: 811 return self.cast_sql(expression, safe_prefix="SAFE_") 812 813 def array_sql(self, expression: exp.Array) -> str: 814 first_arg = seq_get(expression.expressions, 0) 815 if isinstance(first_arg, exp.Subqueryable): 816 return f"ARRAY{self.wrap(self.sql(first_arg))}" 817 818 return inline_array_sql(self, expression) 819 820 def bracket_sql(self, expression: exp.Bracket) -> str: 821 this = self.sql(expression, "this") 822 expressions = expression.expressions 823 824 if len(expressions) == 1: 825 arg = expressions[0] 826 if arg.type is None: 827 from sqlglot.optimizer.annotate_types import annotate_types 828 829 arg = annotate_types(arg) 830 831 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 832 # BQ doesn't support bracket syntax with string values 833 return f"{this}.{arg.name}" 834 835 expressions_sql = ", ".join(self.sql(e) for e in expressions) 836 offset = expression.args.get("offset") 837 838 if offset == 0: 839 expressions_sql = f"OFFSET({expressions_sql})" 840 elif offset == 1: 841 expressions_sql = f"ORDINAL({expressions_sql})" 842 elif offset is not None: 843 self.unsupported(f"Unsupported array offset: {offset}") 844 845 if expression.args.get("safe"): 846 expressions_sql = f"SAFE_{expressions_sql}" 847 848 return f"{this}[{expressions_sql}]" 849 850 def in_unnest_op(self, expression: exp.Unnest) -> str: 851 return self.sql(expression) 852 853 def except_op(self, expression: exp.Except) -> str: 854 if not expression.args.get("distinct"): 855 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 856 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 857 858 def intersect_op(self, expression: exp.Intersect) -> str: 859 if not expression.args.get("distinct"): 860 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 861 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 862 863 def with_properties(self, properties: exp.Properties) -> str: 864 return self.properties(properties, prefix=self.seg("OPTIONS")) 865 866 def version_sql(self, expression: exp.Version) -> str: 867 if expression.name == "TIMESTAMP": 868 expression.set("this", "SYSTEM_TIME") 869 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
266 def normalize_identifier(self, expression: E) -> E: 267 if isinstance(expression, exp.Identifier): 268 parent = expression.parent 269 while isinstance(parent, exp.Dot): 270 parent = parent.parent 271 272 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 273 # The following check is essentially a heuristic to detect tables based on whether or 274 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 275 if ( 276 not isinstance(parent, exp.UserDefinedFunction) 277 and not (isinstance(parent, exp.Table) and parent.db) 278 and not expression.meta.get("is_table") 279 ): 280 expression.set("this", expression.this.lower()) 281 282 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
284 class Tokenizer(tokens.Tokenizer): 285 QUOTES = ["'", '"', '"""', "'''"] 286 COMMENTS = ["--", "#", ("/*", "*/")] 287 IDENTIFIERS = ["`"] 288 STRING_ESCAPES = ["\\"] 289 290 HEX_STRINGS = [("0x", ""), ("0X", "")] 291 292 BYTE_STRINGS = [ 293 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 294 ] 295 296 RAW_STRINGS = [ 297 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 298 ] 299 300 KEYWORDS = { 301 **tokens.Tokenizer.KEYWORDS, 302 "ANY TYPE": TokenType.VARIANT, 303 "BEGIN": TokenType.COMMAND, 304 "BEGIN TRANSACTION": TokenType.BEGIN, 305 "BYTES": TokenType.BINARY, 306 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 307 "DECLARE": TokenType.COMMAND, 308 "ELSEIF": TokenType.COMMAND, 309 "EXCEPTION": TokenType.COMMAND, 310 "FLOAT64": TokenType.DOUBLE, 311 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 312 "MODEL": TokenType.MODEL, 313 "NOT DETERMINISTIC": TokenType.VOLATILE, 314 "RECORD": TokenType.STRUCT, 315 "TIMESTAMP": TokenType.TIMESTAMPTZ, 316 } 317 KEYWORDS.pop("DIV") 318 KEYWORDS.pop("VALUES")
Inherited Members
320 class Parser(parser.Parser): 321 PREFIXED_PIVOT_COLUMNS = True 322 323 LOG_DEFAULTS_TO_LN = True 324 325 FUNCTIONS = { 326 **parser.Parser.FUNCTIONS, 327 "DATE": _build_date, 328 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 329 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 330 "DATE_TRUNC": lambda args: exp.DateTrunc( 331 unit=exp.Literal.string(str(seq_get(args, 1))), 332 this=seq_get(args, 0), 333 ), 334 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 335 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 336 "DIV": binary_from_function(exp.IntDiv), 337 "FORMAT_DATE": lambda args: exp.TimeToStr( 338 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 339 ), 340 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 341 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 342 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 343 ), 344 "MD5": exp.MD5Digest.from_arg_list, 345 "TO_HEX": _build_to_hex, 346 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 347 [seq_get(args, 1), seq_get(args, 0)] 348 ), 349 "PARSE_TIMESTAMP": _build_parse_timestamp, 350 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 351 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 352 this=seq_get(args, 0), 353 expression=seq_get(args, 1), 354 position=seq_get(args, 2), 355 occurrence=seq_get(args, 3), 356 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 357 ), 358 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 359 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 360 "SPLIT": lambda args: exp.Split( 361 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 362 this=seq_get(args, 0), 363 expression=seq_get(args, 1) or exp.Literal.string(","), 364 ), 365 "TIME": _build_time, 366 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 367 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 368 "TIMESTAMP": _build_timestamp, 369 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 370 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 371 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 372 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 373 ), 374 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 375 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 376 ), 377 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 378 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 379 } 380 381 FUNCTION_PARSERS = { 382 **parser.Parser.FUNCTION_PARSERS, 383 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 384 } 385 FUNCTION_PARSERS.pop("TRIM") 386 387 NO_PAREN_FUNCTIONS = { 388 **parser.Parser.NO_PAREN_FUNCTIONS, 389 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 390 } 391 392 NESTED_TYPE_TOKENS = { 393 *parser.Parser.NESTED_TYPE_TOKENS, 394 TokenType.TABLE, 395 } 396 397 PROPERTY_PARSERS = { 398 **parser.Parser.PROPERTY_PARSERS, 399 "NOT DETERMINISTIC": lambda self: self.expression( 400 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 401 ), 402 "OPTIONS": lambda self: self._parse_with_property(), 403 } 404 405 CONSTRAINT_PARSERS = { 406 **parser.Parser.CONSTRAINT_PARSERS, 407 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 408 } 409 410 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 411 RANGE_PARSERS.pop(TokenType.OVERLAPS) 412 413 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 414 415 STATEMENT_PARSERS = { 416 **parser.Parser.STATEMENT_PARSERS, 417 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 418 TokenType.END: lambda self: self._parse_as_command(self._prev), 419 TokenType.FOR: lambda self: self._parse_for_in(), 420 } 421 422 BRACKET_OFFSETS = { 423 "OFFSET": (0, False), 424 "ORDINAL": (1, False), 425 "SAFE_OFFSET": (0, True), 426 "SAFE_ORDINAL": (1, True), 427 } 428 429 def _parse_for_in(self) -> exp.ForIn: 430 this = self._parse_range() 431 self._match_text_seq("DO") 432 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 433 434 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 435 this = super()._parse_table_part(schema=schema) or self._parse_number() 436 437 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 438 if isinstance(this, exp.Identifier): 439 table_name = this.name 440 while self._match(TokenType.DASH, advance=False) and self._next: 441 self._advance(2) 442 table_name += f"-{self._prev.text}" 443 444 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 445 elif isinstance(this, exp.Literal): 446 table_name = this.name 447 448 if self._is_connected() and self._parse_var(any_token=True): 449 table_name += self._prev.text 450 451 this = exp.Identifier(this=table_name, quoted=True) 452 453 return this 454 455 def _parse_table_parts( 456 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 457 ) -> exp.Table: 458 table = super()._parse_table_parts( 459 schema=schema, is_db_reference=is_db_reference, wildcard=True 460 ) 461 462 if isinstance(table.this, exp.Identifier) and "." in table.name: 463 catalog, db, this, *rest = ( 464 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 465 for x in split_num_words(table.name, ".", 3) 466 ) 467 468 if rest and this: 469 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 470 471 table = exp.Table(this=this, db=db, catalog=catalog) 472 473 return table 474 475 @t.overload 476 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 477 ... 478 479 @t.overload 480 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 481 ... 482 483 def _parse_json_object(self, agg=False): 484 json_object = super()._parse_json_object() 485 array_kv_pair = seq_get(json_object.expressions, 0) 486 487 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 488 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 489 if ( 490 array_kv_pair 491 and isinstance(array_kv_pair.this, exp.Array) 492 and isinstance(array_kv_pair.expression, exp.Array) 493 ): 494 keys = array_kv_pair.this.expressions 495 values = array_kv_pair.expression.expressions 496 497 json_object.set( 498 "expressions", 499 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 500 ) 501 502 return json_object 503 504 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 505 bracket = super()._parse_bracket(this) 506 507 if this is bracket: 508 return bracket 509 510 if isinstance(bracket, exp.Bracket): 511 for expression in bracket.expressions: 512 name = expression.name.upper() 513 514 if name not in self.BRACKET_OFFSETS: 515 break 516 517 offset, safe = self.BRACKET_OFFSETS[name] 518 bracket.set("offset", offset) 519 bracket.set("safe", safe) 520 expression.replace(expression.expressions[0]) 521 522 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
524 class Generator(generator.Generator): 525 EXPLICIT_UNION = True 526 INTERVAL_ALLOWS_PLURAL_FORM = False 527 JOIN_HINTS = False 528 QUERY_HINTS = False 529 TABLE_HINTS = False 530 LIMIT_FETCH = "LIMIT" 531 RENAME_TABLE_WITH_DB = False 532 NVL2_SUPPORTED = False 533 UNNEST_WITH_ORDINALITY = False 534 COLLATE_IS_FUNC = True 535 LIMIT_ONLY_LITERALS = True 536 SUPPORTS_TABLE_ALIAS_COLUMNS = False 537 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 538 JSON_KEY_VALUE_PAIR_SEP = "," 539 NULL_ORDERING_SUPPORTED = False 540 IGNORE_NULLS_IN_FUNC = True 541 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 542 CAN_IMPLEMENT_ARRAY_ANY = True 543 NAMED_PLACEHOLDER_TOKEN = "@" 544 545 TRANSFORMS = { 546 **generator.Generator.TRANSFORMS, 547 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 548 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 549 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 550 exp.ArrayContains: _array_contains_sql, 551 exp.ArrayFilter: filter_array_using_unnest, 552 exp.ArraySize: rename_func("ARRAY_LENGTH"), 553 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 554 exp.CollateProperty: lambda self, e: ( 555 f"DEFAULT COLLATE {self.sql(e, 'this')}" 556 if e.args.get("default") 557 else f"COLLATE {self.sql(e, 'this')}" 558 ), 559 exp.Commit: lambda *_: "COMMIT TRANSACTION", 560 exp.CountIf: rename_func("COUNTIF"), 561 exp.Create: _create_sql, 562 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 563 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 564 exp.DateDiff: lambda self, e: self.func( 565 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 566 ), 567 exp.DateFromParts: rename_func("DATE"), 568 exp.DateStrToDate: datestrtodate_sql, 569 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 570 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 571 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 572 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 573 exp.FromTimeZone: lambda self, e: self.func( 574 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 575 ), 576 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 577 exp.GroupConcat: rename_func("STRING_AGG"), 578 exp.Hex: rename_func("TO_HEX"), 579 exp.If: if_sql(false_value="NULL"), 580 exp.ILike: no_ilike_sql, 581 exp.IntDiv: rename_func("DIV"), 582 exp.JSONFormat: rename_func("TO_JSON_STRING"), 583 exp.Max: max_or_greatest, 584 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 585 exp.MD5Digest: rename_func("MD5"), 586 exp.Min: min_or_least, 587 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 588 exp.RegexpExtract: lambda self, e: self.func( 589 "REGEXP_EXTRACT", 590 e.this, 591 e.expression, 592 e.args.get("position"), 593 e.args.get("occurrence"), 594 ), 595 exp.RegexpReplace: regexp_replace_sql, 596 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 597 exp.ReturnsProperty: _returnsproperty_sql, 598 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 599 exp.Select: transforms.preprocess( 600 [ 601 transforms.explode_to_unnest(), 602 _unqualify_unnest, 603 transforms.eliminate_distinct_on, 604 _alias_ordered_group, 605 transforms.eliminate_semi_and_anti_joins, 606 ] 607 ), 608 exp.SHA2: lambda self, e: self.func( 609 "SHA256" if e.text("length") == "256" else "SHA512", e.this 610 ), 611 exp.StabilityProperty: lambda self, e: ( 612 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 613 ), 614 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 615 exp.StrToTime: lambda self, e: self.func( 616 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 617 ), 618 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 619 exp.TimeFromParts: rename_func("TIME"), 620 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 621 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 622 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 623 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 624 exp.TimeStrToTime: timestrtotime_sql, 625 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 626 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 627 exp.TsOrDsAdd: _ts_or_ds_add_sql, 628 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 629 exp.TsOrDsToTime: rename_func("TIME"), 630 exp.Unhex: rename_func("FROM_HEX"), 631 exp.UnixDate: rename_func("UNIX_DATE"), 632 exp.UnixToTime: _unix_to_time_sql, 633 exp.Values: _derived_table_values_to_unnest, 634 exp.VariancePop: rename_func("VAR_POP"), 635 } 636 637 SUPPORTED_JSON_PATH_PARTS = { 638 exp.JSONPathKey, 639 exp.JSONPathRoot, 640 exp.JSONPathSubscript, 641 } 642 643 TYPE_MAPPING = { 644 **generator.Generator.TYPE_MAPPING, 645 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 646 exp.DataType.Type.BIGINT: "INT64", 647 exp.DataType.Type.BINARY: "BYTES", 648 exp.DataType.Type.BOOLEAN: "BOOL", 649 exp.DataType.Type.CHAR: "STRING", 650 exp.DataType.Type.DECIMAL: "NUMERIC", 651 exp.DataType.Type.DOUBLE: "FLOAT64", 652 exp.DataType.Type.FLOAT: "FLOAT64", 653 exp.DataType.Type.INT: "INT64", 654 exp.DataType.Type.NCHAR: "STRING", 655 exp.DataType.Type.NVARCHAR: "STRING", 656 exp.DataType.Type.SMALLINT: "INT64", 657 exp.DataType.Type.TEXT: "STRING", 658 exp.DataType.Type.TIMESTAMP: "DATETIME", 659 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 660 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 661 exp.DataType.Type.TINYINT: "INT64", 662 exp.DataType.Type.VARBINARY: "BYTES", 663 exp.DataType.Type.VARCHAR: "STRING", 664 exp.DataType.Type.VARIANT: "ANY TYPE", 665 } 666 667 PROPERTIES_LOCATION = { 668 **generator.Generator.PROPERTIES_LOCATION, 669 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 670 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 671 } 672 673 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 674 RESERVED_KEYWORDS = { 675 *generator.Generator.RESERVED_KEYWORDS, 676 "all", 677 "and", 678 "any", 679 "array", 680 "as", 681 "asc", 682 "assert_rows_modified", 683 "at", 684 "between", 685 "by", 686 "case", 687 "cast", 688 "collate", 689 "contains", 690 "create", 691 "cross", 692 "cube", 693 "current", 694 "default", 695 "define", 696 "desc", 697 "distinct", 698 "else", 699 "end", 700 "enum", 701 "escape", 702 "except", 703 "exclude", 704 "exists", 705 "extract", 706 "false", 707 "fetch", 708 "following", 709 "for", 710 "from", 711 "full", 712 "group", 713 "grouping", 714 "groups", 715 "hash", 716 "having", 717 "if", 718 "ignore", 719 "in", 720 "inner", 721 "intersect", 722 "interval", 723 "into", 724 "is", 725 "join", 726 "lateral", 727 "left", 728 "like", 729 "limit", 730 "lookup", 731 "merge", 732 "natural", 733 "new", 734 "no", 735 "not", 736 "null", 737 "nulls", 738 "of", 739 "on", 740 "or", 741 "order", 742 "outer", 743 "over", 744 "partition", 745 "preceding", 746 "proto", 747 "qualify", 748 "range", 749 "recursive", 750 "respect", 751 "right", 752 "rollup", 753 "rows", 754 "select", 755 "set", 756 "some", 757 "struct", 758 "tablesample", 759 "then", 760 "to", 761 "treat", 762 "true", 763 "unbounded", 764 "union", 765 "unnest", 766 "using", 767 "when", 768 "where", 769 "window", 770 "with", 771 "within", 772 } 773 774 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 775 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 776 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 777 778 def struct_sql(self, expression: exp.Struct) -> str: 779 args = [] 780 for expr in expression.expressions: 781 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 782 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 783 else: 784 arg = self.sql(expr) 785 786 args.append(arg) 787 788 return self.func("STRUCT", *args) 789 790 def eq_sql(self, expression: exp.EQ) -> str: 791 # Operands of = cannot be NULL in BigQuery 792 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 793 if not isinstance(expression.parent, exp.Update): 794 return "NULL" 795 796 return self.binary(expression, "=") 797 798 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 799 parent = expression.parent 800 801 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 802 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 803 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 804 return self.func( 805 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 806 ) 807 808 return super().attimezone_sql(expression) 809 810 def trycast_sql(self, expression: exp.TryCast) -> str: 811 return self.cast_sql(expression, safe_prefix="SAFE_") 812 813 def array_sql(self, expression: exp.Array) -> str: 814 first_arg = seq_get(expression.expressions, 0) 815 if isinstance(first_arg, exp.Subqueryable): 816 return f"ARRAY{self.wrap(self.sql(first_arg))}" 817 818 return inline_array_sql(self, expression) 819 820 def bracket_sql(self, expression: exp.Bracket) -> str: 821 this = self.sql(expression, "this") 822 expressions = expression.expressions 823 824 if len(expressions) == 1: 825 arg = expressions[0] 826 if arg.type is None: 827 from sqlglot.optimizer.annotate_types import annotate_types 828 829 arg = annotate_types(arg) 830 831 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 832 # BQ doesn't support bracket syntax with string values 833 return f"{this}.{arg.name}" 834 835 expressions_sql = ", ".join(self.sql(e) for e in expressions) 836 offset = expression.args.get("offset") 837 838 if offset == 0: 839 expressions_sql = f"OFFSET({expressions_sql})" 840 elif offset == 1: 841 expressions_sql = f"ORDINAL({expressions_sql})" 842 elif offset is not None: 843 self.unsupported(f"Unsupported array offset: {offset}") 844 845 if expression.args.get("safe"): 846 expressions_sql = f"SAFE_{expressions_sql}" 847 848 return f"{this}[{expressions_sql}]" 849 850 def in_unnest_op(self, expression: exp.Unnest) -> str: 851 return self.sql(expression) 852 853 def except_op(self, expression: exp.Except) -> str: 854 if not expression.args.get("distinct"): 855 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 856 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 857 858 def intersect_op(self, expression: exp.Intersect) -> str: 859 if not expression.args.get("distinct"): 860 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 861 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 862 863 def with_properties(self, properties: exp.Properties) -> str: 864 return self.properties(properties, prefix=self.seg("OPTIONS")) 865 866 def version_sql(self, expression: exp.Version) -> str: 867 if expression.name == "TIMESTAMP": 868 expression.set("this", "SYSTEM_TIME") 869 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. Default: 2.
- indent: The indentation size in a formatted string. Default: 2.
- normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
778 def struct_sql(self, expression: exp.Struct) -> str: 779 args = [] 780 for expr in expression.expressions: 781 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 782 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 783 else: 784 arg = self.sql(expr) 785 786 args.append(arg) 787 788 return self.func("STRUCT", *args)
798 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 799 parent = expression.parent 800 801 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 802 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 803 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 804 return self.func( 805 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 806 ) 807 808 return super().attimezone_sql(expression)
820 def bracket_sql(self, expression: exp.Bracket) -> str: 821 this = self.sql(expression, "this") 822 expressions = expression.expressions 823 824 if len(expressions) == 1: 825 arg = expressions[0] 826 if arg.type is None: 827 from sqlglot.optimizer.annotate_types import annotate_types 828 829 arg = annotate_types(arg) 830 831 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 832 # BQ doesn't support bracket syntax with string values 833 return f"{this}.{arg.name}" 834 835 expressions_sql = ", ".join(self.sql(e) for e in expressions) 836 offset = expression.args.get("offset") 837 838 if offset == 0: 839 expressions_sql = f"OFFSET({expressions_sql})" 840 elif offset == 1: 841 expressions_sql = f"ORDINAL({expressions_sql})" 842 elif offset is not None: 843 self.unsupported(f"Unsupported array offset: {offset}") 844 845 if expression.args.get("safe"): 846 expressions_sql = f"SAFE_{expressions_sql}" 847 848 return f"{this}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql