sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 format_time_lambda, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 no_ilike_sql, 21 parse_date_delta_with_interval, 22 regexp_replace_sql, 23 rename_func, 24 timestrtotime_sql, 25 ts_or_ds_add_cast, 26) 27from sqlglot.helper import seq_get, split_num_words 28from sqlglot.tokens import TokenType 29 30if t.TYPE_CHECKING: 31 from sqlglot._typing import E, Lit 32 33logger = logging.getLogger("sqlglot") 34 35 36def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 37 if not expression.find_ancestor(exp.From, exp.Join): 38 return self.values_sql(expression) 39 40 alias = expression.args.get("alias") 41 42 structs = [ 43 exp.Struct( 44 expressions=[ 45 exp.alias_(value, column_name) 46 for value, column_name in zip( 47 t.expressions, 48 ( 49 alias.columns 50 if alias and alias.columns 51 else (f"_c{i}" for i in range(len(t.expressions))) 52 ), 53 ) 54 ] 55 ) 56 for t in expression.find_all(exp.Tuple) 57 ] 58 59 return self.unnest_sql(exp.Unnest(expressions=[exp.Array(expressions=structs)])) 60 61 62def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 63 this = expression.this 64 if isinstance(this, exp.Schema): 65 this = f"{this.this} <{self.expressions(this)}>" 66 else: 67 this = self.sql(this) 68 return f"RETURNS {this}" 69 70 71def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 72 kind = expression.args["kind"] 73 returns = expression.find(exp.ReturnsProperty) 74 75 if kind.upper() == "FUNCTION" and returns and returns.args.get("is_table"): 76 expression.set("kind", "TABLE FUNCTION") 77 78 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 79 expression.set("expression", expression.expression.this) 80 81 return self.create_sql(expression) 82 83 return self.create_sql(expression) 84 85 86def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 87 """Remove references to unnest table aliases since bigquery doesn't allow them. 88 89 These are added by the optimizer's qualify_column step. 90 """ 91 from sqlglot.optimizer.scope import find_all_in_scope 92 93 if isinstance(expression, exp.Select): 94 unnest_aliases = { 95 unnest.alias 96 for unnest in find_all_in_scope(expression, exp.Unnest) 97 if isinstance(unnest.parent, (exp.From, exp.Join)) 98 } 99 if unnest_aliases: 100 for column in expression.find_all(exp.Column): 101 if column.table in unnest_aliases: 102 column.set("table", None) 103 elif column.db in unnest_aliases: 104 column.set("db", None) 105 106 return expression 107 108 109# https://issuetracker.google.com/issues/162294746 110# workaround for bigquery bug when grouping by an expression and then ordering 111# WITH x AS (SELECT 1 y) 112# SELECT y + 1 z 113# FROM x 114# GROUP BY x + 1 115# ORDER by z 116def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 117 if isinstance(expression, exp.Select): 118 group = expression.args.get("group") 119 order = expression.args.get("order") 120 121 if group and order: 122 aliases = { 123 select.this: select.args["alias"] 124 for select in expression.selects 125 if isinstance(select, exp.Alias) 126 } 127 128 for e in group.expressions: 129 alias = aliases.get(e) 130 131 if alias: 132 e.replace(exp.column(alias)) 133 134 return expression 135 136 137def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 138 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 139 if isinstance(expression, exp.CTE) and expression.alias_column_names: 140 cte_query = expression.this 141 142 if cte_query.is_star: 143 logger.warning( 144 "Can't push down CTE column names for star queries. Run the query through" 145 " the optimizer or use 'qualify' to expand the star projections first." 146 ) 147 return expression 148 149 column_names = expression.alias_column_names 150 expression.args["alias"].set("columns", None) 151 152 for name, select in zip(column_names, cte_query.selects): 153 to_replace = select 154 155 if isinstance(select, exp.Alias): 156 select = select.this 157 158 # Inner aliases are shadowed by the CTE column names 159 to_replace.replace(exp.alias_(select, name)) 160 161 return expression 162 163 164def _parse_timestamp(args: t.List) -> exp.StrToTime: 165 this = format_time_lambda(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 166 this.set("zone", seq_get(args, 2)) 167 return this 168 169 170def _parse_date(args: t.List) -> exp.Date | exp.DateFromParts: 171 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 172 return expr_type.from_arg_list(args) 173 174 175def _parse_to_hex(args: t.List) -> exp.Hex | exp.MD5: 176 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 177 arg = seq_get(args, 0) 178 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 179 180 181def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 182 return self.sql( 183 exp.Exists( 184 this=exp.select("1") 185 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 186 .where(exp.column("_col").eq(expression.right)) 187 ) 188 ) 189 190 191def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 192 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 193 194 195def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 196 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 197 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 198 unit = expression.args.get("unit") or "DAY" 199 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 200 201 202def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 203 scale = expression.args.get("scale") 204 timestamp = self.sql(expression, "this") 205 if scale in (None, exp.UnixToTime.SECONDS): 206 return f"TIMESTAMP_SECONDS({timestamp})" 207 if scale == exp.UnixToTime.MILLIS: 208 return f"TIMESTAMP_MILLIS({timestamp})" 209 if scale == exp.UnixToTime.MICROS: 210 return f"TIMESTAMP_MICROS({timestamp})" 211 212 return f"TIMESTAMP_SECONDS(CAST({timestamp} / POW(10, {scale}) AS INT64))" 213 214 215def _parse_time(args: t.List) -> exp.Func: 216 if len(args) == 1: 217 return exp.TsOrDsToTime(this=args[0]) 218 if len(args) == 3: 219 return exp.TimeFromParts.from_arg_list(args) 220 221 return exp.Anonymous(this="TIME", expressions=args) 222 223 224class BigQuery(Dialect): 225 WEEK_OFFSET = -1 226 UNNEST_COLUMN_ONLY = True 227 SUPPORTS_USER_DEFINED_TYPES = False 228 SUPPORTS_SEMI_ANTI_JOIN = False 229 LOG_BASE_FIRST = False 230 231 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 232 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 233 234 # bigquery udfs are case sensitive 235 NORMALIZE_FUNCTIONS = False 236 237 TIME_MAPPING = { 238 "%D": "%m/%d/%y", 239 } 240 241 ESCAPE_SEQUENCES = { 242 "\\a": "\a", 243 "\\b": "\b", 244 "\\f": "\f", 245 "\\n": "\n", 246 "\\r": "\r", 247 "\\t": "\t", 248 "\\v": "\v", 249 } 250 251 FORMAT_MAPPING = { 252 "DD": "%d", 253 "MM": "%m", 254 "MON": "%b", 255 "MONTH": "%B", 256 "YYYY": "%Y", 257 "YY": "%y", 258 "HH": "%I", 259 "HH12": "%I", 260 "HH24": "%H", 261 "MI": "%M", 262 "SS": "%S", 263 "SSSSS": "%f", 264 "TZH": "%z", 265 } 266 267 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 268 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 269 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 270 271 def normalize_identifier(self, expression: E) -> E: 272 if isinstance(expression, exp.Identifier): 273 parent = expression.parent 274 while isinstance(parent, exp.Dot): 275 parent = parent.parent 276 277 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 278 # The following check is essentially a heuristic to detect tables based on whether or 279 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 280 if ( 281 not isinstance(parent, exp.UserDefinedFunction) 282 and not (isinstance(parent, exp.Table) and parent.db) 283 and not expression.meta.get("is_table") 284 ): 285 expression.set("this", expression.this.lower()) 286 287 return expression 288 289 class Tokenizer(tokens.Tokenizer): 290 QUOTES = ["'", '"', '"""', "'''"] 291 COMMENTS = ["--", "#", ("/*", "*/")] 292 IDENTIFIERS = ["`"] 293 STRING_ESCAPES = ["\\"] 294 295 HEX_STRINGS = [("0x", ""), ("0X", "")] 296 297 BYTE_STRINGS = [ 298 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 299 ] 300 301 RAW_STRINGS = [ 302 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 303 ] 304 305 KEYWORDS = { 306 **tokens.Tokenizer.KEYWORDS, 307 "ANY TYPE": TokenType.VARIANT, 308 "BEGIN": TokenType.COMMAND, 309 "BEGIN TRANSACTION": TokenType.BEGIN, 310 "BYTES": TokenType.BINARY, 311 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 312 "DECLARE": TokenType.COMMAND, 313 "FLOAT64": TokenType.DOUBLE, 314 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 315 "MODEL": TokenType.MODEL, 316 "NOT DETERMINISTIC": TokenType.VOLATILE, 317 "RECORD": TokenType.STRUCT, 318 "TIMESTAMP": TokenType.TIMESTAMPTZ, 319 } 320 KEYWORDS.pop("DIV") 321 322 class Parser(parser.Parser): 323 PREFIXED_PIVOT_COLUMNS = True 324 325 LOG_DEFAULTS_TO_LN = True 326 327 FUNCTIONS = { 328 **parser.Parser.FUNCTIONS, 329 "DATE": _parse_date, 330 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 331 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 332 "DATE_TRUNC": lambda args: exp.DateTrunc( 333 unit=exp.Literal.string(str(seq_get(args, 1))), 334 this=seq_get(args, 0), 335 ), 336 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 337 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 338 "DIV": binary_from_function(exp.IntDiv), 339 "FORMAT_DATE": lambda args: exp.TimeToStr( 340 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 341 ), 342 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 343 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 344 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 345 ), 346 "MD5": exp.MD5Digest.from_arg_list, 347 "TO_HEX": _parse_to_hex, 348 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 349 [seq_get(args, 1), seq_get(args, 0)] 350 ), 351 "PARSE_TIMESTAMP": _parse_timestamp, 352 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 353 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 354 this=seq_get(args, 0), 355 expression=seq_get(args, 1), 356 position=seq_get(args, 2), 357 occurrence=seq_get(args, 3), 358 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 359 ), 360 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 361 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 362 "SPLIT": lambda args: exp.Split( 363 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 364 this=seq_get(args, 0), 365 expression=seq_get(args, 1) or exp.Literal.string(","), 366 ), 367 "TIME": _parse_time, 368 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 369 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 370 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 371 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 372 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 373 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 374 ), 375 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 376 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 377 ), 378 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 379 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 380 } 381 382 FUNCTION_PARSERS = { 383 **parser.Parser.FUNCTION_PARSERS, 384 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 385 } 386 FUNCTION_PARSERS.pop("TRIM") 387 388 NO_PAREN_FUNCTIONS = { 389 **parser.Parser.NO_PAREN_FUNCTIONS, 390 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 391 } 392 393 NESTED_TYPE_TOKENS = { 394 *parser.Parser.NESTED_TYPE_TOKENS, 395 TokenType.TABLE, 396 } 397 398 ID_VAR_TOKENS = { 399 *parser.Parser.ID_VAR_TOKENS, 400 TokenType.VALUES, 401 } 402 403 PROPERTY_PARSERS = { 404 **parser.Parser.PROPERTY_PARSERS, 405 "NOT DETERMINISTIC": lambda self: self.expression( 406 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 407 ), 408 "OPTIONS": lambda self: self._parse_with_property(), 409 } 410 411 CONSTRAINT_PARSERS = { 412 **parser.Parser.CONSTRAINT_PARSERS, 413 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 414 } 415 416 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 417 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 418 419 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 420 421 STATEMENT_PARSERS = { 422 **parser.Parser.STATEMENT_PARSERS, 423 TokenType.END: lambda self: self._parse_as_command(self._prev), 424 TokenType.FOR: lambda self: self._parse_for_in(), 425 } 426 427 BRACKET_OFFSETS = { 428 "OFFSET": (0, False), 429 "ORDINAL": (1, False), 430 "SAFE_OFFSET": (0, True), 431 "SAFE_ORDINAL": (1, True), 432 } 433 434 def _parse_for_in(self) -> exp.ForIn: 435 this = self._parse_range() 436 self._match_text_seq("DO") 437 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 438 439 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 440 this = super()._parse_table_part(schema=schema) or self._parse_number() 441 442 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 443 if isinstance(this, exp.Identifier): 444 table_name = this.name 445 while self._match(TokenType.DASH, advance=False) and self._next: 446 self._advance(2) 447 table_name += f"-{self._prev.text}" 448 449 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 450 elif isinstance(this, exp.Literal): 451 table_name = this.name 452 453 if self._is_connected() and self._parse_var(any_token=True): 454 table_name += self._prev.text 455 456 this = exp.Identifier(this=table_name, quoted=True) 457 458 return this 459 460 def _parse_table_parts( 461 self, schema: bool = False, is_db_reference: bool = False 462 ) -> exp.Table: 463 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 464 if isinstance(table.this, exp.Identifier) and "." in table.name: 465 catalog, db, this, *rest = ( 466 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 467 for x in split_num_words(table.name, ".", 3) 468 ) 469 470 if rest and this: 471 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 472 473 table = exp.Table(this=this, db=db, catalog=catalog) 474 475 return table 476 477 @t.overload 478 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 479 ... 480 481 @t.overload 482 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 483 ... 484 485 def _parse_json_object(self, agg=False): 486 json_object = super()._parse_json_object() 487 array_kv_pair = seq_get(json_object.expressions, 0) 488 489 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 490 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 491 if ( 492 array_kv_pair 493 and isinstance(array_kv_pair.this, exp.Array) 494 and isinstance(array_kv_pair.expression, exp.Array) 495 ): 496 keys = array_kv_pair.this.expressions 497 values = array_kv_pair.expression.expressions 498 499 json_object.set( 500 "expressions", 501 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 502 ) 503 504 return json_object 505 506 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 507 bracket = super()._parse_bracket(this) 508 509 if this is bracket: 510 return bracket 511 512 if isinstance(bracket, exp.Bracket): 513 for expression in bracket.expressions: 514 name = expression.name.upper() 515 516 if name not in self.BRACKET_OFFSETS: 517 break 518 519 offset, safe = self.BRACKET_OFFSETS[name] 520 bracket.set("offset", offset) 521 bracket.set("safe", safe) 522 expression.replace(expression.expressions[0]) 523 524 return bracket 525 526 class Generator(generator.Generator): 527 EXPLICIT_UNION = True 528 INTERVAL_ALLOWS_PLURAL_FORM = False 529 JOIN_HINTS = False 530 QUERY_HINTS = False 531 TABLE_HINTS = False 532 LIMIT_FETCH = "LIMIT" 533 RENAME_TABLE_WITH_DB = False 534 NVL2_SUPPORTED = False 535 UNNEST_WITH_ORDINALITY = False 536 COLLATE_IS_FUNC = True 537 LIMIT_ONLY_LITERALS = True 538 SUPPORTS_TABLE_ALIAS_COLUMNS = False 539 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 540 JSON_KEY_VALUE_PAIR_SEP = "," 541 NULL_ORDERING_SUPPORTED = False 542 IGNORE_NULLS_IN_FUNC = True 543 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 544 545 TRANSFORMS = { 546 **generator.Generator.TRANSFORMS, 547 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 548 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 549 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 550 exp.ArrayContains: _array_contains_sql, 551 exp.ArraySize: rename_func("ARRAY_LENGTH"), 552 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 553 exp.CollateProperty: lambda self, e: ( 554 f"DEFAULT COLLATE {self.sql(e, 'this')}" 555 if e.args.get("default") 556 else f"COLLATE {self.sql(e, 'this')}" 557 ), 558 exp.CountIf: rename_func("COUNTIF"), 559 exp.Create: _create_sql, 560 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 561 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 562 exp.DateDiff: lambda self, 563 e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 564 exp.DateFromParts: rename_func("DATE"), 565 exp.DateStrToDate: datestrtodate_sql, 566 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 567 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 568 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 569 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 570 exp.FromTimeZone: lambda self, e: self.func( 571 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 572 ), 573 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 574 exp.GroupConcat: rename_func("STRING_AGG"), 575 exp.Hex: rename_func("TO_HEX"), 576 exp.If: if_sql(false_value="NULL"), 577 exp.ILike: no_ilike_sql, 578 exp.IntDiv: rename_func("DIV"), 579 exp.JSONFormat: rename_func("TO_JSON_STRING"), 580 exp.Max: max_or_greatest, 581 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 582 exp.MD5Digest: rename_func("MD5"), 583 exp.Min: min_or_least, 584 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 585 exp.RegexpExtract: lambda self, e: self.func( 586 "REGEXP_EXTRACT", 587 e.this, 588 e.expression, 589 e.args.get("position"), 590 e.args.get("occurrence"), 591 ), 592 exp.RegexpReplace: regexp_replace_sql, 593 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 594 exp.ReturnsProperty: _returnsproperty_sql, 595 exp.Select: transforms.preprocess( 596 [ 597 transforms.explode_to_unnest(), 598 _unqualify_unnest, 599 transforms.eliminate_distinct_on, 600 _alias_ordered_group, 601 transforms.eliminate_semi_and_anti_joins, 602 ] 603 ), 604 exp.SHA2: lambda self, e: self.func( 605 "SHA256" if e.text("length") == "256" else "SHA512", e.this 606 ), 607 exp.StabilityProperty: lambda self, e: ( 608 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 609 ), 610 exp.StrToDate: lambda self, 611 e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 612 exp.StrToTime: lambda self, e: self.func( 613 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 614 ), 615 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 616 exp.TimeFromParts: rename_func("TIME"), 617 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 618 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 619 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 620 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 621 exp.TimeStrToTime: timestrtotime_sql, 622 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 623 exp.TsOrDsAdd: _ts_or_ds_add_sql, 624 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 625 exp.TsOrDsToTime: rename_func("TIME"), 626 exp.Unhex: rename_func("FROM_HEX"), 627 exp.UnixDate: rename_func("UNIX_DATE"), 628 exp.UnixToTime: _unix_to_time_sql, 629 exp.Values: _derived_table_values_to_unnest, 630 exp.VariancePop: rename_func("VAR_POP"), 631 } 632 633 SUPPORTED_JSON_PATH_PARTS = { 634 exp.JSONPathKey, 635 exp.JSONPathRoot, 636 exp.JSONPathSubscript, 637 } 638 639 TYPE_MAPPING = { 640 **generator.Generator.TYPE_MAPPING, 641 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 642 exp.DataType.Type.BIGINT: "INT64", 643 exp.DataType.Type.BINARY: "BYTES", 644 exp.DataType.Type.BOOLEAN: "BOOL", 645 exp.DataType.Type.CHAR: "STRING", 646 exp.DataType.Type.DECIMAL: "NUMERIC", 647 exp.DataType.Type.DOUBLE: "FLOAT64", 648 exp.DataType.Type.FLOAT: "FLOAT64", 649 exp.DataType.Type.INT: "INT64", 650 exp.DataType.Type.NCHAR: "STRING", 651 exp.DataType.Type.NVARCHAR: "STRING", 652 exp.DataType.Type.SMALLINT: "INT64", 653 exp.DataType.Type.TEXT: "STRING", 654 exp.DataType.Type.TIMESTAMP: "DATETIME", 655 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 656 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 657 exp.DataType.Type.TINYINT: "INT64", 658 exp.DataType.Type.VARBINARY: "BYTES", 659 exp.DataType.Type.VARCHAR: "STRING", 660 exp.DataType.Type.VARIANT: "ANY TYPE", 661 } 662 663 PROPERTIES_LOCATION = { 664 **generator.Generator.PROPERTIES_LOCATION, 665 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 666 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 667 } 668 669 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 670 RESERVED_KEYWORDS = { 671 *generator.Generator.RESERVED_KEYWORDS, 672 "all", 673 "and", 674 "any", 675 "array", 676 "as", 677 "asc", 678 "assert_rows_modified", 679 "at", 680 "between", 681 "by", 682 "case", 683 "cast", 684 "collate", 685 "contains", 686 "create", 687 "cross", 688 "cube", 689 "current", 690 "default", 691 "define", 692 "desc", 693 "distinct", 694 "else", 695 "end", 696 "enum", 697 "escape", 698 "except", 699 "exclude", 700 "exists", 701 "extract", 702 "false", 703 "fetch", 704 "following", 705 "for", 706 "from", 707 "full", 708 "group", 709 "grouping", 710 "groups", 711 "hash", 712 "having", 713 "if", 714 "ignore", 715 "in", 716 "inner", 717 "intersect", 718 "interval", 719 "into", 720 "is", 721 "join", 722 "lateral", 723 "left", 724 "like", 725 "limit", 726 "lookup", 727 "merge", 728 "natural", 729 "new", 730 "no", 731 "not", 732 "null", 733 "nulls", 734 "of", 735 "on", 736 "or", 737 "order", 738 "outer", 739 "over", 740 "partition", 741 "preceding", 742 "proto", 743 "qualify", 744 "range", 745 "recursive", 746 "respect", 747 "right", 748 "rollup", 749 "rows", 750 "select", 751 "set", 752 "some", 753 "struct", 754 "tablesample", 755 "then", 756 "to", 757 "treat", 758 "true", 759 "unbounded", 760 "union", 761 "unnest", 762 "using", 763 "when", 764 "where", 765 "window", 766 "with", 767 "within", 768 } 769 770 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 771 if isinstance(expression.this, exp.TsOrDsToDate): 772 this: exp.Expression = expression.this 773 else: 774 this = expression 775 776 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 777 778 def struct_sql(self, expression: exp.Struct) -> str: 779 args = [] 780 for expr in expression.expressions: 781 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 782 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 783 else: 784 arg = self.sql(expr) 785 786 args.append(arg) 787 788 return self.func("STRUCT", *args) 789 790 def eq_sql(self, expression: exp.EQ) -> str: 791 # Operands of = cannot be NULL in BigQuery 792 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 793 if not isinstance(expression.parent, exp.Update): 794 return "NULL" 795 796 return self.binary(expression, "=") 797 798 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 799 parent = expression.parent 800 801 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 802 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 803 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 804 return self.func( 805 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 806 ) 807 808 return super().attimezone_sql(expression) 809 810 def trycast_sql(self, expression: exp.TryCast) -> str: 811 return self.cast_sql(expression, safe_prefix="SAFE_") 812 813 def cte_sql(self, expression: exp.CTE) -> str: 814 if expression.alias_column_names: 815 self.unsupported("Column names in CTE definition are not supported.") 816 return super().cte_sql(expression) 817 818 def array_sql(self, expression: exp.Array) -> str: 819 first_arg = seq_get(expression.expressions, 0) 820 if isinstance(first_arg, exp.Subqueryable): 821 return f"ARRAY{self.wrap(self.sql(first_arg))}" 822 823 return inline_array_sql(self, expression) 824 825 def bracket_sql(self, expression: exp.Bracket) -> str: 826 this = self.sql(expression, "this") 827 expressions = expression.expressions 828 829 if len(expressions) == 1: 830 arg = expressions[0] 831 if arg.type is None: 832 from sqlglot.optimizer.annotate_types import annotate_types 833 834 arg = annotate_types(arg) 835 836 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 837 # BQ doesn't support bracket syntax with string values 838 return f"{this}.{arg.name}" 839 840 expressions_sql = ", ".join(self.sql(e) for e in expressions) 841 offset = expression.args.get("offset") 842 843 if offset == 0: 844 expressions_sql = f"OFFSET({expressions_sql})" 845 elif offset == 1: 846 expressions_sql = f"ORDINAL({expressions_sql})" 847 elif offset is not None: 848 self.unsupported(f"Unsupported array offset: {offset}") 849 850 if expression.args.get("safe"): 851 expressions_sql = f"SAFE_{expressions_sql}" 852 853 return f"{this}[{expressions_sql}]" 854 855 def transaction_sql(self, *_) -> str: 856 return "BEGIN TRANSACTION" 857 858 def commit_sql(self, *_) -> str: 859 return "COMMIT TRANSACTION" 860 861 def rollback_sql(self, *_) -> str: 862 return "ROLLBACK TRANSACTION" 863 864 def in_unnest_op(self, expression: exp.Unnest) -> str: 865 return self.sql(expression) 866 867 def except_op(self, expression: exp.Except) -> str: 868 if not expression.args.get("distinct", False): 869 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 870 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 871 872 def intersect_op(self, expression: exp.Intersect) -> str: 873 if not expression.args.get("distinct", False): 874 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 875 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 876 877 def with_properties(self, properties: exp.Properties) -> str: 878 return self.properties(properties, prefix=self.seg("OPTIONS")) 879 880 def version_sql(self, expression: exp.Version) -> str: 881 if expression.name == "TIMESTAMP": 882 expression.set("this", "SYSTEM_TIME") 883 return super().version_sql(expression)
225class BigQuery(Dialect): 226 WEEK_OFFSET = -1 227 UNNEST_COLUMN_ONLY = True 228 SUPPORTS_USER_DEFINED_TYPES = False 229 SUPPORTS_SEMI_ANTI_JOIN = False 230 LOG_BASE_FIRST = False 231 232 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 233 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 234 235 # bigquery udfs are case sensitive 236 NORMALIZE_FUNCTIONS = False 237 238 TIME_MAPPING = { 239 "%D": "%m/%d/%y", 240 } 241 242 ESCAPE_SEQUENCES = { 243 "\\a": "\a", 244 "\\b": "\b", 245 "\\f": "\f", 246 "\\n": "\n", 247 "\\r": "\r", 248 "\\t": "\t", 249 "\\v": "\v", 250 } 251 252 FORMAT_MAPPING = { 253 "DD": "%d", 254 "MM": "%m", 255 "MON": "%b", 256 "MONTH": "%B", 257 "YYYY": "%Y", 258 "YY": "%y", 259 "HH": "%I", 260 "HH12": "%I", 261 "HH24": "%H", 262 "MI": "%M", 263 "SS": "%S", 264 "SSSSS": "%f", 265 "TZH": "%z", 266 } 267 268 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 269 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 270 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 271 272 def normalize_identifier(self, expression: E) -> E: 273 if isinstance(expression, exp.Identifier): 274 parent = expression.parent 275 while isinstance(parent, exp.Dot): 276 parent = parent.parent 277 278 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 279 # The following check is essentially a heuristic to detect tables based on whether or 280 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 281 if ( 282 not isinstance(parent, exp.UserDefinedFunction) 283 and not (isinstance(parent, exp.Table) and parent.db) 284 and not expression.meta.get("is_table") 285 ): 286 expression.set("this", expression.this.lower()) 287 288 return expression 289 290 class Tokenizer(tokens.Tokenizer): 291 QUOTES = ["'", '"', '"""', "'''"] 292 COMMENTS = ["--", "#", ("/*", "*/")] 293 IDENTIFIERS = ["`"] 294 STRING_ESCAPES = ["\\"] 295 296 HEX_STRINGS = [("0x", ""), ("0X", "")] 297 298 BYTE_STRINGS = [ 299 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 300 ] 301 302 RAW_STRINGS = [ 303 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 304 ] 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "ANY TYPE": TokenType.VARIANT, 309 "BEGIN": TokenType.COMMAND, 310 "BEGIN TRANSACTION": TokenType.BEGIN, 311 "BYTES": TokenType.BINARY, 312 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 313 "DECLARE": TokenType.COMMAND, 314 "FLOAT64": TokenType.DOUBLE, 315 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 316 "MODEL": TokenType.MODEL, 317 "NOT DETERMINISTIC": TokenType.VOLATILE, 318 "RECORD": TokenType.STRUCT, 319 "TIMESTAMP": TokenType.TIMESTAMPTZ, 320 } 321 KEYWORDS.pop("DIV") 322 323 class Parser(parser.Parser): 324 PREFIXED_PIVOT_COLUMNS = True 325 326 LOG_DEFAULTS_TO_LN = True 327 328 FUNCTIONS = { 329 **parser.Parser.FUNCTIONS, 330 "DATE": _parse_date, 331 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 332 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 333 "DATE_TRUNC": lambda args: exp.DateTrunc( 334 unit=exp.Literal.string(str(seq_get(args, 1))), 335 this=seq_get(args, 0), 336 ), 337 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 338 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 339 "DIV": binary_from_function(exp.IntDiv), 340 "FORMAT_DATE": lambda args: exp.TimeToStr( 341 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 342 ), 343 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 344 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 345 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 346 ), 347 "MD5": exp.MD5Digest.from_arg_list, 348 "TO_HEX": _parse_to_hex, 349 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 350 [seq_get(args, 1), seq_get(args, 0)] 351 ), 352 "PARSE_TIMESTAMP": _parse_timestamp, 353 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 354 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 355 this=seq_get(args, 0), 356 expression=seq_get(args, 1), 357 position=seq_get(args, 2), 358 occurrence=seq_get(args, 3), 359 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 360 ), 361 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 362 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 363 "SPLIT": lambda args: exp.Split( 364 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 365 this=seq_get(args, 0), 366 expression=seq_get(args, 1) or exp.Literal.string(","), 367 ), 368 "TIME": _parse_time, 369 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 370 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 371 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 372 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 373 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 374 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 375 ), 376 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 377 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 378 ), 379 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 380 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 381 } 382 383 FUNCTION_PARSERS = { 384 **parser.Parser.FUNCTION_PARSERS, 385 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 386 } 387 FUNCTION_PARSERS.pop("TRIM") 388 389 NO_PAREN_FUNCTIONS = { 390 **parser.Parser.NO_PAREN_FUNCTIONS, 391 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 392 } 393 394 NESTED_TYPE_TOKENS = { 395 *parser.Parser.NESTED_TYPE_TOKENS, 396 TokenType.TABLE, 397 } 398 399 ID_VAR_TOKENS = { 400 *parser.Parser.ID_VAR_TOKENS, 401 TokenType.VALUES, 402 } 403 404 PROPERTY_PARSERS = { 405 **parser.Parser.PROPERTY_PARSERS, 406 "NOT DETERMINISTIC": lambda self: self.expression( 407 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 408 ), 409 "OPTIONS": lambda self: self._parse_with_property(), 410 } 411 412 CONSTRAINT_PARSERS = { 413 **parser.Parser.CONSTRAINT_PARSERS, 414 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 415 } 416 417 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 418 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 419 420 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 421 422 STATEMENT_PARSERS = { 423 **parser.Parser.STATEMENT_PARSERS, 424 TokenType.END: lambda self: self._parse_as_command(self._prev), 425 TokenType.FOR: lambda self: self._parse_for_in(), 426 } 427 428 BRACKET_OFFSETS = { 429 "OFFSET": (0, False), 430 "ORDINAL": (1, False), 431 "SAFE_OFFSET": (0, True), 432 "SAFE_ORDINAL": (1, True), 433 } 434 435 def _parse_for_in(self) -> exp.ForIn: 436 this = self._parse_range() 437 self._match_text_seq("DO") 438 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 439 440 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 441 this = super()._parse_table_part(schema=schema) or self._parse_number() 442 443 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 444 if isinstance(this, exp.Identifier): 445 table_name = this.name 446 while self._match(TokenType.DASH, advance=False) and self._next: 447 self._advance(2) 448 table_name += f"-{self._prev.text}" 449 450 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 451 elif isinstance(this, exp.Literal): 452 table_name = this.name 453 454 if self._is_connected() and self._parse_var(any_token=True): 455 table_name += self._prev.text 456 457 this = exp.Identifier(this=table_name, quoted=True) 458 459 return this 460 461 def _parse_table_parts( 462 self, schema: bool = False, is_db_reference: bool = False 463 ) -> exp.Table: 464 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 465 if isinstance(table.this, exp.Identifier) and "." in table.name: 466 catalog, db, this, *rest = ( 467 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 468 for x in split_num_words(table.name, ".", 3) 469 ) 470 471 if rest and this: 472 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 473 474 table = exp.Table(this=this, db=db, catalog=catalog) 475 476 return table 477 478 @t.overload 479 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 480 ... 481 482 @t.overload 483 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 484 ... 485 486 def _parse_json_object(self, agg=False): 487 json_object = super()._parse_json_object() 488 array_kv_pair = seq_get(json_object.expressions, 0) 489 490 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 491 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 492 if ( 493 array_kv_pair 494 and isinstance(array_kv_pair.this, exp.Array) 495 and isinstance(array_kv_pair.expression, exp.Array) 496 ): 497 keys = array_kv_pair.this.expressions 498 values = array_kv_pair.expression.expressions 499 500 json_object.set( 501 "expressions", 502 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 503 ) 504 505 return json_object 506 507 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 508 bracket = super()._parse_bracket(this) 509 510 if this is bracket: 511 return bracket 512 513 if isinstance(bracket, exp.Bracket): 514 for expression in bracket.expressions: 515 name = expression.name.upper() 516 517 if name not in self.BRACKET_OFFSETS: 518 break 519 520 offset, safe = self.BRACKET_OFFSETS[name] 521 bracket.set("offset", offset) 522 bracket.set("safe", safe) 523 expression.replace(expression.expressions[0]) 524 525 return bracket 526 527 class Generator(generator.Generator): 528 EXPLICIT_UNION = True 529 INTERVAL_ALLOWS_PLURAL_FORM = False 530 JOIN_HINTS = False 531 QUERY_HINTS = False 532 TABLE_HINTS = False 533 LIMIT_FETCH = "LIMIT" 534 RENAME_TABLE_WITH_DB = False 535 NVL2_SUPPORTED = False 536 UNNEST_WITH_ORDINALITY = False 537 COLLATE_IS_FUNC = True 538 LIMIT_ONLY_LITERALS = True 539 SUPPORTS_TABLE_ALIAS_COLUMNS = False 540 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 541 JSON_KEY_VALUE_PAIR_SEP = "," 542 NULL_ORDERING_SUPPORTED = False 543 IGNORE_NULLS_IN_FUNC = True 544 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 545 546 TRANSFORMS = { 547 **generator.Generator.TRANSFORMS, 548 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 549 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 550 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 551 exp.ArrayContains: _array_contains_sql, 552 exp.ArraySize: rename_func("ARRAY_LENGTH"), 553 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 554 exp.CollateProperty: lambda self, e: ( 555 f"DEFAULT COLLATE {self.sql(e, 'this')}" 556 if e.args.get("default") 557 else f"COLLATE {self.sql(e, 'this')}" 558 ), 559 exp.CountIf: rename_func("COUNTIF"), 560 exp.Create: _create_sql, 561 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 562 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 563 exp.DateDiff: lambda self, 564 e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 565 exp.DateFromParts: rename_func("DATE"), 566 exp.DateStrToDate: datestrtodate_sql, 567 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 568 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 569 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 570 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 571 exp.FromTimeZone: lambda self, e: self.func( 572 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 573 ), 574 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 575 exp.GroupConcat: rename_func("STRING_AGG"), 576 exp.Hex: rename_func("TO_HEX"), 577 exp.If: if_sql(false_value="NULL"), 578 exp.ILike: no_ilike_sql, 579 exp.IntDiv: rename_func("DIV"), 580 exp.JSONFormat: rename_func("TO_JSON_STRING"), 581 exp.Max: max_or_greatest, 582 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 583 exp.MD5Digest: rename_func("MD5"), 584 exp.Min: min_or_least, 585 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 586 exp.RegexpExtract: lambda self, e: self.func( 587 "REGEXP_EXTRACT", 588 e.this, 589 e.expression, 590 e.args.get("position"), 591 e.args.get("occurrence"), 592 ), 593 exp.RegexpReplace: regexp_replace_sql, 594 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 595 exp.ReturnsProperty: _returnsproperty_sql, 596 exp.Select: transforms.preprocess( 597 [ 598 transforms.explode_to_unnest(), 599 _unqualify_unnest, 600 transforms.eliminate_distinct_on, 601 _alias_ordered_group, 602 transforms.eliminate_semi_and_anti_joins, 603 ] 604 ), 605 exp.SHA2: lambda self, e: self.func( 606 "SHA256" if e.text("length") == "256" else "SHA512", e.this 607 ), 608 exp.StabilityProperty: lambda self, e: ( 609 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 610 ), 611 exp.StrToDate: lambda self, 612 e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 613 exp.StrToTime: lambda self, e: self.func( 614 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 615 ), 616 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 617 exp.TimeFromParts: rename_func("TIME"), 618 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 619 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 620 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 621 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 622 exp.TimeStrToTime: timestrtotime_sql, 623 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 624 exp.TsOrDsAdd: _ts_or_ds_add_sql, 625 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 626 exp.TsOrDsToTime: rename_func("TIME"), 627 exp.Unhex: rename_func("FROM_HEX"), 628 exp.UnixDate: rename_func("UNIX_DATE"), 629 exp.UnixToTime: _unix_to_time_sql, 630 exp.Values: _derived_table_values_to_unnest, 631 exp.VariancePop: rename_func("VAR_POP"), 632 } 633 634 SUPPORTED_JSON_PATH_PARTS = { 635 exp.JSONPathKey, 636 exp.JSONPathRoot, 637 exp.JSONPathSubscript, 638 } 639 640 TYPE_MAPPING = { 641 **generator.Generator.TYPE_MAPPING, 642 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 643 exp.DataType.Type.BIGINT: "INT64", 644 exp.DataType.Type.BINARY: "BYTES", 645 exp.DataType.Type.BOOLEAN: "BOOL", 646 exp.DataType.Type.CHAR: "STRING", 647 exp.DataType.Type.DECIMAL: "NUMERIC", 648 exp.DataType.Type.DOUBLE: "FLOAT64", 649 exp.DataType.Type.FLOAT: "FLOAT64", 650 exp.DataType.Type.INT: "INT64", 651 exp.DataType.Type.NCHAR: "STRING", 652 exp.DataType.Type.NVARCHAR: "STRING", 653 exp.DataType.Type.SMALLINT: "INT64", 654 exp.DataType.Type.TEXT: "STRING", 655 exp.DataType.Type.TIMESTAMP: "DATETIME", 656 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 657 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 658 exp.DataType.Type.TINYINT: "INT64", 659 exp.DataType.Type.VARBINARY: "BYTES", 660 exp.DataType.Type.VARCHAR: "STRING", 661 exp.DataType.Type.VARIANT: "ANY TYPE", 662 } 663 664 PROPERTIES_LOCATION = { 665 **generator.Generator.PROPERTIES_LOCATION, 666 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 667 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 668 } 669 670 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 671 RESERVED_KEYWORDS = { 672 *generator.Generator.RESERVED_KEYWORDS, 673 "all", 674 "and", 675 "any", 676 "array", 677 "as", 678 "asc", 679 "assert_rows_modified", 680 "at", 681 "between", 682 "by", 683 "case", 684 "cast", 685 "collate", 686 "contains", 687 "create", 688 "cross", 689 "cube", 690 "current", 691 "default", 692 "define", 693 "desc", 694 "distinct", 695 "else", 696 "end", 697 "enum", 698 "escape", 699 "except", 700 "exclude", 701 "exists", 702 "extract", 703 "false", 704 "fetch", 705 "following", 706 "for", 707 "from", 708 "full", 709 "group", 710 "grouping", 711 "groups", 712 "hash", 713 "having", 714 "if", 715 "ignore", 716 "in", 717 "inner", 718 "intersect", 719 "interval", 720 "into", 721 "is", 722 "join", 723 "lateral", 724 "left", 725 "like", 726 "limit", 727 "lookup", 728 "merge", 729 "natural", 730 "new", 731 "no", 732 "not", 733 "null", 734 "nulls", 735 "of", 736 "on", 737 "or", 738 "order", 739 "outer", 740 "over", 741 "partition", 742 "preceding", 743 "proto", 744 "qualify", 745 "range", 746 "recursive", 747 "respect", 748 "right", 749 "rollup", 750 "rows", 751 "select", 752 "set", 753 "some", 754 "struct", 755 "tablesample", 756 "then", 757 "to", 758 "treat", 759 "true", 760 "unbounded", 761 "union", 762 "unnest", 763 "using", 764 "when", 765 "where", 766 "window", 767 "with", 768 "within", 769 } 770 771 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 772 if isinstance(expression.this, exp.TsOrDsToDate): 773 this: exp.Expression = expression.this 774 else: 775 this = expression 776 777 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 778 779 def struct_sql(self, expression: exp.Struct) -> str: 780 args = [] 781 for expr in expression.expressions: 782 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 783 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 784 else: 785 arg = self.sql(expr) 786 787 args.append(arg) 788 789 return self.func("STRUCT", *args) 790 791 def eq_sql(self, expression: exp.EQ) -> str: 792 # Operands of = cannot be NULL in BigQuery 793 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 794 if not isinstance(expression.parent, exp.Update): 795 return "NULL" 796 797 return self.binary(expression, "=") 798 799 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 800 parent = expression.parent 801 802 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 803 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 804 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 805 return self.func( 806 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 807 ) 808 809 return super().attimezone_sql(expression) 810 811 def trycast_sql(self, expression: exp.TryCast) -> str: 812 return self.cast_sql(expression, safe_prefix="SAFE_") 813 814 def cte_sql(self, expression: exp.CTE) -> str: 815 if expression.alias_column_names: 816 self.unsupported("Column names in CTE definition are not supported.") 817 return super().cte_sql(expression) 818 819 def array_sql(self, expression: exp.Array) -> str: 820 first_arg = seq_get(expression.expressions, 0) 821 if isinstance(first_arg, exp.Subqueryable): 822 return f"ARRAY{self.wrap(self.sql(first_arg))}" 823 824 return inline_array_sql(self, expression) 825 826 def bracket_sql(self, expression: exp.Bracket) -> str: 827 this = self.sql(expression, "this") 828 expressions = expression.expressions 829 830 if len(expressions) == 1: 831 arg = expressions[0] 832 if arg.type is None: 833 from sqlglot.optimizer.annotate_types import annotate_types 834 835 arg = annotate_types(arg) 836 837 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 838 # BQ doesn't support bracket syntax with string values 839 return f"{this}.{arg.name}" 840 841 expressions_sql = ", ".join(self.sql(e) for e in expressions) 842 offset = expression.args.get("offset") 843 844 if offset == 0: 845 expressions_sql = f"OFFSET({expressions_sql})" 846 elif offset == 1: 847 expressions_sql = f"ORDINAL({expressions_sql})" 848 elif offset is not None: 849 self.unsupported(f"Unsupported array offset: {offset}") 850 851 if expression.args.get("safe"): 852 expressions_sql = f"SAFE_{expressions_sql}" 853 854 return f"{this}[{expressions_sql}]" 855 856 def transaction_sql(self, *_) -> str: 857 return "BEGIN TRANSACTION" 858 859 def commit_sql(self, *_) -> str: 860 return "COMMIT TRANSACTION" 861 862 def rollback_sql(self, *_) -> str: 863 return "ROLLBACK TRANSACTION" 864 865 def in_unnest_op(self, expression: exp.Unnest) -> str: 866 return self.sql(expression) 867 868 def except_op(self, expression: exp.Except) -> str: 869 if not expression.args.get("distinct", False): 870 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 871 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 872 873 def intersect_op(self, expression: exp.Intersect) -> str: 874 if not expression.args.get("distinct", False): 875 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 876 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 877 878 def with_properties(self, properties: exp.Properties) -> str: 879 return self.properties(properties, prefix=self.seg("OPTIONS")) 880 881 def version_sql(self, expression: exp.Version) -> str: 882 if expression.name == "TIMESTAMP": 883 expression.set("this", "SYSTEM_TIME") 884 return super().version_sql(expression)
Determines the day of week of DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Determines whether or not UNNEST
table aliases are treated as column aliases.
Determines whether or not user-defined data types are supported.
Specifies the strategy according to which identifiers should be normalized.
Associates this dialect's time formats with their equivalent Python strftime
format.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
272 def normalize_identifier(self, expression: E) -> E: 273 if isinstance(expression, exp.Identifier): 274 parent = expression.parent 275 while isinstance(parent, exp.Dot): 276 parent = parent.parent 277 278 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 279 # The following check is essentially a heuristic to detect tables based on whether or 280 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 281 if ( 282 not isinstance(parent, exp.UserDefinedFunction) 283 and not (isinstance(parent, exp.Table) and parent.db) 284 and not expression.meta.get("is_table") 285 ): 286 expression.set("this", expression.this.lower()) 287 288 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
290 class Tokenizer(tokens.Tokenizer): 291 QUOTES = ["'", '"', '"""', "'''"] 292 COMMENTS = ["--", "#", ("/*", "*/")] 293 IDENTIFIERS = ["`"] 294 STRING_ESCAPES = ["\\"] 295 296 HEX_STRINGS = [("0x", ""), ("0X", "")] 297 298 BYTE_STRINGS = [ 299 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 300 ] 301 302 RAW_STRINGS = [ 303 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 304 ] 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "ANY TYPE": TokenType.VARIANT, 309 "BEGIN": TokenType.COMMAND, 310 "BEGIN TRANSACTION": TokenType.BEGIN, 311 "BYTES": TokenType.BINARY, 312 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 313 "DECLARE": TokenType.COMMAND, 314 "FLOAT64": TokenType.DOUBLE, 315 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 316 "MODEL": TokenType.MODEL, 317 "NOT DETERMINISTIC": TokenType.VOLATILE, 318 "RECORD": TokenType.STRUCT, 319 "TIMESTAMP": TokenType.TIMESTAMPTZ, 320 } 321 KEYWORDS.pop("DIV")
Inherited Members
323 class Parser(parser.Parser): 324 PREFIXED_PIVOT_COLUMNS = True 325 326 LOG_DEFAULTS_TO_LN = True 327 328 FUNCTIONS = { 329 **parser.Parser.FUNCTIONS, 330 "DATE": _parse_date, 331 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 332 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 333 "DATE_TRUNC": lambda args: exp.DateTrunc( 334 unit=exp.Literal.string(str(seq_get(args, 1))), 335 this=seq_get(args, 0), 336 ), 337 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 338 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 339 "DIV": binary_from_function(exp.IntDiv), 340 "FORMAT_DATE": lambda args: exp.TimeToStr( 341 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 342 ), 343 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 344 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 345 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 346 ), 347 "MD5": exp.MD5Digest.from_arg_list, 348 "TO_HEX": _parse_to_hex, 349 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 350 [seq_get(args, 1), seq_get(args, 0)] 351 ), 352 "PARSE_TIMESTAMP": _parse_timestamp, 353 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 354 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 355 this=seq_get(args, 0), 356 expression=seq_get(args, 1), 357 position=seq_get(args, 2), 358 occurrence=seq_get(args, 3), 359 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 360 ), 361 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 362 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 363 "SPLIT": lambda args: exp.Split( 364 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 365 this=seq_get(args, 0), 366 expression=seq_get(args, 1) or exp.Literal.string(","), 367 ), 368 "TIME": _parse_time, 369 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 370 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 371 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 372 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 373 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 374 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 375 ), 376 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 377 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 378 ), 379 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 380 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 381 } 382 383 FUNCTION_PARSERS = { 384 **parser.Parser.FUNCTION_PARSERS, 385 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 386 } 387 FUNCTION_PARSERS.pop("TRIM") 388 389 NO_PAREN_FUNCTIONS = { 390 **parser.Parser.NO_PAREN_FUNCTIONS, 391 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 392 } 393 394 NESTED_TYPE_TOKENS = { 395 *parser.Parser.NESTED_TYPE_TOKENS, 396 TokenType.TABLE, 397 } 398 399 ID_VAR_TOKENS = { 400 *parser.Parser.ID_VAR_TOKENS, 401 TokenType.VALUES, 402 } 403 404 PROPERTY_PARSERS = { 405 **parser.Parser.PROPERTY_PARSERS, 406 "NOT DETERMINISTIC": lambda self: self.expression( 407 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 408 ), 409 "OPTIONS": lambda self: self._parse_with_property(), 410 } 411 412 CONSTRAINT_PARSERS = { 413 **parser.Parser.CONSTRAINT_PARSERS, 414 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 415 } 416 417 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 418 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 419 420 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 421 422 STATEMENT_PARSERS = { 423 **parser.Parser.STATEMENT_PARSERS, 424 TokenType.END: lambda self: self._parse_as_command(self._prev), 425 TokenType.FOR: lambda self: self._parse_for_in(), 426 } 427 428 BRACKET_OFFSETS = { 429 "OFFSET": (0, False), 430 "ORDINAL": (1, False), 431 "SAFE_OFFSET": (0, True), 432 "SAFE_ORDINAL": (1, True), 433 } 434 435 def _parse_for_in(self) -> exp.ForIn: 436 this = self._parse_range() 437 self._match_text_seq("DO") 438 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 439 440 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 441 this = super()._parse_table_part(schema=schema) or self._parse_number() 442 443 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 444 if isinstance(this, exp.Identifier): 445 table_name = this.name 446 while self._match(TokenType.DASH, advance=False) and self._next: 447 self._advance(2) 448 table_name += f"-{self._prev.text}" 449 450 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 451 elif isinstance(this, exp.Literal): 452 table_name = this.name 453 454 if self._is_connected() and self._parse_var(any_token=True): 455 table_name += self._prev.text 456 457 this = exp.Identifier(this=table_name, quoted=True) 458 459 return this 460 461 def _parse_table_parts( 462 self, schema: bool = False, is_db_reference: bool = False 463 ) -> exp.Table: 464 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 465 if isinstance(table.this, exp.Identifier) and "." in table.name: 466 catalog, db, this, *rest = ( 467 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 468 for x in split_num_words(table.name, ".", 3) 469 ) 470 471 if rest and this: 472 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 473 474 table = exp.Table(this=this, db=db, catalog=catalog) 475 476 return table 477 478 @t.overload 479 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 480 ... 481 482 @t.overload 483 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 484 ... 485 486 def _parse_json_object(self, agg=False): 487 json_object = super()._parse_json_object() 488 array_kv_pair = seq_get(json_object.expressions, 0) 489 490 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 491 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 492 if ( 493 array_kv_pair 494 and isinstance(array_kv_pair.this, exp.Array) 495 and isinstance(array_kv_pair.expression, exp.Array) 496 ): 497 keys = array_kv_pair.this.expressions 498 values = array_kv_pair.expression.expressions 499 500 json_object.set( 501 "expressions", 502 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 503 ) 504 505 return json_object 506 507 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 508 bracket = super()._parse_bracket(this) 509 510 if this is bracket: 511 return bracket 512 513 if isinstance(bracket, exp.Bracket): 514 for expression in bracket.expressions: 515 name = expression.name.upper() 516 517 if name not in self.BRACKET_OFFSETS: 518 break 519 520 offset, safe = self.BRACKET_OFFSETS[name] 521 bracket.set("offset", offset) 522 bracket.set("safe", safe) 523 expression.replace(expression.expressions[0]) 524 525 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
527 class Generator(generator.Generator): 528 EXPLICIT_UNION = True 529 INTERVAL_ALLOWS_PLURAL_FORM = False 530 JOIN_HINTS = False 531 QUERY_HINTS = False 532 TABLE_HINTS = False 533 LIMIT_FETCH = "LIMIT" 534 RENAME_TABLE_WITH_DB = False 535 NVL2_SUPPORTED = False 536 UNNEST_WITH_ORDINALITY = False 537 COLLATE_IS_FUNC = True 538 LIMIT_ONLY_LITERALS = True 539 SUPPORTS_TABLE_ALIAS_COLUMNS = False 540 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 541 JSON_KEY_VALUE_PAIR_SEP = "," 542 NULL_ORDERING_SUPPORTED = False 543 IGNORE_NULLS_IN_FUNC = True 544 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 545 546 TRANSFORMS = { 547 **generator.Generator.TRANSFORMS, 548 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 549 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 550 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 551 exp.ArrayContains: _array_contains_sql, 552 exp.ArraySize: rename_func("ARRAY_LENGTH"), 553 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 554 exp.CollateProperty: lambda self, e: ( 555 f"DEFAULT COLLATE {self.sql(e, 'this')}" 556 if e.args.get("default") 557 else f"COLLATE {self.sql(e, 'this')}" 558 ), 559 exp.CountIf: rename_func("COUNTIF"), 560 exp.Create: _create_sql, 561 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 562 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 563 exp.DateDiff: lambda self, 564 e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 565 exp.DateFromParts: rename_func("DATE"), 566 exp.DateStrToDate: datestrtodate_sql, 567 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 568 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 569 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 570 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 571 exp.FromTimeZone: lambda self, e: self.func( 572 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 573 ), 574 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 575 exp.GroupConcat: rename_func("STRING_AGG"), 576 exp.Hex: rename_func("TO_HEX"), 577 exp.If: if_sql(false_value="NULL"), 578 exp.ILike: no_ilike_sql, 579 exp.IntDiv: rename_func("DIV"), 580 exp.JSONFormat: rename_func("TO_JSON_STRING"), 581 exp.Max: max_or_greatest, 582 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 583 exp.MD5Digest: rename_func("MD5"), 584 exp.Min: min_or_least, 585 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 586 exp.RegexpExtract: lambda self, e: self.func( 587 "REGEXP_EXTRACT", 588 e.this, 589 e.expression, 590 e.args.get("position"), 591 e.args.get("occurrence"), 592 ), 593 exp.RegexpReplace: regexp_replace_sql, 594 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 595 exp.ReturnsProperty: _returnsproperty_sql, 596 exp.Select: transforms.preprocess( 597 [ 598 transforms.explode_to_unnest(), 599 _unqualify_unnest, 600 transforms.eliminate_distinct_on, 601 _alias_ordered_group, 602 transforms.eliminate_semi_and_anti_joins, 603 ] 604 ), 605 exp.SHA2: lambda self, e: self.func( 606 "SHA256" if e.text("length") == "256" else "SHA512", e.this 607 ), 608 exp.StabilityProperty: lambda self, e: ( 609 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 610 ), 611 exp.StrToDate: lambda self, 612 e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 613 exp.StrToTime: lambda self, e: self.func( 614 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 615 ), 616 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 617 exp.TimeFromParts: rename_func("TIME"), 618 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 619 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 620 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 621 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 622 exp.TimeStrToTime: timestrtotime_sql, 623 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 624 exp.TsOrDsAdd: _ts_or_ds_add_sql, 625 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 626 exp.TsOrDsToTime: rename_func("TIME"), 627 exp.Unhex: rename_func("FROM_HEX"), 628 exp.UnixDate: rename_func("UNIX_DATE"), 629 exp.UnixToTime: _unix_to_time_sql, 630 exp.Values: _derived_table_values_to_unnest, 631 exp.VariancePop: rename_func("VAR_POP"), 632 } 633 634 SUPPORTED_JSON_PATH_PARTS = { 635 exp.JSONPathKey, 636 exp.JSONPathRoot, 637 exp.JSONPathSubscript, 638 } 639 640 TYPE_MAPPING = { 641 **generator.Generator.TYPE_MAPPING, 642 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 643 exp.DataType.Type.BIGINT: "INT64", 644 exp.DataType.Type.BINARY: "BYTES", 645 exp.DataType.Type.BOOLEAN: "BOOL", 646 exp.DataType.Type.CHAR: "STRING", 647 exp.DataType.Type.DECIMAL: "NUMERIC", 648 exp.DataType.Type.DOUBLE: "FLOAT64", 649 exp.DataType.Type.FLOAT: "FLOAT64", 650 exp.DataType.Type.INT: "INT64", 651 exp.DataType.Type.NCHAR: "STRING", 652 exp.DataType.Type.NVARCHAR: "STRING", 653 exp.DataType.Type.SMALLINT: "INT64", 654 exp.DataType.Type.TEXT: "STRING", 655 exp.DataType.Type.TIMESTAMP: "DATETIME", 656 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 657 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 658 exp.DataType.Type.TINYINT: "INT64", 659 exp.DataType.Type.VARBINARY: "BYTES", 660 exp.DataType.Type.VARCHAR: "STRING", 661 exp.DataType.Type.VARIANT: "ANY TYPE", 662 } 663 664 PROPERTIES_LOCATION = { 665 **generator.Generator.PROPERTIES_LOCATION, 666 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 667 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 668 } 669 670 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 671 RESERVED_KEYWORDS = { 672 *generator.Generator.RESERVED_KEYWORDS, 673 "all", 674 "and", 675 "any", 676 "array", 677 "as", 678 "asc", 679 "assert_rows_modified", 680 "at", 681 "between", 682 "by", 683 "case", 684 "cast", 685 "collate", 686 "contains", 687 "create", 688 "cross", 689 "cube", 690 "current", 691 "default", 692 "define", 693 "desc", 694 "distinct", 695 "else", 696 "end", 697 "enum", 698 "escape", 699 "except", 700 "exclude", 701 "exists", 702 "extract", 703 "false", 704 "fetch", 705 "following", 706 "for", 707 "from", 708 "full", 709 "group", 710 "grouping", 711 "groups", 712 "hash", 713 "having", 714 "if", 715 "ignore", 716 "in", 717 "inner", 718 "intersect", 719 "interval", 720 "into", 721 "is", 722 "join", 723 "lateral", 724 "left", 725 "like", 726 "limit", 727 "lookup", 728 "merge", 729 "natural", 730 "new", 731 "no", 732 "not", 733 "null", 734 "nulls", 735 "of", 736 "on", 737 "or", 738 "order", 739 "outer", 740 "over", 741 "partition", 742 "preceding", 743 "proto", 744 "qualify", 745 "range", 746 "recursive", 747 "respect", 748 "right", 749 "rollup", 750 "rows", 751 "select", 752 "set", 753 "some", 754 "struct", 755 "tablesample", 756 "then", 757 "to", 758 "treat", 759 "true", 760 "unbounded", 761 "union", 762 "unnest", 763 "using", 764 "when", 765 "where", 766 "window", 767 "with", 768 "within", 769 } 770 771 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 772 if isinstance(expression.this, exp.TsOrDsToDate): 773 this: exp.Expression = expression.this 774 else: 775 this = expression 776 777 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 778 779 def struct_sql(self, expression: exp.Struct) -> str: 780 args = [] 781 for expr in expression.expressions: 782 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 783 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 784 else: 785 arg = self.sql(expr) 786 787 args.append(arg) 788 789 return self.func("STRUCT", *args) 790 791 def eq_sql(self, expression: exp.EQ) -> str: 792 # Operands of = cannot be NULL in BigQuery 793 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 794 if not isinstance(expression.parent, exp.Update): 795 return "NULL" 796 797 return self.binary(expression, "=") 798 799 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 800 parent = expression.parent 801 802 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 803 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 804 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 805 return self.func( 806 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 807 ) 808 809 return super().attimezone_sql(expression) 810 811 def trycast_sql(self, expression: exp.TryCast) -> str: 812 return self.cast_sql(expression, safe_prefix="SAFE_") 813 814 def cte_sql(self, expression: exp.CTE) -> str: 815 if expression.alias_column_names: 816 self.unsupported("Column names in CTE definition are not supported.") 817 return super().cte_sql(expression) 818 819 def array_sql(self, expression: exp.Array) -> str: 820 first_arg = seq_get(expression.expressions, 0) 821 if isinstance(first_arg, exp.Subqueryable): 822 return f"ARRAY{self.wrap(self.sql(first_arg))}" 823 824 return inline_array_sql(self, expression) 825 826 def bracket_sql(self, expression: exp.Bracket) -> str: 827 this = self.sql(expression, "this") 828 expressions = expression.expressions 829 830 if len(expressions) == 1: 831 arg = expressions[0] 832 if arg.type is None: 833 from sqlglot.optimizer.annotate_types import annotate_types 834 835 arg = annotate_types(arg) 836 837 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 838 # BQ doesn't support bracket syntax with string values 839 return f"{this}.{arg.name}" 840 841 expressions_sql = ", ".join(self.sql(e) for e in expressions) 842 offset = expression.args.get("offset") 843 844 if offset == 0: 845 expressions_sql = f"OFFSET({expressions_sql})" 846 elif offset == 1: 847 expressions_sql = f"ORDINAL({expressions_sql})" 848 elif offset is not None: 849 self.unsupported(f"Unsupported array offset: {offset}") 850 851 if expression.args.get("safe"): 852 expressions_sql = f"SAFE_{expressions_sql}" 853 854 return f"{this}[{expressions_sql}]" 855 856 def transaction_sql(self, *_) -> str: 857 return "BEGIN TRANSACTION" 858 859 def commit_sql(self, *_) -> str: 860 return "COMMIT TRANSACTION" 861 862 def rollback_sql(self, *_) -> str: 863 return "ROLLBACK TRANSACTION" 864 865 def in_unnest_op(self, expression: exp.Unnest) -> str: 866 return self.sql(expression) 867 868 def except_op(self, expression: exp.Except) -> str: 869 if not expression.args.get("distinct", False): 870 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 871 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 872 873 def intersect_op(self, expression: exp.Intersect) -> str: 874 if not expression.args.get("distinct", False): 875 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 876 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 877 878 def with_properties(self, properties: exp.Properties) -> str: 879 return self.properties(properties, prefix=self.seg("OPTIONS")) 880 881 def version_sql(self, expression: exp.Version) -> str: 882 if expression.name == "TIMESTAMP": 883 expression.set("this", "SYSTEM_TIME") 884 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
779 def struct_sql(self, expression: exp.Struct) -> str: 780 args = [] 781 for expr in expression.expressions: 782 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 783 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 784 else: 785 arg = self.sql(expr) 786 787 args.append(arg) 788 789 return self.func("STRUCT", *args)
799 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 800 parent = expression.parent 801 802 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 803 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 804 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 805 return self.func( 806 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 807 ) 808 809 return super().attimezone_sql(expression)
826 def bracket_sql(self, expression: exp.Bracket) -> str: 827 this = self.sql(expression, "this") 828 expressions = expression.expressions 829 830 if len(expressions) == 1: 831 arg = expressions[0] 832 if arg.type is None: 833 from sqlglot.optimizer.annotate_types import annotate_types 834 835 arg = annotate_types(arg) 836 837 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 838 # BQ doesn't support bracket syntax with string values 839 return f"{this}.{arg.name}" 840 841 expressions_sql = ", ".join(self.sql(e) for e in expressions) 842 offset = expression.args.get("offset") 843 844 if offset == 0: 845 expressions_sql = f"OFFSET({expressions_sql})" 846 elif offset == 1: 847 expressions_sql = f"ORDINAL({expressions_sql})" 848 elif offset is not None: 849 self.unsupported(f"Unsupported array offset: {offset}") 850 851 if expression.args.get("safe"): 852 expressions_sql = f"SAFE_{expressions_sql}" 853 854 return f"{this}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql