sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27) 28from sqlglot.helper import seq_get, split_num_words 29from sqlglot.tokens import TokenType 30 31if t.TYPE_CHECKING: 32 from sqlglot._typing import E, Lit 33 34logger = logging.getLogger("sqlglot") 35 36 37def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 38 if not expression.find_ancestor(exp.From, exp.Join): 39 return self.values_sql(expression) 40 41 structs = [] 42 alias = expression.args.get("alias") 43 for tup in expression.find_all(exp.Tuple): 44 field_aliases = alias.columns if alias else (f"_c{i}" for i in range(len(tup.expressions))) 45 expressions = [exp.alias_(fld, name) for fld, name in zip(tup.expressions, field_aliases)] 46 structs.append(exp.Struct(expressions=expressions)) 47 48 return self.unnest_sql(exp.Unnest(expressions=[exp.array(*structs, copy=False)])) 49 50 51def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 52 this = expression.this 53 if isinstance(this, exp.Schema): 54 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 55 else: 56 this = self.sql(this) 57 return f"RETURNS {this}" 58 59 60def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 61 returns = expression.find(exp.ReturnsProperty) 62 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 63 expression.set("kind", "TABLE FUNCTION") 64 65 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 66 expression.set("expression", expression.expression.this) 67 68 return self.create_sql(expression) 69 70 71def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 72 """Remove references to unnest table aliases since bigquery doesn't allow them. 73 74 These are added by the optimizer's qualify_column step. 75 """ 76 from sqlglot.optimizer.scope import find_all_in_scope 77 78 if isinstance(expression, exp.Select): 79 unnest_aliases = { 80 unnest.alias 81 for unnest in find_all_in_scope(expression, exp.Unnest) 82 if isinstance(unnest.parent, (exp.From, exp.Join)) 83 } 84 if unnest_aliases: 85 for column in expression.find_all(exp.Column): 86 if column.table in unnest_aliases: 87 column.set("table", None) 88 elif column.db in unnest_aliases: 89 column.set("db", None) 90 91 return expression 92 93 94# https://issuetracker.google.com/issues/162294746 95# workaround for bigquery bug when grouping by an expression and then ordering 96# WITH x AS (SELECT 1 y) 97# SELECT y + 1 z 98# FROM x 99# GROUP BY x + 1 100# ORDER by z 101def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 102 if isinstance(expression, exp.Select): 103 group = expression.args.get("group") 104 order = expression.args.get("order") 105 106 if group and order: 107 aliases = { 108 select.this: select.args["alias"] 109 for select in expression.selects 110 if isinstance(select, exp.Alias) 111 } 112 113 for grouped in group.expressions: 114 alias = aliases.get(grouped) 115 if alias: 116 grouped.replace(exp.column(alias)) 117 118 return expression 119 120 121def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 122 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 123 if isinstance(expression, exp.CTE) and expression.alias_column_names: 124 cte_query = expression.this 125 126 if cte_query.is_star: 127 logger.warning( 128 "Can't push down CTE column names for star queries. Run the query through" 129 " the optimizer or use 'qualify' to expand the star projections first." 130 ) 131 return expression 132 133 column_names = expression.alias_column_names 134 expression.args["alias"].set("columns", None) 135 136 for name, select in zip(column_names, cte_query.selects): 137 to_replace = select 138 139 if isinstance(select, exp.Alias): 140 select = select.this 141 142 # Inner aliases are shadowed by the CTE column names 143 to_replace.replace(exp.alias_(select, name)) 144 145 return expression 146 147 148def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 149 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 150 this.set("zone", seq_get(args, 2)) 151 return this 152 153 154def _build_timestamp(args: t.List) -> exp.Timestamp: 155 timestamp = exp.Timestamp.from_arg_list(args) 156 timestamp.set("with_tz", True) 157 return timestamp 158 159 160def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 161 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 162 return expr_type.from_arg_list(args) 163 164 165def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 166 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 167 arg = seq_get(args, 0) 168 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 169 170 171def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 172 return self.sql( 173 exp.Exists( 174 this=exp.select("1") 175 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 176 .where(exp.column("_col").eq(expression.right)) 177 ) 178 ) 179 180 181def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 182 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 183 184 185def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 186 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 187 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 188 unit = expression.args.get("unit") or "DAY" 189 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 190 191 192def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 193 scale = expression.args.get("scale") 194 timestamp = expression.this 195 196 if scale in (None, exp.UnixToTime.SECONDS): 197 return self.func("TIMESTAMP_SECONDS", timestamp) 198 if scale == exp.UnixToTime.MILLIS: 199 return self.func("TIMESTAMP_MILLIS", timestamp) 200 if scale == exp.UnixToTime.MICROS: 201 return self.func("TIMESTAMP_MICROS", timestamp) 202 203 unix_seconds = exp.cast(exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), "int64") 204 return self.func("TIMESTAMP_SECONDS", unix_seconds) 205 206 207def _build_time(args: t.List) -> exp.Func: 208 if len(args) == 1: 209 return exp.TsOrDsToTime(this=args[0]) 210 if len(args) == 3: 211 return exp.TimeFromParts.from_arg_list(args) 212 213 return exp.Anonymous(this="TIME", expressions=args) 214 215 216class BigQuery(Dialect): 217 WEEK_OFFSET = -1 218 UNNEST_COLUMN_ONLY = True 219 SUPPORTS_USER_DEFINED_TYPES = False 220 SUPPORTS_SEMI_ANTI_JOIN = False 221 LOG_BASE_FIRST = False 222 223 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 224 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 225 226 # bigquery udfs are case sensitive 227 NORMALIZE_FUNCTIONS = False 228 229 TIME_MAPPING = { 230 "%D": "%m/%d/%y", 231 } 232 233 ESCAPE_SEQUENCES = { 234 "\\a": "\a", 235 "\\b": "\b", 236 "\\f": "\f", 237 "\\n": "\n", 238 "\\r": "\r", 239 "\\t": "\t", 240 "\\v": "\v", 241 } 242 243 FORMAT_MAPPING = { 244 "DD": "%d", 245 "MM": "%m", 246 "MON": "%b", 247 "MONTH": "%B", 248 "YYYY": "%Y", 249 "YY": "%y", 250 "HH": "%I", 251 "HH12": "%I", 252 "HH24": "%H", 253 "MI": "%M", 254 "SS": "%S", 255 "SSSSS": "%f", 256 "TZH": "%z", 257 } 258 259 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 260 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 261 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 262 263 def normalize_identifier(self, expression: E) -> E: 264 if isinstance(expression, exp.Identifier): 265 parent = expression.parent 266 while isinstance(parent, exp.Dot): 267 parent = parent.parent 268 269 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 270 # The following check is essentially a heuristic to detect tables based on whether or 271 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 272 if ( 273 not isinstance(parent, exp.UserDefinedFunction) 274 and not (isinstance(parent, exp.Table) and parent.db) 275 and not expression.meta.get("is_table") 276 ): 277 expression.set("this", expression.this.lower()) 278 279 return expression 280 281 class Tokenizer(tokens.Tokenizer): 282 QUOTES = ["'", '"', '"""', "'''"] 283 COMMENTS = ["--", "#", ("/*", "*/")] 284 IDENTIFIERS = ["`"] 285 STRING_ESCAPES = ["\\"] 286 287 HEX_STRINGS = [("0x", ""), ("0X", "")] 288 289 BYTE_STRINGS = [ 290 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 291 ] 292 293 RAW_STRINGS = [ 294 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 295 ] 296 297 KEYWORDS = { 298 **tokens.Tokenizer.KEYWORDS, 299 "ANY TYPE": TokenType.VARIANT, 300 "BEGIN": TokenType.COMMAND, 301 "BEGIN TRANSACTION": TokenType.BEGIN, 302 "BYTES": TokenType.BINARY, 303 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 304 "DECLARE": TokenType.COMMAND, 305 "EXCEPTION": TokenType.COMMAND, 306 "FLOAT64": TokenType.DOUBLE, 307 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 308 "MODEL": TokenType.MODEL, 309 "NOT DETERMINISTIC": TokenType.VOLATILE, 310 "RECORD": TokenType.STRUCT, 311 "TIMESTAMP": TokenType.TIMESTAMPTZ, 312 } 313 KEYWORDS.pop("DIV") 314 KEYWORDS.pop("VALUES") 315 316 class Parser(parser.Parser): 317 PREFIXED_PIVOT_COLUMNS = True 318 319 LOG_DEFAULTS_TO_LN = True 320 321 FUNCTIONS = { 322 **parser.Parser.FUNCTIONS, 323 "DATE": _build_date, 324 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 325 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 326 "DATE_TRUNC": lambda args: exp.DateTrunc( 327 unit=exp.Literal.string(str(seq_get(args, 1))), 328 this=seq_get(args, 0), 329 ), 330 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 331 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 332 "DIV": binary_from_function(exp.IntDiv), 333 "FORMAT_DATE": lambda args: exp.TimeToStr( 334 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 335 ), 336 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 337 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 338 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 339 ), 340 "MD5": exp.MD5Digest.from_arg_list, 341 "TO_HEX": _build_to_hex, 342 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 343 [seq_get(args, 1), seq_get(args, 0)] 344 ), 345 "PARSE_TIMESTAMP": _build_parse_timestamp, 346 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 347 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 348 this=seq_get(args, 0), 349 expression=seq_get(args, 1), 350 position=seq_get(args, 2), 351 occurrence=seq_get(args, 3), 352 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 353 ), 354 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 355 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 356 "SPLIT": lambda args: exp.Split( 357 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 358 this=seq_get(args, 0), 359 expression=seq_get(args, 1) or exp.Literal.string(","), 360 ), 361 "TIME": _build_time, 362 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 363 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 364 "TIMESTAMP": _build_timestamp, 365 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 366 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 367 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 368 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 369 ), 370 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 371 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 372 ), 373 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 374 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 375 } 376 377 FUNCTION_PARSERS = { 378 **parser.Parser.FUNCTION_PARSERS, 379 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 380 } 381 FUNCTION_PARSERS.pop("TRIM") 382 383 NO_PAREN_FUNCTIONS = { 384 **parser.Parser.NO_PAREN_FUNCTIONS, 385 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 386 } 387 388 NESTED_TYPE_TOKENS = { 389 *parser.Parser.NESTED_TYPE_TOKENS, 390 TokenType.TABLE, 391 } 392 393 PROPERTY_PARSERS = { 394 **parser.Parser.PROPERTY_PARSERS, 395 "NOT DETERMINISTIC": lambda self: self.expression( 396 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 397 ), 398 "OPTIONS": lambda self: self._parse_with_property(), 399 } 400 401 CONSTRAINT_PARSERS = { 402 **parser.Parser.CONSTRAINT_PARSERS, 403 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 404 } 405 406 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 407 RANGE_PARSERS.pop(TokenType.OVERLAPS) 408 409 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 410 411 STATEMENT_PARSERS = { 412 **parser.Parser.STATEMENT_PARSERS, 413 TokenType.END: lambda self: self._parse_as_command(self._prev), 414 TokenType.FOR: lambda self: self._parse_for_in(), 415 } 416 417 BRACKET_OFFSETS = { 418 "OFFSET": (0, False), 419 "ORDINAL": (1, False), 420 "SAFE_OFFSET": (0, True), 421 "SAFE_ORDINAL": (1, True), 422 } 423 424 def _parse_for_in(self) -> exp.ForIn: 425 this = self._parse_range() 426 self._match_text_seq("DO") 427 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 428 429 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 430 this = super()._parse_table_part(schema=schema) or self._parse_number() 431 432 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 433 if isinstance(this, exp.Identifier): 434 table_name = this.name 435 while self._match(TokenType.DASH, advance=False) and self._next: 436 self._advance(2) 437 table_name += f"-{self._prev.text}" 438 439 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 440 elif isinstance(this, exp.Literal): 441 table_name = this.name 442 443 if self._is_connected() and self._parse_var(any_token=True): 444 table_name += self._prev.text 445 446 this = exp.Identifier(this=table_name, quoted=True) 447 448 return this 449 450 def _parse_table_parts( 451 self, schema: bool = False, is_db_reference: bool = False 452 ) -> exp.Table: 453 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 454 if isinstance(table.this, exp.Identifier) and "." in table.name: 455 catalog, db, this, *rest = ( 456 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 457 for x in split_num_words(table.name, ".", 3) 458 ) 459 460 if rest and this: 461 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 462 463 table = exp.Table(this=this, db=db, catalog=catalog) 464 465 return table 466 467 @t.overload 468 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 469 ... 470 471 @t.overload 472 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 473 ... 474 475 def _parse_json_object(self, agg=False): 476 json_object = super()._parse_json_object() 477 array_kv_pair = seq_get(json_object.expressions, 0) 478 479 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 480 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 481 if ( 482 array_kv_pair 483 and isinstance(array_kv_pair.this, exp.Array) 484 and isinstance(array_kv_pair.expression, exp.Array) 485 ): 486 keys = array_kv_pair.this.expressions 487 values = array_kv_pair.expression.expressions 488 489 json_object.set( 490 "expressions", 491 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 492 ) 493 494 return json_object 495 496 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 497 bracket = super()._parse_bracket(this) 498 499 if this is bracket: 500 return bracket 501 502 if isinstance(bracket, exp.Bracket): 503 for expression in bracket.expressions: 504 name = expression.name.upper() 505 506 if name not in self.BRACKET_OFFSETS: 507 break 508 509 offset, safe = self.BRACKET_OFFSETS[name] 510 bracket.set("offset", offset) 511 bracket.set("safe", safe) 512 expression.replace(expression.expressions[0]) 513 514 return bracket 515 516 class Generator(generator.Generator): 517 EXPLICIT_UNION = True 518 INTERVAL_ALLOWS_PLURAL_FORM = False 519 JOIN_HINTS = False 520 QUERY_HINTS = False 521 TABLE_HINTS = False 522 LIMIT_FETCH = "LIMIT" 523 RENAME_TABLE_WITH_DB = False 524 NVL2_SUPPORTED = False 525 UNNEST_WITH_ORDINALITY = False 526 COLLATE_IS_FUNC = True 527 LIMIT_ONLY_LITERALS = True 528 SUPPORTS_TABLE_ALIAS_COLUMNS = False 529 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 530 JSON_KEY_VALUE_PAIR_SEP = "," 531 NULL_ORDERING_SUPPORTED = False 532 IGNORE_NULLS_IN_FUNC = True 533 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 534 CAN_IMPLEMENT_ARRAY_ANY = True 535 536 TRANSFORMS = { 537 **generator.Generator.TRANSFORMS, 538 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 539 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 540 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 541 exp.ArrayContains: _array_contains_sql, 542 exp.ArrayFilter: filter_array_using_unnest, 543 exp.ArraySize: rename_func("ARRAY_LENGTH"), 544 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 545 exp.CollateProperty: lambda self, e: ( 546 f"DEFAULT COLLATE {self.sql(e, 'this')}" 547 if e.args.get("default") 548 else f"COLLATE {self.sql(e, 'this')}" 549 ), 550 exp.Commit: lambda *_: "COMMIT TRANSACTION", 551 exp.CountIf: rename_func("COUNTIF"), 552 exp.Create: _create_sql, 553 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 554 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 555 exp.DateDiff: lambda self, e: self.func( 556 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 557 ), 558 exp.DateFromParts: rename_func("DATE"), 559 exp.DateStrToDate: datestrtodate_sql, 560 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 561 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 562 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 563 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 564 exp.FromTimeZone: lambda self, e: self.func( 565 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 566 ), 567 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 568 exp.GroupConcat: rename_func("STRING_AGG"), 569 exp.Hex: rename_func("TO_HEX"), 570 exp.If: if_sql(false_value="NULL"), 571 exp.ILike: no_ilike_sql, 572 exp.IntDiv: rename_func("DIV"), 573 exp.JSONFormat: rename_func("TO_JSON_STRING"), 574 exp.Max: max_or_greatest, 575 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 576 exp.MD5Digest: rename_func("MD5"), 577 exp.Min: min_or_least, 578 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 579 exp.RegexpExtract: lambda self, e: self.func( 580 "REGEXP_EXTRACT", 581 e.this, 582 e.expression, 583 e.args.get("position"), 584 e.args.get("occurrence"), 585 ), 586 exp.RegexpReplace: regexp_replace_sql, 587 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 588 exp.ReturnsProperty: _returnsproperty_sql, 589 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 590 exp.Select: transforms.preprocess( 591 [ 592 transforms.explode_to_unnest(), 593 _unqualify_unnest, 594 transforms.eliminate_distinct_on, 595 _alias_ordered_group, 596 transforms.eliminate_semi_and_anti_joins, 597 ] 598 ), 599 exp.SHA2: lambda self, e: self.func( 600 "SHA256" if e.text("length") == "256" else "SHA512", e.this 601 ), 602 exp.StabilityProperty: lambda self, e: ( 603 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 604 ), 605 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 606 exp.StrToTime: lambda self, e: self.func( 607 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 608 ), 609 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 610 exp.TimeFromParts: rename_func("TIME"), 611 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 612 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 613 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 614 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 615 exp.TimeStrToTime: timestrtotime_sql, 616 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 617 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 618 exp.TsOrDsAdd: _ts_or_ds_add_sql, 619 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 620 exp.TsOrDsToTime: rename_func("TIME"), 621 exp.Unhex: rename_func("FROM_HEX"), 622 exp.UnixDate: rename_func("UNIX_DATE"), 623 exp.UnixToTime: _unix_to_time_sql, 624 exp.Values: _derived_table_values_to_unnest, 625 exp.VariancePop: rename_func("VAR_POP"), 626 } 627 628 SUPPORTED_JSON_PATH_PARTS = { 629 exp.JSONPathKey, 630 exp.JSONPathRoot, 631 exp.JSONPathSubscript, 632 } 633 634 TYPE_MAPPING = { 635 **generator.Generator.TYPE_MAPPING, 636 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 637 exp.DataType.Type.BIGINT: "INT64", 638 exp.DataType.Type.BINARY: "BYTES", 639 exp.DataType.Type.BOOLEAN: "BOOL", 640 exp.DataType.Type.CHAR: "STRING", 641 exp.DataType.Type.DECIMAL: "NUMERIC", 642 exp.DataType.Type.DOUBLE: "FLOAT64", 643 exp.DataType.Type.FLOAT: "FLOAT64", 644 exp.DataType.Type.INT: "INT64", 645 exp.DataType.Type.NCHAR: "STRING", 646 exp.DataType.Type.NVARCHAR: "STRING", 647 exp.DataType.Type.SMALLINT: "INT64", 648 exp.DataType.Type.TEXT: "STRING", 649 exp.DataType.Type.TIMESTAMP: "DATETIME", 650 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 651 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 652 exp.DataType.Type.TINYINT: "INT64", 653 exp.DataType.Type.VARBINARY: "BYTES", 654 exp.DataType.Type.VARCHAR: "STRING", 655 exp.DataType.Type.VARIANT: "ANY TYPE", 656 } 657 658 PROPERTIES_LOCATION = { 659 **generator.Generator.PROPERTIES_LOCATION, 660 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 661 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 662 } 663 664 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 665 RESERVED_KEYWORDS = { 666 *generator.Generator.RESERVED_KEYWORDS, 667 "all", 668 "and", 669 "any", 670 "array", 671 "as", 672 "asc", 673 "assert_rows_modified", 674 "at", 675 "between", 676 "by", 677 "case", 678 "cast", 679 "collate", 680 "contains", 681 "create", 682 "cross", 683 "cube", 684 "current", 685 "default", 686 "define", 687 "desc", 688 "distinct", 689 "else", 690 "end", 691 "enum", 692 "escape", 693 "except", 694 "exclude", 695 "exists", 696 "extract", 697 "false", 698 "fetch", 699 "following", 700 "for", 701 "from", 702 "full", 703 "group", 704 "grouping", 705 "groups", 706 "hash", 707 "having", 708 "if", 709 "ignore", 710 "in", 711 "inner", 712 "intersect", 713 "interval", 714 "into", 715 "is", 716 "join", 717 "lateral", 718 "left", 719 "like", 720 "limit", 721 "lookup", 722 "merge", 723 "natural", 724 "new", 725 "no", 726 "not", 727 "null", 728 "nulls", 729 "of", 730 "on", 731 "or", 732 "order", 733 "outer", 734 "over", 735 "partition", 736 "preceding", 737 "proto", 738 "qualify", 739 "range", 740 "recursive", 741 "respect", 742 "right", 743 "rollup", 744 "rows", 745 "select", 746 "set", 747 "some", 748 "struct", 749 "tablesample", 750 "then", 751 "to", 752 "treat", 753 "true", 754 "unbounded", 755 "union", 756 "unnest", 757 "using", 758 "when", 759 "where", 760 "window", 761 "with", 762 "within", 763 } 764 765 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 766 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 767 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 768 769 def struct_sql(self, expression: exp.Struct) -> str: 770 args = [] 771 for expr in expression.expressions: 772 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 773 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 774 else: 775 arg = self.sql(expr) 776 777 args.append(arg) 778 779 return self.func("STRUCT", *args) 780 781 def eq_sql(self, expression: exp.EQ) -> str: 782 # Operands of = cannot be NULL in BigQuery 783 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 784 if not isinstance(expression.parent, exp.Update): 785 return "NULL" 786 787 return self.binary(expression, "=") 788 789 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 790 parent = expression.parent 791 792 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 793 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 794 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 795 return self.func( 796 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 797 ) 798 799 return super().attimezone_sql(expression) 800 801 def trycast_sql(self, expression: exp.TryCast) -> str: 802 return self.cast_sql(expression, safe_prefix="SAFE_") 803 804 def array_sql(self, expression: exp.Array) -> str: 805 first_arg = seq_get(expression.expressions, 0) 806 if isinstance(first_arg, exp.Subqueryable): 807 return f"ARRAY{self.wrap(self.sql(first_arg))}" 808 809 return inline_array_sql(self, expression) 810 811 def bracket_sql(self, expression: exp.Bracket) -> str: 812 this = self.sql(expression, "this") 813 expressions = expression.expressions 814 815 if len(expressions) == 1: 816 arg = expressions[0] 817 if arg.type is None: 818 from sqlglot.optimizer.annotate_types import annotate_types 819 820 arg = annotate_types(arg) 821 822 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 823 # BQ doesn't support bracket syntax with string values 824 return f"{this}.{arg.name}" 825 826 expressions_sql = ", ".join(self.sql(e) for e in expressions) 827 offset = expression.args.get("offset") 828 829 if offset == 0: 830 expressions_sql = f"OFFSET({expressions_sql})" 831 elif offset == 1: 832 expressions_sql = f"ORDINAL({expressions_sql})" 833 elif offset is not None: 834 self.unsupported(f"Unsupported array offset: {offset}") 835 836 if expression.args.get("safe"): 837 expressions_sql = f"SAFE_{expressions_sql}" 838 839 return f"{this}[{expressions_sql}]" 840 841 def in_unnest_op(self, expression: exp.Unnest) -> str: 842 return self.sql(expression) 843 844 def except_op(self, expression: exp.Except) -> str: 845 if not expression.args.get("distinct"): 846 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 847 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 848 849 def intersect_op(self, expression: exp.Intersect) -> str: 850 if not expression.args.get("distinct"): 851 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 852 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 853 854 def with_properties(self, properties: exp.Properties) -> str: 855 return self.properties(properties, prefix=self.seg("OPTIONS")) 856 857 def version_sql(self, expression: exp.Version) -> str: 858 if expression.name == "TIMESTAMP": 859 expression.set("this", "SYSTEM_TIME") 860 return super().version_sql(expression)
217class BigQuery(Dialect): 218 WEEK_OFFSET = -1 219 UNNEST_COLUMN_ONLY = True 220 SUPPORTS_USER_DEFINED_TYPES = False 221 SUPPORTS_SEMI_ANTI_JOIN = False 222 LOG_BASE_FIRST = False 223 224 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 225 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 226 227 # bigquery udfs are case sensitive 228 NORMALIZE_FUNCTIONS = False 229 230 TIME_MAPPING = { 231 "%D": "%m/%d/%y", 232 } 233 234 ESCAPE_SEQUENCES = { 235 "\\a": "\a", 236 "\\b": "\b", 237 "\\f": "\f", 238 "\\n": "\n", 239 "\\r": "\r", 240 "\\t": "\t", 241 "\\v": "\v", 242 } 243 244 FORMAT_MAPPING = { 245 "DD": "%d", 246 "MM": "%m", 247 "MON": "%b", 248 "MONTH": "%B", 249 "YYYY": "%Y", 250 "YY": "%y", 251 "HH": "%I", 252 "HH12": "%I", 253 "HH24": "%H", 254 "MI": "%M", 255 "SS": "%S", 256 "SSSSS": "%f", 257 "TZH": "%z", 258 } 259 260 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 261 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 262 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 263 264 def normalize_identifier(self, expression: E) -> E: 265 if isinstance(expression, exp.Identifier): 266 parent = expression.parent 267 while isinstance(parent, exp.Dot): 268 parent = parent.parent 269 270 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 271 # The following check is essentially a heuristic to detect tables based on whether or 272 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 273 if ( 274 not isinstance(parent, exp.UserDefinedFunction) 275 and not (isinstance(parent, exp.Table) and parent.db) 276 and not expression.meta.get("is_table") 277 ): 278 expression.set("this", expression.this.lower()) 279 280 return expression 281 282 class Tokenizer(tokens.Tokenizer): 283 QUOTES = ["'", '"', '"""', "'''"] 284 COMMENTS = ["--", "#", ("/*", "*/")] 285 IDENTIFIERS = ["`"] 286 STRING_ESCAPES = ["\\"] 287 288 HEX_STRINGS = [("0x", ""), ("0X", "")] 289 290 BYTE_STRINGS = [ 291 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 292 ] 293 294 RAW_STRINGS = [ 295 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 296 ] 297 298 KEYWORDS = { 299 **tokens.Tokenizer.KEYWORDS, 300 "ANY TYPE": TokenType.VARIANT, 301 "BEGIN": TokenType.COMMAND, 302 "BEGIN TRANSACTION": TokenType.BEGIN, 303 "BYTES": TokenType.BINARY, 304 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 305 "DECLARE": TokenType.COMMAND, 306 "EXCEPTION": TokenType.COMMAND, 307 "FLOAT64": TokenType.DOUBLE, 308 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 309 "MODEL": TokenType.MODEL, 310 "NOT DETERMINISTIC": TokenType.VOLATILE, 311 "RECORD": TokenType.STRUCT, 312 "TIMESTAMP": TokenType.TIMESTAMPTZ, 313 } 314 KEYWORDS.pop("DIV") 315 KEYWORDS.pop("VALUES") 316 317 class Parser(parser.Parser): 318 PREFIXED_PIVOT_COLUMNS = True 319 320 LOG_DEFAULTS_TO_LN = True 321 322 FUNCTIONS = { 323 **parser.Parser.FUNCTIONS, 324 "DATE": _build_date, 325 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 326 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 327 "DATE_TRUNC": lambda args: exp.DateTrunc( 328 unit=exp.Literal.string(str(seq_get(args, 1))), 329 this=seq_get(args, 0), 330 ), 331 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 332 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 333 "DIV": binary_from_function(exp.IntDiv), 334 "FORMAT_DATE": lambda args: exp.TimeToStr( 335 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 336 ), 337 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 338 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 339 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 340 ), 341 "MD5": exp.MD5Digest.from_arg_list, 342 "TO_HEX": _build_to_hex, 343 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 344 [seq_get(args, 1), seq_get(args, 0)] 345 ), 346 "PARSE_TIMESTAMP": _build_parse_timestamp, 347 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 348 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 349 this=seq_get(args, 0), 350 expression=seq_get(args, 1), 351 position=seq_get(args, 2), 352 occurrence=seq_get(args, 3), 353 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 354 ), 355 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 356 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 357 "SPLIT": lambda args: exp.Split( 358 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 359 this=seq_get(args, 0), 360 expression=seq_get(args, 1) or exp.Literal.string(","), 361 ), 362 "TIME": _build_time, 363 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 364 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 365 "TIMESTAMP": _build_timestamp, 366 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 367 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 368 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 369 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 370 ), 371 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 372 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 373 ), 374 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 375 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 376 } 377 378 FUNCTION_PARSERS = { 379 **parser.Parser.FUNCTION_PARSERS, 380 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 381 } 382 FUNCTION_PARSERS.pop("TRIM") 383 384 NO_PAREN_FUNCTIONS = { 385 **parser.Parser.NO_PAREN_FUNCTIONS, 386 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 387 } 388 389 NESTED_TYPE_TOKENS = { 390 *parser.Parser.NESTED_TYPE_TOKENS, 391 TokenType.TABLE, 392 } 393 394 PROPERTY_PARSERS = { 395 **parser.Parser.PROPERTY_PARSERS, 396 "NOT DETERMINISTIC": lambda self: self.expression( 397 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 398 ), 399 "OPTIONS": lambda self: self._parse_with_property(), 400 } 401 402 CONSTRAINT_PARSERS = { 403 **parser.Parser.CONSTRAINT_PARSERS, 404 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 405 } 406 407 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 408 RANGE_PARSERS.pop(TokenType.OVERLAPS) 409 410 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 411 412 STATEMENT_PARSERS = { 413 **parser.Parser.STATEMENT_PARSERS, 414 TokenType.END: lambda self: self._parse_as_command(self._prev), 415 TokenType.FOR: lambda self: self._parse_for_in(), 416 } 417 418 BRACKET_OFFSETS = { 419 "OFFSET": (0, False), 420 "ORDINAL": (1, False), 421 "SAFE_OFFSET": (0, True), 422 "SAFE_ORDINAL": (1, True), 423 } 424 425 def _parse_for_in(self) -> exp.ForIn: 426 this = self._parse_range() 427 self._match_text_seq("DO") 428 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 429 430 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 431 this = super()._parse_table_part(schema=schema) or self._parse_number() 432 433 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 434 if isinstance(this, exp.Identifier): 435 table_name = this.name 436 while self._match(TokenType.DASH, advance=False) and self._next: 437 self._advance(2) 438 table_name += f"-{self._prev.text}" 439 440 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 441 elif isinstance(this, exp.Literal): 442 table_name = this.name 443 444 if self._is_connected() and self._parse_var(any_token=True): 445 table_name += self._prev.text 446 447 this = exp.Identifier(this=table_name, quoted=True) 448 449 return this 450 451 def _parse_table_parts( 452 self, schema: bool = False, is_db_reference: bool = False 453 ) -> exp.Table: 454 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 455 if isinstance(table.this, exp.Identifier) and "." in table.name: 456 catalog, db, this, *rest = ( 457 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 458 for x in split_num_words(table.name, ".", 3) 459 ) 460 461 if rest and this: 462 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 463 464 table = exp.Table(this=this, db=db, catalog=catalog) 465 466 return table 467 468 @t.overload 469 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 470 ... 471 472 @t.overload 473 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 474 ... 475 476 def _parse_json_object(self, agg=False): 477 json_object = super()._parse_json_object() 478 array_kv_pair = seq_get(json_object.expressions, 0) 479 480 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 481 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 482 if ( 483 array_kv_pair 484 and isinstance(array_kv_pair.this, exp.Array) 485 and isinstance(array_kv_pair.expression, exp.Array) 486 ): 487 keys = array_kv_pair.this.expressions 488 values = array_kv_pair.expression.expressions 489 490 json_object.set( 491 "expressions", 492 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 493 ) 494 495 return json_object 496 497 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 498 bracket = super()._parse_bracket(this) 499 500 if this is bracket: 501 return bracket 502 503 if isinstance(bracket, exp.Bracket): 504 for expression in bracket.expressions: 505 name = expression.name.upper() 506 507 if name not in self.BRACKET_OFFSETS: 508 break 509 510 offset, safe = self.BRACKET_OFFSETS[name] 511 bracket.set("offset", offset) 512 bracket.set("safe", safe) 513 expression.replace(expression.expressions[0]) 514 515 return bracket 516 517 class Generator(generator.Generator): 518 EXPLICIT_UNION = True 519 INTERVAL_ALLOWS_PLURAL_FORM = False 520 JOIN_HINTS = False 521 QUERY_HINTS = False 522 TABLE_HINTS = False 523 LIMIT_FETCH = "LIMIT" 524 RENAME_TABLE_WITH_DB = False 525 NVL2_SUPPORTED = False 526 UNNEST_WITH_ORDINALITY = False 527 COLLATE_IS_FUNC = True 528 LIMIT_ONLY_LITERALS = True 529 SUPPORTS_TABLE_ALIAS_COLUMNS = False 530 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 531 JSON_KEY_VALUE_PAIR_SEP = "," 532 NULL_ORDERING_SUPPORTED = False 533 IGNORE_NULLS_IN_FUNC = True 534 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 535 CAN_IMPLEMENT_ARRAY_ANY = True 536 537 TRANSFORMS = { 538 **generator.Generator.TRANSFORMS, 539 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 540 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 541 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 542 exp.ArrayContains: _array_contains_sql, 543 exp.ArrayFilter: filter_array_using_unnest, 544 exp.ArraySize: rename_func("ARRAY_LENGTH"), 545 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 546 exp.CollateProperty: lambda self, e: ( 547 f"DEFAULT COLLATE {self.sql(e, 'this')}" 548 if e.args.get("default") 549 else f"COLLATE {self.sql(e, 'this')}" 550 ), 551 exp.Commit: lambda *_: "COMMIT TRANSACTION", 552 exp.CountIf: rename_func("COUNTIF"), 553 exp.Create: _create_sql, 554 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 555 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 556 exp.DateDiff: lambda self, e: self.func( 557 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 558 ), 559 exp.DateFromParts: rename_func("DATE"), 560 exp.DateStrToDate: datestrtodate_sql, 561 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 562 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 563 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 564 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 565 exp.FromTimeZone: lambda self, e: self.func( 566 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 567 ), 568 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 569 exp.GroupConcat: rename_func("STRING_AGG"), 570 exp.Hex: rename_func("TO_HEX"), 571 exp.If: if_sql(false_value="NULL"), 572 exp.ILike: no_ilike_sql, 573 exp.IntDiv: rename_func("DIV"), 574 exp.JSONFormat: rename_func("TO_JSON_STRING"), 575 exp.Max: max_or_greatest, 576 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 577 exp.MD5Digest: rename_func("MD5"), 578 exp.Min: min_or_least, 579 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 580 exp.RegexpExtract: lambda self, e: self.func( 581 "REGEXP_EXTRACT", 582 e.this, 583 e.expression, 584 e.args.get("position"), 585 e.args.get("occurrence"), 586 ), 587 exp.RegexpReplace: regexp_replace_sql, 588 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 589 exp.ReturnsProperty: _returnsproperty_sql, 590 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 591 exp.Select: transforms.preprocess( 592 [ 593 transforms.explode_to_unnest(), 594 _unqualify_unnest, 595 transforms.eliminate_distinct_on, 596 _alias_ordered_group, 597 transforms.eliminate_semi_and_anti_joins, 598 ] 599 ), 600 exp.SHA2: lambda self, e: self.func( 601 "SHA256" if e.text("length") == "256" else "SHA512", e.this 602 ), 603 exp.StabilityProperty: lambda self, e: ( 604 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 605 ), 606 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 607 exp.StrToTime: lambda self, e: self.func( 608 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 609 ), 610 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 611 exp.TimeFromParts: rename_func("TIME"), 612 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 613 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 614 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 615 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 616 exp.TimeStrToTime: timestrtotime_sql, 617 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 618 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 619 exp.TsOrDsAdd: _ts_or_ds_add_sql, 620 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 621 exp.TsOrDsToTime: rename_func("TIME"), 622 exp.Unhex: rename_func("FROM_HEX"), 623 exp.UnixDate: rename_func("UNIX_DATE"), 624 exp.UnixToTime: _unix_to_time_sql, 625 exp.Values: _derived_table_values_to_unnest, 626 exp.VariancePop: rename_func("VAR_POP"), 627 } 628 629 SUPPORTED_JSON_PATH_PARTS = { 630 exp.JSONPathKey, 631 exp.JSONPathRoot, 632 exp.JSONPathSubscript, 633 } 634 635 TYPE_MAPPING = { 636 **generator.Generator.TYPE_MAPPING, 637 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 638 exp.DataType.Type.BIGINT: "INT64", 639 exp.DataType.Type.BINARY: "BYTES", 640 exp.DataType.Type.BOOLEAN: "BOOL", 641 exp.DataType.Type.CHAR: "STRING", 642 exp.DataType.Type.DECIMAL: "NUMERIC", 643 exp.DataType.Type.DOUBLE: "FLOAT64", 644 exp.DataType.Type.FLOAT: "FLOAT64", 645 exp.DataType.Type.INT: "INT64", 646 exp.DataType.Type.NCHAR: "STRING", 647 exp.DataType.Type.NVARCHAR: "STRING", 648 exp.DataType.Type.SMALLINT: "INT64", 649 exp.DataType.Type.TEXT: "STRING", 650 exp.DataType.Type.TIMESTAMP: "DATETIME", 651 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 652 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 653 exp.DataType.Type.TINYINT: "INT64", 654 exp.DataType.Type.VARBINARY: "BYTES", 655 exp.DataType.Type.VARCHAR: "STRING", 656 exp.DataType.Type.VARIANT: "ANY TYPE", 657 } 658 659 PROPERTIES_LOCATION = { 660 **generator.Generator.PROPERTIES_LOCATION, 661 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 662 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 663 } 664 665 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 666 RESERVED_KEYWORDS = { 667 *generator.Generator.RESERVED_KEYWORDS, 668 "all", 669 "and", 670 "any", 671 "array", 672 "as", 673 "asc", 674 "assert_rows_modified", 675 "at", 676 "between", 677 "by", 678 "case", 679 "cast", 680 "collate", 681 "contains", 682 "create", 683 "cross", 684 "cube", 685 "current", 686 "default", 687 "define", 688 "desc", 689 "distinct", 690 "else", 691 "end", 692 "enum", 693 "escape", 694 "except", 695 "exclude", 696 "exists", 697 "extract", 698 "false", 699 "fetch", 700 "following", 701 "for", 702 "from", 703 "full", 704 "group", 705 "grouping", 706 "groups", 707 "hash", 708 "having", 709 "if", 710 "ignore", 711 "in", 712 "inner", 713 "intersect", 714 "interval", 715 "into", 716 "is", 717 "join", 718 "lateral", 719 "left", 720 "like", 721 "limit", 722 "lookup", 723 "merge", 724 "natural", 725 "new", 726 "no", 727 "not", 728 "null", 729 "nulls", 730 "of", 731 "on", 732 "or", 733 "order", 734 "outer", 735 "over", 736 "partition", 737 "preceding", 738 "proto", 739 "qualify", 740 "range", 741 "recursive", 742 "respect", 743 "right", 744 "rollup", 745 "rows", 746 "select", 747 "set", 748 "some", 749 "struct", 750 "tablesample", 751 "then", 752 "to", 753 "treat", 754 "true", 755 "unbounded", 756 "union", 757 "unnest", 758 "using", 759 "when", 760 "where", 761 "window", 762 "with", 763 "within", 764 } 765 766 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 767 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 768 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 769 770 def struct_sql(self, expression: exp.Struct) -> str: 771 args = [] 772 for expr in expression.expressions: 773 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 774 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 775 else: 776 arg = self.sql(expr) 777 778 args.append(arg) 779 780 return self.func("STRUCT", *args) 781 782 def eq_sql(self, expression: exp.EQ) -> str: 783 # Operands of = cannot be NULL in BigQuery 784 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 785 if not isinstance(expression.parent, exp.Update): 786 return "NULL" 787 788 return self.binary(expression, "=") 789 790 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 791 parent = expression.parent 792 793 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 794 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 795 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 796 return self.func( 797 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 798 ) 799 800 return super().attimezone_sql(expression) 801 802 def trycast_sql(self, expression: exp.TryCast) -> str: 803 return self.cast_sql(expression, safe_prefix="SAFE_") 804 805 def array_sql(self, expression: exp.Array) -> str: 806 first_arg = seq_get(expression.expressions, 0) 807 if isinstance(first_arg, exp.Subqueryable): 808 return f"ARRAY{self.wrap(self.sql(first_arg))}" 809 810 return inline_array_sql(self, expression) 811 812 def bracket_sql(self, expression: exp.Bracket) -> str: 813 this = self.sql(expression, "this") 814 expressions = expression.expressions 815 816 if len(expressions) == 1: 817 arg = expressions[0] 818 if arg.type is None: 819 from sqlglot.optimizer.annotate_types import annotate_types 820 821 arg = annotate_types(arg) 822 823 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 824 # BQ doesn't support bracket syntax with string values 825 return f"{this}.{arg.name}" 826 827 expressions_sql = ", ".join(self.sql(e) for e in expressions) 828 offset = expression.args.get("offset") 829 830 if offset == 0: 831 expressions_sql = f"OFFSET({expressions_sql})" 832 elif offset == 1: 833 expressions_sql = f"ORDINAL({expressions_sql})" 834 elif offset is not None: 835 self.unsupported(f"Unsupported array offset: {offset}") 836 837 if expression.args.get("safe"): 838 expressions_sql = f"SAFE_{expressions_sql}" 839 840 return f"{this}[{expressions_sql}]" 841 842 def in_unnest_op(self, expression: exp.Unnest) -> str: 843 return self.sql(expression) 844 845 def except_op(self, expression: exp.Except) -> str: 846 if not expression.args.get("distinct"): 847 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 848 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 849 850 def intersect_op(self, expression: exp.Intersect) -> str: 851 if not expression.args.get("distinct"): 852 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 853 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 854 855 def with_properties(self, properties: exp.Properties) -> str: 856 return self.properties(properties, prefix=self.seg("OPTIONS")) 857 858 def version_sql(self, expression: exp.Version) -> str: 859 if expression.name == "TIMESTAMP": 860 expression.set("this", "SYSTEM_TIME") 861 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
264 def normalize_identifier(self, expression: E) -> E: 265 if isinstance(expression, exp.Identifier): 266 parent = expression.parent 267 while isinstance(parent, exp.Dot): 268 parent = parent.parent 269 270 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 271 # The following check is essentially a heuristic to detect tables based on whether or 272 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 273 if ( 274 not isinstance(parent, exp.UserDefinedFunction) 275 and not (isinstance(parent, exp.Table) and parent.db) 276 and not expression.meta.get("is_table") 277 ): 278 expression.set("this", expression.this.lower()) 279 280 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
282 class Tokenizer(tokens.Tokenizer): 283 QUOTES = ["'", '"', '"""', "'''"] 284 COMMENTS = ["--", "#", ("/*", "*/")] 285 IDENTIFIERS = ["`"] 286 STRING_ESCAPES = ["\\"] 287 288 HEX_STRINGS = [("0x", ""), ("0X", "")] 289 290 BYTE_STRINGS = [ 291 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 292 ] 293 294 RAW_STRINGS = [ 295 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 296 ] 297 298 KEYWORDS = { 299 **tokens.Tokenizer.KEYWORDS, 300 "ANY TYPE": TokenType.VARIANT, 301 "BEGIN": TokenType.COMMAND, 302 "BEGIN TRANSACTION": TokenType.BEGIN, 303 "BYTES": TokenType.BINARY, 304 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 305 "DECLARE": TokenType.COMMAND, 306 "EXCEPTION": TokenType.COMMAND, 307 "FLOAT64": TokenType.DOUBLE, 308 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 309 "MODEL": TokenType.MODEL, 310 "NOT DETERMINISTIC": TokenType.VOLATILE, 311 "RECORD": TokenType.STRUCT, 312 "TIMESTAMP": TokenType.TIMESTAMPTZ, 313 } 314 KEYWORDS.pop("DIV") 315 KEYWORDS.pop("VALUES")
Inherited Members
317 class Parser(parser.Parser): 318 PREFIXED_PIVOT_COLUMNS = True 319 320 LOG_DEFAULTS_TO_LN = True 321 322 FUNCTIONS = { 323 **parser.Parser.FUNCTIONS, 324 "DATE": _build_date, 325 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 326 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 327 "DATE_TRUNC": lambda args: exp.DateTrunc( 328 unit=exp.Literal.string(str(seq_get(args, 1))), 329 this=seq_get(args, 0), 330 ), 331 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 332 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 333 "DIV": binary_from_function(exp.IntDiv), 334 "FORMAT_DATE": lambda args: exp.TimeToStr( 335 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 336 ), 337 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 338 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 339 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 340 ), 341 "MD5": exp.MD5Digest.from_arg_list, 342 "TO_HEX": _build_to_hex, 343 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 344 [seq_get(args, 1), seq_get(args, 0)] 345 ), 346 "PARSE_TIMESTAMP": _build_parse_timestamp, 347 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 348 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 349 this=seq_get(args, 0), 350 expression=seq_get(args, 1), 351 position=seq_get(args, 2), 352 occurrence=seq_get(args, 3), 353 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 354 ), 355 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 356 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 357 "SPLIT": lambda args: exp.Split( 358 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 359 this=seq_get(args, 0), 360 expression=seq_get(args, 1) or exp.Literal.string(","), 361 ), 362 "TIME": _build_time, 363 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 364 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 365 "TIMESTAMP": _build_timestamp, 366 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 367 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 368 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 369 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 370 ), 371 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 372 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 373 ), 374 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 375 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 376 } 377 378 FUNCTION_PARSERS = { 379 **parser.Parser.FUNCTION_PARSERS, 380 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 381 } 382 FUNCTION_PARSERS.pop("TRIM") 383 384 NO_PAREN_FUNCTIONS = { 385 **parser.Parser.NO_PAREN_FUNCTIONS, 386 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 387 } 388 389 NESTED_TYPE_TOKENS = { 390 *parser.Parser.NESTED_TYPE_TOKENS, 391 TokenType.TABLE, 392 } 393 394 PROPERTY_PARSERS = { 395 **parser.Parser.PROPERTY_PARSERS, 396 "NOT DETERMINISTIC": lambda self: self.expression( 397 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 398 ), 399 "OPTIONS": lambda self: self._parse_with_property(), 400 } 401 402 CONSTRAINT_PARSERS = { 403 **parser.Parser.CONSTRAINT_PARSERS, 404 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 405 } 406 407 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 408 RANGE_PARSERS.pop(TokenType.OVERLAPS) 409 410 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 411 412 STATEMENT_PARSERS = { 413 **parser.Parser.STATEMENT_PARSERS, 414 TokenType.END: lambda self: self._parse_as_command(self._prev), 415 TokenType.FOR: lambda self: self._parse_for_in(), 416 } 417 418 BRACKET_OFFSETS = { 419 "OFFSET": (0, False), 420 "ORDINAL": (1, False), 421 "SAFE_OFFSET": (0, True), 422 "SAFE_ORDINAL": (1, True), 423 } 424 425 def _parse_for_in(self) -> exp.ForIn: 426 this = self._parse_range() 427 self._match_text_seq("DO") 428 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 429 430 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 431 this = super()._parse_table_part(schema=schema) or self._parse_number() 432 433 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 434 if isinstance(this, exp.Identifier): 435 table_name = this.name 436 while self._match(TokenType.DASH, advance=False) and self._next: 437 self._advance(2) 438 table_name += f"-{self._prev.text}" 439 440 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 441 elif isinstance(this, exp.Literal): 442 table_name = this.name 443 444 if self._is_connected() and self._parse_var(any_token=True): 445 table_name += self._prev.text 446 447 this = exp.Identifier(this=table_name, quoted=True) 448 449 return this 450 451 def _parse_table_parts( 452 self, schema: bool = False, is_db_reference: bool = False 453 ) -> exp.Table: 454 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 455 if isinstance(table.this, exp.Identifier) and "." in table.name: 456 catalog, db, this, *rest = ( 457 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 458 for x in split_num_words(table.name, ".", 3) 459 ) 460 461 if rest and this: 462 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 463 464 table = exp.Table(this=this, db=db, catalog=catalog) 465 466 return table 467 468 @t.overload 469 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 470 ... 471 472 @t.overload 473 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 474 ... 475 476 def _parse_json_object(self, agg=False): 477 json_object = super()._parse_json_object() 478 array_kv_pair = seq_get(json_object.expressions, 0) 479 480 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 481 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 482 if ( 483 array_kv_pair 484 and isinstance(array_kv_pair.this, exp.Array) 485 and isinstance(array_kv_pair.expression, exp.Array) 486 ): 487 keys = array_kv_pair.this.expressions 488 values = array_kv_pair.expression.expressions 489 490 json_object.set( 491 "expressions", 492 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 493 ) 494 495 return json_object 496 497 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 498 bracket = super()._parse_bracket(this) 499 500 if this is bracket: 501 return bracket 502 503 if isinstance(bracket, exp.Bracket): 504 for expression in bracket.expressions: 505 name = expression.name.upper() 506 507 if name not in self.BRACKET_OFFSETS: 508 break 509 510 offset, safe = self.BRACKET_OFFSETS[name] 511 bracket.set("offset", offset) 512 bracket.set("safe", safe) 513 expression.replace(expression.expressions[0]) 514 515 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
517 class Generator(generator.Generator): 518 EXPLICIT_UNION = True 519 INTERVAL_ALLOWS_PLURAL_FORM = False 520 JOIN_HINTS = False 521 QUERY_HINTS = False 522 TABLE_HINTS = False 523 LIMIT_FETCH = "LIMIT" 524 RENAME_TABLE_WITH_DB = False 525 NVL2_SUPPORTED = False 526 UNNEST_WITH_ORDINALITY = False 527 COLLATE_IS_FUNC = True 528 LIMIT_ONLY_LITERALS = True 529 SUPPORTS_TABLE_ALIAS_COLUMNS = False 530 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 531 JSON_KEY_VALUE_PAIR_SEP = "," 532 NULL_ORDERING_SUPPORTED = False 533 IGNORE_NULLS_IN_FUNC = True 534 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 535 CAN_IMPLEMENT_ARRAY_ANY = True 536 537 TRANSFORMS = { 538 **generator.Generator.TRANSFORMS, 539 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 540 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 541 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 542 exp.ArrayContains: _array_contains_sql, 543 exp.ArrayFilter: filter_array_using_unnest, 544 exp.ArraySize: rename_func("ARRAY_LENGTH"), 545 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 546 exp.CollateProperty: lambda self, e: ( 547 f"DEFAULT COLLATE {self.sql(e, 'this')}" 548 if e.args.get("default") 549 else f"COLLATE {self.sql(e, 'this')}" 550 ), 551 exp.Commit: lambda *_: "COMMIT TRANSACTION", 552 exp.CountIf: rename_func("COUNTIF"), 553 exp.Create: _create_sql, 554 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 555 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 556 exp.DateDiff: lambda self, e: self.func( 557 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 558 ), 559 exp.DateFromParts: rename_func("DATE"), 560 exp.DateStrToDate: datestrtodate_sql, 561 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 562 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 563 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 564 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 565 exp.FromTimeZone: lambda self, e: self.func( 566 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 567 ), 568 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 569 exp.GroupConcat: rename_func("STRING_AGG"), 570 exp.Hex: rename_func("TO_HEX"), 571 exp.If: if_sql(false_value="NULL"), 572 exp.ILike: no_ilike_sql, 573 exp.IntDiv: rename_func("DIV"), 574 exp.JSONFormat: rename_func("TO_JSON_STRING"), 575 exp.Max: max_or_greatest, 576 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 577 exp.MD5Digest: rename_func("MD5"), 578 exp.Min: min_or_least, 579 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 580 exp.RegexpExtract: lambda self, e: self.func( 581 "REGEXP_EXTRACT", 582 e.this, 583 e.expression, 584 e.args.get("position"), 585 e.args.get("occurrence"), 586 ), 587 exp.RegexpReplace: regexp_replace_sql, 588 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 589 exp.ReturnsProperty: _returnsproperty_sql, 590 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 591 exp.Select: transforms.preprocess( 592 [ 593 transforms.explode_to_unnest(), 594 _unqualify_unnest, 595 transforms.eliminate_distinct_on, 596 _alias_ordered_group, 597 transforms.eliminate_semi_and_anti_joins, 598 ] 599 ), 600 exp.SHA2: lambda self, e: self.func( 601 "SHA256" if e.text("length") == "256" else "SHA512", e.this 602 ), 603 exp.StabilityProperty: lambda self, e: ( 604 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 605 ), 606 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 607 exp.StrToTime: lambda self, e: self.func( 608 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 609 ), 610 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 611 exp.TimeFromParts: rename_func("TIME"), 612 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 613 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 614 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 615 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 616 exp.TimeStrToTime: timestrtotime_sql, 617 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 618 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 619 exp.TsOrDsAdd: _ts_or_ds_add_sql, 620 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 621 exp.TsOrDsToTime: rename_func("TIME"), 622 exp.Unhex: rename_func("FROM_HEX"), 623 exp.UnixDate: rename_func("UNIX_DATE"), 624 exp.UnixToTime: _unix_to_time_sql, 625 exp.Values: _derived_table_values_to_unnest, 626 exp.VariancePop: rename_func("VAR_POP"), 627 } 628 629 SUPPORTED_JSON_PATH_PARTS = { 630 exp.JSONPathKey, 631 exp.JSONPathRoot, 632 exp.JSONPathSubscript, 633 } 634 635 TYPE_MAPPING = { 636 **generator.Generator.TYPE_MAPPING, 637 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 638 exp.DataType.Type.BIGINT: "INT64", 639 exp.DataType.Type.BINARY: "BYTES", 640 exp.DataType.Type.BOOLEAN: "BOOL", 641 exp.DataType.Type.CHAR: "STRING", 642 exp.DataType.Type.DECIMAL: "NUMERIC", 643 exp.DataType.Type.DOUBLE: "FLOAT64", 644 exp.DataType.Type.FLOAT: "FLOAT64", 645 exp.DataType.Type.INT: "INT64", 646 exp.DataType.Type.NCHAR: "STRING", 647 exp.DataType.Type.NVARCHAR: "STRING", 648 exp.DataType.Type.SMALLINT: "INT64", 649 exp.DataType.Type.TEXT: "STRING", 650 exp.DataType.Type.TIMESTAMP: "DATETIME", 651 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 652 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 653 exp.DataType.Type.TINYINT: "INT64", 654 exp.DataType.Type.VARBINARY: "BYTES", 655 exp.DataType.Type.VARCHAR: "STRING", 656 exp.DataType.Type.VARIANT: "ANY TYPE", 657 } 658 659 PROPERTIES_LOCATION = { 660 **generator.Generator.PROPERTIES_LOCATION, 661 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 662 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 663 } 664 665 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 666 RESERVED_KEYWORDS = { 667 *generator.Generator.RESERVED_KEYWORDS, 668 "all", 669 "and", 670 "any", 671 "array", 672 "as", 673 "asc", 674 "assert_rows_modified", 675 "at", 676 "between", 677 "by", 678 "case", 679 "cast", 680 "collate", 681 "contains", 682 "create", 683 "cross", 684 "cube", 685 "current", 686 "default", 687 "define", 688 "desc", 689 "distinct", 690 "else", 691 "end", 692 "enum", 693 "escape", 694 "except", 695 "exclude", 696 "exists", 697 "extract", 698 "false", 699 "fetch", 700 "following", 701 "for", 702 "from", 703 "full", 704 "group", 705 "grouping", 706 "groups", 707 "hash", 708 "having", 709 "if", 710 "ignore", 711 "in", 712 "inner", 713 "intersect", 714 "interval", 715 "into", 716 "is", 717 "join", 718 "lateral", 719 "left", 720 "like", 721 "limit", 722 "lookup", 723 "merge", 724 "natural", 725 "new", 726 "no", 727 "not", 728 "null", 729 "nulls", 730 "of", 731 "on", 732 "or", 733 "order", 734 "outer", 735 "over", 736 "partition", 737 "preceding", 738 "proto", 739 "qualify", 740 "range", 741 "recursive", 742 "respect", 743 "right", 744 "rollup", 745 "rows", 746 "select", 747 "set", 748 "some", 749 "struct", 750 "tablesample", 751 "then", 752 "to", 753 "treat", 754 "true", 755 "unbounded", 756 "union", 757 "unnest", 758 "using", 759 "when", 760 "where", 761 "window", 762 "with", 763 "within", 764 } 765 766 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 767 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 768 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 769 770 def struct_sql(self, expression: exp.Struct) -> str: 771 args = [] 772 for expr in expression.expressions: 773 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 774 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 775 else: 776 arg = self.sql(expr) 777 778 args.append(arg) 779 780 return self.func("STRUCT", *args) 781 782 def eq_sql(self, expression: exp.EQ) -> str: 783 # Operands of = cannot be NULL in BigQuery 784 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 785 if not isinstance(expression.parent, exp.Update): 786 return "NULL" 787 788 return self.binary(expression, "=") 789 790 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 791 parent = expression.parent 792 793 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 794 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 795 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 796 return self.func( 797 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 798 ) 799 800 return super().attimezone_sql(expression) 801 802 def trycast_sql(self, expression: exp.TryCast) -> str: 803 return self.cast_sql(expression, safe_prefix="SAFE_") 804 805 def array_sql(self, expression: exp.Array) -> str: 806 first_arg = seq_get(expression.expressions, 0) 807 if isinstance(first_arg, exp.Subqueryable): 808 return f"ARRAY{self.wrap(self.sql(first_arg))}" 809 810 return inline_array_sql(self, expression) 811 812 def bracket_sql(self, expression: exp.Bracket) -> str: 813 this = self.sql(expression, "this") 814 expressions = expression.expressions 815 816 if len(expressions) == 1: 817 arg = expressions[0] 818 if arg.type is None: 819 from sqlglot.optimizer.annotate_types import annotate_types 820 821 arg = annotate_types(arg) 822 823 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 824 # BQ doesn't support bracket syntax with string values 825 return f"{this}.{arg.name}" 826 827 expressions_sql = ", ".join(self.sql(e) for e in expressions) 828 offset = expression.args.get("offset") 829 830 if offset == 0: 831 expressions_sql = f"OFFSET({expressions_sql})" 832 elif offset == 1: 833 expressions_sql = f"ORDINAL({expressions_sql})" 834 elif offset is not None: 835 self.unsupported(f"Unsupported array offset: {offset}") 836 837 if expression.args.get("safe"): 838 expressions_sql = f"SAFE_{expressions_sql}" 839 840 return f"{this}[{expressions_sql}]" 841 842 def in_unnest_op(self, expression: exp.Unnest) -> str: 843 return self.sql(expression) 844 845 def except_op(self, expression: exp.Except) -> str: 846 if not expression.args.get("distinct"): 847 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 848 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 849 850 def intersect_op(self, expression: exp.Intersect) -> str: 851 if not expression.args.get("distinct"): 852 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 853 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 854 855 def with_properties(self, properties: exp.Properties) -> str: 856 return self.properties(properties, prefix=self.seg("OPTIONS")) 857 858 def version_sql(self, expression: exp.Version) -> str: 859 if expression.name == "TIMESTAMP": 860 expression.set("this", "SYSTEM_TIME") 861 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. Default: 2.
- indent: The indentation size in a formatted string. Default: 2.
- normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
770 def struct_sql(self, expression: exp.Struct) -> str: 771 args = [] 772 for expr in expression.expressions: 773 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 774 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 775 else: 776 arg = self.sql(expr) 777 778 args.append(arg) 779 780 return self.func("STRUCT", *args)
790 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 791 parent = expression.parent 792 793 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 794 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 795 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 796 return self.func( 797 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 798 ) 799 800 return super().attimezone_sql(expression)
812 def bracket_sql(self, expression: exp.Bracket) -> str: 813 this = self.sql(expression, "this") 814 expressions = expression.expressions 815 816 if len(expressions) == 1: 817 arg = expressions[0] 818 if arg.type is None: 819 from sqlglot.optimizer.annotate_types import annotate_types 820 821 arg = annotate_types(arg) 822 823 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 824 # BQ doesn't support bracket syntax with string values 825 return f"{this}.{arg.name}" 826 827 expressions_sql = ", ".join(self.sql(e) for e in expressions) 828 offset = expression.args.get("offset") 829 830 if offset == 0: 831 expressions_sql = f"OFFSET({expressions_sql})" 832 elif offset == 1: 833 expressions_sql = f"ORDINAL({expressions_sql})" 834 elif offset is not None: 835 self.unsupported(f"Unsupported array offset: {offset}") 836 837 if expression.args.get("safe"): 838 expressions_sql = f"SAFE_{expressions_sql}" 839 840 return f"{this}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql