sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot._typing import E 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 NormalizationStrategy, 12 arg_max_or_min_no_count, 13 binary_from_function, 14 date_add_interval_sql, 15 datestrtodate_sql, 16 format_time_lambda, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 parse_date_delta_with_interval, 23 path_to_jsonpath, 24 regexp_replace_sql, 25 rename_func, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28) 29from sqlglot.helper import seq_get, split_num_words 30from sqlglot.tokens import TokenType 31 32if t.TYPE_CHECKING: 33 from typing_extensions import Literal 34 35logger = logging.getLogger("sqlglot") 36 37 38def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 39 if not expression.find_ancestor(exp.From, exp.Join): 40 return self.values_sql(expression) 41 42 alias = expression.args.get("alias") 43 44 structs = [ 45 exp.Struct( 46 expressions=[ 47 exp.alias_(value, column_name) 48 for value, column_name in zip( 49 t.expressions, 50 alias.columns 51 if alias and alias.columns 52 else (f"_c{i}" for i in range(len(t.expressions))), 53 ) 54 ] 55 ) 56 for t in expression.find_all(exp.Tuple) 57 ] 58 59 return self.unnest_sql(exp.Unnest(expressions=[exp.Array(expressions=structs)])) 60 61 62def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 63 this = expression.this 64 if isinstance(this, exp.Schema): 65 this = f"{this.this} <{self.expressions(this)}>" 66 else: 67 this = self.sql(this) 68 return f"RETURNS {this}" 69 70 71def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 72 kind = expression.args["kind"] 73 returns = expression.find(exp.ReturnsProperty) 74 75 if kind.upper() == "FUNCTION" and returns and returns.args.get("is_table"): 76 expression.set("kind", "TABLE FUNCTION") 77 78 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 79 expression.set("expression", expression.expression.this) 80 81 return self.create_sql(expression) 82 83 return self.create_sql(expression) 84 85 86def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 87 """Remove references to unnest table aliases since bigquery doesn't allow them. 88 89 These are added by the optimizer's qualify_column step. 90 """ 91 from sqlglot.optimizer.scope import find_all_in_scope 92 93 if isinstance(expression, exp.Select): 94 unnest_aliases = { 95 unnest.alias 96 for unnest in find_all_in_scope(expression, exp.Unnest) 97 if isinstance(unnest.parent, (exp.From, exp.Join)) 98 } 99 if unnest_aliases: 100 for column in expression.find_all(exp.Column): 101 if column.table in unnest_aliases: 102 column.set("table", None) 103 elif column.db in unnest_aliases: 104 column.set("db", None) 105 106 return expression 107 108 109# https://issuetracker.google.com/issues/162294746 110# workaround for bigquery bug when grouping by an expression and then ordering 111# WITH x AS (SELECT 1 y) 112# SELECT y + 1 z 113# FROM x 114# GROUP BY x + 1 115# ORDER by z 116def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 117 if isinstance(expression, exp.Select): 118 group = expression.args.get("group") 119 order = expression.args.get("order") 120 121 if group and order: 122 aliases = { 123 select.this: select.args["alias"] 124 for select in expression.selects 125 if isinstance(select, exp.Alias) 126 } 127 128 for e in group.expressions: 129 alias = aliases.get(e) 130 131 if alias: 132 e.replace(exp.column(alias)) 133 134 return expression 135 136 137def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 138 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 139 if isinstance(expression, exp.CTE) and expression.alias_column_names: 140 cte_query = expression.this 141 142 if cte_query.is_star: 143 logger.warning( 144 "Can't push down CTE column names for star queries. Run the query through" 145 " the optimizer or use 'qualify' to expand the star projections first." 146 ) 147 return expression 148 149 column_names = expression.alias_column_names 150 expression.args["alias"].set("columns", None) 151 152 for name, select in zip(column_names, cte_query.selects): 153 to_replace = select 154 155 if isinstance(select, exp.Alias): 156 select = select.this 157 158 # Inner aliases are shadowed by the CTE column names 159 to_replace.replace(exp.alias_(select, name)) 160 161 return expression 162 163 164def _parse_timestamp(args: t.List) -> exp.StrToTime: 165 this = format_time_lambda(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 166 this.set("zone", seq_get(args, 2)) 167 return this 168 169 170def _parse_date(args: t.List) -> exp.Date | exp.DateFromParts: 171 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 172 return expr_type.from_arg_list(args) 173 174 175def _parse_to_hex(args: t.List) -> exp.Hex | exp.MD5: 176 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 177 arg = seq_get(args, 0) 178 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 179 180 181def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 182 return self.sql( 183 exp.Exists( 184 this=exp.select("1") 185 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 186 .where(exp.column("_col").eq(expression.right)) 187 ) 188 ) 189 190 191def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 192 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 193 194 195def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 196 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 197 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 198 unit = expression.args.get("unit") or "DAY" 199 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 200 201 202def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 203 scale = expression.args.get("scale") 204 timestamp = self.sql(expression, "this") 205 if scale in (None, exp.UnixToTime.SECONDS): 206 return f"TIMESTAMP_SECONDS({timestamp})" 207 if scale == exp.UnixToTime.MILLIS: 208 return f"TIMESTAMP_MILLIS({timestamp})" 209 if scale == exp.UnixToTime.MICROS: 210 return f"TIMESTAMP_MICROS({timestamp})" 211 212 return f"TIMESTAMP_SECONDS(CAST({timestamp} / POW(10, {scale}) AS INT64))" 213 214 215def _parse_time(args: t.List) -> exp.Func: 216 if len(args) == 1: 217 return exp.TsOrDsToTime(this=args[0]) 218 if len(args) == 3: 219 return exp.TimeFromParts.from_arg_list(args) 220 221 return exp.Anonymous(this="TIME", expressions=args) 222 223 224class BigQuery(Dialect): 225 WEEK_OFFSET = -1 226 UNNEST_COLUMN_ONLY = True 227 SUPPORTS_USER_DEFINED_TYPES = False 228 SUPPORTS_SEMI_ANTI_JOIN = False 229 LOG_BASE_FIRST = False 230 231 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 232 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 233 234 # bigquery udfs are case sensitive 235 NORMALIZE_FUNCTIONS = False 236 237 TIME_MAPPING = { 238 "%D": "%m/%d/%y", 239 } 240 241 ESCAPE_SEQUENCES = { 242 "\\a": "\a", 243 "\\b": "\b", 244 "\\f": "\f", 245 "\\n": "\n", 246 "\\r": "\r", 247 "\\t": "\t", 248 "\\v": "\v", 249 } 250 251 FORMAT_MAPPING = { 252 "DD": "%d", 253 "MM": "%m", 254 "MON": "%b", 255 "MONTH": "%B", 256 "YYYY": "%Y", 257 "YY": "%y", 258 "HH": "%I", 259 "HH12": "%I", 260 "HH24": "%H", 261 "MI": "%M", 262 "SS": "%S", 263 "SSSSS": "%f", 264 "TZH": "%z", 265 } 266 267 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 268 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 269 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 270 271 def normalize_identifier(self, expression: E) -> E: 272 if isinstance(expression, exp.Identifier): 273 parent = expression.parent 274 while isinstance(parent, exp.Dot): 275 parent = parent.parent 276 277 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 278 # The following check is essentially a heuristic to detect tables based on whether or 279 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 280 if ( 281 not isinstance(parent, exp.UserDefinedFunction) 282 and not (isinstance(parent, exp.Table) and parent.db) 283 and not expression.meta.get("is_table") 284 ): 285 expression.set("this", expression.this.lower()) 286 287 return expression 288 289 class Tokenizer(tokens.Tokenizer): 290 QUOTES = ["'", '"', '"""', "'''"] 291 COMMENTS = ["--", "#", ("/*", "*/")] 292 IDENTIFIERS = ["`"] 293 STRING_ESCAPES = ["\\"] 294 295 HEX_STRINGS = [("0x", ""), ("0X", "")] 296 297 BYTE_STRINGS = [ 298 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 299 ] 300 301 RAW_STRINGS = [ 302 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 303 ] 304 305 KEYWORDS = { 306 **tokens.Tokenizer.KEYWORDS, 307 "ANY TYPE": TokenType.VARIANT, 308 "BEGIN": TokenType.COMMAND, 309 "BEGIN TRANSACTION": TokenType.BEGIN, 310 "BYTES": TokenType.BINARY, 311 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 312 "DECLARE": TokenType.COMMAND, 313 "FLOAT64": TokenType.DOUBLE, 314 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 315 "MODEL": TokenType.MODEL, 316 "NOT DETERMINISTIC": TokenType.VOLATILE, 317 "RECORD": TokenType.STRUCT, 318 "TIMESTAMP": TokenType.TIMESTAMPTZ, 319 } 320 KEYWORDS.pop("DIV") 321 322 class Parser(parser.Parser): 323 PREFIXED_PIVOT_COLUMNS = True 324 325 LOG_DEFAULTS_TO_LN = True 326 327 FUNCTIONS = { 328 **parser.Parser.FUNCTIONS, 329 "DATE": _parse_date, 330 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 331 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 332 "DATE_TRUNC": lambda args: exp.DateTrunc( 333 unit=exp.Literal.string(str(seq_get(args, 1))), 334 this=seq_get(args, 0), 335 ), 336 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 337 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 338 "DIV": binary_from_function(exp.IntDiv), 339 "FORMAT_DATE": lambda args: exp.TimeToStr( 340 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 341 ), 342 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 343 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 344 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 345 ), 346 "MD5": exp.MD5Digest.from_arg_list, 347 "TO_HEX": _parse_to_hex, 348 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 349 [seq_get(args, 1), seq_get(args, 0)] 350 ), 351 "PARSE_TIMESTAMP": _parse_timestamp, 352 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 353 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 354 this=seq_get(args, 0), 355 expression=seq_get(args, 1), 356 position=seq_get(args, 2), 357 occurrence=seq_get(args, 3), 358 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 359 ), 360 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 361 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 362 "SPLIT": lambda args: exp.Split( 363 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 364 this=seq_get(args, 0), 365 expression=seq_get(args, 1) or exp.Literal.string(","), 366 ), 367 "TIME": _parse_time, 368 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 369 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 370 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 371 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 372 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 373 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 374 ), 375 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 376 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 377 ), 378 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 379 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 380 } 381 382 FUNCTION_PARSERS = { 383 **parser.Parser.FUNCTION_PARSERS, 384 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 385 } 386 FUNCTION_PARSERS.pop("TRIM") 387 388 NO_PAREN_FUNCTIONS = { 389 **parser.Parser.NO_PAREN_FUNCTIONS, 390 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 391 } 392 393 NESTED_TYPE_TOKENS = { 394 *parser.Parser.NESTED_TYPE_TOKENS, 395 TokenType.TABLE, 396 } 397 398 ID_VAR_TOKENS = { 399 *parser.Parser.ID_VAR_TOKENS, 400 TokenType.VALUES, 401 } 402 403 PROPERTY_PARSERS = { 404 **parser.Parser.PROPERTY_PARSERS, 405 "NOT DETERMINISTIC": lambda self: self.expression( 406 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 407 ), 408 "OPTIONS": lambda self: self._parse_with_property(), 409 } 410 411 CONSTRAINT_PARSERS = { 412 **parser.Parser.CONSTRAINT_PARSERS, 413 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 414 } 415 416 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 417 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 418 419 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 420 421 STATEMENT_PARSERS = { 422 **parser.Parser.STATEMENT_PARSERS, 423 TokenType.END: lambda self: self._parse_as_command(self._prev), 424 TokenType.FOR: lambda self: self._parse_for_in(), 425 } 426 427 BRACKET_OFFSETS = { 428 "OFFSET": (0, False), 429 "ORDINAL": (1, False), 430 "SAFE_OFFSET": (0, True), 431 "SAFE_ORDINAL": (1, True), 432 } 433 434 def _parse_for_in(self) -> exp.ForIn: 435 this = self._parse_range() 436 self._match_text_seq("DO") 437 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 438 439 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 440 this = super()._parse_table_part(schema=schema) or self._parse_number() 441 442 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 443 if isinstance(this, exp.Identifier): 444 table_name = this.name 445 while self._match(TokenType.DASH, advance=False) and self._next: 446 self._advance(2) 447 table_name += f"-{self._prev.text}" 448 449 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 450 elif isinstance(this, exp.Literal): 451 table_name = this.name 452 453 if self._is_connected() and self._parse_var(any_token=True): 454 table_name += self._prev.text 455 456 this = exp.Identifier(this=table_name, quoted=True) 457 458 return this 459 460 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 461 table = super()._parse_table_parts(schema=schema) 462 if isinstance(table.this, exp.Identifier) and "." in table.name: 463 catalog, db, this, *rest = ( 464 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 465 for x in split_num_words(table.name, ".", 3) 466 ) 467 468 if rest and this: 469 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 470 471 table = exp.Table(this=this, db=db, catalog=catalog) 472 473 return table 474 475 @t.overload 476 def _parse_json_object(self, agg: Literal[False]) -> exp.JSONObject: 477 ... 478 479 @t.overload 480 def _parse_json_object(self, agg: Literal[True]) -> exp.JSONObjectAgg: 481 ... 482 483 def _parse_json_object(self, agg=False): 484 json_object = super()._parse_json_object() 485 array_kv_pair = seq_get(json_object.expressions, 0) 486 487 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 488 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 489 if ( 490 array_kv_pair 491 and isinstance(array_kv_pair.this, exp.Array) 492 and isinstance(array_kv_pair.expression, exp.Array) 493 ): 494 keys = array_kv_pair.this.expressions 495 values = array_kv_pair.expression.expressions 496 497 json_object.set( 498 "expressions", 499 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 500 ) 501 502 return json_object 503 504 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 505 bracket = super()._parse_bracket(this) 506 507 if this is bracket: 508 return bracket 509 510 if isinstance(bracket, exp.Bracket): 511 for expression in bracket.expressions: 512 name = expression.name.upper() 513 514 if name not in self.BRACKET_OFFSETS: 515 break 516 517 offset, safe = self.BRACKET_OFFSETS[name] 518 bracket.set("offset", offset) 519 bracket.set("safe", safe) 520 expression.replace(expression.expressions[0]) 521 522 return bracket 523 524 class Generator(generator.Generator): 525 EXPLICIT_UNION = True 526 INTERVAL_ALLOWS_PLURAL_FORM = False 527 JOIN_HINTS = False 528 QUERY_HINTS = False 529 TABLE_HINTS = False 530 LIMIT_FETCH = "LIMIT" 531 RENAME_TABLE_WITH_DB = False 532 NVL2_SUPPORTED = False 533 UNNEST_WITH_ORDINALITY = False 534 COLLATE_IS_FUNC = True 535 LIMIT_ONLY_LITERALS = True 536 SUPPORTS_TABLE_ALIAS_COLUMNS = False 537 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 538 JSON_KEY_VALUE_PAIR_SEP = "," 539 NULL_ORDERING_SUPPORTED = False 540 541 TRANSFORMS = { 542 **generator.Generator.TRANSFORMS, 543 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 544 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 545 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 546 exp.ArrayContains: _array_contains_sql, 547 exp.ArraySize: rename_func("ARRAY_LENGTH"), 548 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 549 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 550 if e.args.get("default") 551 else f"COLLATE {self.sql(e, 'this')}", 552 exp.CountIf: rename_func("COUNTIF"), 553 exp.Create: _create_sql, 554 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 555 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 556 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 557 exp.DateFromParts: rename_func("DATE"), 558 exp.DateStrToDate: datestrtodate_sql, 559 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 560 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 561 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 562 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 563 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 564 exp.GetPath: path_to_jsonpath(), 565 exp.GroupConcat: rename_func("STRING_AGG"), 566 exp.Hex: rename_func("TO_HEX"), 567 exp.If: if_sql(false_value="NULL"), 568 exp.ILike: no_ilike_sql, 569 exp.IntDiv: rename_func("DIV"), 570 exp.JSONFormat: rename_func("TO_JSON_STRING"), 571 exp.Max: max_or_greatest, 572 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 573 exp.MD5Digest: rename_func("MD5"), 574 exp.Min: min_or_least, 575 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 576 exp.RegexpExtract: lambda self, e: self.func( 577 "REGEXP_EXTRACT", 578 e.this, 579 e.expression, 580 e.args.get("position"), 581 e.args.get("occurrence"), 582 ), 583 exp.RegexpReplace: regexp_replace_sql, 584 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 585 exp.ReturnsProperty: _returnsproperty_sql, 586 exp.Select: transforms.preprocess( 587 [ 588 transforms.explode_to_unnest(), 589 _unqualify_unnest, 590 transforms.eliminate_distinct_on, 591 _alias_ordered_group, 592 transforms.eliminate_semi_and_anti_joins, 593 ] 594 ), 595 exp.SHA2: lambda self, e: self.func( 596 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 597 ), 598 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 599 if e.name == "IMMUTABLE" 600 else "NOT DETERMINISTIC", 601 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 602 exp.StrToTime: lambda self, e: self.func( 603 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 604 ), 605 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 606 exp.TimeFromParts: rename_func("TIME"), 607 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 608 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 609 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 610 exp.TimeStrToTime: timestrtotime_sql, 611 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 612 exp.TsOrDsAdd: _ts_or_ds_add_sql, 613 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 614 exp.TsOrDsToTime: rename_func("TIME"), 615 exp.Unhex: rename_func("FROM_HEX"), 616 exp.UnixDate: rename_func("UNIX_DATE"), 617 exp.UnixToTime: _unix_to_time_sql, 618 exp.Values: _derived_table_values_to_unnest, 619 exp.VariancePop: rename_func("VAR_POP"), 620 } 621 622 TYPE_MAPPING = { 623 **generator.Generator.TYPE_MAPPING, 624 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 625 exp.DataType.Type.BIGINT: "INT64", 626 exp.DataType.Type.BINARY: "BYTES", 627 exp.DataType.Type.BOOLEAN: "BOOL", 628 exp.DataType.Type.CHAR: "STRING", 629 exp.DataType.Type.DECIMAL: "NUMERIC", 630 exp.DataType.Type.DOUBLE: "FLOAT64", 631 exp.DataType.Type.FLOAT: "FLOAT64", 632 exp.DataType.Type.INT: "INT64", 633 exp.DataType.Type.NCHAR: "STRING", 634 exp.DataType.Type.NVARCHAR: "STRING", 635 exp.DataType.Type.SMALLINT: "INT64", 636 exp.DataType.Type.TEXT: "STRING", 637 exp.DataType.Type.TIMESTAMP: "DATETIME", 638 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 639 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 640 exp.DataType.Type.TINYINT: "INT64", 641 exp.DataType.Type.VARBINARY: "BYTES", 642 exp.DataType.Type.VARCHAR: "STRING", 643 exp.DataType.Type.VARIANT: "ANY TYPE", 644 } 645 646 PROPERTIES_LOCATION = { 647 **generator.Generator.PROPERTIES_LOCATION, 648 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 649 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 650 } 651 652 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 653 RESERVED_KEYWORDS = { 654 *generator.Generator.RESERVED_KEYWORDS, 655 "all", 656 "and", 657 "any", 658 "array", 659 "as", 660 "asc", 661 "assert_rows_modified", 662 "at", 663 "between", 664 "by", 665 "case", 666 "cast", 667 "collate", 668 "contains", 669 "create", 670 "cross", 671 "cube", 672 "current", 673 "default", 674 "define", 675 "desc", 676 "distinct", 677 "else", 678 "end", 679 "enum", 680 "escape", 681 "except", 682 "exclude", 683 "exists", 684 "extract", 685 "false", 686 "fetch", 687 "following", 688 "for", 689 "from", 690 "full", 691 "group", 692 "grouping", 693 "groups", 694 "hash", 695 "having", 696 "if", 697 "ignore", 698 "in", 699 "inner", 700 "intersect", 701 "interval", 702 "into", 703 "is", 704 "join", 705 "lateral", 706 "left", 707 "like", 708 "limit", 709 "lookup", 710 "merge", 711 "natural", 712 "new", 713 "no", 714 "not", 715 "null", 716 "nulls", 717 "of", 718 "on", 719 "or", 720 "order", 721 "outer", 722 "over", 723 "partition", 724 "preceding", 725 "proto", 726 "qualify", 727 "range", 728 "recursive", 729 "respect", 730 "right", 731 "rollup", 732 "rows", 733 "select", 734 "set", 735 "some", 736 "struct", 737 "tablesample", 738 "then", 739 "to", 740 "treat", 741 "true", 742 "unbounded", 743 "union", 744 "unnest", 745 "using", 746 "when", 747 "where", 748 "window", 749 "with", 750 "within", 751 } 752 753 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 754 if isinstance(expression.this, exp.TsOrDsToDate): 755 this: exp.Expression = expression.this 756 else: 757 this = expression 758 759 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 760 761 def struct_sql(self, expression: exp.Struct) -> str: 762 args = [] 763 for expr in expression.expressions: 764 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 765 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 766 else: 767 arg = self.sql(expr) 768 769 args.append(arg) 770 771 return self.func("STRUCT", *args) 772 773 def eq_sql(self, expression: exp.EQ) -> str: 774 # Operands of = cannot be NULL in BigQuery 775 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 776 if not isinstance(expression.parent, exp.Update): 777 return "NULL" 778 779 return self.binary(expression, "=") 780 781 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 782 parent = expression.parent 783 784 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 785 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 786 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 787 return self.func( 788 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 789 ) 790 791 return super().attimezone_sql(expression) 792 793 def trycast_sql(self, expression: exp.TryCast) -> str: 794 return self.cast_sql(expression, safe_prefix="SAFE_") 795 796 def cte_sql(self, expression: exp.CTE) -> str: 797 if expression.alias_column_names: 798 self.unsupported("Column names in CTE definition are not supported.") 799 return super().cte_sql(expression) 800 801 def array_sql(self, expression: exp.Array) -> str: 802 first_arg = seq_get(expression.expressions, 0) 803 if isinstance(first_arg, exp.Subqueryable): 804 return f"ARRAY{self.wrap(self.sql(first_arg))}" 805 806 return inline_array_sql(self, expression) 807 808 def bracket_sql(self, expression: exp.Bracket) -> str: 809 this = self.sql(expression, "this") 810 expressions = expression.expressions 811 812 if len(expressions) == 1: 813 arg = expressions[0] 814 if arg.type is None: 815 from sqlglot.optimizer.annotate_types import annotate_types 816 817 arg = annotate_types(arg) 818 819 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 820 # BQ doesn't support bracket syntax with string values 821 return f"{this}.{arg.name}" 822 823 expressions_sql = ", ".join(self.sql(e) for e in expressions) 824 offset = expression.args.get("offset") 825 826 if offset == 0: 827 expressions_sql = f"OFFSET({expressions_sql})" 828 elif offset == 1: 829 expressions_sql = f"ORDINAL({expressions_sql})" 830 elif offset is not None: 831 self.unsupported(f"Unsupported array offset: {offset}") 832 833 if expression.args.get("safe"): 834 expressions_sql = f"SAFE_{expressions_sql}" 835 836 return f"{this}[{expressions_sql}]" 837 838 def transaction_sql(self, *_) -> str: 839 return "BEGIN TRANSACTION" 840 841 def commit_sql(self, *_) -> str: 842 return "COMMIT TRANSACTION" 843 844 def rollback_sql(self, *_) -> str: 845 return "ROLLBACK TRANSACTION" 846 847 def in_unnest_op(self, expression: exp.Unnest) -> str: 848 return self.sql(expression) 849 850 def except_op(self, expression: exp.Except) -> str: 851 if not expression.args.get("distinct", False): 852 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 853 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 854 855 def intersect_op(self, expression: exp.Intersect) -> str: 856 if not expression.args.get("distinct", False): 857 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 858 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 859 860 def with_properties(self, properties: exp.Properties) -> str: 861 return self.properties(properties, prefix=self.seg("OPTIONS")) 862 863 def version_sql(self, expression: exp.Version) -> str: 864 if expression.name == "TIMESTAMP": 865 expression.set("this", "SYSTEM_TIME") 866 return super().version_sql(expression)
225class BigQuery(Dialect): 226 WEEK_OFFSET = -1 227 UNNEST_COLUMN_ONLY = True 228 SUPPORTS_USER_DEFINED_TYPES = False 229 SUPPORTS_SEMI_ANTI_JOIN = False 230 LOG_BASE_FIRST = False 231 232 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 233 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 234 235 # bigquery udfs are case sensitive 236 NORMALIZE_FUNCTIONS = False 237 238 TIME_MAPPING = { 239 "%D": "%m/%d/%y", 240 } 241 242 ESCAPE_SEQUENCES = { 243 "\\a": "\a", 244 "\\b": "\b", 245 "\\f": "\f", 246 "\\n": "\n", 247 "\\r": "\r", 248 "\\t": "\t", 249 "\\v": "\v", 250 } 251 252 FORMAT_MAPPING = { 253 "DD": "%d", 254 "MM": "%m", 255 "MON": "%b", 256 "MONTH": "%B", 257 "YYYY": "%Y", 258 "YY": "%y", 259 "HH": "%I", 260 "HH12": "%I", 261 "HH24": "%H", 262 "MI": "%M", 263 "SS": "%S", 264 "SSSSS": "%f", 265 "TZH": "%z", 266 } 267 268 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 269 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 270 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 271 272 def normalize_identifier(self, expression: E) -> E: 273 if isinstance(expression, exp.Identifier): 274 parent = expression.parent 275 while isinstance(parent, exp.Dot): 276 parent = parent.parent 277 278 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 279 # The following check is essentially a heuristic to detect tables based on whether or 280 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 281 if ( 282 not isinstance(parent, exp.UserDefinedFunction) 283 and not (isinstance(parent, exp.Table) and parent.db) 284 and not expression.meta.get("is_table") 285 ): 286 expression.set("this", expression.this.lower()) 287 288 return expression 289 290 class Tokenizer(tokens.Tokenizer): 291 QUOTES = ["'", '"', '"""', "'''"] 292 COMMENTS = ["--", "#", ("/*", "*/")] 293 IDENTIFIERS = ["`"] 294 STRING_ESCAPES = ["\\"] 295 296 HEX_STRINGS = [("0x", ""), ("0X", "")] 297 298 BYTE_STRINGS = [ 299 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 300 ] 301 302 RAW_STRINGS = [ 303 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 304 ] 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "ANY TYPE": TokenType.VARIANT, 309 "BEGIN": TokenType.COMMAND, 310 "BEGIN TRANSACTION": TokenType.BEGIN, 311 "BYTES": TokenType.BINARY, 312 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 313 "DECLARE": TokenType.COMMAND, 314 "FLOAT64": TokenType.DOUBLE, 315 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 316 "MODEL": TokenType.MODEL, 317 "NOT DETERMINISTIC": TokenType.VOLATILE, 318 "RECORD": TokenType.STRUCT, 319 "TIMESTAMP": TokenType.TIMESTAMPTZ, 320 } 321 KEYWORDS.pop("DIV") 322 323 class Parser(parser.Parser): 324 PREFIXED_PIVOT_COLUMNS = True 325 326 LOG_DEFAULTS_TO_LN = True 327 328 FUNCTIONS = { 329 **parser.Parser.FUNCTIONS, 330 "DATE": _parse_date, 331 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 332 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 333 "DATE_TRUNC": lambda args: exp.DateTrunc( 334 unit=exp.Literal.string(str(seq_get(args, 1))), 335 this=seq_get(args, 0), 336 ), 337 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 338 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 339 "DIV": binary_from_function(exp.IntDiv), 340 "FORMAT_DATE": lambda args: exp.TimeToStr( 341 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 342 ), 343 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 344 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 345 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 346 ), 347 "MD5": exp.MD5Digest.from_arg_list, 348 "TO_HEX": _parse_to_hex, 349 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 350 [seq_get(args, 1), seq_get(args, 0)] 351 ), 352 "PARSE_TIMESTAMP": _parse_timestamp, 353 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 354 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 355 this=seq_get(args, 0), 356 expression=seq_get(args, 1), 357 position=seq_get(args, 2), 358 occurrence=seq_get(args, 3), 359 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 360 ), 361 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 362 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 363 "SPLIT": lambda args: exp.Split( 364 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 365 this=seq_get(args, 0), 366 expression=seq_get(args, 1) or exp.Literal.string(","), 367 ), 368 "TIME": _parse_time, 369 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 370 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 371 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 372 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 373 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 374 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 375 ), 376 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 377 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 378 ), 379 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 380 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 381 } 382 383 FUNCTION_PARSERS = { 384 **parser.Parser.FUNCTION_PARSERS, 385 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 386 } 387 FUNCTION_PARSERS.pop("TRIM") 388 389 NO_PAREN_FUNCTIONS = { 390 **parser.Parser.NO_PAREN_FUNCTIONS, 391 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 392 } 393 394 NESTED_TYPE_TOKENS = { 395 *parser.Parser.NESTED_TYPE_TOKENS, 396 TokenType.TABLE, 397 } 398 399 ID_VAR_TOKENS = { 400 *parser.Parser.ID_VAR_TOKENS, 401 TokenType.VALUES, 402 } 403 404 PROPERTY_PARSERS = { 405 **parser.Parser.PROPERTY_PARSERS, 406 "NOT DETERMINISTIC": lambda self: self.expression( 407 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 408 ), 409 "OPTIONS": lambda self: self._parse_with_property(), 410 } 411 412 CONSTRAINT_PARSERS = { 413 **parser.Parser.CONSTRAINT_PARSERS, 414 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 415 } 416 417 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 418 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 419 420 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 421 422 STATEMENT_PARSERS = { 423 **parser.Parser.STATEMENT_PARSERS, 424 TokenType.END: lambda self: self._parse_as_command(self._prev), 425 TokenType.FOR: lambda self: self._parse_for_in(), 426 } 427 428 BRACKET_OFFSETS = { 429 "OFFSET": (0, False), 430 "ORDINAL": (1, False), 431 "SAFE_OFFSET": (0, True), 432 "SAFE_ORDINAL": (1, True), 433 } 434 435 def _parse_for_in(self) -> exp.ForIn: 436 this = self._parse_range() 437 self._match_text_seq("DO") 438 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 439 440 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 441 this = super()._parse_table_part(schema=schema) or self._parse_number() 442 443 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 444 if isinstance(this, exp.Identifier): 445 table_name = this.name 446 while self._match(TokenType.DASH, advance=False) and self._next: 447 self._advance(2) 448 table_name += f"-{self._prev.text}" 449 450 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 451 elif isinstance(this, exp.Literal): 452 table_name = this.name 453 454 if self._is_connected() and self._parse_var(any_token=True): 455 table_name += self._prev.text 456 457 this = exp.Identifier(this=table_name, quoted=True) 458 459 return this 460 461 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 462 table = super()._parse_table_parts(schema=schema) 463 if isinstance(table.this, exp.Identifier) and "." in table.name: 464 catalog, db, this, *rest = ( 465 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 466 for x in split_num_words(table.name, ".", 3) 467 ) 468 469 if rest and this: 470 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 471 472 table = exp.Table(this=this, db=db, catalog=catalog) 473 474 return table 475 476 @t.overload 477 def _parse_json_object(self, agg: Literal[False]) -> exp.JSONObject: 478 ... 479 480 @t.overload 481 def _parse_json_object(self, agg: Literal[True]) -> exp.JSONObjectAgg: 482 ... 483 484 def _parse_json_object(self, agg=False): 485 json_object = super()._parse_json_object() 486 array_kv_pair = seq_get(json_object.expressions, 0) 487 488 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 489 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 490 if ( 491 array_kv_pair 492 and isinstance(array_kv_pair.this, exp.Array) 493 and isinstance(array_kv_pair.expression, exp.Array) 494 ): 495 keys = array_kv_pair.this.expressions 496 values = array_kv_pair.expression.expressions 497 498 json_object.set( 499 "expressions", 500 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 501 ) 502 503 return json_object 504 505 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 506 bracket = super()._parse_bracket(this) 507 508 if this is bracket: 509 return bracket 510 511 if isinstance(bracket, exp.Bracket): 512 for expression in bracket.expressions: 513 name = expression.name.upper() 514 515 if name not in self.BRACKET_OFFSETS: 516 break 517 518 offset, safe = self.BRACKET_OFFSETS[name] 519 bracket.set("offset", offset) 520 bracket.set("safe", safe) 521 expression.replace(expression.expressions[0]) 522 523 return bracket 524 525 class Generator(generator.Generator): 526 EXPLICIT_UNION = True 527 INTERVAL_ALLOWS_PLURAL_FORM = False 528 JOIN_HINTS = False 529 QUERY_HINTS = False 530 TABLE_HINTS = False 531 LIMIT_FETCH = "LIMIT" 532 RENAME_TABLE_WITH_DB = False 533 NVL2_SUPPORTED = False 534 UNNEST_WITH_ORDINALITY = False 535 COLLATE_IS_FUNC = True 536 LIMIT_ONLY_LITERALS = True 537 SUPPORTS_TABLE_ALIAS_COLUMNS = False 538 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 539 JSON_KEY_VALUE_PAIR_SEP = "," 540 NULL_ORDERING_SUPPORTED = False 541 542 TRANSFORMS = { 543 **generator.Generator.TRANSFORMS, 544 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 545 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 546 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 547 exp.ArrayContains: _array_contains_sql, 548 exp.ArraySize: rename_func("ARRAY_LENGTH"), 549 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 550 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 551 if e.args.get("default") 552 else f"COLLATE {self.sql(e, 'this')}", 553 exp.CountIf: rename_func("COUNTIF"), 554 exp.Create: _create_sql, 555 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 556 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 557 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 558 exp.DateFromParts: rename_func("DATE"), 559 exp.DateStrToDate: datestrtodate_sql, 560 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 561 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 562 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 563 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 564 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 565 exp.GetPath: path_to_jsonpath(), 566 exp.GroupConcat: rename_func("STRING_AGG"), 567 exp.Hex: rename_func("TO_HEX"), 568 exp.If: if_sql(false_value="NULL"), 569 exp.ILike: no_ilike_sql, 570 exp.IntDiv: rename_func("DIV"), 571 exp.JSONFormat: rename_func("TO_JSON_STRING"), 572 exp.Max: max_or_greatest, 573 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 574 exp.MD5Digest: rename_func("MD5"), 575 exp.Min: min_or_least, 576 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 577 exp.RegexpExtract: lambda self, e: self.func( 578 "REGEXP_EXTRACT", 579 e.this, 580 e.expression, 581 e.args.get("position"), 582 e.args.get("occurrence"), 583 ), 584 exp.RegexpReplace: regexp_replace_sql, 585 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 586 exp.ReturnsProperty: _returnsproperty_sql, 587 exp.Select: transforms.preprocess( 588 [ 589 transforms.explode_to_unnest(), 590 _unqualify_unnest, 591 transforms.eliminate_distinct_on, 592 _alias_ordered_group, 593 transforms.eliminate_semi_and_anti_joins, 594 ] 595 ), 596 exp.SHA2: lambda self, e: self.func( 597 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 598 ), 599 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 600 if e.name == "IMMUTABLE" 601 else "NOT DETERMINISTIC", 602 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 603 exp.StrToTime: lambda self, e: self.func( 604 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 605 ), 606 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 607 exp.TimeFromParts: rename_func("TIME"), 608 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 609 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 610 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 611 exp.TimeStrToTime: timestrtotime_sql, 612 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 613 exp.TsOrDsAdd: _ts_or_ds_add_sql, 614 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 615 exp.TsOrDsToTime: rename_func("TIME"), 616 exp.Unhex: rename_func("FROM_HEX"), 617 exp.UnixDate: rename_func("UNIX_DATE"), 618 exp.UnixToTime: _unix_to_time_sql, 619 exp.Values: _derived_table_values_to_unnest, 620 exp.VariancePop: rename_func("VAR_POP"), 621 } 622 623 TYPE_MAPPING = { 624 **generator.Generator.TYPE_MAPPING, 625 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 626 exp.DataType.Type.BIGINT: "INT64", 627 exp.DataType.Type.BINARY: "BYTES", 628 exp.DataType.Type.BOOLEAN: "BOOL", 629 exp.DataType.Type.CHAR: "STRING", 630 exp.DataType.Type.DECIMAL: "NUMERIC", 631 exp.DataType.Type.DOUBLE: "FLOAT64", 632 exp.DataType.Type.FLOAT: "FLOAT64", 633 exp.DataType.Type.INT: "INT64", 634 exp.DataType.Type.NCHAR: "STRING", 635 exp.DataType.Type.NVARCHAR: "STRING", 636 exp.DataType.Type.SMALLINT: "INT64", 637 exp.DataType.Type.TEXT: "STRING", 638 exp.DataType.Type.TIMESTAMP: "DATETIME", 639 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 640 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 641 exp.DataType.Type.TINYINT: "INT64", 642 exp.DataType.Type.VARBINARY: "BYTES", 643 exp.DataType.Type.VARCHAR: "STRING", 644 exp.DataType.Type.VARIANT: "ANY TYPE", 645 } 646 647 PROPERTIES_LOCATION = { 648 **generator.Generator.PROPERTIES_LOCATION, 649 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 650 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 651 } 652 653 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 654 RESERVED_KEYWORDS = { 655 *generator.Generator.RESERVED_KEYWORDS, 656 "all", 657 "and", 658 "any", 659 "array", 660 "as", 661 "asc", 662 "assert_rows_modified", 663 "at", 664 "between", 665 "by", 666 "case", 667 "cast", 668 "collate", 669 "contains", 670 "create", 671 "cross", 672 "cube", 673 "current", 674 "default", 675 "define", 676 "desc", 677 "distinct", 678 "else", 679 "end", 680 "enum", 681 "escape", 682 "except", 683 "exclude", 684 "exists", 685 "extract", 686 "false", 687 "fetch", 688 "following", 689 "for", 690 "from", 691 "full", 692 "group", 693 "grouping", 694 "groups", 695 "hash", 696 "having", 697 "if", 698 "ignore", 699 "in", 700 "inner", 701 "intersect", 702 "interval", 703 "into", 704 "is", 705 "join", 706 "lateral", 707 "left", 708 "like", 709 "limit", 710 "lookup", 711 "merge", 712 "natural", 713 "new", 714 "no", 715 "not", 716 "null", 717 "nulls", 718 "of", 719 "on", 720 "or", 721 "order", 722 "outer", 723 "over", 724 "partition", 725 "preceding", 726 "proto", 727 "qualify", 728 "range", 729 "recursive", 730 "respect", 731 "right", 732 "rollup", 733 "rows", 734 "select", 735 "set", 736 "some", 737 "struct", 738 "tablesample", 739 "then", 740 "to", 741 "treat", 742 "true", 743 "unbounded", 744 "union", 745 "unnest", 746 "using", 747 "when", 748 "where", 749 "window", 750 "with", 751 "within", 752 } 753 754 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 755 if isinstance(expression.this, exp.TsOrDsToDate): 756 this: exp.Expression = expression.this 757 else: 758 this = expression 759 760 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 761 762 def struct_sql(self, expression: exp.Struct) -> str: 763 args = [] 764 for expr in expression.expressions: 765 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 766 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 767 else: 768 arg = self.sql(expr) 769 770 args.append(arg) 771 772 return self.func("STRUCT", *args) 773 774 def eq_sql(self, expression: exp.EQ) -> str: 775 # Operands of = cannot be NULL in BigQuery 776 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 777 if not isinstance(expression.parent, exp.Update): 778 return "NULL" 779 780 return self.binary(expression, "=") 781 782 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 783 parent = expression.parent 784 785 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 786 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 787 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 788 return self.func( 789 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 790 ) 791 792 return super().attimezone_sql(expression) 793 794 def trycast_sql(self, expression: exp.TryCast) -> str: 795 return self.cast_sql(expression, safe_prefix="SAFE_") 796 797 def cte_sql(self, expression: exp.CTE) -> str: 798 if expression.alias_column_names: 799 self.unsupported("Column names in CTE definition are not supported.") 800 return super().cte_sql(expression) 801 802 def array_sql(self, expression: exp.Array) -> str: 803 first_arg = seq_get(expression.expressions, 0) 804 if isinstance(first_arg, exp.Subqueryable): 805 return f"ARRAY{self.wrap(self.sql(first_arg))}" 806 807 return inline_array_sql(self, expression) 808 809 def bracket_sql(self, expression: exp.Bracket) -> str: 810 this = self.sql(expression, "this") 811 expressions = expression.expressions 812 813 if len(expressions) == 1: 814 arg = expressions[0] 815 if arg.type is None: 816 from sqlglot.optimizer.annotate_types import annotate_types 817 818 arg = annotate_types(arg) 819 820 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 821 # BQ doesn't support bracket syntax with string values 822 return f"{this}.{arg.name}" 823 824 expressions_sql = ", ".join(self.sql(e) for e in expressions) 825 offset = expression.args.get("offset") 826 827 if offset == 0: 828 expressions_sql = f"OFFSET({expressions_sql})" 829 elif offset == 1: 830 expressions_sql = f"ORDINAL({expressions_sql})" 831 elif offset is not None: 832 self.unsupported(f"Unsupported array offset: {offset}") 833 834 if expression.args.get("safe"): 835 expressions_sql = f"SAFE_{expressions_sql}" 836 837 return f"{this}[{expressions_sql}]" 838 839 def transaction_sql(self, *_) -> str: 840 return "BEGIN TRANSACTION" 841 842 def commit_sql(self, *_) -> str: 843 return "COMMIT TRANSACTION" 844 845 def rollback_sql(self, *_) -> str: 846 return "ROLLBACK TRANSACTION" 847 848 def in_unnest_op(self, expression: exp.Unnest) -> str: 849 return self.sql(expression) 850 851 def except_op(self, expression: exp.Except) -> str: 852 if not expression.args.get("distinct", False): 853 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 854 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 855 856 def intersect_op(self, expression: exp.Intersect) -> str: 857 if not expression.args.get("distinct", False): 858 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 859 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 860 861 def with_properties(self, properties: exp.Properties) -> str: 862 return self.properties(properties, prefix=self.seg("OPTIONS")) 863 864 def version_sql(self, expression: exp.Version) -> str: 865 if expression.name == "TIMESTAMP": 866 expression.set("this", "SYSTEM_TIME") 867 return super().version_sql(expression)
Determines the day of week of DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Determines whether or not UNNEST
table aliases are treated as column aliases.
Determines whether or not user-defined data types are supported.
Specifies the strategy according to which identifiers should be normalized.
Associates this dialect's time formats with their equivalent Python strftime
format.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
272 def normalize_identifier(self, expression: E) -> E: 273 if isinstance(expression, exp.Identifier): 274 parent = expression.parent 275 while isinstance(parent, exp.Dot): 276 parent = parent.parent 277 278 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 279 # The following check is essentially a heuristic to detect tables based on whether or 280 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 281 if ( 282 not isinstance(parent, exp.UserDefinedFunction) 283 and not (isinstance(parent, exp.Table) and parent.db) 284 and not expression.meta.get("is_table") 285 ): 286 expression.set("this", expression.this.lower()) 287 288 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
290 class Tokenizer(tokens.Tokenizer): 291 QUOTES = ["'", '"', '"""', "'''"] 292 COMMENTS = ["--", "#", ("/*", "*/")] 293 IDENTIFIERS = ["`"] 294 STRING_ESCAPES = ["\\"] 295 296 HEX_STRINGS = [("0x", ""), ("0X", "")] 297 298 BYTE_STRINGS = [ 299 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 300 ] 301 302 RAW_STRINGS = [ 303 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 304 ] 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "ANY TYPE": TokenType.VARIANT, 309 "BEGIN": TokenType.COMMAND, 310 "BEGIN TRANSACTION": TokenType.BEGIN, 311 "BYTES": TokenType.BINARY, 312 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 313 "DECLARE": TokenType.COMMAND, 314 "FLOAT64": TokenType.DOUBLE, 315 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 316 "MODEL": TokenType.MODEL, 317 "NOT DETERMINISTIC": TokenType.VOLATILE, 318 "RECORD": TokenType.STRUCT, 319 "TIMESTAMP": TokenType.TIMESTAMPTZ, 320 } 321 KEYWORDS.pop("DIV")
323 class Parser(parser.Parser): 324 PREFIXED_PIVOT_COLUMNS = True 325 326 LOG_DEFAULTS_TO_LN = True 327 328 FUNCTIONS = { 329 **parser.Parser.FUNCTIONS, 330 "DATE": _parse_date, 331 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 332 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 333 "DATE_TRUNC": lambda args: exp.DateTrunc( 334 unit=exp.Literal.string(str(seq_get(args, 1))), 335 this=seq_get(args, 0), 336 ), 337 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 338 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 339 "DIV": binary_from_function(exp.IntDiv), 340 "FORMAT_DATE": lambda args: exp.TimeToStr( 341 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 342 ), 343 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 344 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 345 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 346 ), 347 "MD5": exp.MD5Digest.from_arg_list, 348 "TO_HEX": _parse_to_hex, 349 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 350 [seq_get(args, 1), seq_get(args, 0)] 351 ), 352 "PARSE_TIMESTAMP": _parse_timestamp, 353 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 354 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 355 this=seq_get(args, 0), 356 expression=seq_get(args, 1), 357 position=seq_get(args, 2), 358 occurrence=seq_get(args, 3), 359 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 360 ), 361 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 362 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 363 "SPLIT": lambda args: exp.Split( 364 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 365 this=seq_get(args, 0), 366 expression=seq_get(args, 1) or exp.Literal.string(","), 367 ), 368 "TIME": _parse_time, 369 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 370 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 371 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 372 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 373 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 374 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 375 ), 376 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 377 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 378 ), 379 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 380 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 381 } 382 383 FUNCTION_PARSERS = { 384 **parser.Parser.FUNCTION_PARSERS, 385 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 386 } 387 FUNCTION_PARSERS.pop("TRIM") 388 389 NO_PAREN_FUNCTIONS = { 390 **parser.Parser.NO_PAREN_FUNCTIONS, 391 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 392 } 393 394 NESTED_TYPE_TOKENS = { 395 *parser.Parser.NESTED_TYPE_TOKENS, 396 TokenType.TABLE, 397 } 398 399 ID_VAR_TOKENS = { 400 *parser.Parser.ID_VAR_TOKENS, 401 TokenType.VALUES, 402 } 403 404 PROPERTY_PARSERS = { 405 **parser.Parser.PROPERTY_PARSERS, 406 "NOT DETERMINISTIC": lambda self: self.expression( 407 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 408 ), 409 "OPTIONS": lambda self: self._parse_with_property(), 410 } 411 412 CONSTRAINT_PARSERS = { 413 **parser.Parser.CONSTRAINT_PARSERS, 414 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 415 } 416 417 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 418 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 419 420 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 421 422 STATEMENT_PARSERS = { 423 **parser.Parser.STATEMENT_PARSERS, 424 TokenType.END: lambda self: self._parse_as_command(self._prev), 425 TokenType.FOR: lambda self: self._parse_for_in(), 426 } 427 428 BRACKET_OFFSETS = { 429 "OFFSET": (0, False), 430 "ORDINAL": (1, False), 431 "SAFE_OFFSET": (0, True), 432 "SAFE_ORDINAL": (1, True), 433 } 434 435 def _parse_for_in(self) -> exp.ForIn: 436 this = self._parse_range() 437 self._match_text_seq("DO") 438 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 439 440 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 441 this = super()._parse_table_part(schema=schema) or self._parse_number() 442 443 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 444 if isinstance(this, exp.Identifier): 445 table_name = this.name 446 while self._match(TokenType.DASH, advance=False) and self._next: 447 self._advance(2) 448 table_name += f"-{self._prev.text}" 449 450 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 451 elif isinstance(this, exp.Literal): 452 table_name = this.name 453 454 if self._is_connected() and self._parse_var(any_token=True): 455 table_name += self._prev.text 456 457 this = exp.Identifier(this=table_name, quoted=True) 458 459 return this 460 461 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 462 table = super()._parse_table_parts(schema=schema) 463 if isinstance(table.this, exp.Identifier) and "." in table.name: 464 catalog, db, this, *rest = ( 465 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 466 for x in split_num_words(table.name, ".", 3) 467 ) 468 469 if rest and this: 470 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 471 472 table = exp.Table(this=this, db=db, catalog=catalog) 473 474 return table 475 476 @t.overload 477 def _parse_json_object(self, agg: Literal[False]) -> exp.JSONObject: 478 ... 479 480 @t.overload 481 def _parse_json_object(self, agg: Literal[True]) -> exp.JSONObjectAgg: 482 ... 483 484 def _parse_json_object(self, agg=False): 485 json_object = super()._parse_json_object() 486 array_kv_pair = seq_get(json_object.expressions, 0) 487 488 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 489 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 490 if ( 491 array_kv_pair 492 and isinstance(array_kv_pair.this, exp.Array) 493 and isinstance(array_kv_pair.expression, exp.Array) 494 ): 495 keys = array_kv_pair.this.expressions 496 values = array_kv_pair.expression.expressions 497 498 json_object.set( 499 "expressions", 500 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 501 ) 502 503 return json_object 504 505 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 506 bracket = super()._parse_bracket(this) 507 508 if this is bracket: 509 return bracket 510 511 if isinstance(bracket, exp.Bracket): 512 for expression in bracket.expressions: 513 name = expression.name.upper() 514 515 if name not in self.BRACKET_OFFSETS: 516 break 517 518 offset, safe = self.BRACKET_OFFSETS[name] 519 bracket.set("offset", offset) 520 bracket.set("safe", safe) 521 expression.replace(expression.expressions[0]) 522 523 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
525 class Generator(generator.Generator): 526 EXPLICIT_UNION = True 527 INTERVAL_ALLOWS_PLURAL_FORM = False 528 JOIN_HINTS = False 529 QUERY_HINTS = False 530 TABLE_HINTS = False 531 LIMIT_FETCH = "LIMIT" 532 RENAME_TABLE_WITH_DB = False 533 NVL2_SUPPORTED = False 534 UNNEST_WITH_ORDINALITY = False 535 COLLATE_IS_FUNC = True 536 LIMIT_ONLY_LITERALS = True 537 SUPPORTS_TABLE_ALIAS_COLUMNS = False 538 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 539 JSON_KEY_VALUE_PAIR_SEP = "," 540 NULL_ORDERING_SUPPORTED = False 541 542 TRANSFORMS = { 543 **generator.Generator.TRANSFORMS, 544 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 545 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 546 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 547 exp.ArrayContains: _array_contains_sql, 548 exp.ArraySize: rename_func("ARRAY_LENGTH"), 549 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 550 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 551 if e.args.get("default") 552 else f"COLLATE {self.sql(e, 'this')}", 553 exp.CountIf: rename_func("COUNTIF"), 554 exp.Create: _create_sql, 555 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 556 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 557 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 558 exp.DateFromParts: rename_func("DATE"), 559 exp.DateStrToDate: datestrtodate_sql, 560 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 561 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 562 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 563 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 564 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 565 exp.GetPath: path_to_jsonpath(), 566 exp.GroupConcat: rename_func("STRING_AGG"), 567 exp.Hex: rename_func("TO_HEX"), 568 exp.If: if_sql(false_value="NULL"), 569 exp.ILike: no_ilike_sql, 570 exp.IntDiv: rename_func("DIV"), 571 exp.JSONFormat: rename_func("TO_JSON_STRING"), 572 exp.Max: max_or_greatest, 573 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 574 exp.MD5Digest: rename_func("MD5"), 575 exp.Min: min_or_least, 576 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 577 exp.RegexpExtract: lambda self, e: self.func( 578 "REGEXP_EXTRACT", 579 e.this, 580 e.expression, 581 e.args.get("position"), 582 e.args.get("occurrence"), 583 ), 584 exp.RegexpReplace: regexp_replace_sql, 585 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 586 exp.ReturnsProperty: _returnsproperty_sql, 587 exp.Select: transforms.preprocess( 588 [ 589 transforms.explode_to_unnest(), 590 _unqualify_unnest, 591 transforms.eliminate_distinct_on, 592 _alias_ordered_group, 593 transforms.eliminate_semi_and_anti_joins, 594 ] 595 ), 596 exp.SHA2: lambda self, e: self.func( 597 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 598 ), 599 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 600 if e.name == "IMMUTABLE" 601 else "NOT DETERMINISTIC", 602 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 603 exp.StrToTime: lambda self, e: self.func( 604 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 605 ), 606 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 607 exp.TimeFromParts: rename_func("TIME"), 608 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 609 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 610 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 611 exp.TimeStrToTime: timestrtotime_sql, 612 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 613 exp.TsOrDsAdd: _ts_or_ds_add_sql, 614 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 615 exp.TsOrDsToTime: rename_func("TIME"), 616 exp.Unhex: rename_func("FROM_HEX"), 617 exp.UnixDate: rename_func("UNIX_DATE"), 618 exp.UnixToTime: _unix_to_time_sql, 619 exp.Values: _derived_table_values_to_unnest, 620 exp.VariancePop: rename_func("VAR_POP"), 621 } 622 623 TYPE_MAPPING = { 624 **generator.Generator.TYPE_MAPPING, 625 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 626 exp.DataType.Type.BIGINT: "INT64", 627 exp.DataType.Type.BINARY: "BYTES", 628 exp.DataType.Type.BOOLEAN: "BOOL", 629 exp.DataType.Type.CHAR: "STRING", 630 exp.DataType.Type.DECIMAL: "NUMERIC", 631 exp.DataType.Type.DOUBLE: "FLOAT64", 632 exp.DataType.Type.FLOAT: "FLOAT64", 633 exp.DataType.Type.INT: "INT64", 634 exp.DataType.Type.NCHAR: "STRING", 635 exp.DataType.Type.NVARCHAR: "STRING", 636 exp.DataType.Type.SMALLINT: "INT64", 637 exp.DataType.Type.TEXT: "STRING", 638 exp.DataType.Type.TIMESTAMP: "DATETIME", 639 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 640 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 641 exp.DataType.Type.TINYINT: "INT64", 642 exp.DataType.Type.VARBINARY: "BYTES", 643 exp.DataType.Type.VARCHAR: "STRING", 644 exp.DataType.Type.VARIANT: "ANY TYPE", 645 } 646 647 PROPERTIES_LOCATION = { 648 **generator.Generator.PROPERTIES_LOCATION, 649 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 650 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 651 } 652 653 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 654 RESERVED_KEYWORDS = { 655 *generator.Generator.RESERVED_KEYWORDS, 656 "all", 657 "and", 658 "any", 659 "array", 660 "as", 661 "asc", 662 "assert_rows_modified", 663 "at", 664 "between", 665 "by", 666 "case", 667 "cast", 668 "collate", 669 "contains", 670 "create", 671 "cross", 672 "cube", 673 "current", 674 "default", 675 "define", 676 "desc", 677 "distinct", 678 "else", 679 "end", 680 "enum", 681 "escape", 682 "except", 683 "exclude", 684 "exists", 685 "extract", 686 "false", 687 "fetch", 688 "following", 689 "for", 690 "from", 691 "full", 692 "group", 693 "grouping", 694 "groups", 695 "hash", 696 "having", 697 "if", 698 "ignore", 699 "in", 700 "inner", 701 "intersect", 702 "interval", 703 "into", 704 "is", 705 "join", 706 "lateral", 707 "left", 708 "like", 709 "limit", 710 "lookup", 711 "merge", 712 "natural", 713 "new", 714 "no", 715 "not", 716 "null", 717 "nulls", 718 "of", 719 "on", 720 "or", 721 "order", 722 "outer", 723 "over", 724 "partition", 725 "preceding", 726 "proto", 727 "qualify", 728 "range", 729 "recursive", 730 "respect", 731 "right", 732 "rollup", 733 "rows", 734 "select", 735 "set", 736 "some", 737 "struct", 738 "tablesample", 739 "then", 740 "to", 741 "treat", 742 "true", 743 "unbounded", 744 "union", 745 "unnest", 746 "using", 747 "when", 748 "where", 749 "window", 750 "with", 751 "within", 752 } 753 754 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 755 if isinstance(expression.this, exp.TsOrDsToDate): 756 this: exp.Expression = expression.this 757 else: 758 this = expression 759 760 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 761 762 def struct_sql(self, expression: exp.Struct) -> str: 763 args = [] 764 for expr in expression.expressions: 765 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 766 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 767 else: 768 arg = self.sql(expr) 769 770 args.append(arg) 771 772 return self.func("STRUCT", *args) 773 774 def eq_sql(self, expression: exp.EQ) -> str: 775 # Operands of = cannot be NULL in BigQuery 776 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 777 if not isinstance(expression.parent, exp.Update): 778 return "NULL" 779 780 return self.binary(expression, "=") 781 782 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 783 parent = expression.parent 784 785 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 786 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 787 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 788 return self.func( 789 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 790 ) 791 792 return super().attimezone_sql(expression) 793 794 def trycast_sql(self, expression: exp.TryCast) -> str: 795 return self.cast_sql(expression, safe_prefix="SAFE_") 796 797 def cte_sql(self, expression: exp.CTE) -> str: 798 if expression.alias_column_names: 799 self.unsupported("Column names in CTE definition are not supported.") 800 return super().cte_sql(expression) 801 802 def array_sql(self, expression: exp.Array) -> str: 803 first_arg = seq_get(expression.expressions, 0) 804 if isinstance(first_arg, exp.Subqueryable): 805 return f"ARRAY{self.wrap(self.sql(first_arg))}" 806 807 return inline_array_sql(self, expression) 808 809 def bracket_sql(self, expression: exp.Bracket) -> str: 810 this = self.sql(expression, "this") 811 expressions = expression.expressions 812 813 if len(expressions) == 1: 814 arg = expressions[0] 815 if arg.type is None: 816 from sqlglot.optimizer.annotate_types import annotate_types 817 818 arg = annotate_types(arg) 819 820 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 821 # BQ doesn't support bracket syntax with string values 822 return f"{this}.{arg.name}" 823 824 expressions_sql = ", ".join(self.sql(e) for e in expressions) 825 offset = expression.args.get("offset") 826 827 if offset == 0: 828 expressions_sql = f"OFFSET({expressions_sql})" 829 elif offset == 1: 830 expressions_sql = f"ORDINAL({expressions_sql})" 831 elif offset is not None: 832 self.unsupported(f"Unsupported array offset: {offset}") 833 834 if expression.args.get("safe"): 835 expressions_sql = f"SAFE_{expressions_sql}" 836 837 return f"{this}[{expressions_sql}]" 838 839 def transaction_sql(self, *_) -> str: 840 return "BEGIN TRANSACTION" 841 842 def commit_sql(self, *_) -> str: 843 return "COMMIT TRANSACTION" 844 845 def rollback_sql(self, *_) -> str: 846 return "ROLLBACK TRANSACTION" 847 848 def in_unnest_op(self, expression: exp.Unnest) -> str: 849 return self.sql(expression) 850 851 def except_op(self, expression: exp.Except) -> str: 852 if not expression.args.get("distinct", False): 853 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 854 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 855 856 def intersect_op(self, expression: exp.Intersect) -> str: 857 if not expression.args.get("distinct", False): 858 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 859 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 860 861 def with_properties(self, properties: exp.Properties) -> str: 862 return self.properties(properties, prefix=self.seg("OPTIONS")) 863 864 def version_sql(self, expression: exp.Version) -> str: 865 if expression.name == "TIMESTAMP": 866 expression.set("this", "SYSTEM_TIME") 867 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
762 def struct_sql(self, expression: exp.Struct) -> str: 763 args = [] 764 for expr in expression.expressions: 765 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 766 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 767 else: 768 arg = self.sql(expr) 769 770 args.append(arg) 771 772 return self.func("STRUCT", *args)
782 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 783 parent = expression.parent 784 785 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 786 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 787 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 788 return self.func( 789 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 790 ) 791 792 return super().attimezone_sql(expression)
809 def bracket_sql(self, expression: exp.Bracket) -> str: 810 this = self.sql(expression, "this") 811 expressions = expression.expressions 812 813 if len(expressions) == 1: 814 arg = expressions[0] 815 if arg.type is None: 816 from sqlglot.optimizer.annotate_types import annotate_types 817 818 arg = annotate_types(arg) 819 820 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 821 # BQ doesn't support bracket syntax with string values 822 return f"{this}.{arg.name}" 823 824 expressions_sql = ", ".join(self.sql(e) for e in expressions) 825 offset = expression.args.get("offset") 826 827 if offset == 0: 828 expressions_sql = f"OFFSET({expressions_sql})" 829 elif offset == 1: 830 expressions_sql = f"ORDINAL({expressions_sql})" 831 elif offset is not None: 832 self.unsupported(f"Unsupported array offset: {offset}") 833 834 if expression.args.get("safe"): 835 expressions_sql = f"SAFE_{expressions_sql}" 836 837 return f"{this}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql