sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot._typing import E 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 NormalizationStrategy, 12 arg_max_or_min_no_count, 13 binary_from_function, 14 date_add_interval_sql, 15 datestrtodate_sql, 16 format_time_lambda, 17 if_sql, 18 inline_array_sql, 19 json_keyvalue_comma_sql, 20 max_or_greatest, 21 min_or_least, 22 no_ilike_sql, 23 parse_date_delta_with_interval, 24 path_to_jsonpath, 25 regexp_replace_sql, 26 rename_func, 27 timestrtotime_sql, 28 ts_or_ds_add_cast, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33logger = logging.getLogger("sqlglot") 34 35 36def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 37 if not expression.find_ancestor(exp.From, exp.Join): 38 return self.values_sql(expression) 39 40 alias = expression.args.get("alias") 41 42 structs = [ 43 exp.Struct( 44 expressions=[ 45 exp.alias_(value, column_name) 46 for value, column_name in zip( 47 t.expressions, 48 alias.columns 49 if alias and alias.columns 50 else (f"_c{i}" for i in range(len(t.expressions))), 51 ) 52 ] 53 ) 54 for t in expression.find_all(exp.Tuple) 55 ] 56 57 return self.unnest_sql(exp.Unnest(expressions=[exp.Array(expressions=structs)])) 58 59 60def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 61 this = expression.this 62 if isinstance(this, exp.Schema): 63 this = f"{this.this} <{self.expressions(this)}>" 64 else: 65 this = self.sql(this) 66 return f"RETURNS {this}" 67 68 69def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 70 kind = expression.args["kind"] 71 returns = expression.find(exp.ReturnsProperty) 72 73 if kind.upper() == "FUNCTION" and returns and returns.args.get("is_table"): 74 expression.set("kind", "TABLE FUNCTION") 75 76 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 77 expression.set("expression", expression.expression.this) 78 79 return self.create_sql(expression) 80 81 return self.create_sql(expression) 82 83 84def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 85 """Remove references to unnest table aliases since bigquery doesn't allow them. 86 87 These are added by the optimizer's qualify_column step. 88 """ 89 from sqlglot.optimizer.scope import find_all_in_scope 90 91 if isinstance(expression, exp.Select): 92 unnest_aliases = { 93 unnest.alias 94 for unnest in find_all_in_scope(expression, exp.Unnest) 95 if isinstance(unnest.parent, (exp.From, exp.Join)) 96 } 97 if unnest_aliases: 98 for column in expression.find_all(exp.Column): 99 if column.table in unnest_aliases: 100 column.set("table", None) 101 elif column.db in unnest_aliases: 102 column.set("db", None) 103 104 return expression 105 106 107# https://issuetracker.google.com/issues/162294746 108# workaround for bigquery bug when grouping by an expression and then ordering 109# WITH x AS (SELECT 1 y) 110# SELECT y + 1 z 111# FROM x 112# GROUP BY x + 1 113# ORDER by z 114def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 115 if isinstance(expression, exp.Select): 116 group = expression.args.get("group") 117 order = expression.args.get("order") 118 119 if group and order: 120 aliases = { 121 select.this: select.args["alias"] 122 for select in expression.selects 123 if isinstance(select, exp.Alias) 124 } 125 126 for e in group.expressions: 127 alias = aliases.get(e) 128 129 if alias: 130 e.replace(exp.column(alias)) 131 132 return expression 133 134 135def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 136 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 137 if isinstance(expression, exp.CTE) and expression.alias_column_names: 138 cte_query = expression.this 139 140 if cte_query.is_star: 141 logger.warning( 142 "Can't push down CTE column names for star queries. Run the query through" 143 " the optimizer or use 'qualify' to expand the star projections first." 144 ) 145 return expression 146 147 column_names = expression.alias_column_names 148 expression.args["alias"].set("columns", None) 149 150 for name, select in zip(column_names, cte_query.selects): 151 to_replace = select 152 153 if isinstance(select, exp.Alias): 154 select = select.this 155 156 # Inner aliases are shadowed by the CTE column names 157 to_replace.replace(exp.alias_(select, name)) 158 159 return expression 160 161 162def _parse_timestamp(args: t.List) -> exp.StrToTime: 163 this = format_time_lambda(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 164 this.set("zone", seq_get(args, 2)) 165 return this 166 167 168def _parse_date(args: t.List) -> exp.Date | exp.DateFromParts: 169 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 170 return expr_type.from_arg_list(args) 171 172 173def _parse_to_hex(args: t.List) -> exp.Hex | exp.MD5: 174 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 175 arg = seq_get(args, 0) 176 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 177 178 179def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 180 return self.sql( 181 exp.Exists( 182 this=exp.select("1") 183 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 184 .where(exp.column("_col").eq(expression.right)) 185 ) 186 ) 187 188 189def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 190 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 191 192 193def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 194 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 195 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 196 unit = expression.args.get("unit") or "DAY" 197 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 198 199 200def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 201 scale = expression.args.get("scale") 202 timestamp = self.sql(expression, "this") 203 if scale in (None, exp.UnixToTime.SECONDS): 204 return f"TIMESTAMP_SECONDS({timestamp})" 205 if scale == exp.UnixToTime.MILLIS: 206 return f"TIMESTAMP_MILLIS({timestamp})" 207 if scale == exp.UnixToTime.MICROS: 208 return f"TIMESTAMP_MICROS({timestamp})" 209 if scale == exp.UnixToTime.NANOS: 210 # We need to cast to INT64 because that's what BQ expects 211 return f"TIMESTAMP_MICROS(CAST({timestamp} / 1000 AS INT64))" 212 213 self.unsupported(f"Unsupported scale for timestamp: {scale}.") 214 return "" 215 216 217def _parse_time(args: t.List) -> exp.Func: 218 if len(args) == 1: 219 return exp.TsOrDsToTime(this=args[0]) 220 if len(args) == 3: 221 return exp.TimeFromParts.from_arg_list(args) 222 223 return exp.Anonymous(this="TIME", expressions=args) 224 225 226class BigQuery(Dialect): 227 WEEK_OFFSET = -1 228 UNNEST_COLUMN_ONLY = True 229 SUPPORTS_USER_DEFINED_TYPES = False 230 SUPPORTS_SEMI_ANTI_JOIN = False 231 LOG_BASE_FIRST = False 232 233 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 234 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 235 236 # bigquery udfs are case sensitive 237 NORMALIZE_FUNCTIONS = False 238 239 TIME_MAPPING = { 240 "%D": "%m/%d/%y", 241 } 242 243 ESCAPE_SEQUENCES = { 244 "\\a": "\a", 245 "\\b": "\b", 246 "\\f": "\f", 247 "\\n": "\n", 248 "\\r": "\r", 249 "\\t": "\t", 250 "\\v": "\v", 251 } 252 253 FORMAT_MAPPING = { 254 "DD": "%d", 255 "MM": "%m", 256 "MON": "%b", 257 "MONTH": "%B", 258 "YYYY": "%Y", 259 "YY": "%y", 260 "HH": "%I", 261 "HH12": "%I", 262 "HH24": "%H", 263 "MI": "%M", 264 "SS": "%S", 265 "SSSSS": "%f", 266 "TZH": "%z", 267 } 268 269 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 270 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 271 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 272 273 def normalize_identifier(self, expression: E) -> E: 274 if isinstance(expression, exp.Identifier): 275 parent = expression.parent 276 while isinstance(parent, exp.Dot): 277 parent = parent.parent 278 279 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 280 # The following check is essentially a heuristic to detect tables based on whether or 281 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 282 if ( 283 not isinstance(parent, exp.UserDefinedFunction) 284 and not (isinstance(parent, exp.Table) and parent.db) 285 and not expression.meta.get("is_table") 286 ): 287 expression.set("this", expression.this.lower()) 288 289 return expression 290 291 class Tokenizer(tokens.Tokenizer): 292 QUOTES = ["'", '"', '"""', "'''"] 293 COMMENTS = ["--", "#", ("/*", "*/")] 294 IDENTIFIERS = ["`"] 295 STRING_ESCAPES = ["\\"] 296 297 HEX_STRINGS = [("0x", ""), ("0X", "")] 298 299 BYTE_STRINGS = [ 300 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 301 ] 302 303 RAW_STRINGS = [ 304 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 305 ] 306 307 KEYWORDS = { 308 **tokens.Tokenizer.KEYWORDS, 309 "ANY TYPE": TokenType.VARIANT, 310 "BEGIN": TokenType.COMMAND, 311 "BEGIN TRANSACTION": TokenType.BEGIN, 312 "BYTES": TokenType.BINARY, 313 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 314 "DECLARE": TokenType.COMMAND, 315 "FLOAT64": TokenType.DOUBLE, 316 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 317 "MODEL": TokenType.MODEL, 318 "NOT DETERMINISTIC": TokenType.VOLATILE, 319 "RECORD": TokenType.STRUCT, 320 "TIMESTAMP": TokenType.TIMESTAMPTZ, 321 } 322 KEYWORDS.pop("DIV") 323 324 class Parser(parser.Parser): 325 PREFIXED_PIVOT_COLUMNS = True 326 327 LOG_DEFAULTS_TO_LN = True 328 329 FUNCTIONS = { 330 **parser.Parser.FUNCTIONS, 331 "DATE": _parse_date, 332 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 333 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 334 "DATE_TRUNC": lambda args: exp.DateTrunc( 335 unit=exp.Literal.string(str(seq_get(args, 1))), 336 this=seq_get(args, 0), 337 ), 338 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 339 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 340 "DIV": binary_from_function(exp.IntDiv), 341 "FORMAT_DATE": lambda args: exp.TimeToStr( 342 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 343 ), 344 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 345 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 346 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 347 ), 348 "MD5": exp.MD5Digest.from_arg_list, 349 "TO_HEX": _parse_to_hex, 350 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 351 [seq_get(args, 1), seq_get(args, 0)] 352 ), 353 "PARSE_TIMESTAMP": _parse_timestamp, 354 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 355 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 356 this=seq_get(args, 0), 357 expression=seq_get(args, 1), 358 position=seq_get(args, 2), 359 occurrence=seq_get(args, 3), 360 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 361 ), 362 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 363 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 364 "SPLIT": lambda args: exp.Split( 365 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 366 this=seq_get(args, 0), 367 expression=seq_get(args, 1) or exp.Literal.string(","), 368 ), 369 "TIME": _parse_time, 370 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 371 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 372 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 373 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 374 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 375 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 376 ), 377 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 378 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 379 ), 380 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime( 381 this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS 382 ), 383 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 384 } 385 386 FUNCTION_PARSERS = { 387 **parser.Parser.FUNCTION_PARSERS, 388 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 389 } 390 FUNCTION_PARSERS.pop("TRIM") 391 392 NO_PAREN_FUNCTIONS = { 393 **parser.Parser.NO_PAREN_FUNCTIONS, 394 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 395 } 396 397 NESTED_TYPE_TOKENS = { 398 *parser.Parser.NESTED_TYPE_TOKENS, 399 TokenType.TABLE, 400 } 401 402 ID_VAR_TOKENS = { 403 *parser.Parser.ID_VAR_TOKENS, 404 TokenType.VALUES, 405 } 406 407 PROPERTY_PARSERS = { 408 **parser.Parser.PROPERTY_PARSERS, 409 "NOT DETERMINISTIC": lambda self: self.expression( 410 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 411 ), 412 "OPTIONS": lambda self: self._parse_with_property(), 413 } 414 415 CONSTRAINT_PARSERS = { 416 **parser.Parser.CONSTRAINT_PARSERS, 417 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 418 } 419 420 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 421 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 422 423 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 424 425 STATEMENT_PARSERS = { 426 **parser.Parser.STATEMENT_PARSERS, 427 TokenType.END: lambda self: self._parse_as_command(self._prev), 428 TokenType.FOR: lambda self: self._parse_for_in(), 429 } 430 431 BRACKET_OFFSETS = { 432 "OFFSET": (0, False), 433 "ORDINAL": (1, False), 434 "SAFE_OFFSET": (0, True), 435 "SAFE_ORDINAL": (1, True), 436 } 437 438 def _parse_for_in(self) -> exp.ForIn: 439 this = self._parse_range() 440 self._match_text_seq("DO") 441 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 442 443 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 444 this = super()._parse_table_part(schema=schema) or self._parse_number() 445 446 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 447 if isinstance(this, exp.Identifier): 448 table_name = this.name 449 while self._match(TokenType.DASH, advance=False) and self._next: 450 self._advance(2) 451 table_name += f"-{self._prev.text}" 452 453 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 454 elif isinstance(this, exp.Literal): 455 table_name = this.name 456 457 if self._is_connected() and self._parse_var(any_token=True): 458 table_name += self._prev.text 459 460 this = exp.Identifier(this=table_name, quoted=True) 461 462 return this 463 464 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 465 table = super()._parse_table_parts(schema=schema) 466 if isinstance(table.this, exp.Identifier) and "." in table.name: 467 catalog, db, this, *rest = ( 468 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 469 for x in split_num_words(table.name, ".", 3) 470 ) 471 472 if rest and this: 473 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 474 475 table = exp.Table(this=this, db=db, catalog=catalog) 476 477 return table 478 479 def _parse_json_object(self) -> exp.JSONObject: 480 json_object = super()._parse_json_object() 481 array_kv_pair = seq_get(json_object.expressions, 0) 482 483 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 484 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 485 if ( 486 array_kv_pair 487 and isinstance(array_kv_pair.this, exp.Array) 488 and isinstance(array_kv_pair.expression, exp.Array) 489 ): 490 keys = array_kv_pair.this.expressions 491 values = array_kv_pair.expression.expressions 492 493 json_object.set( 494 "expressions", 495 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 496 ) 497 498 return json_object 499 500 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 501 bracket = super()._parse_bracket(this) 502 503 if this is bracket: 504 return bracket 505 506 if isinstance(bracket, exp.Bracket): 507 for expression in bracket.expressions: 508 name = expression.name.upper() 509 510 if name not in self.BRACKET_OFFSETS: 511 break 512 513 offset, safe = self.BRACKET_OFFSETS[name] 514 bracket.set("offset", offset) 515 bracket.set("safe", safe) 516 expression.replace(expression.expressions[0]) 517 518 return bracket 519 520 class Generator(generator.Generator): 521 EXPLICIT_UNION = True 522 INTERVAL_ALLOWS_PLURAL_FORM = False 523 JOIN_HINTS = False 524 QUERY_HINTS = False 525 TABLE_HINTS = False 526 LIMIT_FETCH = "LIMIT" 527 RENAME_TABLE_WITH_DB = False 528 NVL2_SUPPORTED = False 529 UNNEST_WITH_ORDINALITY = False 530 COLLATE_IS_FUNC = True 531 LIMIT_ONLY_LITERALS = True 532 533 TRANSFORMS = { 534 **generator.Generator.TRANSFORMS, 535 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 536 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 537 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 538 exp.ArrayContains: _array_contains_sql, 539 exp.ArraySize: rename_func("ARRAY_LENGTH"), 540 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 541 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 542 if e.args.get("default") 543 else f"COLLATE {self.sql(e, 'this')}", 544 exp.CountIf: rename_func("COUNTIF"), 545 exp.Create: _create_sql, 546 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 547 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 548 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 549 exp.DateFromParts: rename_func("DATE"), 550 exp.DateStrToDate: datestrtodate_sql, 551 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 552 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 553 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 554 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 555 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 556 exp.GetPath: path_to_jsonpath(), 557 exp.GroupConcat: rename_func("STRING_AGG"), 558 exp.Hex: rename_func("TO_HEX"), 559 exp.If: if_sql(false_value="NULL"), 560 exp.ILike: no_ilike_sql, 561 exp.IntDiv: rename_func("DIV"), 562 exp.JSONFormat: rename_func("TO_JSON_STRING"), 563 exp.JSONKeyValue: json_keyvalue_comma_sql, 564 exp.Max: max_or_greatest, 565 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 566 exp.MD5Digest: rename_func("MD5"), 567 exp.Min: min_or_least, 568 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 569 exp.RegexpExtract: lambda self, e: self.func( 570 "REGEXP_EXTRACT", 571 e.this, 572 e.expression, 573 e.args.get("position"), 574 e.args.get("occurrence"), 575 ), 576 exp.RegexpReplace: regexp_replace_sql, 577 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 578 exp.ReturnsProperty: _returnsproperty_sql, 579 exp.Select: transforms.preprocess( 580 [ 581 transforms.explode_to_unnest(), 582 _unqualify_unnest, 583 transforms.eliminate_distinct_on, 584 _alias_ordered_group, 585 transforms.eliminate_semi_and_anti_joins, 586 ] 587 ), 588 exp.SHA2: lambda self, e: self.func( 589 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 590 ), 591 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 592 if e.name == "IMMUTABLE" 593 else "NOT DETERMINISTIC", 594 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 595 exp.StrToTime: lambda self, e: self.func( 596 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 597 ), 598 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 599 exp.TimeFromParts: rename_func("TIME"), 600 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 601 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 602 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 603 exp.TimeStrToTime: timestrtotime_sql, 604 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 605 exp.TsOrDsAdd: _ts_or_ds_add_sql, 606 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 607 exp.TsOrDsToTime: rename_func("TIME"), 608 exp.Unhex: rename_func("FROM_HEX"), 609 exp.UnixDate: rename_func("UNIX_DATE"), 610 exp.UnixToTime: _unix_to_time_sql, 611 exp.Values: _derived_table_values_to_unnest, 612 exp.VariancePop: rename_func("VAR_POP"), 613 } 614 615 TYPE_MAPPING = { 616 **generator.Generator.TYPE_MAPPING, 617 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 618 exp.DataType.Type.BIGINT: "INT64", 619 exp.DataType.Type.BINARY: "BYTES", 620 exp.DataType.Type.BOOLEAN: "BOOL", 621 exp.DataType.Type.CHAR: "STRING", 622 exp.DataType.Type.DECIMAL: "NUMERIC", 623 exp.DataType.Type.DOUBLE: "FLOAT64", 624 exp.DataType.Type.FLOAT: "FLOAT64", 625 exp.DataType.Type.INT: "INT64", 626 exp.DataType.Type.NCHAR: "STRING", 627 exp.DataType.Type.NVARCHAR: "STRING", 628 exp.DataType.Type.SMALLINT: "INT64", 629 exp.DataType.Type.TEXT: "STRING", 630 exp.DataType.Type.TIMESTAMP: "DATETIME", 631 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 632 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 633 exp.DataType.Type.TINYINT: "INT64", 634 exp.DataType.Type.VARBINARY: "BYTES", 635 exp.DataType.Type.VARCHAR: "STRING", 636 exp.DataType.Type.VARIANT: "ANY TYPE", 637 } 638 639 PROPERTIES_LOCATION = { 640 **generator.Generator.PROPERTIES_LOCATION, 641 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 642 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 643 } 644 645 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 646 RESERVED_KEYWORDS = { 647 *generator.Generator.RESERVED_KEYWORDS, 648 "all", 649 "and", 650 "any", 651 "array", 652 "as", 653 "asc", 654 "assert_rows_modified", 655 "at", 656 "between", 657 "by", 658 "case", 659 "cast", 660 "collate", 661 "contains", 662 "create", 663 "cross", 664 "cube", 665 "current", 666 "default", 667 "define", 668 "desc", 669 "distinct", 670 "else", 671 "end", 672 "enum", 673 "escape", 674 "except", 675 "exclude", 676 "exists", 677 "extract", 678 "false", 679 "fetch", 680 "following", 681 "for", 682 "from", 683 "full", 684 "group", 685 "grouping", 686 "groups", 687 "hash", 688 "having", 689 "if", 690 "ignore", 691 "in", 692 "inner", 693 "intersect", 694 "interval", 695 "into", 696 "is", 697 "join", 698 "lateral", 699 "left", 700 "like", 701 "limit", 702 "lookup", 703 "merge", 704 "natural", 705 "new", 706 "no", 707 "not", 708 "null", 709 "nulls", 710 "of", 711 "on", 712 "or", 713 "order", 714 "outer", 715 "over", 716 "partition", 717 "preceding", 718 "proto", 719 "qualify", 720 "range", 721 "recursive", 722 "respect", 723 "right", 724 "rollup", 725 "rows", 726 "select", 727 "set", 728 "some", 729 "struct", 730 "tablesample", 731 "then", 732 "to", 733 "treat", 734 "true", 735 "unbounded", 736 "union", 737 "unnest", 738 "using", 739 "when", 740 "where", 741 "window", 742 "with", 743 "within", 744 } 745 746 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 747 if isinstance(expression.this, exp.TsOrDsToDate): 748 this: exp.Expression = expression.this 749 else: 750 this = expression 751 752 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 753 754 def struct_sql(self, expression: exp.Struct) -> str: 755 args = [] 756 for expr in expression.expressions: 757 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 758 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 759 else: 760 arg = self.sql(expr) 761 762 args.append(arg) 763 764 return self.func("STRUCT", *args) 765 766 def eq_sql(self, expression: exp.EQ) -> str: 767 # Operands of = cannot be NULL in BigQuery 768 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 769 if not isinstance(expression.parent, exp.Update): 770 return "NULL" 771 772 return self.binary(expression, "=") 773 774 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 775 parent = expression.parent 776 777 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 778 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 779 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 780 return self.func( 781 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 782 ) 783 784 return super().attimezone_sql(expression) 785 786 def trycast_sql(self, expression: exp.TryCast) -> str: 787 return self.cast_sql(expression, safe_prefix="SAFE_") 788 789 def cte_sql(self, expression: exp.CTE) -> str: 790 if expression.alias_column_names: 791 self.unsupported("Column names in CTE definition are not supported.") 792 return super().cte_sql(expression) 793 794 def array_sql(self, expression: exp.Array) -> str: 795 first_arg = seq_get(expression.expressions, 0) 796 if isinstance(first_arg, exp.Subqueryable): 797 return f"ARRAY{self.wrap(self.sql(first_arg))}" 798 799 return inline_array_sql(self, expression) 800 801 def bracket_sql(self, expression: exp.Bracket) -> str: 802 this = self.sql(expression, "this") 803 expressions = expression.expressions 804 805 if len(expressions) == 1: 806 arg = expressions[0] 807 if arg.type is None: 808 from sqlglot.optimizer.annotate_types import annotate_types 809 810 arg = annotate_types(arg) 811 812 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 813 # BQ doesn't support bracket syntax with string values 814 return f"{this}.{arg.name}" 815 816 expressions_sql = ", ".join(self.sql(e) for e in expressions) 817 offset = expression.args.get("offset") 818 819 if offset == 0: 820 expressions_sql = f"OFFSET({expressions_sql})" 821 elif offset == 1: 822 expressions_sql = f"ORDINAL({expressions_sql})" 823 elif offset is not None: 824 self.unsupported(f"Unsupported array offset: {offset}") 825 826 if expression.args.get("safe"): 827 expressions_sql = f"SAFE_{expressions_sql}" 828 829 return f"{this}[{expressions_sql}]" 830 831 def transaction_sql(self, *_) -> str: 832 return "BEGIN TRANSACTION" 833 834 def commit_sql(self, *_) -> str: 835 return "COMMIT TRANSACTION" 836 837 def rollback_sql(self, *_) -> str: 838 return "ROLLBACK TRANSACTION" 839 840 def in_unnest_op(self, expression: exp.Unnest) -> str: 841 return self.sql(expression) 842 843 def except_op(self, expression: exp.Except) -> str: 844 if not expression.args.get("distinct", False): 845 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 846 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 847 848 def intersect_op(self, expression: exp.Intersect) -> str: 849 if not expression.args.get("distinct", False): 850 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 851 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 852 853 def with_properties(self, properties: exp.Properties) -> str: 854 return self.properties(properties, prefix=self.seg("OPTIONS")) 855 856 def version_sql(self, expression: exp.Version) -> str: 857 if expression.name == "TIMESTAMP": 858 expression.set("this", "SYSTEM_TIME") 859 return super().version_sql(expression)
227class BigQuery(Dialect): 228 WEEK_OFFSET = -1 229 UNNEST_COLUMN_ONLY = True 230 SUPPORTS_USER_DEFINED_TYPES = False 231 SUPPORTS_SEMI_ANTI_JOIN = False 232 LOG_BASE_FIRST = False 233 234 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 235 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 236 237 # bigquery udfs are case sensitive 238 NORMALIZE_FUNCTIONS = False 239 240 TIME_MAPPING = { 241 "%D": "%m/%d/%y", 242 } 243 244 ESCAPE_SEQUENCES = { 245 "\\a": "\a", 246 "\\b": "\b", 247 "\\f": "\f", 248 "\\n": "\n", 249 "\\r": "\r", 250 "\\t": "\t", 251 "\\v": "\v", 252 } 253 254 FORMAT_MAPPING = { 255 "DD": "%d", 256 "MM": "%m", 257 "MON": "%b", 258 "MONTH": "%B", 259 "YYYY": "%Y", 260 "YY": "%y", 261 "HH": "%I", 262 "HH12": "%I", 263 "HH24": "%H", 264 "MI": "%M", 265 "SS": "%S", 266 "SSSSS": "%f", 267 "TZH": "%z", 268 } 269 270 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 271 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 272 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 273 274 def normalize_identifier(self, expression: E) -> E: 275 if isinstance(expression, exp.Identifier): 276 parent = expression.parent 277 while isinstance(parent, exp.Dot): 278 parent = parent.parent 279 280 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 281 # The following check is essentially a heuristic to detect tables based on whether or 282 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 283 if ( 284 not isinstance(parent, exp.UserDefinedFunction) 285 and not (isinstance(parent, exp.Table) and parent.db) 286 and not expression.meta.get("is_table") 287 ): 288 expression.set("this", expression.this.lower()) 289 290 return expression 291 292 class Tokenizer(tokens.Tokenizer): 293 QUOTES = ["'", '"', '"""', "'''"] 294 COMMENTS = ["--", "#", ("/*", "*/")] 295 IDENTIFIERS = ["`"] 296 STRING_ESCAPES = ["\\"] 297 298 HEX_STRINGS = [("0x", ""), ("0X", "")] 299 300 BYTE_STRINGS = [ 301 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 302 ] 303 304 RAW_STRINGS = [ 305 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 306 ] 307 308 KEYWORDS = { 309 **tokens.Tokenizer.KEYWORDS, 310 "ANY TYPE": TokenType.VARIANT, 311 "BEGIN": TokenType.COMMAND, 312 "BEGIN TRANSACTION": TokenType.BEGIN, 313 "BYTES": TokenType.BINARY, 314 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 315 "DECLARE": TokenType.COMMAND, 316 "FLOAT64": TokenType.DOUBLE, 317 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 318 "MODEL": TokenType.MODEL, 319 "NOT DETERMINISTIC": TokenType.VOLATILE, 320 "RECORD": TokenType.STRUCT, 321 "TIMESTAMP": TokenType.TIMESTAMPTZ, 322 } 323 KEYWORDS.pop("DIV") 324 325 class Parser(parser.Parser): 326 PREFIXED_PIVOT_COLUMNS = True 327 328 LOG_DEFAULTS_TO_LN = True 329 330 FUNCTIONS = { 331 **parser.Parser.FUNCTIONS, 332 "DATE": _parse_date, 333 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 334 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 335 "DATE_TRUNC": lambda args: exp.DateTrunc( 336 unit=exp.Literal.string(str(seq_get(args, 1))), 337 this=seq_get(args, 0), 338 ), 339 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 340 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 341 "DIV": binary_from_function(exp.IntDiv), 342 "FORMAT_DATE": lambda args: exp.TimeToStr( 343 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 344 ), 345 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 346 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 347 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 348 ), 349 "MD5": exp.MD5Digest.from_arg_list, 350 "TO_HEX": _parse_to_hex, 351 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 352 [seq_get(args, 1), seq_get(args, 0)] 353 ), 354 "PARSE_TIMESTAMP": _parse_timestamp, 355 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 356 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1), 359 position=seq_get(args, 2), 360 occurrence=seq_get(args, 3), 361 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 362 ), 363 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 364 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 365 "SPLIT": lambda args: exp.Split( 366 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 367 this=seq_get(args, 0), 368 expression=seq_get(args, 1) or exp.Literal.string(","), 369 ), 370 "TIME": _parse_time, 371 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 372 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 373 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 374 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 375 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 376 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 377 ), 378 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 379 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 380 ), 381 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS 383 ), 384 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 385 } 386 387 FUNCTION_PARSERS = { 388 **parser.Parser.FUNCTION_PARSERS, 389 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 390 } 391 FUNCTION_PARSERS.pop("TRIM") 392 393 NO_PAREN_FUNCTIONS = { 394 **parser.Parser.NO_PAREN_FUNCTIONS, 395 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 396 } 397 398 NESTED_TYPE_TOKENS = { 399 *parser.Parser.NESTED_TYPE_TOKENS, 400 TokenType.TABLE, 401 } 402 403 ID_VAR_TOKENS = { 404 *parser.Parser.ID_VAR_TOKENS, 405 TokenType.VALUES, 406 } 407 408 PROPERTY_PARSERS = { 409 **parser.Parser.PROPERTY_PARSERS, 410 "NOT DETERMINISTIC": lambda self: self.expression( 411 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 412 ), 413 "OPTIONS": lambda self: self._parse_with_property(), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 419 } 420 421 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 422 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 423 424 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 425 426 STATEMENT_PARSERS = { 427 **parser.Parser.STATEMENT_PARSERS, 428 TokenType.END: lambda self: self._parse_as_command(self._prev), 429 TokenType.FOR: lambda self: self._parse_for_in(), 430 } 431 432 BRACKET_OFFSETS = { 433 "OFFSET": (0, False), 434 "ORDINAL": (1, False), 435 "SAFE_OFFSET": (0, True), 436 "SAFE_ORDINAL": (1, True), 437 } 438 439 def _parse_for_in(self) -> exp.ForIn: 440 this = self._parse_range() 441 self._match_text_seq("DO") 442 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 443 444 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 445 this = super()._parse_table_part(schema=schema) or self._parse_number() 446 447 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 448 if isinstance(this, exp.Identifier): 449 table_name = this.name 450 while self._match(TokenType.DASH, advance=False) and self._next: 451 self._advance(2) 452 table_name += f"-{self._prev.text}" 453 454 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 455 elif isinstance(this, exp.Literal): 456 table_name = this.name 457 458 if self._is_connected() and self._parse_var(any_token=True): 459 table_name += self._prev.text 460 461 this = exp.Identifier(this=table_name, quoted=True) 462 463 return this 464 465 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 466 table = super()._parse_table_parts(schema=schema) 467 if isinstance(table.this, exp.Identifier) and "." in table.name: 468 catalog, db, this, *rest = ( 469 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 470 for x in split_num_words(table.name, ".", 3) 471 ) 472 473 if rest and this: 474 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 475 476 table = exp.Table(this=this, db=db, catalog=catalog) 477 478 return table 479 480 def _parse_json_object(self) -> exp.JSONObject: 481 json_object = super()._parse_json_object() 482 array_kv_pair = seq_get(json_object.expressions, 0) 483 484 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 485 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 486 if ( 487 array_kv_pair 488 and isinstance(array_kv_pair.this, exp.Array) 489 and isinstance(array_kv_pair.expression, exp.Array) 490 ): 491 keys = array_kv_pair.this.expressions 492 values = array_kv_pair.expression.expressions 493 494 json_object.set( 495 "expressions", 496 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 497 ) 498 499 return json_object 500 501 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 502 bracket = super()._parse_bracket(this) 503 504 if this is bracket: 505 return bracket 506 507 if isinstance(bracket, exp.Bracket): 508 for expression in bracket.expressions: 509 name = expression.name.upper() 510 511 if name not in self.BRACKET_OFFSETS: 512 break 513 514 offset, safe = self.BRACKET_OFFSETS[name] 515 bracket.set("offset", offset) 516 bracket.set("safe", safe) 517 expression.replace(expression.expressions[0]) 518 519 return bracket 520 521 class Generator(generator.Generator): 522 EXPLICIT_UNION = True 523 INTERVAL_ALLOWS_PLURAL_FORM = False 524 JOIN_HINTS = False 525 QUERY_HINTS = False 526 TABLE_HINTS = False 527 LIMIT_FETCH = "LIMIT" 528 RENAME_TABLE_WITH_DB = False 529 NVL2_SUPPORTED = False 530 UNNEST_WITH_ORDINALITY = False 531 COLLATE_IS_FUNC = True 532 LIMIT_ONLY_LITERALS = True 533 534 TRANSFORMS = { 535 **generator.Generator.TRANSFORMS, 536 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 537 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 538 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 539 exp.ArrayContains: _array_contains_sql, 540 exp.ArraySize: rename_func("ARRAY_LENGTH"), 541 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 542 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 543 if e.args.get("default") 544 else f"COLLATE {self.sql(e, 'this')}", 545 exp.CountIf: rename_func("COUNTIF"), 546 exp.Create: _create_sql, 547 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 548 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 549 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 550 exp.DateFromParts: rename_func("DATE"), 551 exp.DateStrToDate: datestrtodate_sql, 552 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 553 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 554 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 555 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 556 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 557 exp.GetPath: path_to_jsonpath(), 558 exp.GroupConcat: rename_func("STRING_AGG"), 559 exp.Hex: rename_func("TO_HEX"), 560 exp.If: if_sql(false_value="NULL"), 561 exp.ILike: no_ilike_sql, 562 exp.IntDiv: rename_func("DIV"), 563 exp.JSONFormat: rename_func("TO_JSON_STRING"), 564 exp.JSONKeyValue: json_keyvalue_comma_sql, 565 exp.Max: max_or_greatest, 566 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 567 exp.MD5Digest: rename_func("MD5"), 568 exp.Min: min_or_least, 569 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 570 exp.RegexpExtract: lambda self, e: self.func( 571 "REGEXP_EXTRACT", 572 e.this, 573 e.expression, 574 e.args.get("position"), 575 e.args.get("occurrence"), 576 ), 577 exp.RegexpReplace: regexp_replace_sql, 578 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 579 exp.ReturnsProperty: _returnsproperty_sql, 580 exp.Select: transforms.preprocess( 581 [ 582 transforms.explode_to_unnest(), 583 _unqualify_unnest, 584 transforms.eliminate_distinct_on, 585 _alias_ordered_group, 586 transforms.eliminate_semi_and_anti_joins, 587 ] 588 ), 589 exp.SHA2: lambda self, e: self.func( 590 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 591 ), 592 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 593 if e.name == "IMMUTABLE" 594 else "NOT DETERMINISTIC", 595 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 596 exp.StrToTime: lambda self, e: self.func( 597 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 598 ), 599 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 600 exp.TimeFromParts: rename_func("TIME"), 601 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 602 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 603 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 604 exp.TimeStrToTime: timestrtotime_sql, 605 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 606 exp.TsOrDsAdd: _ts_or_ds_add_sql, 607 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 608 exp.TsOrDsToTime: rename_func("TIME"), 609 exp.Unhex: rename_func("FROM_HEX"), 610 exp.UnixDate: rename_func("UNIX_DATE"), 611 exp.UnixToTime: _unix_to_time_sql, 612 exp.Values: _derived_table_values_to_unnest, 613 exp.VariancePop: rename_func("VAR_POP"), 614 } 615 616 TYPE_MAPPING = { 617 **generator.Generator.TYPE_MAPPING, 618 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 619 exp.DataType.Type.BIGINT: "INT64", 620 exp.DataType.Type.BINARY: "BYTES", 621 exp.DataType.Type.BOOLEAN: "BOOL", 622 exp.DataType.Type.CHAR: "STRING", 623 exp.DataType.Type.DECIMAL: "NUMERIC", 624 exp.DataType.Type.DOUBLE: "FLOAT64", 625 exp.DataType.Type.FLOAT: "FLOAT64", 626 exp.DataType.Type.INT: "INT64", 627 exp.DataType.Type.NCHAR: "STRING", 628 exp.DataType.Type.NVARCHAR: "STRING", 629 exp.DataType.Type.SMALLINT: "INT64", 630 exp.DataType.Type.TEXT: "STRING", 631 exp.DataType.Type.TIMESTAMP: "DATETIME", 632 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 633 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 634 exp.DataType.Type.TINYINT: "INT64", 635 exp.DataType.Type.VARBINARY: "BYTES", 636 exp.DataType.Type.VARCHAR: "STRING", 637 exp.DataType.Type.VARIANT: "ANY TYPE", 638 } 639 640 PROPERTIES_LOCATION = { 641 **generator.Generator.PROPERTIES_LOCATION, 642 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 643 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 644 } 645 646 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 647 RESERVED_KEYWORDS = { 648 *generator.Generator.RESERVED_KEYWORDS, 649 "all", 650 "and", 651 "any", 652 "array", 653 "as", 654 "asc", 655 "assert_rows_modified", 656 "at", 657 "between", 658 "by", 659 "case", 660 "cast", 661 "collate", 662 "contains", 663 "create", 664 "cross", 665 "cube", 666 "current", 667 "default", 668 "define", 669 "desc", 670 "distinct", 671 "else", 672 "end", 673 "enum", 674 "escape", 675 "except", 676 "exclude", 677 "exists", 678 "extract", 679 "false", 680 "fetch", 681 "following", 682 "for", 683 "from", 684 "full", 685 "group", 686 "grouping", 687 "groups", 688 "hash", 689 "having", 690 "if", 691 "ignore", 692 "in", 693 "inner", 694 "intersect", 695 "interval", 696 "into", 697 "is", 698 "join", 699 "lateral", 700 "left", 701 "like", 702 "limit", 703 "lookup", 704 "merge", 705 "natural", 706 "new", 707 "no", 708 "not", 709 "null", 710 "nulls", 711 "of", 712 "on", 713 "or", 714 "order", 715 "outer", 716 "over", 717 "partition", 718 "preceding", 719 "proto", 720 "qualify", 721 "range", 722 "recursive", 723 "respect", 724 "right", 725 "rollup", 726 "rows", 727 "select", 728 "set", 729 "some", 730 "struct", 731 "tablesample", 732 "then", 733 "to", 734 "treat", 735 "true", 736 "unbounded", 737 "union", 738 "unnest", 739 "using", 740 "when", 741 "where", 742 "window", 743 "with", 744 "within", 745 } 746 747 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 748 if isinstance(expression.this, exp.TsOrDsToDate): 749 this: exp.Expression = expression.this 750 else: 751 this = expression 752 753 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 754 755 def struct_sql(self, expression: exp.Struct) -> str: 756 args = [] 757 for expr in expression.expressions: 758 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 759 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 760 else: 761 arg = self.sql(expr) 762 763 args.append(arg) 764 765 return self.func("STRUCT", *args) 766 767 def eq_sql(self, expression: exp.EQ) -> str: 768 # Operands of = cannot be NULL in BigQuery 769 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 770 if not isinstance(expression.parent, exp.Update): 771 return "NULL" 772 773 return self.binary(expression, "=") 774 775 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 776 parent = expression.parent 777 778 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 779 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 780 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 781 return self.func( 782 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 783 ) 784 785 return super().attimezone_sql(expression) 786 787 def trycast_sql(self, expression: exp.TryCast) -> str: 788 return self.cast_sql(expression, safe_prefix="SAFE_") 789 790 def cte_sql(self, expression: exp.CTE) -> str: 791 if expression.alias_column_names: 792 self.unsupported("Column names in CTE definition are not supported.") 793 return super().cte_sql(expression) 794 795 def array_sql(self, expression: exp.Array) -> str: 796 first_arg = seq_get(expression.expressions, 0) 797 if isinstance(first_arg, exp.Subqueryable): 798 return f"ARRAY{self.wrap(self.sql(first_arg))}" 799 800 return inline_array_sql(self, expression) 801 802 def bracket_sql(self, expression: exp.Bracket) -> str: 803 this = self.sql(expression, "this") 804 expressions = expression.expressions 805 806 if len(expressions) == 1: 807 arg = expressions[0] 808 if arg.type is None: 809 from sqlglot.optimizer.annotate_types import annotate_types 810 811 arg = annotate_types(arg) 812 813 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 814 # BQ doesn't support bracket syntax with string values 815 return f"{this}.{arg.name}" 816 817 expressions_sql = ", ".join(self.sql(e) for e in expressions) 818 offset = expression.args.get("offset") 819 820 if offset == 0: 821 expressions_sql = f"OFFSET({expressions_sql})" 822 elif offset == 1: 823 expressions_sql = f"ORDINAL({expressions_sql})" 824 elif offset is not None: 825 self.unsupported(f"Unsupported array offset: {offset}") 826 827 if expression.args.get("safe"): 828 expressions_sql = f"SAFE_{expressions_sql}" 829 830 return f"{this}[{expressions_sql}]" 831 832 def transaction_sql(self, *_) -> str: 833 return "BEGIN TRANSACTION" 834 835 def commit_sql(self, *_) -> str: 836 return "COMMIT TRANSACTION" 837 838 def rollback_sql(self, *_) -> str: 839 return "ROLLBACK TRANSACTION" 840 841 def in_unnest_op(self, expression: exp.Unnest) -> str: 842 return self.sql(expression) 843 844 def except_op(self, expression: exp.Except) -> str: 845 if not expression.args.get("distinct", False): 846 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 847 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 848 849 def intersect_op(self, expression: exp.Intersect) -> str: 850 if not expression.args.get("distinct", False): 851 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 852 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 853 854 def with_properties(self, properties: exp.Properties) -> str: 855 return self.properties(properties, prefix=self.seg("OPTIONS")) 856 857 def version_sql(self, expression: exp.Version) -> str: 858 if expression.name == "TIMESTAMP": 859 expression.set("this", "SYSTEM_TIME") 860 return super().version_sql(expression)
Determines the day of week of DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Determines whether or not UNNEST
table aliases are treated as column aliases.
Determines whether or not user-defined data types are supported.
Specifies the strategy according to which identifiers should be normalized.
Associates this dialect's time formats with their equivalent Python strftime
format.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
274 def normalize_identifier(self, expression: E) -> E: 275 if isinstance(expression, exp.Identifier): 276 parent = expression.parent 277 while isinstance(parent, exp.Dot): 278 parent = parent.parent 279 280 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 281 # The following check is essentially a heuristic to detect tables based on whether or 282 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 283 if ( 284 not isinstance(parent, exp.UserDefinedFunction) 285 and not (isinstance(parent, exp.Table) and parent.db) 286 and not expression.meta.get("is_table") 287 ): 288 expression.set("this", expression.this.lower()) 289 290 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
292 class Tokenizer(tokens.Tokenizer): 293 QUOTES = ["'", '"', '"""', "'''"] 294 COMMENTS = ["--", "#", ("/*", "*/")] 295 IDENTIFIERS = ["`"] 296 STRING_ESCAPES = ["\\"] 297 298 HEX_STRINGS = [("0x", ""), ("0X", "")] 299 300 BYTE_STRINGS = [ 301 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 302 ] 303 304 RAW_STRINGS = [ 305 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 306 ] 307 308 KEYWORDS = { 309 **tokens.Tokenizer.KEYWORDS, 310 "ANY TYPE": TokenType.VARIANT, 311 "BEGIN": TokenType.COMMAND, 312 "BEGIN TRANSACTION": TokenType.BEGIN, 313 "BYTES": TokenType.BINARY, 314 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 315 "DECLARE": TokenType.COMMAND, 316 "FLOAT64": TokenType.DOUBLE, 317 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 318 "MODEL": TokenType.MODEL, 319 "NOT DETERMINISTIC": TokenType.VOLATILE, 320 "RECORD": TokenType.STRUCT, 321 "TIMESTAMP": TokenType.TIMESTAMPTZ, 322 } 323 KEYWORDS.pop("DIV")
325 class Parser(parser.Parser): 326 PREFIXED_PIVOT_COLUMNS = True 327 328 LOG_DEFAULTS_TO_LN = True 329 330 FUNCTIONS = { 331 **parser.Parser.FUNCTIONS, 332 "DATE": _parse_date, 333 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 334 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 335 "DATE_TRUNC": lambda args: exp.DateTrunc( 336 unit=exp.Literal.string(str(seq_get(args, 1))), 337 this=seq_get(args, 0), 338 ), 339 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 340 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 341 "DIV": binary_from_function(exp.IntDiv), 342 "FORMAT_DATE": lambda args: exp.TimeToStr( 343 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 344 ), 345 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 346 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 347 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 348 ), 349 "MD5": exp.MD5Digest.from_arg_list, 350 "TO_HEX": _parse_to_hex, 351 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 352 [seq_get(args, 1), seq_get(args, 0)] 353 ), 354 "PARSE_TIMESTAMP": _parse_timestamp, 355 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 356 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1), 359 position=seq_get(args, 2), 360 occurrence=seq_get(args, 3), 361 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 362 ), 363 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 364 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 365 "SPLIT": lambda args: exp.Split( 366 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 367 this=seq_get(args, 0), 368 expression=seq_get(args, 1) or exp.Literal.string(","), 369 ), 370 "TIME": _parse_time, 371 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 372 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 373 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 374 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 375 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 376 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 377 ), 378 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 379 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 380 ), 381 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS 383 ), 384 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 385 } 386 387 FUNCTION_PARSERS = { 388 **parser.Parser.FUNCTION_PARSERS, 389 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 390 } 391 FUNCTION_PARSERS.pop("TRIM") 392 393 NO_PAREN_FUNCTIONS = { 394 **parser.Parser.NO_PAREN_FUNCTIONS, 395 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 396 } 397 398 NESTED_TYPE_TOKENS = { 399 *parser.Parser.NESTED_TYPE_TOKENS, 400 TokenType.TABLE, 401 } 402 403 ID_VAR_TOKENS = { 404 *parser.Parser.ID_VAR_TOKENS, 405 TokenType.VALUES, 406 } 407 408 PROPERTY_PARSERS = { 409 **parser.Parser.PROPERTY_PARSERS, 410 "NOT DETERMINISTIC": lambda self: self.expression( 411 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 412 ), 413 "OPTIONS": lambda self: self._parse_with_property(), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 419 } 420 421 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 422 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 423 424 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 425 426 STATEMENT_PARSERS = { 427 **parser.Parser.STATEMENT_PARSERS, 428 TokenType.END: lambda self: self._parse_as_command(self._prev), 429 TokenType.FOR: lambda self: self._parse_for_in(), 430 } 431 432 BRACKET_OFFSETS = { 433 "OFFSET": (0, False), 434 "ORDINAL": (1, False), 435 "SAFE_OFFSET": (0, True), 436 "SAFE_ORDINAL": (1, True), 437 } 438 439 def _parse_for_in(self) -> exp.ForIn: 440 this = self._parse_range() 441 self._match_text_seq("DO") 442 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 443 444 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 445 this = super()._parse_table_part(schema=schema) or self._parse_number() 446 447 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 448 if isinstance(this, exp.Identifier): 449 table_name = this.name 450 while self._match(TokenType.DASH, advance=False) and self._next: 451 self._advance(2) 452 table_name += f"-{self._prev.text}" 453 454 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 455 elif isinstance(this, exp.Literal): 456 table_name = this.name 457 458 if self._is_connected() and self._parse_var(any_token=True): 459 table_name += self._prev.text 460 461 this = exp.Identifier(this=table_name, quoted=True) 462 463 return this 464 465 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 466 table = super()._parse_table_parts(schema=schema) 467 if isinstance(table.this, exp.Identifier) and "." in table.name: 468 catalog, db, this, *rest = ( 469 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 470 for x in split_num_words(table.name, ".", 3) 471 ) 472 473 if rest and this: 474 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 475 476 table = exp.Table(this=this, db=db, catalog=catalog) 477 478 return table 479 480 def _parse_json_object(self) -> exp.JSONObject: 481 json_object = super()._parse_json_object() 482 array_kv_pair = seq_get(json_object.expressions, 0) 483 484 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 485 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 486 if ( 487 array_kv_pair 488 and isinstance(array_kv_pair.this, exp.Array) 489 and isinstance(array_kv_pair.expression, exp.Array) 490 ): 491 keys = array_kv_pair.this.expressions 492 values = array_kv_pair.expression.expressions 493 494 json_object.set( 495 "expressions", 496 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 497 ) 498 499 return json_object 500 501 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 502 bracket = super()._parse_bracket(this) 503 504 if this is bracket: 505 return bracket 506 507 if isinstance(bracket, exp.Bracket): 508 for expression in bracket.expressions: 509 name = expression.name.upper() 510 511 if name not in self.BRACKET_OFFSETS: 512 break 513 514 offset, safe = self.BRACKET_OFFSETS[name] 515 bracket.set("offset", offset) 516 bracket.set("safe", safe) 517 expression.replace(expression.expressions[0]) 518 519 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
521 class Generator(generator.Generator): 522 EXPLICIT_UNION = True 523 INTERVAL_ALLOWS_PLURAL_FORM = False 524 JOIN_HINTS = False 525 QUERY_HINTS = False 526 TABLE_HINTS = False 527 LIMIT_FETCH = "LIMIT" 528 RENAME_TABLE_WITH_DB = False 529 NVL2_SUPPORTED = False 530 UNNEST_WITH_ORDINALITY = False 531 COLLATE_IS_FUNC = True 532 LIMIT_ONLY_LITERALS = True 533 534 TRANSFORMS = { 535 **generator.Generator.TRANSFORMS, 536 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 537 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 538 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 539 exp.ArrayContains: _array_contains_sql, 540 exp.ArraySize: rename_func("ARRAY_LENGTH"), 541 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 542 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 543 if e.args.get("default") 544 else f"COLLATE {self.sql(e, 'this')}", 545 exp.CountIf: rename_func("COUNTIF"), 546 exp.Create: _create_sql, 547 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 548 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 549 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 550 exp.DateFromParts: rename_func("DATE"), 551 exp.DateStrToDate: datestrtodate_sql, 552 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 553 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 554 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 555 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 556 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 557 exp.GetPath: path_to_jsonpath(), 558 exp.GroupConcat: rename_func("STRING_AGG"), 559 exp.Hex: rename_func("TO_HEX"), 560 exp.If: if_sql(false_value="NULL"), 561 exp.ILike: no_ilike_sql, 562 exp.IntDiv: rename_func("DIV"), 563 exp.JSONFormat: rename_func("TO_JSON_STRING"), 564 exp.JSONKeyValue: json_keyvalue_comma_sql, 565 exp.Max: max_or_greatest, 566 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 567 exp.MD5Digest: rename_func("MD5"), 568 exp.Min: min_or_least, 569 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 570 exp.RegexpExtract: lambda self, e: self.func( 571 "REGEXP_EXTRACT", 572 e.this, 573 e.expression, 574 e.args.get("position"), 575 e.args.get("occurrence"), 576 ), 577 exp.RegexpReplace: regexp_replace_sql, 578 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 579 exp.ReturnsProperty: _returnsproperty_sql, 580 exp.Select: transforms.preprocess( 581 [ 582 transforms.explode_to_unnest(), 583 _unqualify_unnest, 584 transforms.eliminate_distinct_on, 585 _alias_ordered_group, 586 transforms.eliminate_semi_and_anti_joins, 587 ] 588 ), 589 exp.SHA2: lambda self, e: self.func( 590 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 591 ), 592 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 593 if e.name == "IMMUTABLE" 594 else "NOT DETERMINISTIC", 595 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 596 exp.StrToTime: lambda self, e: self.func( 597 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 598 ), 599 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 600 exp.TimeFromParts: rename_func("TIME"), 601 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 602 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 603 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 604 exp.TimeStrToTime: timestrtotime_sql, 605 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 606 exp.TsOrDsAdd: _ts_or_ds_add_sql, 607 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 608 exp.TsOrDsToTime: rename_func("TIME"), 609 exp.Unhex: rename_func("FROM_HEX"), 610 exp.UnixDate: rename_func("UNIX_DATE"), 611 exp.UnixToTime: _unix_to_time_sql, 612 exp.Values: _derived_table_values_to_unnest, 613 exp.VariancePop: rename_func("VAR_POP"), 614 } 615 616 TYPE_MAPPING = { 617 **generator.Generator.TYPE_MAPPING, 618 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 619 exp.DataType.Type.BIGINT: "INT64", 620 exp.DataType.Type.BINARY: "BYTES", 621 exp.DataType.Type.BOOLEAN: "BOOL", 622 exp.DataType.Type.CHAR: "STRING", 623 exp.DataType.Type.DECIMAL: "NUMERIC", 624 exp.DataType.Type.DOUBLE: "FLOAT64", 625 exp.DataType.Type.FLOAT: "FLOAT64", 626 exp.DataType.Type.INT: "INT64", 627 exp.DataType.Type.NCHAR: "STRING", 628 exp.DataType.Type.NVARCHAR: "STRING", 629 exp.DataType.Type.SMALLINT: "INT64", 630 exp.DataType.Type.TEXT: "STRING", 631 exp.DataType.Type.TIMESTAMP: "DATETIME", 632 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 633 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 634 exp.DataType.Type.TINYINT: "INT64", 635 exp.DataType.Type.VARBINARY: "BYTES", 636 exp.DataType.Type.VARCHAR: "STRING", 637 exp.DataType.Type.VARIANT: "ANY TYPE", 638 } 639 640 PROPERTIES_LOCATION = { 641 **generator.Generator.PROPERTIES_LOCATION, 642 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 643 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 644 } 645 646 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 647 RESERVED_KEYWORDS = { 648 *generator.Generator.RESERVED_KEYWORDS, 649 "all", 650 "and", 651 "any", 652 "array", 653 "as", 654 "asc", 655 "assert_rows_modified", 656 "at", 657 "between", 658 "by", 659 "case", 660 "cast", 661 "collate", 662 "contains", 663 "create", 664 "cross", 665 "cube", 666 "current", 667 "default", 668 "define", 669 "desc", 670 "distinct", 671 "else", 672 "end", 673 "enum", 674 "escape", 675 "except", 676 "exclude", 677 "exists", 678 "extract", 679 "false", 680 "fetch", 681 "following", 682 "for", 683 "from", 684 "full", 685 "group", 686 "grouping", 687 "groups", 688 "hash", 689 "having", 690 "if", 691 "ignore", 692 "in", 693 "inner", 694 "intersect", 695 "interval", 696 "into", 697 "is", 698 "join", 699 "lateral", 700 "left", 701 "like", 702 "limit", 703 "lookup", 704 "merge", 705 "natural", 706 "new", 707 "no", 708 "not", 709 "null", 710 "nulls", 711 "of", 712 "on", 713 "or", 714 "order", 715 "outer", 716 "over", 717 "partition", 718 "preceding", 719 "proto", 720 "qualify", 721 "range", 722 "recursive", 723 "respect", 724 "right", 725 "rollup", 726 "rows", 727 "select", 728 "set", 729 "some", 730 "struct", 731 "tablesample", 732 "then", 733 "to", 734 "treat", 735 "true", 736 "unbounded", 737 "union", 738 "unnest", 739 "using", 740 "when", 741 "where", 742 "window", 743 "with", 744 "within", 745 } 746 747 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 748 if isinstance(expression.this, exp.TsOrDsToDate): 749 this: exp.Expression = expression.this 750 else: 751 this = expression 752 753 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 754 755 def struct_sql(self, expression: exp.Struct) -> str: 756 args = [] 757 for expr in expression.expressions: 758 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 759 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 760 else: 761 arg = self.sql(expr) 762 763 args.append(arg) 764 765 return self.func("STRUCT", *args) 766 767 def eq_sql(self, expression: exp.EQ) -> str: 768 # Operands of = cannot be NULL in BigQuery 769 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 770 if not isinstance(expression.parent, exp.Update): 771 return "NULL" 772 773 return self.binary(expression, "=") 774 775 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 776 parent = expression.parent 777 778 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 779 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 780 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 781 return self.func( 782 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 783 ) 784 785 return super().attimezone_sql(expression) 786 787 def trycast_sql(self, expression: exp.TryCast) -> str: 788 return self.cast_sql(expression, safe_prefix="SAFE_") 789 790 def cte_sql(self, expression: exp.CTE) -> str: 791 if expression.alias_column_names: 792 self.unsupported("Column names in CTE definition are not supported.") 793 return super().cte_sql(expression) 794 795 def array_sql(self, expression: exp.Array) -> str: 796 first_arg = seq_get(expression.expressions, 0) 797 if isinstance(first_arg, exp.Subqueryable): 798 return f"ARRAY{self.wrap(self.sql(first_arg))}" 799 800 return inline_array_sql(self, expression) 801 802 def bracket_sql(self, expression: exp.Bracket) -> str: 803 this = self.sql(expression, "this") 804 expressions = expression.expressions 805 806 if len(expressions) == 1: 807 arg = expressions[0] 808 if arg.type is None: 809 from sqlglot.optimizer.annotate_types import annotate_types 810 811 arg = annotate_types(arg) 812 813 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 814 # BQ doesn't support bracket syntax with string values 815 return f"{this}.{arg.name}" 816 817 expressions_sql = ", ".join(self.sql(e) for e in expressions) 818 offset = expression.args.get("offset") 819 820 if offset == 0: 821 expressions_sql = f"OFFSET({expressions_sql})" 822 elif offset == 1: 823 expressions_sql = f"ORDINAL({expressions_sql})" 824 elif offset is not None: 825 self.unsupported(f"Unsupported array offset: {offset}") 826 827 if expression.args.get("safe"): 828 expressions_sql = f"SAFE_{expressions_sql}" 829 830 return f"{this}[{expressions_sql}]" 831 832 def transaction_sql(self, *_) -> str: 833 return "BEGIN TRANSACTION" 834 835 def commit_sql(self, *_) -> str: 836 return "COMMIT TRANSACTION" 837 838 def rollback_sql(self, *_) -> str: 839 return "ROLLBACK TRANSACTION" 840 841 def in_unnest_op(self, expression: exp.Unnest) -> str: 842 return self.sql(expression) 843 844 def except_op(self, expression: exp.Except) -> str: 845 if not expression.args.get("distinct", False): 846 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 847 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 848 849 def intersect_op(self, expression: exp.Intersect) -> str: 850 if not expression.args.get("distinct", False): 851 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 852 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 853 854 def with_properties(self, properties: exp.Properties) -> str: 855 return self.properties(properties, prefix=self.seg("OPTIONS")) 856 857 def version_sql(self, expression: exp.Version) -> str: 858 if expression.name == "TIMESTAMP": 859 expression.set("this", "SYSTEM_TIME") 860 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
755 def struct_sql(self, expression: exp.Struct) -> str: 756 args = [] 757 for expr in expression.expressions: 758 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 759 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 760 else: 761 arg = self.sql(expr) 762 763 args.append(arg) 764 765 return self.func("STRUCT", *args)
775 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 776 parent = expression.parent 777 778 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 779 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 780 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 781 return self.func( 782 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 783 ) 784 785 return super().attimezone_sql(expression)
802 def bracket_sql(self, expression: exp.Bracket) -> str: 803 this = self.sql(expression, "this") 804 expressions = expression.expressions 805 806 if len(expressions) == 1: 807 arg = expressions[0] 808 if arg.type is None: 809 from sqlglot.optimizer.annotate_types import annotate_types 810 811 arg = annotate_types(arg) 812 813 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 814 # BQ doesn't support bracket syntax with string values 815 return f"{this}.{arg.name}" 816 817 expressions_sql = ", ".join(self.sql(e) for e in expressions) 818 offset = expression.args.get("offset") 819 820 if offset == 0: 821 expressions_sql = f"OFFSET({expressions_sql})" 822 elif offset == 1: 823 expressions_sql = f"ORDINAL({expressions_sql})" 824 elif offset is not None: 825 self.unsupported(f"Unsupported array offset: {offset}") 826 827 if expression.args.get("safe"): 828 expressions_sql = f"SAFE_{expressions_sql}" 829 830 return f"{this}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- atindex_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql