sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 format_time_lambda, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 no_ilike_sql, 21 parse_date_delta_with_interval, 22 path_to_jsonpath, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27) 28from sqlglot.helper import seq_get, split_num_words 29from sqlglot.tokens import TokenType 30 31if t.TYPE_CHECKING: 32 from sqlglot._typing import E, Lit 33 34logger = logging.getLogger("sqlglot") 35 36 37def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 38 if not expression.find_ancestor(exp.From, exp.Join): 39 return self.values_sql(expression) 40 41 alias = expression.args.get("alias") 42 43 structs = [ 44 exp.Struct( 45 expressions=[ 46 exp.alias_(value, column_name) 47 for value, column_name in zip( 48 t.expressions, 49 ( 50 alias.columns 51 if alias and alias.columns 52 else (f"_c{i}" for i in range(len(t.expressions))) 53 ), 54 ) 55 ] 56 ) 57 for t in expression.find_all(exp.Tuple) 58 ] 59 60 return self.unnest_sql(exp.Unnest(expressions=[exp.Array(expressions=structs)])) 61 62 63def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 64 this = expression.this 65 if isinstance(this, exp.Schema): 66 this = f"{this.this} <{self.expressions(this)}>" 67 else: 68 this = self.sql(this) 69 return f"RETURNS {this}" 70 71 72def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 73 kind = expression.args["kind"] 74 returns = expression.find(exp.ReturnsProperty) 75 76 if kind.upper() == "FUNCTION" and returns and returns.args.get("is_table"): 77 expression.set("kind", "TABLE FUNCTION") 78 79 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 80 expression.set("expression", expression.expression.this) 81 82 return self.create_sql(expression) 83 84 return self.create_sql(expression) 85 86 87def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 88 """Remove references to unnest table aliases since bigquery doesn't allow them. 89 90 These are added by the optimizer's qualify_column step. 91 """ 92 from sqlglot.optimizer.scope import find_all_in_scope 93 94 if isinstance(expression, exp.Select): 95 unnest_aliases = { 96 unnest.alias 97 for unnest in find_all_in_scope(expression, exp.Unnest) 98 if isinstance(unnest.parent, (exp.From, exp.Join)) 99 } 100 if unnest_aliases: 101 for column in expression.find_all(exp.Column): 102 if column.table in unnest_aliases: 103 column.set("table", None) 104 elif column.db in unnest_aliases: 105 column.set("db", None) 106 107 return expression 108 109 110# https://issuetracker.google.com/issues/162294746 111# workaround for bigquery bug when grouping by an expression and then ordering 112# WITH x AS (SELECT 1 y) 113# SELECT y + 1 z 114# FROM x 115# GROUP BY x + 1 116# ORDER by z 117def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 118 if isinstance(expression, exp.Select): 119 group = expression.args.get("group") 120 order = expression.args.get("order") 121 122 if group and order: 123 aliases = { 124 select.this: select.args["alias"] 125 for select in expression.selects 126 if isinstance(select, exp.Alias) 127 } 128 129 for e in group.expressions: 130 alias = aliases.get(e) 131 132 if alias: 133 e.replace(exp.column(alias)) 134 135 return expression 136 137 138def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 139 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 140 if isinstance(expression, exp.CTE) and expression.alias_column_names: 141 cte_query = expression.this 142 143 if cte_query.is_star: 144 logger.warning( 145 "Can't push down CTE column names for star queries. Run the query through" 146 " the optimizer or use 'qualify' to expand the star projections first." 147 ) 148 return expression 149 150 column_names = expression.alias_column_names 151 expression.args["alias"].set("columns", None) 152 153 for name, select in zip(column_names, cte_query.selects): 154 to_replace = select 155 156 if isinstance(select, exp.Alias): 157 select = select.this 158 159 # Inner aliases are shadowed by the CTE column names 160 to_replace.replace(exp.alias_(select, name)) 161 162 return expression 163 164 165def _parse_timestamp(args: t.List) -> exp.StrToTime: 166 this = format_time_lambda(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 167 this.set("zone", seq_get(args, 2)) 168 return this 169 170 171def _parse_date(args: t.List) -> exp.Date | exp.DateFromParts: 172 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 173 return expr_type.from_arg_list(args) 174 175 176def _parse_to_hex(args: t.List) -> exp.Hex | exp.MD5: 177 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 178 arg = seq_get(args, 0) 179 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 180 181 182def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 183 return self.sql( 184 exp.Exists( 185 this=exp.select("1") 186 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 187 .where(exp.column("_col").eq(expression.right)) 188 ) 189 ) 190 191 192def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 193 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 194 195 196def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 197 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 198 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 199 unit = expression.args.get("unit") or "DAY" 200 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 201 202 203def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 204 scale = expression.args.get("scale") 205 timestamp = self.sql(expression, "this") 206 if scale in (None, exp.UnixToTime.SECONDS): 207 return f"TIMESTAMP_SECONDS({timestamp})" 208 if scale == exp.UnixToTime.MILLIS: 209 return f"TIMESTAMP_MILLIS({timestamp})" 210 if scale == exp.UnixToTime.MICROS: 211 return f"TIMESTAMP_MICROS({timestamp})" 212 213 return f"TIMESTAMP_SECONDS(CAST({timestamp} / POW(10, {scale}) AS INT64))" 214 215 216def _parse_time(args: t.List) -> exp.Func: 217 if len(args) == 1: 218 return exp.TsOrDsToTime(this=args[0]) 219 if len(args) == 3: 220 return exp.TimeFromParts.from_arg_list(args) 221 222 return exp.Anonymous(this="TIME", expressions=args) 223 224 225class BigQuery(Dialect): 226 WEEK_OFFSET = -1 227 UNNEST_COLUMN_ONLY = True 228 SUPPORTS_USER_DEFINED_TYPES = False 229 SUPPORTS_SEMI_ANTI_JOIN = False 230 LOG_BASE_FIRST = False 231 232 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 233 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 234 235 # bigquery udfs are case sensitive 236 NORMALIZE_FUNCTIONS = False 237 238 TIME_MAPPING = { 239 "%D": "%m/%d/%y", 240 } 241 242 ESCAPE_SEQUENCES = { 243 "\\a": "\a", 244 "\\b": "\b", 245 "\\f": "\f", 246 "\\n": "\n", 247 "\\r": "\r", 248 "\\t": "\t", 249 "\\v": "\v", 250 } 251 252 FORMAT_MAPPING = { 253 "DD": "%d", 254 "MM": "%m", 255 "MON": "%b", 256 "MONTH": "%B", 257 "YYYY": "%Y", 258 "YY": "%y", 259 "HH": "%I", 260 "HH12": "%I", 261 "HH24": "%H", 262 "MI": "%M", 263 "SS": "%S", 264 "SSSSS": "%f", 265 "TZH": "%z", 266 } 267 268 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 269 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 270 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 271 272 def normalize_identifier(self, expression: E) -> E: 273 if isinstance(expression, exp.Identifier): 274 parent = expression.parent 275 while isinstance(parent, exp.Dot): 276 parent = parent.parent 277 278 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 279 # The following check is essentially a heuristic to detect tables based on whether or 280 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 281 if ( 282 not isinstance(parent, exp.UserDefinedFunction) 283 and not (isinstance(parent, exp.Table) and parent.db) 284 and not expression.meta.get("is_table") 285 ): 286 expression.set("this", expression.this.lower()) 287 288 return expression 289 290 class Tokenizer(tokens.Tokenizer): 291 QUOTES = ["'", '"', '"""', "'''"] 292 COMMENTS = ["--", "#", ("/*", "*/")] 293 IDENTIFIERS = ["`"] 294 STRING_ESCAPES = ["\\"] 295 296 HEX_STRINGS = [("0x", ""), ("0X", "")] 297 298 BYTE_STRINGS = [ 299 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 300 ] 301 302 RAW_STRINGS = [ 303 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 304 ] 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "ANY TYPE": TokenType.VARIANT, 309 "BEGIN": TokenType.COMMAND, 310 "BEGIN TRANSACTION": TokenType.BEGIN, 311 "BYTES": TokenType.BINARY, 312 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 313 "DECLARE": TokenType.COMMAND, 314 "FLOAT64": TokenType.DOUBLE, 315 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 316 "MODEL": TokenType.MODEL, 317 "NOT DETERMINISTIC": TokenType.VOLATILE, 318 "RECORD": TokenType.STRUCT, 319 "TIMESTAMP": TokenType.TIMESTAMPTZ, 320 } 321 KEYWORDS.pop("DIV") 322 323 class Parser(parser.Parser): 324 PREFIXED_PIVOT_COLUMNS = True 325 326 LOG_DEFAULTS_TO_LN = True 327 328 FUNCTIONS = { 329 **parser.Parser.FUNCTIONS, 330 "DATE": _parse_date, 331 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 332 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 333 "DATE_TRUNC": lambda args: exp.DateTrunc( 334 unit=exp.Literal.string(str(seq_get(args, 1))), 335 this=seq_get(args, 0), 336 ), 337 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 338 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 339 "DIV": binary_from_function(exp.IntDiv), 340 "FORMAT_DATE": lambda args: exp.TimeToStr( 341 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 342 ), 343 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 344 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 345 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 346 ), 347 "MD5": exp.MD5Digest.from_arg_list, 348 "TO_HEX": _parse_to_hex, 349 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 350 [seq_get(args, 1), seq_get(args, 0)] 351 ), 352 "PARSE_TIMESTAMP": _parse_timestamp, 353 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 354 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 355 this=seq_get(args, 0), 356 expression=seq_get(args, 1), 357 position=seq_get(args, 2), 358 occurrence=seq_get(args, 3), 359 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 360 ), 361 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 362 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 363 "SPLIT": lambda args: exp.Split( 364 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 365 this=seq_get(args, 0), 366 expression=seq_get(args, 1) or exp.Literal.string(","), 367 ), 368 "TIME": _parse_time, 369 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 370 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 371 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 372 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 373 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 374 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 375 ), 376 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 377 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 378 ), 379 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 380 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 381 } 382 383 FUNCTION_PARSERS = { 384 **parser.Parser.FUNCTION_PARSERS, 385 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 386 } 387 FUNCTION_PARSERS.pop("TRIM") 388 389 NO_PAREN_FUNCTIONS = { 390 **parser.Parser.NO_PAREN_FUNCTIONS, 391 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 392 } 393 394 NESTED_TYPE_TOKENS = { 395 *parser.Parser.NESTED_TYPE_TOKENS, 396 TokenType.TABLE, 397 } 398 399 ID_VAR_TOKENS = { 400 *parser.Parser.ID_VAR_TOKENS, 401 TokenType.VALUES, 402 } 403 404 PROPERTY_PARSERS = { 405 **parser.Parser.PROPERTY_PARSERS, 406 "NOT DETERMINISTIC": lambda self: self.expression( 407 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 408 ), 409 "OPTIONS": lambda self: self._parse_with_property(), 410 } 411 412 CONSTRAINT_PARSERS = { 413 **parser.Parser.CONSTRAINT_PARSERS, 414 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 415 } 416 417 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 418 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 419 420 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 421 422 STATEMENT_PARSERS = { 423 **parser.Parser.STATEMENT_PARSERS, 424 TokenType.END: lambda self: self._parse_as_command(self._prev), 425 TokenType.FOR: lambda self: self._parse_for_in(), 426 } 427 428 BRACKET_OFFSETS = { 429 "OFFSET": (0, False), 430 "ORDINAL": (1, False), 431 "SAFE_OFFSET": (0, True), 432 "SAFE_ORDINAL": (1, True), 433 } 434 435 def _parse_for_in(self) -> exp.ForIn: 436 this = self._parse_range() 437 self._match_text_seq("DO") 438 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 439 440 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 441 this = super()._parse_table_part(schema=schema) or self._parse_number() 442 443 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 444 if isinstance(this, exp.Identifier): 445 table_name = this.name 446 while self._match(TokenType.DASH, advance=False) and self._next: 447 self._advance(2) 448 table_name += f"-{self._prev.text}" 449 450 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 451 elif isinstance(this, exp.Literal): 452 table_name = this.name 453 454 if self._is_connected() and self._parse_var(any_token=True): 455 table_name += self._prev.text 456 457 this = exp.Identifier(this=table_name, quoted=True) 458 459 return this 460 461 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 462 table = super()._parse_table_parts(schema=schema) 463 if isinstance(table.this, exp.Identifier) and "." in table.name: 464 catalog, db, this, *rest = ( 465 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 466 for x in split_num_words(table.name, ".", 3) 467 ) 468 469 if rest and this: 470 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 471 472 table = exp.Table(this=this, db=db, catalog=catalog) 473 474 return table 475 476 @t.overload 477 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 478 479 @t.overload 480 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 481 482 def _parse_json_object(self, agg=False): 483 json_object = super()._parse_json_object() 484 array_kv_pair = seq_get(json_object.expressions, 0) 485 486 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 487 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 488 if ( 489 array_kv_pair 490 and isinstance(array_kv_pair.this, exp.Array) 491 and isinstance(array_kv_pair.expression, exp.Array) 492 ): 493 keys = array_kv_pair.this.expressions 494 values = array_kv_pair.expression.expressions 495 496 json_object.set( 497 "expressions", 498 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 499 ) 500 501 return json_object 502 503 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 504 bracket = super()._parse_bracket(this) 505 506 if this is bracket: 507 return bracket 508 509 if isinstance(bracket, exp.Bracket): 510 for expression in bracket.expressions: 511 name = expression.name.upper() 512 513 if name not in self.BRACKET_OFFSETS: 514 break 515 516 offset, safe = self.BRACKET_OFFSETS[name] 517 bracket.set("offset", offset) 518 bracket.set("safe", safe) 519 expression.replace(expression.expressions[0]) 520 521 return bracket 522 523 class Generator(generator.Generator): 524 EXPLICIT_UNION = True 525 INTERVAL_ALLOWS_PLURAL_FORM = False 526 JOIN_HINTS = False 527 QUERY_HINTS = False 528 TABLE_HINTS = False 529 LIMIT_FETCH = "LIMIT" 530 RENAME_TABLE_WITH_DB = False 531 NVL2_SUPPORTED = False 532 UNNEST_WITH_ORDINALITY = False 533 COLLATE_IS_FUNC = True 534 LIMIT_ONLY_LITERALS = True 535 SUPPORTS_TABLE_ALIAS_COLUMNS = False 536 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 537 JSON_KEY_VALUE_PAIR_SEP = "," 538 NULL_ORDERING_SUPPORTED = False 539 540 TRANSFORMS = { 541 **generator.Generator.TRANSFORMS, 542 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 543 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 544 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 545 exp.ArrayContains: _array_contains_sql, 546 exp.ArraySize: rename_func("ARRAY_LENGTH"), 547 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 548 exp.CollateProperty: lambda self, e: ( 549 f"DEFAULT COLLATE {self.sql(e, 'this')}" 550 if e.args.get("default") 551 else f"COLLATE {self.sql(e, 'this')}" 552 ), 553 exp.CountIf: rename_func("COUNTIF"), 554 exp.Create: _create_sql, 555 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 556 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 557 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 558 exp.DateFromParts: rename_func("DATE"), 559 exp.DateStrToDate: datestrtodate_sql, 560 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 561 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 562 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 563 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 564 exp.FromTimeZone: lambda self, e: self.func( 565 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 566 ), 567 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 568 exp.GetPath: path_to_jsonpath(), 569 exp.GroupConcat: rename_func("STRING_AGG"), 570 exp.Hex: rename_func("TO_HEX"), 571 exp.If: if_sql(false_value="NULL"), 572 exp.ILike: no_ilike_sql, 573 exp.IntDiv: rename_func("DIV"), 574 exp.JSONFormat: rename_func("TO_JSON_STRING"), 575 exp.Max: max_or_greatest, 576 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 577 exp.MD5Digest: rename_func("MD5"), 578 exp.Min: min_or_least, 579 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 580 exp.RegexpExtract: lambda self, e: self.func( 581 "REGEXP_EXTRACT", 582 e.this, 583 e.expression, 584 e.args.get("position"), 585 e.args.get("occurrence"), 586 ), 587 exp.RegexpReplace: regexp_replace_sql, 588 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 589 exp.ReturnsProperty: _returnsproperty_sql, 590 exp.Select: transforms.preprocess( 591 [ 592 transforms.explode_to_unnest(), 593 _unqualify_unnest, 594 transforms.eliminate_distinct_on, 595 _alias_ordered_group, 596 transforms.eliminate_semi_and_anti_joins, 597 ] 598 ), 599 exp.SHA2: lambda self, e: self.func( 600 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 601 ), 602 exp.StabilityProperty: lambda self, e: ( 603 f"DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 604 ), 605 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 606 exp.StrToTime: lambda self, e: self.func( 607 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 608 ), 609 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 610 exp.TimeFromParts: rename_func("TIME"), 611 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 612 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 613 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 614 exp.TimeStrToTime: timestrtotime_sql, 615 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 616 exp.TsOrDsAdd: _ts_or_ds_add_sql, 617 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 618 exp.TsOrDsToTime: rename_func("TIME"), 619 exp.Unhex: rename_func("FROM_HEX"), 620 exp.UnixDate: rename_func("UNIX_DATE"), 621 exp.UnixToTime: _unix_to_time_sql, 622 exp.Values: _derived_table_values_to_unnest, 623 exp.VariancePop: rename_func("VAR_POP"), 624 } 625 626 TYPE_MAPPING = { 627 **generator.Generator.TYPE_MAPPING, 628 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 629 exp.DataType.Type.BIGINT: "INT64", 630 exp.DataType.Type.BINARY: "BYTES", 631 exp.DataType.Type.BOOLEAN: "BOOL", 632 exp.DataType.Type.CHAR: "STRING", 633 exp.DataType.Type.DECIMAL: "NUMERIC", 634 exp.DataType.Type.DOUBLE: "FLOAT64", 635 exp.DataType.Type.FLOAT: "FLOAT64", 636 exp.DataType.Type.INT: "INT64", 637 exp.DataType.Type.NCHAR: "STRING", 638 exp.DataType.Type.NVARCHAR: "STRING", 639 exp.DataType.Type.SMALLINT: "INT64", 640 exp.DataType.Type.TEXT: "STRING", 641 exp.DataType.Type.TIMESTAMP: "DATETIME", 642 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 643 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 644 exp.DataType.Type.TINYINT: "INT64", 645 exp.DataType.Type.VARBINARY: "BYTES", 646 exp.DataType.Type.VARCHAR: "STRING", 647 exp.DataType.Type.VARIANT: "ANY TYPE", 648 } 649 650 PROPERTIES_LOCATION = { 651 **generator.Generator.PROPERTIES_LOCATION, 652 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 653 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 654 } 655 656 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 657 RESERVED_KEYWORDS = { 658 *generator.Generator.RESERVED_KEYWORDS, 659 "all", 660 "and", 661 "any", 662 "array", 663 "as", 664 "asc", 665 "assert_rows_modified", 666 "at", 667 "between", 668 "by", 669 "case", 670 "cast", 671 "collate", 672 "contains", 673 "create", 674 "cross", 675 "cube", 676 "current", 677 "default", 678 "define", 679 "desc", 680 "distinct", 681 "else", 682 "end", 683 "enum", 684 "escape", 685 "except", 686 "exclude", 687 "exists", 688 "extract", 689 "false", 690 "fetch", 691 "following", 692 "for", 693 "from", 694 "full", 695 "group", 696 "grouping", 697 "groups", 698 "hash", 699 "having", 700 "if", 701 "ignore", 702 "in", 703 "inner", 704 "intersect", 705 "interval", 706 "into", 707 "is", 708 "join", 709 "lateral", 710 "left", 711 "like", 712 "limit", 713 "lookup", 714 "merge", 715 "natural", 716 "new", 717 "no", 718 "not", 719 "null", 720 "nulls", 721 "of", 722 "on", 723 "or", 724 "order", 725 "outer", 726 "over", 727 "partition", 728 "preceding", 729 "proto", 730 "qualify", 731 "range", 732 "recursive", 733 "respect", 734 "right", 735 "rollup", 736 "rows", 737 "select", 738 "set", 739 "some", 740 "struct", 741 "tablesample", 742 "then", 743 "to", 744 "treat", 745 "true", 746 "unbounded", 747 "union", 748 "unnest", 749 "using", 750 "when", 751 "where", 752 "window", 753 "with", 754 "within", 755 } 756 757 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 758 if isinstance(expression.this, exp.TsOrDsToDate): 759 this: exp.Expression = expression.this 760 else: 761 this = expression 762 763 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 764 765 def struct_sql(self, expression: exp.Struct) -> str: 766 args = [] 767 for expr in expression.expressions: 768 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 769 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 770 else: 771 arg = self.sql(expr) 772 773 args.append(arg) 774 775 return self.func("STRUCT", *args) 776 777 def eq_sql(self, expression: exp.EQ) -> str: 778 # Operands of = cannot be NULL in BigQuery 779 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 780 if not isinstance(expression.parent, exp.Update): 781 return "NULL" 782 783 return self.binary(expression, "=") 784 785 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 786 parent = expression.parent 787 788 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 789 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 790 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 791 return self.func( 792 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 793 ) 794 795 return super().attimezone_sql(expression) 796 797 def trycast_sql(self, expression: exp.TryCast) -> str: 798 return self.cast_sql(expression, safe_prefix="SAFE_") 799 800 def cte_sql(self, expression: exp.CTE) -> str: 801 if expression.alias_column_names: 802 self.unsupported("Column names in CTE definition are not supported.") 803 return super().cte_sql(expression) 804 805 def array_sql(self, expression: exp.Array) -> str: 806 first_arg = seq_get(expression.expressions, 0) 807 if isinstance(first_arg, exp.Subqueryable): 808 return f"ARRAY{self.wrap(self.sql(first_arg))}" 809 810 return inline_array_sql(self, expression) 811 812 def bracket_sql(self, expression: exp.Bracket) -> str: 813 this = self.sql(expression, "this") 814 expressions = expression.expressions 815 816 if len(expressions) == 1: 817 arg = expressions[0] 818 if arg.type is None: 819 from sqlglot.optimizer.annotate_types import annotate_types 820 821 arg = annotate_types(arg) 822 823 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 824 # BQ doesn't support bracket syntax with string values 825 return f"{this}.{arg.name}" 826 827 expressions_sql = ", ".join(self.sql(e) for e in expressions) 828 offset = expression.args.get("offset") 829 830 if offset == 0: 831 expressions_sql = f"OFFSET({expressions_sql})" 832 elif offset == 1: 833 expressions_sql = f"ORDINAL({expressions_sql})" 834 elif offset is not None: 835 self.unsupported(f"Unsupported array offset: {offset}") 836 837 if expression.args.get("safe"): 838 expressions_sql = f"SAFE_{expressions_sql}" 839 840 return f"{this}[{expressions_sql}]" 841 842 def transaction_sql(self, *_) -> str: 843 return "BEGIN TRANSACTION" 844 845 def commit_sql(self, *_) -> str: 846 return "COMMIT TRANSACTION" 847 848 def rollback_sql(self, *_) -> str: 849 return "ROLLBACK TRANSACTION" 850 851 def in_unnest_op(self, expression: exp.Unnest) -> str: 852 return self.sql(expression) 853 854 def except_op(self, expression: exp.Except) -> str: 855 if not expression.args.get("distinct", False): 856 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 857 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 858 859 def intersect_op(self, expression: exp.Intersect) -> str: 860 if not expression.args.get("distinct", False): 861 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 862 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 863 864 def with_properties(self, properties: exp.Properties) -> str: 865 return self.properties(properties, prefix=self.seg("OPTIONS")) 866 867 def version_sql(self, expression: exp.Version) -> str: 868 if expression.name == "TIMESTAMP": 869 expression.set("this", "SYSTEM_TIME") 870 return super().version_sql(expression)
226class BigQuery(Dialect): 227 WEEK_OFFSET = -1 228 UNNEST_COLUMN_ONLY = True 229 SUPPORTS_USER_DEFINED_TYPES = False 230 SUPPORTS_SEMI_ANTI_JOIN = False 231 LOG_BASE_FIRST = False 232 233 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 234 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 235 236 # bigquery udfs are case sensitive 237 NORMALIZE_FUNCTIONS = False 238 239 TIME_MAPPING = { 240 "%D": "%m/%d/%y", 241 } 242 243 ESCAPE_SEQUENCES = { 244 "\\a": "\a", 245 "\\b": "\b", 246 "\\f": "\f", 247 "\\n": "\n", 248 "\\r": "\r", 249 "\\t": "\t", 250 "\\v": "\v", 251 } 252 253 FORMAT_MAPPING = { 254 "DD": "%d", 255 "MM": "%m", 256 "MON": "%b", 257 "MONTH": "%B", 258 "YYYY": "%Y", 259 "YY": "%y", 260 "HH": "%I", 261 "HH12": "%I", 262 "HH24": "%H", 263 "MI": "%M", 264 "SS": "%S", 265 "SSSSS": "%f", 266 "TZH": "%z", 267 } 268 269 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 270 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 271 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 272 273 def normalize_identifier(self, expression: E) -> E: 274 if isinstance(expression, exp.Identifier): 275 parent = expression.parent 276 while isinstance(parent, exp.Dot): 277 parent = parent.parent 278 279 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 280 # The following check is essentially a heuristic to detect tables based on whether or 281 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 282 if ( 283 not isinstance(parent, exp.UserDefinedFunction) 284 and not (isinstance(parent, exp.Table) and parent.db) 285 and not expression.meta.get("is_table") 286 ): 287 expression.set("this", expression.this.lower()) 288 289 return expression 290 291 class Tokenizer(tokens.Tokenizer): 292 QUOTES = ["'", '"', '"""', "'''"] 293 COMMENTS = ["--", "#", ("/*", "*/")] 294 IDENTIFIERS = ["`"] 295 STRING_ESCAPES = ["\\"] 296 297 HEX_STRINGS = [("0x", ""), ("0X", "")] 298 299 BYTE_STRINGS = [ 300 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 301 ] 302 303 RAW_STRINGS = [ 304 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 305 ] 306 307 KEYWORDS = { 308 **tokens.Tokenizer.KEYWORDS, 309 "ANY TYPE": TokenType.VARIANT, 310 "BEGIN": TokenType.COMMAND, 311 "BEGIN TRANSACTION": TokenType.BEGIN, 312 "BYTES": TokenType.BINARY, 313 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 314 "DECLARE": TokenType.COMMAND, 315 "FLOAT64": TokenType.DOUBLE, 316 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 317 "MODEL": TokenType.MODEL, 318 "NOT DETERMINISTIC": TokenType.VOLATILE, 319 "RECORD": TokenType.STRUCT, 320 "TIMESTAMP": TokenType.TIMESTAMPTZ, 321 } 322 KEYWORDS.pop("DIV") 323 324 class Parser(parser.Parser): 325 PREFIXED_PIVOT_COLUMNS = True 326 327 LOG_DEFAULTS_TO_LN = True 328 329 FUNCTIONS = { 330 **parser.Parser.FUNCTIONS, 331 "DATE": _parse_date, 332 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 333 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 334 "DATE_TRUNC": lambda args: exp.DateTrunc( 335 unit=exp.Literal.string(str(seq_get(args, 1))), 336 this=seq_get(args, 0), 337 ), 338 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 339 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 340 "DIV": binary_from_function(exp.IntDiv), 341 "FORMAT_DATE": lambda args: exp.TimeToStr( 342 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 343 ), 344 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 345 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 346 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 347 ), 348 "MD5": exp.MD5Digest.from_arg_list, 349 "TO_HEX": _parse_to_hex, 350 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 351 [seq_get(args, 1), seq_get(args, 0)] 352 ), 353 "PARSE_TIMESTAMP": _parse_timestamp, 354 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 355 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 356 this=seq_get(args, 0), 357 expression=seq_get(args, 1), 358 position=seq_get(args, 2), 359 occurrence=seq_get(args, 3), 360 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 361 ), 362 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 363 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 364 "SPLIT": lambda args: exp.Split( 365 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 366 this=seq_get(args, 0), 367 expression=seq_get(args, 1) or exp.Literal.string(","), 368 ), 369 "TIME": _parse_time, 370 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 371 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 372 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 373 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 374 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 375 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 376 ), 377 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 378 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 379 ), 380 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 381 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 382 } 383 384 FUNCTION_PARSERS = { 385 **parser.Parser.FUNCTION_PARSERS, 386 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 387 } 388 FUNCTION_PARSERS.pop("TRIM") 389 390 NO_PAREN_FUNCTIONS = { 391 **parser.Parser.NO_PAREN_FUNCTIONS, 392 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 393 } 394 395 NESTED_TYPE_TOKENS = { 396 *parser.Parser.NESTED_TYPE_TOKENS, 397 TokenType.TABLE, 398 } 399 400 ID_VAR_TOKENS = { 401 *parser.Parser.ID_VAR_TOKENS, 402 TokenType.VALUES, 403 } 404 405 PROPERTY_PARSERS = { 406 **parser.Parser.PROPERTY_PARSERS, 407 "NOT DETERMINISTIC": lambda self: self.expression( 408 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 409 ), 410 "OPTIONS": lambda self: self._parse_with_property(), 411 } 412 413 CONSTRAINT_PARSERS = { 414 **parser.Parser.CONSTRAINT_PARSERS, 415 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 416 } 417 418 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 419 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 420 421 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 422 423 STATEMENT_PARSERS = { 424 **parser.Parser.STATEMENT_PARSERS, 425 TokenType.END: lambda self: self._parse_as_command(self._prev), 426 TokenType.FOR: lambda self: self._parse_for_in(), 427 } 428 429 BRACKET_OFFSETS = { 430 "OFFSET": (0, False), 431 "ORDINAL": (1, False), 432 "SAFE_OFFSET": (0, True), 433 "SAFE_ORDINAL": (1, True), 434 } 435 436 def _parse_for_in(self) -> exp.ForIn: 437 this = self._parse_range() 438 self._match_text_seq("DO") 439 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 440 441 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 442 this = super()._parse_table_part(schema=schema) or self._parse_number() 443 444 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 445 if isinstance(this, exp.Identifier): 446 table_name = this.name 447 while self._match(TokenType.DASH, advance=False) and self._next: 448 self._advance(2) 449 table_name += f"-{self._prev.text}" 450 451 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 452 elif isinstance(this, exp.Literal): 453 table_name = this.name 454 455 if self._is_connected() and self._parse_var(any_token=True): 456 table_name += self._prev.text 457 458 this = exp.Identifier(this=table_name, quoted=True) 459 460 return this 461 462 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 463 table = super()._parse_table_parts(schema=schema) 464 if isinstance(table.this, exp.Identifier) and "." in table.name: 465 catalog, db, this, *rest = ( 466 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 467 for x in split_num_words(table.name, ".", 3) 468 ) 469 470 if rest and this: 471 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 472 473 table = exp.Table(this=this, db=db, catalog=catalog) 474 475 return table 476 477 @t.overload 478 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 479 480 @t.overload 481 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 482 483 def _parse_json_object(self, agg=False): 484 json_object = super()._parse_json_object() 485 array_kv_pair = seq_get(json_object.expressions, 0) 486 487 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 488 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 489 if ( 490 array_kv_pair 491 and isinstance(array_kv_pair.this, exp.Array) 492 and isinstance(array_kv_pair.expression, exp.Array) 493 ): 494 keys = array_kv_pair.this.expressions 495 values = array_kv_pair.expression.expressions 496 497 json_object.set( 498 "expressions", 499 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 500 ) 501 502 return json_object 503 504 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 505 bracket = super()._parse_bracket(this) 506 507 if this is bracket: 508 return bracket 509 510 if isinstance(bracket, exp.Bracket): 511 for expression in bracket.expressions: 512 name = expression.name.upper() 513 514 if name not in self.BRACKET_OFFSETS: 515 break 516 517 offset, safe = self.BRACKET_OFFSETS[name] 518 bracket.set("offset", offset) 519 bracket.set("safe", safe) 520 expression.replace(expression.expressions[0]) 521 522 return bracket 523 524 class Generator(generator.Generator): 525 EXPLICIT_UNION = True 526 INTERVAL_ALLOWS_PLURAL_FORM = False 527 JOIN_HINTS = False 528 QUERY_HINTS = False 529 TABLE_HINTS = False 530 LIMIT_FETCH = "LIMIT" 531 RENAME_TABLE_WITH_DB = False 532 NVL2_SUPPORTED = False 533 UNNEST_WITH_ORDINALITY = False 534 COLLATE_IS_FUNC = True 535 LIMIT_ONLY_LITERALS = True 536 SUPPORTS_TABLE_ALIAS_COLUMNS = False 537 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 538 JSON_KEY_VALUE_PAIR_SEP = "," 539 NULL_ORDERING_SUPPORTED = False 540 541 TRANSFORMS = { 542 **generator.Generator.TRANSFORMS, 543 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 544 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 545 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 546 exp.ArrayContains: _array_contains_sql, 547 exp.ArraySize: rename_func("ARRAY_LENGTH"), 548 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 549 exp.CollateProperty: lambda self, e: ( 550 f"DEFAULT COLLATE {self.sql(e, 'this')}" 551 if e.args.get("default") 552 else f"COLLATE {self.sql(e, 'this')}" 553 ), 554 exp.CountIf: rename_func("COUNTIF"), 555 exp.Create: _create_sql, 556 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 557 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 558 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 559 exp.DateFromParts: rename_func("DATE"), 560 exp.DateStrToDate: datestrtodate_sql, 561 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 562 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 563 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 564 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 565 exp.FromTimeZone: lambda self, e: self.func( 566 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 567 ), 568 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 569 exp.GetPath: path_to_jsonpath(), 570 exp.GroupConcat: rename_func("STRING_AGG"), 571 exp.Hex: rename_func("TO_HEX"), 572 exp.If: if_sql(false_value="NULL"), 573 exp.ILike: no_ilike_sql, 574 exp.IntDiv: rename_func("DIV"), 575 exp.JSONFormat: rename_func("TO_JSON_STRING"), 576 exp.Max: max_or_greatest, 577 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 578 exp.MD5Digest: rename_func("MD5"), 579 exp.Min: min_or_least, 580 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 581 exp.RegexpExtract: lambda self, e: self.func( 582 "REGEXP_EXTRACT", 583 e.this, 584 e.expression, 585 e.args.get("position"), 586 e.args.get("occurrence"), 587 ), 588 exp.RegexpReplace: regexp_replace_sql, 589 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 590 exp.ReturnsProperty: _returnsproperty_sql, 591 exp.Select: transforms.preprocess( 592 [ 593 transforms.explode_to_unnest(), 594 _unqualify_unnest, 595 transforms.eliminate_distinct_on, 596 _alias_ordered_group, 597 transforms.eliminate_semi_and_anti_joins, 598 ] 599 ), 600 exp.SHA2: lambda self, e: self.func( 601 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 602 ), 603 exp.StabilityProperty: lambda self, e: ( 604 f"DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 605 ), 606 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 607 exp.StrToTime: lambda self, e: self.func( 608 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 609 ), 610 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 611 exp.TimeFromParts: rename_func("TIME"), 612 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 613 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 614 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 615 exp.TimeStrToTime: timestrtotime_sql, 616 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 617 exp.TsOrDsAdd: _ts_or_ds_add_sql, 618 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 619 exp.TsOrDsToTime: rename_func("TIME"), 620 exp.Unhex: rename_func("FROM_HEX"), 621 exp.UnixDate: rename_func("UNIX_DATE"), 622 exp.UnixToTime: _unix_to_time_sql, 623 exp.Values: _derived_table_values_to_unnest, 624 exp.VariancePop: rename_func("VAR_POP"), 625 } 626 627 TYPE_MAPPING = { 628 **generator.Generator.TYPE_MAPPING, 629 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 630 exp.DataType.Type.BIGINT: "INT64", 631 exp.DataType.Type.BINARY: "BYTES", 632 exp.DataType.Type.BOOLEAN: "BOOL", 633 exp.DataType.Type.CHAR: "STRING", 634 exp.DataType.Type.DECIMAL: "NUMERIC", 635 exp.DataType.Type.DOUBLE: "FLOAT64", 636 exp.DataType.Type.FLOAT: "FLOAT64", 637 exp.DataType.Type.INT: "INT64", 638 exp.DataType.Type.NCHAR: "STRING", 639 exp.DataType.Type.NVARCHAR: "STRING", 640 exp.DataType.Type.SMALLINT: "INT64", 641 exp.DataType.Type.TEXT: "STRING", 642 exp.DataType.Type.TIMESTAMP: "DATETIME", 643 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 644 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 645 exp.DataType.Type.TINYINT: "INT64", 646 exp.DataType.Type.VARBINARY: "BYTES", 647 exp.DataType.Type.VARCHAR: "STRING", 648 exp.DataType.Type.VARIANT: "ANY TYPE", 649 } 650 651 PROPERTIES_LOCATION = { 652 **generator.Generator.PROPERTIES_LOCATION, 653 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 654 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 655 } 656 657 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 658 RESERVED_KEYWORDS = { 659 *generator.Generator.RESERVED_KEYWORDS, 660 "all", 661 "and", 662 "any", 663 "array", 664 "as", 665 "asc", 666 "assert_rows_modified", 667 "at", 668 "between", 669 "by", 670 "case", 671 "cast", 672 "collate", 673 "contains", 674 "create", 675 "cross", 676 "cube", 677 "current", 678 "default", 679 "define", 680 "desc", 681 "distinct", 682 "else", 683 "end", 684 "enum", 685 "escape", 686 "except", 687 "exclude", 688 "exists", 689 "extract", 690 "false", 691 "fetch", 692 "following", 693 "for", 694 "from", 695 "full", 696 "group", 697 "grouping", 698 "groups", 699 "hash", 700 "having", 701 "if", 702 "ignore", 703 "in", 704 "inner", 705 "intersect", 706 "interval", 707 "into", 708 "is", 709 "join", 710 "lateral", 711 "left", 712 "like", 713 "limit", 714 "lookup", 715 "merge", 716 "natural", 717 "new", 718 "no", 719 "not", 720 "null", 721 "nulls", 722 "of", 723 "on", 724 "or", 725 "order", 726 "outer", 727 "over", 728 "partition", 729 "preceding", 730 "proto", 731 "qualify", 732 "range", 733 "recursive", 734 "respect", 735 "right", 736 "rollup", 737 "rows", 738 "select", 739 "set", 740 "some", 741 "struct", 742 "tablesample", 743 "then", 744 "to", 745 "treat", 746 "true", 747 "unbounded", 748 "union", 749 "unnest", 750 "using", 751 "when", 752 "where", 753 "window", 754 "with", 755 "within", 756 } 757 758 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 759 if isinstance(expression.this, exp.TsOrDsToDate): 760 this: exp.Expression = expression.this 761 else: 762 this = expression 763 764 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 765 766 def struct_sql(self, expression: exp.Struct) -> str: 767 args = [] 768 for expr in expression.expressions: 769 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 770 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 771 else: 772 arg = self.sql(expr) 773 774 args.append(arg) 775 776 return self.func("STRUCT", *args) 777 778 def eq_sql(self, expression: exp.EQ) -> str: 779 # Operands of = cannot be NULL in BigQuery 780 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 781 if not isinstance(expression.parent, exp.Update): 782 return "NULL" 783 784 return self.binary(expression, "=") 785 786 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 787 parent = expression.parent 788 789 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 790 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 791 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 792 return self.func( 793 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 794 ) 795 796 return super().attimezone_sql(expression) 797 798 def trycast_sql(self, expression: exp.TryCast) -> str: 799 return self.cast_sql(expression, safe_prefix="SAFE_") 800 801 def cte_sql(self, expression: exp.CTE) -> str: 802 if expression.alias_column_names: 803 self.unsupported("Column names in CTE definition are not supported.") 804 return super().cte_sql(expression) 805 806 def array_sql(self, expression: exp.Array) -> str: 807 first_arg = seq_get(expression.expressions, 0) 808 if isinstance(first_arg, exp.Subqueryable): 809 return f"ARRAY{self.wrap(self.sql(first_arg))}" 810 811 return inline_array_sql(self, expression) 812 813 def bracket_sql(self, expression: exp.Bracket) -> str: 814 this = self.sql(expression, "this") 815 expressions = expression.expressions 816 817 if len(expressions) == 1: 818 arg = expressions[0] 819 if arg.type is None: 820 from sqlglot.optimizer.annotate_types import annotate_types 821 822 arg = annotate_types(arg) 823 824 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 825 # BQ doesn't support bracket syntax with string values 826 return f"{this}.{arg.name}" 827 828 expressions_sql = ", ".join(self.sql(e) for e in expressions) 829 offset = expression.args.get("offset") 830 831 if offset == 0: 832 expressions_sql = f"OFFSET({expressions_sql})" 833 elif offset == 1: 834 expressions_sql = f"ORDINAL({expressions_sql})" 835 elif offset is not None: 836 self.unsupported(f"Unsupported array offset: {offset}") 837 838 if expression.args.get("safe"): 839 expressions_sql = f"SAFE_{expressions_sql}" 840 841 return f"{this}[{expressions_sql}]" 842 843 def transaction_sql(self, *_) -> str: 844 return "BEGIN TRANSACTION" 845 846 def commit_sql(self, *_) -> str: 847 return "COMMIT TRANSACTION" 848 849 def rollback_sql(self, *_) -> str: 850 return "ROLLBACK TRANSACTION" 851 852 def in_unnest_op(self, expression: exp.Unnest) -> str: 853 return self.sql(expression) 854 855 def except_op(self, expression: exp.Except) -> str: 856 if not expression.args.get("distinct", False): 857 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 858 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 859 860 def intersect_op(self, expression: exp.Intersect) -> str: 861 if not expression.args.get("distinct", False): 862 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 863 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 864 865 def with_properties(self, properties: exp.Properties) -> str: 866 return self.properties(properties, prefix=self.seg("OPTIONS")) 867 868 def version_sql(self, expression: exp.Version) -> str: 869 if expression.name == "TIMESTAMP": 870 expression.set("this", "SYSTEM_TIME") 871 return super().version_sql(expression)
Determines the day of week of DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Determines whether or not UNNEST
table aliases are treated as column aliases.
Determines whether or not user-defined data types are supported.
Specifies the strategy according to which identifiers should be normalized.
Associates this dialect's time formats with their equivalent Python strftime
format.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
273 def normalize_identifier(self, expression: E) -> E: 274 if isinstance(expression, exp.Identifier): 275 parent = expression.parent 276 while isinstance(parent, exp.Dot): 277 parent = parent.parent 278 279 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 280 # The following check is essentially a heuristic to detect tables based on whether or 281 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 282 if ( 283 not isinstance(parent, exp.UserDefinedFunction) 284 and not (isinstance(parent, exp.Table) and parent.db) 285 and not expression.meta.get("is_table") 286 ): 287 expression.set("this", expression.this.lower()) 288 289 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
291 class Tokenizer(tokens.Tokenizer): 292 QUOTES = ["'", '"', '"""', "'''"] 293 COMMENTS = ["--", "#", ("/*", "*/")] 294 IDENTIFIERS = ["`"] 295 STRING_ESCAPES = ["\\"] 296 297 HEX_STRINGS = [("0x", ""), ("0X", "")] 298 299 BYTE_STRINGS = [ 300 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 301 ] 302 303 RAW_STRINGS = [ 304 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 305 ] 306 307 KEYWORDS = { 308 **tokens.Tokenizer.KEYWORDS, 309 "ANY TYPE": TokenType.VARIANT, 310 "BEGIN": TokenType.COMMAND, 311 "BEGIN TRANSACTION": TokenType.BEGIN, 312 "BYTES": TokenType.BINARY, 313 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 314 "DECLARE": TokenType.COMMAND, 315 "FLOAT64": TokenType.DOUBLE, 316 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 317 "MODEL": TokenType.MODEL, 318 "NOT DETERMINISTIC": TokenType.VOLATILE, 319 "RECORD": TokenType.STRUCT, 320 "TIMESTAMP": TokenType.TIMESTAMPTZ, 321 } 322 KEYWORDS.pop("DIV")
324 class Parser(parser.Parser): 325 PREFIXED_PIVOT_COLUMNS = True 326 327 LOG_DEFAULTS_TO_LN = True 328 329 FUNCTIONS = { 330 **parser.Parser.FUNCTIONS, 331 "DATE": _parse_date, 332 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 333 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 334 "DATE_TRUNC": lambda args: exp.DateTrunc( 335 unit=exp.Literal.string(str(seq_get(args, 1))), 336 this=seq_get(args, 0), 337 ), 338 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 339 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 340 "DIV": binary_from_function(exp.IntDiv), 341 "FORMAT_DATE": lambda args: exp.TimeToStr( 342 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 343 ), 344 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 345 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 346 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 347 ), 348 "MD5": exp.MD5Digest.from_arg_list, 349 "TO_HEX": _parse_to_hex, 350 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 351 [seq_get(args, 1), seq_get(args, 0)] 352 ), 353 "PARSE_TIMESTAMP": _parse_timestamp, 354 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 355 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 356 this=seq_get(args, 0), 357 expression=seq_get(args, 1), 358 position=seq_get(args, 2), 359 occurrence=seq_get(args, 3), 360 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 361 ), 362 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 363 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 364 "SPLIT": lambda args: exp.Split( 365 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 366 this=seq_get(args, 0), 367 expression=seq_get(args, 1) or exp.Literal.string(","), 368 ), 369 "TIME": _parse_time, 370 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 371 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 372 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 373 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 374 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 375 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 376 ), 377 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 378 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 379 ), 380 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 381 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 382 } 383 384 FUNCTION_PARSERS = { 385 **parser.Parser.FUNCTION_PARSERS, 386 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 387 } 388 FUNCTION_PARSERS.pop("TRIM") 389 390 NO_PAREN_FUNCTIONS = { 391 **parser.Parser.NO_PAREN_FUNCTIONS, 392 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 393 } 394 395 NESTED_TYPE_TOKENS = { 396 *parser.Parser.NESTED_TYPE_TOKENS, 397 TokenType.TABLE, 398 } 399 400 ID_VAR_TOKENS = { 401 *parser.Parser.ID_VAR_TOKENS, 402 TokenType.VALUES, 403 } 404 405 PROPERTY_PARSERS = { 406 **parser.Parser.PROPERTY_PARSERS, 407 "NOT DETERMINISTIC": lambda self: self.expression( 408 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 409 ), 410 "OPTIONS": lambda self: self._parse_with_property(), 411 } 412 413 CONSTRAINT_PARSERS = { 414 **parser.Parser.CONSTRAINT_PARSERS, 415 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 416 } 417 418 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 419 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 420 421 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 422 423 STATEMENT_PARSERS = { 424 **parser.Parser.STATEMENT_PARSERS, 425 TokenType.END: lambda self: self._parse_as_command(self._prev), 426 TokenType.FOR: lambda self: self._parse_for_in(), 427 } 428 429 BRACKET_OFFSETS = { 430 "OFFSET": (0, False), 431 "ORDINAL": (1, False), 432 "SAFE_OFFSET": (0, True), 433 "SAFE_ORDINAL": (1, True), 434 } 435 436 def _parse_for_in(self) -> exp.ForIn: 437 this = self._parse_range() 438 self._match_text_seq("DO") 439 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 440 441 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 442 this = super()._parse_table_part(schema=schema) or self._parse_number() 443 444 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 445 if isinstance(this, exp.Identifier): 446 table_name = this.name 447 while self._match(TokenType.DASH, advance=False) and self._next: 448 self._advance(2) 449 table_name += f"-{self._prev.text}" 450 451 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 452 elif isinstance(this, exp.Literal): 453 table_name = this.name 454 455 if self._is_connected() and self._parse_var(any_token=True): 456 table_name += self._prev.text 457 458 this = exp.Identifier(this=table_name, quoted=True) 459 460 return this 461 462 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 463 table = super()._parse_table_parts(schema=schema) 464 if isinstance(table.this, exp.Identifier) and "." in table.name: 465 catalog, db, this, *rest = ( 466 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 467 for x in split_num_words(table.name, ".", 3) 468 ) 469 470 if rest and this: 471 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 472 473 table = exp.Table(this=this, db=db, catalog=catalog) 474 475 return table 476 477 @t.overload 478 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 479 480 @t.overload 481 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 482 483 def _parse_json_object(self, agg=False): 484 json_object = super()._parse_json_object() 485 array_kv_pair = seq_get(json_object.expressions, 0) 486 487 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 488 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 489 if ( 490 array_kv_pair 491 and isinstance(array_kv_pair.this, exp.Array) 492 and isinstance(array_kv_pair.expression, exp.Array) 493 ): 494 keys = array_kv_pair.this.expressions 495 values = array_kv_pair.expression.expressions 496 497 json_object.set( 498 "expressions", 499 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 500 ) 501 502 return json_object 503 504 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 505 bracket = super()._parse_bracket(this) 506 507 if this is bracket: 508 return bracket 509 510 if isinstance(bracket, exp.Bracket): 511 for expression in bracket.expressions: 512 name = expression.name.upper() 513 514 if name not in self.BRACKET_OFFSETS: 515 break 516 517 offset, safe = self.BRACKET_OFFSETS[name] 518 bracket.set("offset", offset) 519 bracket.set("safe", safe) 520 expression.replace(expression.expressions[0]) 521 522 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
524 class Generator(generator.Generator): 525 EXPLICIT_UNION = True 526 INTERVAL_ALLOWS_PLURAL_FORM = False 527 JOIN_HINTS = False 528 QUERY_HINTS = False 529 TABLE_HINTS = False 530 LIMIT_FETCH = "LIMIT" 531 RENAME_TABLE_WITH_DB = False 532 NVL2_SUPPORTED = False 533 UNNEST_WITH_ORDINALITY = False 534 COLLATE_IS_FUNC = True 535 LIMIT_ONLY_LITERALS = True 536 SUPPORTS_TABLE_ALIAS_COLUMNS = False 537 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 538 JSON_KEY_VALUE_PAIR_SEP = "," 539 NULL_ORDERING_SUPPORTED = False 540 541 TRANSFORMS = { 542 **generator.Generator.TRANSFORMS, 543 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 544 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 545 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 546 exp.ArrayContains: _array_contains_sql, 547 exp.ArraySize: rename_func("ARRAY_LENGTH"), 548 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 549 exp.CollateProperty: lambda self, e: ( 550 f"DEFAULT COLLATE {self.sql(e, 'this')}" 551 if e.args.get("default") 552 else f"COLLATE {self.sql(e, 'this')}" 553 ), 554 exp.CountIf: rename_func("COUNTIF"), 555 exp.Create: _create_sql, 556 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 557 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 558 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 559 exp.DateFromParts: rename_func("DATE"), 560 exp.DateStrToDate: datestrtodate_sql, 561 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 562 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 563 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 564 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 565 exp.FromTimeZone: lambda self, e: self.func( 566 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 567 ), 568 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 569 exp.GetPath: path_to_jsonpath(), 570 exp.GroupConcat: rename_func("STRING_AGG"), 571 exp.Hex: rename_func("TO_HEX"), 572 exp.If: if_sql(false_value="NULL"), 573 exp.ILike: no_ilike_sql, 574 exp.IntDiv: rename_func("DIV"), 575 exp.JSONFormat: rename_func("TO_JSON_STRING"), 576 exp.Max: max_or_greatest, 577 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 578 exp.MD5Digest: rename_func("MD5"), 579 exp.Min: min_or_least, 580 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 581 exp.RegexpExtract: lambda self, e: self.func( 582 "REGEXP_EXTRACT", 583 e.this, 584 e.expression, 585 e.args.get("position"), 586 e.args.get("occurrence"), 587 ), 588 exp.RegexpReplace: regexp_replace_sql, 589 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 590 exp.ReturnsProperty: _returnsproperty_sql, 591 exp.Select: transforms.preprocess( 592 [ 593 transforms.explode_to_unnest(), 594 _unqualify_unnest, 595 transforms.eliminate_distinct_on, 596 _alias_ordered_group, 597 transforms.eliminate_semi_and_anti_joins, 598 ] 599 ), 600 exp.SHA2: lambda self, e: self.func( 601 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 602 ), 603 exp.StabilityProperty: lambda self, e: ( 604 f"DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 605 ), 606 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 607 exp.StrToTime: lambda self, e: self.func( 608 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 609 ), 610 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 611 exp.TimeFromParts: rename_func("TIME"), 612 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 613 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 614 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 615 exp.TimeStrToTime: timestrtotime_sql, 616 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 617 exp.TsOrDsAdd: _ts_or_ds_add_sql, 618 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 619 exp.TsOrDsToTime: rename_func("TIME"), 620 exp.Unhex: rename_func("FROM_HEX"), 621 exp.UnixDate: rename_func("UNIX_DATE"), 622 exp.UnixToTime: _unix_to_time_sql, 623 exp.Values: _derived_table_values_to_unnest, 624 exp.VariancePop: rename_func("VAR_POP"), 625 } 626 627 TYPE_MAPPING = { 628 **generator.Generator.TYPE_MAPPING, 629 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 630 exp.DataType.Type.BIGINT: "INT64", 631 exp.DataType.Type.BINARY: "BYTES", 632 exp.DataType.Type.BOOLEAN: "BOOL", 633 exp.DataType.Type.CHAR: "STRING", 634 exp.DataType.Type.DECIMAL: "NUMERIC", 635 exp.DataType.Type.DOUBLE: "FLOAT64", 636 exp.DataType.Type.FLOAT: "FLOAT64", 637 exp.DataType.Type.INT: "INT64", 638 exp.DataType.Type.NCHAR: "STRING", 639 exp.DataType.Type.NVARCHAR: "STRING", 640 exp.DataType.Type.SMALLINT: "INT64", 641 exp.DataType.Type.TEXT: "STRING", 642 exp.DataType.Type.TIMESTAMP: "DATETIME", 643 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 644 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 645 exp.DataType.Type.TINYINT: "INT64", 646 exp.DataType.Type.VARBINARY: "BYTES", 647 exp.DataType.Type.VARCHAR: "STRING", 648 exp.DataType.Type.VARIANT: "ANY TYPE", 649 } 650 651 PROPERTIES_LOCATION = { 652 **generator.Generator.PROPERTIES_LOCATION, 653 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 654 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 655 } 656 657 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 658 RESERVED_KEYWORDS = { 659 *generator.Generator.RESERVED_KEYWORDS, 660 "all", 661 "and", 662 "any", 663 "array", 664 "as", 665 "asc", 666 "assert_rows_modified", 667 "at", 668 "between", 669 "by", 670 "case", 671 "cast", 672 "collate", 673 "contains", 674 "create", 675 "cross", 676 "cube", 677 "current", 678 "default", 679 "define", 680 "desc", 681 "distinct", 682 "else", 683 "end", 684 "enum", 685 "escape", 686 "except", 687 "exclude", 688 "exists", 689 "extract", 690 "false", 691 "fetch", 692 "following", 693 "for", 694 "from", 695 "full", 696 "group", 697 "grouping", 698 "groups", 699 "hash", 700 "having", 701 "if", 702 "ignore", 703 "in", 704 "inner", 705 "intersect", 706 "interval", 707 "into", 708 "is", 709 "join", 710 "lateral", 711 "left", 712 "like", 713 "limit", 714 "lookup", 715 "merge", 716 "natural", 717 "new", 718 "no", 719 "not", 720 "null", 721 "nulls", 722 "of", 723 "on", 724 "or", 725 "order", 726 "outer", 727 "over", 728 "partition", 729 "preceding", 730 "proto", 731 "qualify", 732 "range", 733 "recursive", 734 "respect", 735 "right", 736 "rollup", 737 "rows", 738 "select", 739 "set", 740 "some", 741 "struct", 742 "tablesample", 743 "then", 744 "to", 745 "treat", 746 "true", 747 "unbounded", 748 "union", 749 "unnest", 750 "using", 751 "when", 752 "where", 753 "window", 754 "with", 755 "within", 756 } 757 758 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 759 if isinstance(expression.this, exp.TsOrDsToDate): 760 this: exp.Expression = expression.this 761 else: 762 this = expression 763 764 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 765 766 def struct_sql(self, expression: exp.Struct) -> str: 767 args = [] 768 for expr in expression.expressions: 769 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 770 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 771 else: 772 arg = self.sql(expr) 773 774 args.append(arg) 775 776 return self.func("STRUCT", *args) 777 778 def eq_sql(self, expression: exp.EQ) -> str: 779 # Operands of = cannot be NULL in BigQuery 780 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 781 if not isinstance(expression.parent, exp.Update): 782 return "NULL" 783 784 return self.binary(expression, "=") 785 786 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 787 parent = expression.parent 788 789 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 790 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 791 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 792 return self.func( 793 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 794 ) 795 796 return super().attimezone_sql(expression) 797 798 def trycast_sql(self, expression: exp.TryCast) -> str: 799 return self.cast_sql(expression, safe_prefix="SAFE_") 800 801 def cte_sql(self, expression: exp.CTE) -> str: 802 if expression.alias_column_names: 803 self.unsupported("Column names in CTE definition are not supported.") 804 return super().cte_sql(expression) 805 806 def array_sql(self, expression: exp.Array) -> str: 807 first_arg = seq_get(expression.expressions, 0) 808 if isinstance(first_arg, exp.Subqueryable): 809 return f"ARRAY{self.wrap(self.sql(first_arg))}" 810 811 return inline_array_sql(self, expression) 812 813 def bracket_sql(self, expression: exp.Bracket) -> str: 814 this = self.sql(expression, "this") 815 expressions = expression.expressions 816 817 if len(expressions) == 1: 818 arg = expressions[0] 819 if arg.type is None: 820 from sqlglot.optimizer.annotate_types import annotate_types 821 822 arg = annotate_types(arg) 823 824 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 825 # BQ doesn't support bracket syntax with string values 826 return f"{this}.{arg.name}" 827 828 expressions_sql = ", ".join(self.sql(e) for e in expressions) 829 offset = expression.args.get("offset") 830 831 if offset == 0: 832 expressions_sql = f"OFFSET({expressions_sql})" 833 elif offset == 1: 834 expressions_sql = f"ORDINAL({expressions_sql})" 835 elif offset is not None: 836 self.unsupported(f"Unsupported array offset: {offset}") 837 838 if expression.args.get("safe"): 839 expressions_sql = f"SAFE_{expressions_sql}" 840 841 return f"{this}[{expressions_sql}]" 842 843 def transaction_sql(self, *_) -> str: 844 return "BEGIN TRANSACTION" 845 846 def commit_sql(self, *_) -> str: 847 return "COMMIT TRANSACTION" 848 849 def rollback_sql(self, *_) -> str: 850 return "ROLLBACK TRANSACTION" 851 852 def in_unnest_op(self, expression: exp.Unnest) -> str: 853 return self.sql(expression) 854 855 def except_op(self, expression: exp.Except) -> str: 856 if not expression.args.get("distinct", False): 857 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 858 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 859 860 def intersect_op(self, expression: exp.Intersect) -> str: 861 if not expression.args.get("distinct", False): 862 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 863 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 864 865 def with_properties(self, properties: exp.Properties) -> str: 866 return self.properties(properties, prefix=self.seg("OPTIONS")) 867 868 def version_sql(self, expression: exp.Version) -> str: 869 if expression.name == "TIMESTAMP": 870 expression.set("this", "SYSTEM_TIME") 871 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
766 def struct_sql(self, expression: exp.Struct) -> str: 767 args = [] 768 for expr in expression.expressions: 769 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 770 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 771 else: 772 arg = self.sql(expr) 773 774 args.append(arg) 775 776 return self.func("STRUCT", *args)
786 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 787 parent = expression.parent 788 789 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 790 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 791 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 792 return self.func( 793 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 794 ) 795 796 return super().attimezone_sql(expression)
813 def bracket_sql(self, expression: exp.Bracket) -> str: 814 this = self.sql(expression, "this") 815 expressions = expression.expressions 816 817 if len(expressions) == 1: 818 arg = expressions[0] 819 if arg.type is None: 820 from sqlglot.optimizer.annotate_types import annotate_types 821 822 arg = annotate_types(arg) 823 824 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 825 # BQ doesn't support bracket syntax with string values 826 return f"{this}.{arg.name}" 827 828 expressions_sql = ", ".join(self.sql(e) for e in expressions) 829 offset = expression.args.get("offset") 830 831 if offset == 0: 832 expressions_sql = f"OFFSET({expressions_sql})" 833 elif offset == 1: 834 expressions_sql = f"ORDINAL({expressions_sql})" 835 elif offset is not None: 836 self.unsupported(f"Unsupported array offset: {offset}") 837 838 if expression.args.get("safe"): 839 expressions_sql = f"SAFE_{expressions_sql}" 840 841 return f"{this}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql