sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot._typing import E 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 NormalizationStrategy, 12 arg_max_or_min_no_count, 13 binary_from_function, 14 date_add_interval_sql, 15 datestrtodate_sql, 16 format_time_lambda, 17 if_sql, 18 inline_array_sql, 19 json_keyvalue_comma_sql, 20 max_or_greatest, 21 min_or_least, 22 no_ilike_sql, 23 parse_date_delta_with_interval, 24 regexp_replace_sql, 25 rename_func, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 ts_or_ds_to_date_sql, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33logger = logging.getLogger("sqlglot") 34 35 36def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 37 if not expression.find_ancestor(exp.From, exp.Join): 38 return self.values_sql(expression) 39 40 alias = expression.args.get("alias") 41 42 structs = [ 43 exp.Struct( 44 expressions=[ 45 exp.alias_(value, column_name) 46 for value, column_name in zip( 47 t.expressions, 48 alias.columns 49 if alias and alias.columns 50 else (f"_c{i}" for i in range(len(t.expressions))), 51 ) 52 ] 53 ) 54 for t in expression.find_all(exp.Tuple) 55 ] 56 57 return self.unnest_sql(exp.Unnest(expressions=[exp.Array(expressions=structs)])) 58 59 60def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 61 this = expression.this 62 if isinstance(this, exp.Schema): 63 this = f"{this.this} <{self.expressions(this)}>" 64 else: 65 this = self.sql(this) 66 return f"RETURNS {this}" 67 68 69def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 70 kind = expression.args["kind"] 71 returns = expression.find(exp.ReturnsProperty) 72 73 if kind.upper() == "FUNCTION" and returns and returns.args.get("is_table"): 74 expression.set("kind", "TABLE FUNCTION") 75 76 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 77 expression.set("expression", expression.expression.this) 78 79 return self.create_sql(expression) 80 81 return self.create_sql(expression) 82 83 84def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 85 """Remove references to unnest table aliases since bigquery doesn't allow them. 86 87 These are added by the optimizer's qualify_column step. 88 """ 89 from sqlglot.optimizer.scope import find_all_in_scope 90 91 if isinstance(expression, exp.Select): 92 unnest_aliases = { 93 unnest.alias 94 for unnest in find_all_in_scope(expression, exp.Unnest) 95 if isinstance(unnest.parent, (exp.From, exp.Join)) 96 } 97 if unnest_aliases: 98 for column in expression.find_all(exp.Column): 99 if column.table in unnest_aliases: 100 column.set("table", None) 101 elif column.db in unnest_aliases: 102 column.set("db", None) 103 104 return expression 105 106 107# https://issuetracker.google.com/issues/162294746 108# workaround for bigquery bug when grouping by an expression and then ordering 109# WITH x AS (SELECT 1 y) 110# SELECT y + 1 z 111# FROM x 112# GROUP BY x + 1 113# ORDER by z 114def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 115 if isinstance(expression, exp.Select): 116 group = expression.args.get("group") 117 order = expression.args.get("order") 118 119 if group and order: 120 aliases = { 121 select.this: select.args["alias"] 122 for select in expression.selects 123 if isinstance(select, exp.Alias) 124 } 125 126 for e in group.expressions: 127 alias = aliases.get(e) 128 129 if alias: 130 e.replace(exp.column(alias)) 131 132 return expression 133 134 135def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 136 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 137 if isinstance(expression, exp.CTE) and expression.alias_column_names: 138 cte_query = expression.this 139 140 if cte_query.is_star: 141 logger.warning( 142 "Can't push down CTE column names for star queries. Run the query through" 143 " the optimizer or use 'qualify' to expand the star projections first." 144 ) 145 return expression 146 147 column_names = expression.alias_column_names 148 expression.args["alias"].set("columns", None) 149 150 for name, select in zip(column_names, cte_query.selects): 151 to_replace = select 152 153 if isinstance(select, exp.Alias): 154 select = select.this 155 156 # Inner aliases are shadowed by the CTE column names 157 to_replace.replace(exp.alias_(select, name)) 158 159 return expression 160 161 162def _parse_timestamp(args: t.List) -> exp.StrToTime: 163 this = format_time_lambda(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 164 this.set("zone", seq_get(args, 2)) 165 return this 166 167 168def _parse_date(args: t.List) -> exp.Date | exp.DateFromParts: 169 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 170 return expr_type.from_arg_list(args) 171 172 173def _parse_to_hex(args: t.List) -> exp.Hex | exp.MD5: 174 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 175 arg = seq_get(args, 0) 176 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 177 178 179def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 180 return self.sql( 181 exp.Exists( 182 this=exp.select("1") 183 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 184 .where(exp.column("_col").eq(expression.right)) 185 ) 186 ) 187 188 189def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 190 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 191 192 193def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 194 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 195 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 196 unit = expression.args.get("unit") or "DAY" 197 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 198 199 200def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 201 scale = expression.args.get("scale") 202 timestamp = self.sql(expression, "this") 203 if scale in (None, exp.UnixToTime.SECONDS): 204 return f"TIMESTAMP_SECONDS({timestamp})" 205 if scale == exp.UnixToTime.MILLIS: 206 return f"TIMESTAMP_MILLIS({timestamp})" 207 if scale == exp.UnixToTime.MICROS: 208 return f"TIMESTAMP_MICROS({timestamp})" 209 if scale == exp.UnixToTime.NANOS: 210 # We need to cast to INT64 because that's what BQ expects 211 return f"TIMESTAMP_MICROS(CAST({timestamp} / 1000 AS INT64))" 212 213 self.unsupported(f"Unsupported scale for timestamp: {scale}.") 214 return "" 215 216 217class BigQuery(Dialect): 218 WEEK_OFFSET = -1 219 UNNEST_COLUMN_ONLY = True 220 SUPPORTS_USER_DEFINED_TYPES = False 221 SUPPORTS_SEMI_ANTI_JOIN = False 222 LOG_BASE_FIRST = False 223 224 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 225 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 226 227 # bigquery udfs are case sensitive 228 NORMALIZE_FUNCTIONS = False 229 230 TIME_MAPPING = { 231 "%D": "%m/%d/%y", 232 } 233 234 ESCAPE_SEQUENCES = { 235 "\\a": "\a", 236 "\\b": "\b", 237 "\\f": "\f", 238 "\\n": "\n", 239 "\\r": "\r", 240 "\\t": "\t", 241 "\\v": "\v", 242 } 243 244 FORMAT_MAPPING = { 245 "DD": "%d", 246 "MM": "%m", 247 "MON": "%b", 248 "MONTH": "%B", 249 "YYYY": "%Y", 250 "YY": "%y", 251 "HH": "%I", 252 "HH12": "%I", 253 "HH24": "%H", 254 "MI": "%M", 255 "SS": "%S", 256 "SSSSS": "%f", 257 "TZH": "%z", 258 } 259 260 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 261 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 262 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 263 264 def normalize_identifier(self, expression: E) -> E: 265 if isinstance(expression, exp.Identifier): 266 parent = expression.parent 267 while isinstance(parent, exp.Dot): 268 parent = parent.parent 269 270 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 271 # The following check is essentially a heuristic to detect tables based on whether or 272 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 273 if ( 274 not isinstance(parent, exp.UserDefinedFunction) 275 and not (isinstance(parent, exp.Table) and parent.db) 276 and not expression.meta.get("is_table") 277 ): 278 expression.set("this", expression.this.lower()) 279 280 return expression 281 282 class Tokenizer(tokens.Tokenizer): 283 QUOTES = ["'", '"', '"""', "'''"] 284 COMMENTS = ["--", "#", ("/*", "*/")] 285 IDENTIFIERS = ["`"] 286 STRING_ESCAPES = ["\\"] 287 288 HEX_STRINGS = [("0x", ""), ("0X", "")] 289 290 BYTE_STRINGS = [ 291 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 292 ] 293 294 RAW_STRINGS = [ 295 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 296 ] 297 298 KEYWORDS = { 299 **tokens.Tokenizer.KEYWORDS, 300 "ANY TYPE": TokenType.VARIANT, 301 "BEGIN": TokenType.COMMAND, 302 "BEGIN TRANSACTION": TokenType.BEGIN, 303 "BYTES": TokenType.BINARY, 304 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 305 "DECLARE": TokenType.COMMAND, 306 "FLOAT64": TokenType.DOUBLE, 307 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 308 "MODEL": TokenType.MODEL, 309 "NOT DETERMINISTIC": TokenType.VOLATILE, 310 "RECORD": TokenType.STRUCT, 311 "TIMESTAMP": TokenType.TIMESTAMPTZ, 312 } 313 KEYWORDS.pop("DIV") 314 315 class Parser(parser.Parser): 316 PREFIXED_PIVOT_COLUMNS = True 317 318 LOG_DEFAULTS_TO_LN = True 319 320 FUNCTIONS = { 321 **parser.Parser.FUNCTIONS, 322 "DATE": _parse_date, 323 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 324 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 325 "DATE_TRUNC": lambda args: exp.DateTrunc( 326 unit=exp.Literal.string(str(seq_get(args, 1))), 327 this=seq_get(args, 0), 328 ), 329 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 330 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 331 "DIV": binary_from_function(exp.IntDiv), 332 "FORMAT_DATE": lambda args: exp.TimeToStr( 333 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 334 ), 335 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 336 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 337 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 338 ), 339 "MD5": exp.MD5Digest.from_arg_list, 340 "TO_HEX": _parse_to_hex, 341 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 342 [seq_get(args, 1), seq_get(args, 0)] 343 ), 344 "PARSE_TIMESTAMP": _parse_timestamp, 345 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 346 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 347 this=seq_get(args, 0), 348 expression=seq_get(args, 1), 349 position=seq_get(args, 2), 350 occurrence=seq_get(args, 3), 351 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 352 ), 353 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 354 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 355 "SPLIT": lambda args: exp.Split( 356 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1) or exp.Literal.string(","), 359 ), 360 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 361 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 362 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 363 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 364 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 365 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 366 ), 367 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 368 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 369 ), 370 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime( 371 this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS 372 ), 373 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 374 } 375 376 FUNCTION_PARSERS = { 377 **parser.Parser.FUNCTION_PARSERS, 378 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 379 } 380 FUNCTION_PARSERS.pop("TRIM") 381 382 NO_PAREN_FUNCTIONS = { 383 **parser.Parser.NO_PAREN_FUNCTIONS, 384 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 385 } 386 387 NESTED_TYPE_TOKENS = { 388 *parser.Parser.NESTED_TYPE_TOKENS, 389 TokenType.TABLE, 390 } 391 392 ID_VAR_TOKENS = { 393 *parser.Parser.ID_VAR_TOKENS, 394 TokenType.VALUES, 395 } 396 397 PROPERTY_PARSERS = { 398 **parser.Parser.PROPERTY_PARSERS, 399 "NOT DETERMINISTIC": lambda self: self.expression( 400 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 401 ), 402 "OPTIONS": lambda self: self._parse_with_property(), 403 } 404 405 CONSTRAINT_PARSERS = { 406 **parser.Parser.CONSTRAINT_PARSERS, 407 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 408 } 409 410 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 411 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 412 413 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 414 415 STATEMENT_PARSERS = { 416 **parser.Parser.STATEMENT_PARSERS, 417 TokenType.END: lambda self: self._parse_as_command(self._prev), 418 TokenType.FOR: lambda self: self._parse_for_in(), 419 } 420 421 BRACKET_OFFSETS = { 422 "OFFSET": (0, False), 423 "ORDINAL": (1, False), 424 "SAFE_OFFSET": (0, True), 425 "SAFE_ORDINAL": (1, True), 426 } 427 428 def _parse_for_in(self) -> exp.ForIn: 429 this = self._parse_range() 430 self._match_text_seq("DO") 431 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 432 433 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 434 this = super()._parse_table_part(schema=schema) or self._parse_number() 435 436 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 437 if isinstance(this, exp.Identifier): 438 table_name = this.name 439 while self._match(TokenType.DASH, advance=False) and self._next: 440 self._advance(2) 441 table_name += f"-{self._prev.text}" 442 443 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 444 elif isinstance(this, exp.Literal): 445 table_name = this.name 446 447 if self._is_connected() and self._parse_var(any_token=True): 448 table_name += self._prev.text 449 450 this = exp.Identifier(this=table_name, quoted=True) 451 452 return this 453 454 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 455 table = super()._parse_table_parts(schema=schema) 456 if isinstance(table.this, exp.Identifier) and "." in table.name: 457 catalog, db, this, *rest = ( 458 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 459 for x in split_num_words(table.name, ".", 3) 460 ) 461 462 if rest and this: 463 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 464 465 table = exp.Table(this=this, db=db, catalog=catalog) 466 467 return table 468 469 def _parse_json_object(self) -> exp.JSONObject: 470 json_object = super()._parse_json_object() 471 array_kv_pair = seq_get(json_object.expressions, 0) 472 473 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 474 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 475 if ( 476 array_kv_pair 477 and isinstance(array_kv_pair.this, exp.Array) 478 and isinstance(array_kv_pair.expression, exp.Array) 479 ): 480 keys = array_kv_pair.this.expressions 481 values = array_kv_pair.expression.expressions 482 483 json_object.set( 484 "expressions", 485 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 486 ) 487 488 return json_object 489 490 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 491 bracket = super()._parse_bracket(this) 492 493 if this is bracket: 494 return bracket 495 496 if isinstance(bracket, exp.Bracket): 497 for expression in bracket.expressions: 498 name = expression.name.upper() 499 500 if name not in self.BRACKET_OFFSETS: 501 break 502 503 offset, safe = self.BRACKET_OFFSETS[name] 504 bracket.set("offset", offset) 505 bracket.set("safe", safe) 506 expression.replace(expression.expressions[0]) 507 508 return bracket 509 510 class Generator(generator.Generator): 511 EXPLICIT_UNION = True 512 INTERVAL_ALLOWS_PLURAL_FORM = False 513 JOIN_HINTS = False 514 QUERY_HINTS = False 515 TABLE_HINTS = False 516 LIMIT_FETCH = "LIMIT" 517 RENAME_TABLE_WITH_DB = False 518 NVL2_SUPPORTED = False 519 UNNEST_WITH_ORDINALITY = False 520 COLLATE_IS_FUNC = True 521 LIMIT_ONLY_LITERALS = True 522 523 TRANSFORMS = { 524 **generator.Generator.TRANSFORMS, 525 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 526 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 527 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 528 exp.ArrayContains: _array_contains_sql, 529 exp.ArraySize: rename_func("ARRAY_LENGTH"), 530 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 531 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 532 if e.args.get("default") 533 else f"COLLATE {self.sql(e, 'this')}", 534 exp.Create: _create_sql, 535 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 536 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 537 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 538 exp.DateFromParts: rename_func("DATE"), 539 exp.DateStrToDate: datestrtodate_sql, 540 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 541 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 542 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 543 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 544 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 545 exp.GroupConcat: rename_func("STRING_AGG"), 546 exp.Hex: rename_func("TO_HEX"), 547 exp.If: if_sql(false_value="NULL"), 548 exp.ILike: no_ilike_sql, 549 exp.IntDiv: rename_func("DIV"), 550 exp.JSONFormat: rename_func("TO_JSON_STRING"), 551 exp.JSONKeyValue: json_keyvalue_comma_sql, 552 exp.Max: max_or_greatest, 553 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 554 exp.MD5Digest: rename_func("MD5"), 555 exp.Min: min_or_least, 556 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 557 exp.RegexpExtract: lambda self, e: self.func( 558 "REGEXP_EXTRACT", 559 e.this, 560 e.expression, 561 e.args.get("position"), 562 e.args.get("occurrence"), 563 ), 564 exp.RegexpReplace: regexp_replace_sql, 565 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 566 exp.ReturnsProperty: _returnsproperty_sql, 567 exp.Select: transforms.preprocess( 568 [ 569 transforms.explode_to_unnest(), 570 _unqualify_unnest, 571 transforms.eliminate_distinct_on, 572 _alias_ordered_group, 573 transforms.eliminate_semi_and_anti_joins, 574 ] 575 ), 576 exp.SHA2: lambda self, e: self.func( 577 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 578 ), 579 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 580 if e.name == "IMMUTABLE" 581 else "NOT DETERMINISTIC", 582 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 583 exp.StrToTime: lambda self, e: self.func( 584 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 585 ), 586 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 587 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 588 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 589 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 590 exp.TimeStrToTime: timestrtotime_sql, 591 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 592 exp.TsOrDsAdd: _ts_or_ds_add_sql, 593 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 594 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 595 exp.Unhex: rename_func("FROM_HEX"), 596 exp.UnixToTime: _unix_to_time_sql, 597 exp.Values: _derived_table_values_to_unnest, 598 exp.VariancePop: rename_func("VAR_POP"), 599 } 600 601 TYPE_MAPPING = { 602 **generator.Generator.TYPE_MAPPING, 603 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 604 exp.DataType.Type.BIGINT: "INT64", 605 exp.DataType.Type.BINARY: "BYTES", 606 exp.DataType.Type.BOOLEAN: "BOOL", 607 exp.DataType.Type.CHAR: "STRING", 608 exp.DataType.Type.DECIMAL: "NUMERIC", 609 exp.DataType.Type.DOUBLE: "FLOAT64", 610 exp.DataType.Type.FLOAT: "FLOAT64", 611 exp.DataType.Type.INT: "INT64", 612 exp.DataType.Type.NCHAR: "STRING", 613 exp.DataType.Type.NVARCHAR: "STRING", 614 exp.DataType.Type.SMALLINT: "INT64", 615 exp.DataType.Type.TEXT: "STRING", 616 exp.DataType.Type.TIMESTAMP: "DATETIME", 617 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 618 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 619 exp.DataType.Type.TINYINT: "INT64", 620 exp.DataType.Type.VARBINARY: "BYTES", 621 exp.DataType.Type.VARCHAR: "STRING", 622 exp.DataType.Type.VARIANT: "ANY TYPE", 623 } 624 625 PROPERTIES_LOCATION = { 626 **generator.Generator.PROPERTIES_LOCATION, 627 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 628 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 629 } 630 631 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 632 RESERVED_KEYWORDS = { 633 *generator.Generator.RESERVED_KEYWORDS, 634 "all", 635 "and", 636 "any", 637 "array", 638 "as", 639 "asc", 640 "assert_rows_modified", 641 "at", 642 "between", 643 "by", 644 "case", 645 "cast", 646 "collate", 647 "contains", 648 "create", 649 "cross", 650 "cube", 651 "current", 652 "default", 653 "define", 654 "desc", 655 "distinct", 656 "else", 657 "end", 658 "enum", 659 "escape", 660 "except", 661 "exclude", 662 "exists", 663 "extract", 664 "false", 665 "fetch", 666 "following", 667 "for", 668 "from", 669 "full", 670 "group", 671 "grouping", 672 "groups", 673 "hash", 674 "having", 675 "if", 676 "ignore", 677 "in", 678 "inner", 679 "intersect", 680 "interval", 681 "into", 682 "is", 683 "join", 684 "lateral", 685 "left", 686 "like", 687 "limit", 688 "lookup", 689 "merge", 690 "natural", 691 "new", 692 "no", 693 "not", 694 "null", 695 "nulls", 696 "of", 697 "on", 698 "or", 699 "order", 700 "outer", 701 "over", 702 "partition", 703 "preceding", 704 "proto", 705 "qualify", 706 "range", 707 "recursive", 708 "respect", 709 "right", 710 "rollup", 711 "rows", 712 "select", 713 "set", 714 "some", 715 "struct", 716 "tablesample", 717 "then", 718 "to", 719 "treat", 720 "true", 721 "unbounded", 722 "union", 723 "unnest", 724 "using", 725 "when", 726 "where", 727 "window", 728 "with", 729 "within", 730 } 731 732 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 733 if isinstance(expression.this, exp.TsOrDsToDate): 734 this: exp.Expression = expression.this 735 else: 736 this = expression 737 738 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 739 740 def struct_sql(self, expression: exp.Struct) -> str: 741 args = [] 742 for expr in expression.expressions: 743 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 744 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 745 else: 746 arg = self.sql(expr) 747 748 args.append(arg) 749 750 return self.func("STRUCT", *args) 751 752 def eq_sql(self, expression: exp.EQ) -> str: 753 # Operands of = cannot be NULL in BigQuery 754 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 755 if not isinstance(expression.parent, exp.Update): 756 return "NULL" 757 758 return self.binary(expression, "=") 759 760 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 761 parent = expression.parent 762 763 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 764 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 765 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 766 return self.func( 767 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 768 ) 769 770 return super().attimezone_sql(expression) 771 772 def trycast_sql(self, expression: exp.TryCast) -> str: 773 return self.cast_sql(expression, safe_prefix="SAFE_") 774 775 def cte_sql(self, expression: exp.CTE) -> str: 776 if expression.alias_column_names: 777 self.unsupported("Column names in CTE definition are not supported.") 778 return super().cte_sql(expression) 779 780 def array_sql(self, expression: exp.Array) -> str: 781 first_arg = seq_get(expression.expressions, 0) 782 if isinstance(first_arg, exp.Subqueryable): 783 return f"ARRAY{self.wrap(self.sql(first_arg))}" 784 785 return inline_array_sql(self, expression) 786 787 def bracket_sql(self, expression: exp.Bracket) -> str: 788 this = self.sql(expression, "this") 789 expressions = expression.expressions 790 791 if len(expressions) == 1: 792 arg = expressions[0] 793 if arg.type is None: 794 from sqlglot.optimizer.annotate_types import annotate_types 795 796 arg = annotate_types(arg) 797 798 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 799 # BQ doesn't support bracket syntax with string values 800 return f"{this}.{arg.name}" 801 802 expressions_sql = ", ".join(self.sql(e) for e in expressions) 803 offset = expression.args.get("offset") 804 805 if offset == 0: 806 expressions_sql = f"OFFSET({expressions_sql})" 807 elif offset == 1: 808 expressions_sql = f"ORDINAL({expressions_sql})" 809 elif offset is not None: 810 self.unsupported(f"Unsupported array offset: {offset}") 811 812 if expression.args.get("safe"): 813 expressions_sql = f"SAFE_{expressions_sql}" 814 815 return f"{this}[{expressions_sql}]" 816 817 def transaction_sql(self, *_) -> str: 818 return "BEGIN TRANSACTION" 819 820 def commit_sql(self, *_) -> str: 821 return "COMMIT TRANSACTION" 822 823 def rollback_sql(self, *_) -> str: 824 return "ROLLBACK TRANSACTION" 825 826 def in_unnest_op(self, expression: exp.Unnest) -> str: 827 return self.sql(expression) 828 829 def except_op(self, expression: exp.Except) -> str: 830 if not expression.args.get("distinct", False): 831 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 832 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 833 834 def intersect_op(self, expression: exp.Intersect) -> str: 835 if not expression.args.get("distinct", False): 836 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 837 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 838 839 def with_properties(self, properties: exp.Properties) -> str: 840 return self.properties(properties, prefix=self.seg("OPTIONS")) 841 842 def version_sql(self, expression: exp.Version) -> str: 843 if expression.name == "TIMESTAMP": 844 expression.set("this", "SYSTEM_TIME") 845 return super().version_sql(expression)
218class BigQuery(Dialect): 219 WEEK_OFFSET = -1 220 UNNEST_COLUMN_ONLY = True 221 SUPPORTS_USER_DEFINED_TYPES = False 222 SUPPORTS_SEMI_ANTI_JOIN = False 223 LOG_BASE_FIRST = False 224 225 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 226 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 227 228 # bigquery udfs are case sensitive 229 NORMALIZE_FUNCTIONS = False 230 231 TIME_MAPPING = { 232 "%D": "%m/%d/%y", 233 } 234 235 ESCAPE_SEQUENCES = { 236 "\\a": "\a", 237 "\\b": "\b", 238 "\\f": "\f", 239 "\\n": "\n", 240 "\\r": "\r", 241 "\\t": "\t", 242 "\\v": "\v", 243 } 244 245 FORMAT_MAPPING = { 246 "DD": "%d", 247 "MM": "%m", 248 "MON": "%b", 249 "MONTH": "%B", 250 "YYYY": "%Y", 251 "YY": "%y", 252 "HH": "%I", 253 "HH12": "%I", 254 "HH24": "%H", 255 "MI": "%M", 256 "SS": "%S", 257 "SSSSS": "%f", 258 "TZH": "%z", 259 } 260 261 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 262 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 263 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 264 265 def normalize_identifier(self, expression: E) -> E: 266 if isinstance(expression, exp.Identifier): 267 parent = expression.parent 268 while isinstance(parent, exp.Dot): 269 parent = parent.parent 270 271 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 272 # The following check is essentially a heuristic to detect tables based on whether or 273 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 274 if ( 275 not isinstance(parent, exp.UserDefinedFunction) 276 and not (isinstance(parent, exp.Table) and parent.db) 277 and not expression.meta.get("is_table") 278 ): 279 expression.set("this", expression.this.lower()) 280 281 return expression 282 283 class Tokenizer(tokens.Tokenizer): 284 QUOTES = ["'", '"', '"""', "'''"] 285 COMMENTS = ["--", "#", ("/*", "*/")] 286 IDENTIFIERS = ["`"] 287 STRING_ESCAPES = ["\\"] 288 289 HEX_STRINGS = [("0x", ""), ("0X", "")] 290 291 BYTE_STRINGS = [ 292 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 293 ] 294 295 RAW_STRINGS = [ 296 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 297 ] 298 299 KEYWORDS = { 300 **tokens.Tokenizer.KEYWORDS, 301 "ANY TYPE": TokenType.VARIANT, 302 "BEGIN": TokenType.COMMAND, 303 "BEGIN TRANSACTION": TokenType.BEGIN, 304 "BYTES": TokenType.BINARY, 305 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 306 "DECLARE": TokenType.COMMAND, 307 "FLOAT64": TokenType.DOUBLE, 308 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 309 "MODEL": TokenType.MODEL, 310 "NOT DETERMINISTIC": TokenType.VOLATILE, 311 "RECORD": TokenType.STRUCT, 312 "TIMESTAMP": TokenType.TIMESTAMPTZ, 313 } 314 KEYWORDS.pop("DIV") 315 316 class Parser(parser.Parser): 317 PREFIXED_PIVOT_COLUMNS = True 318 319 LOG_DEFAULTS_TO_LN = True 320 321 FUNCTIONS = { 322 **parser.Parser.FUNCTIONS, 323 "DATE": _parse_date, 324 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 325 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 326 "DATE_TRUNC": lambda args: exp.DateTrunc( 327 unit=exp.Literal.string(str(seq_get(args, 1))), 328 this=seq_get(args, 0), 329 ), 330 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 331 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 332 "DIV": binary_from_function(exp.IntDiv), 333 "FORMAT_DATE": lambda args: exp.TimeToStr( 334 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 335 ), 336 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 337 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 338 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 339 ), 340 "MD5": exp.MD5Digest.from_arg_list, 341 "TO_HEX": _parse_to_hex, 342 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 343 [seq_get(args, 1), seq_get(args, 0)] 344 ), 345 "PARSE_TIMESTAMP": _parse_timestamp, 346 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 347 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 348 this=seq_get(args, 0), 349 expression=seq_get(args, 1), 350 position=seq_get(args, 2), 351 occurrence=seq_get(args, 3), 352 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 353 ), 354 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 355 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 356 "SPLIT": lambda args: exp.Split( 357 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 358 this=seq_get(args, 0), 359 expression=seq_get(args, 1) or exp.Literal.string(","), 360 ), 361 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 362 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 363 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 364 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 365 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 366 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 367 ), 368 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 369 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 370 ), 371 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime( 372 this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS 373 ), 374 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 375 } 376 377 FUNCTION_PARSERS = { 378 **parser.Parser.FUNCTION_PARSERS, 379 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 380 } 381 FUNCTION_PARSERS.pop("TRIM") 382 383 NO_PAREN_FUNCTIONS = { 384 **parser.Parser.NO_PAREN_FUNCTIONS, 385 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 386 } 387 388 NESTED_TYPE_TOKENS = { 389 *parser.Parser.NESTED_TYPE_TOKENS, 390 TokenType.TABLE, 391 } 392 393 ID_VAR_TOKENS = { 394 *parser.Parser.ID_VAR_TOKENS, 395 TokenType.VALUES, 396 } 397 398 PROPERTY_PARSERS = { 399 **parser.Parser.PROPERTY_PARSERS, 400 "NOT DETERMINISTIC": lambda self: self.expression( 401 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 402 ), 403 "OPTIONS": lambda self: self._parse_with_property(), 404 } 405 406 CONSTRAINT_PARSERS = { 407 **parser.Parser.CONSTRAINT_PARSERS, 408 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 409 } 410 411 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 412 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 413 414 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 415 416 STATEMENT_PARSERS = { 417 **parser.Parser.STATEMENT_PARSERS, 418 TokenType.END: lambda self: self._parse_as_command(self._prev), 419 TokenType.FOR: lambda self: self._parse_for_in(), 420 } 421 422 BRACKET_OFFSETS = { 423 "OFFSET": (0, False), 424 "ORDINAL": (1, False), 425 "SAFE_OFFSET": (0, True), 426 "SAFE_ORDINAL": (1, True), 427 } 428 429 def _parse_for_in(self) -> exp.ForIn: 430 this = self._parse_range() 431 self._match_text_seq("DO") 432 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 433 434 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 435 this = super()._parse_table_part(schema=schema) or self._parse_number() 436 437 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 438 if isinstance(this, exp.Identifier): 439 table_name = this.name 440 while self._match(TokenType.DASH, advance=False) and self._next: 441 self._advance(2) 442 table_name += f"-{self._prev.text}" 443 444 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 445 elif isinstance(this, exp.Literal): 446 table_name = this.name 447 448 if self._is_connected() and self._parse_var(any_token=True): 449 table_name += self._prev.text 450 451 this = exp.Identifier(this=table_name, quoted=True) 452 453 return this 454 455 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 456 table = super()._parse_table_parts(schema=schema) 457 if isinstance(table.this, exp.Identifier) and "." in table.name: 458 catalog, db, this, *rest = ( 459 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 460 for x in split_num_words(table.name, ".", 3) 461 ) 462 463 if rest and this: 464 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 465 466 table = exp.Table(this=this, db=db, catalog=catalog) 467 468 return table 469 470 def _parse_json_object(self) -> exp.JSONObject: 471 json_object = super()._parse_json_object() 472 array_kv_pair = seq_get(json_object.expressions, 0) 473 474 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 475 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 476 if ( 477 array_kv_pair 478 and isinstance(array_kv_pair.this, exp.Array) 479 and isinstance(array_kv_pair.expression, exp.Array) 480 ): 481 keys = array_kv_pair.this.expressions 482 values = array_kv_pair.expression.expressions 483 484 json_object.set( 485 "expressions", 486 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 487 ) 488 489 return json_object 490 491 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 492 bracket = super()._parse_bracket(this) 493 494 if this is bracket: 495 return bracket 496 497 if isinstance(bracket, exp.Bracket): 498 for expression in bracket.expressions: 499 name = expression.name.upper() 500 501 if name not in self.BRACKET_OFFSETS: 502 break 503 504 offset, safe = self.BRACKET_OFFSETS[name] 505 bracket.set("offset", offset) 506 bracket.set("safe", safe) 507 expression.replace(expression.expressions[0]) 508 509 return bracket 510 511 class Generator(generator.Generator): 512 EXPLICIT_UNION = True 513 INTERVAL_ALLOWS_PLURAL_FORM = False 514 JOIN_HINTS = False 515 QUERY_HINTS = False 516 TABLE_HINTS = False 517 LIMIT_FETCH = "LIMIT" 518 RENAME_TABLE_WITH_DB = False 519 NVL2_SUPPORTED = False 520 UNNEST_WITH_ORDINALITY = False 521 COLLATE_IS_FUNC = True 522 LIMIT_ONLY_LITERALS = True 523 524 TRANSFORMS = { 525 **generator.Generator.TRANSFORMS, 526 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 527 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 528 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 529 exp.ArrayContains: _array_contains_sql, 530 exp.ArraySize: rename_func("ARRAY_LENGTH"), 531 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 532 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 533 if e.args.get("default") 534 else f"COLLATE {self.sql(e, 'this')}", 535 exp.Create: _create_sql, 536 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 537 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 538 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 539 exp.DateFromParts: rename_func("DATE"), 540 exp.DateStrToDate: datestrtodate_sql, 541 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 542 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 543 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 544 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 545 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 546 exp.GroupConcat: rename_func("STRING_AGG"), 547 exp.Hex: rename_func("TO_HEX"), 548 exp.If: if_sql(false_value="NULL"), 549 exp.ILike: no_ilike_sql, 550 exp.IntDiv: rename_func("DIV"), 551 exp.JSONFormat: rename_func("TO_JSON_STRING"), 552 exp.JSONKeyValue: json_keyvalue_comma_sql, 553 exp.Max: max_or_greatest, 554 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 555 exp.MD5Digest: rename_func("MD5"), 556 exp.Min: min_or_least, 557 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 558 exp.RegexpExtract: lambda self, e: self.func( 559 "REGEXP_EXTRACT", 560 e.this, 561 e.expression, 562 e.args.get("position"), 563 e.args.get("occurrence"), 564 ), 565 exp.RegexpReplace: regexp_replace_sql, 566 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 567 exp.ReturnsProperty: _returnsproperty_sql, 568 exp.Select: transforms.preprocess( 569 [ 570 transforms.explode_to_unnest(), 571 _unqualify_unnest, 572 transforms.eliminate_distinct_on, 573 _alias_ordered_group, 574 transforms.eliminate_semi_and_anti_joins, 575 ] 576 ), 577 exp.SHA2: lambda self, e: self.func( 578 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 579 ), 580 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 581 if e.name == "IMMUTABLE" 582 else "NOT DETERMINISTIC", 583 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 584 exp.StrToTime: lambda self, e: self.func( 585 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 586 ), 587 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 588 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 589 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 590 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 591 exp.TimeStrToTime: timestrtotime_sql, 592 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 593 exp.TsOrDsAdd: _ts_or_ds_add_sql, 594 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 595 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 596 exp.Unhex: rename_func("FROM_HEX"), 597 exp.UnixToTime: _unix_to_time_sql, 598 exp.Values: _derived_table_values_to_unnest, 599 exp.VariancePop: rename_func("VAR_POP"), 600 } 601 602 TYPE_MAPPING = { 603 **generator.Generator.TYPE_MAPPING, 604 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 605 exp.DataType.Type.BIGINT: "INT64", 606 exp.DataType.Type.BINARY: "BYTES", 607 exp.DataType.Type.BOOLEAN: "BOOL", 608 exp.DataType.Type.CHAR: "STRING", 609 exp.DataType.Type.DECIMAL: "NUMERIC", 610 exp.DataType.Type.DOUBLE: "FLOAT64", 611 exp.DataType.Type.FLOAT: "FLOAT64", 612 exp.DataType.Type.INT: "INT64", 613 exp.DataType.Type.NCHAR: "STRING", 614 exp.DataType.Type.NVARCHAR: "STRING", 615 exp.DataType.Type.SMALLINT: "INT64", 616 exp.DataType.Type.TEXT: "STRING", 617 exp.DataType.Type.TIMESTAMP: "DATETIME", 618 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 619 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 620 exp.DataType.Type.TINYINT: "INT64", 621 exp.DataType.Type.VARBINARY: "BYTES", 622 exp.DataType.Type.VARCHAR: "STRING", 623 exp.DataType.Type.VARIANT: "ANY TYPE", 624 } 625 626 PROPERTIES_LOCATION = { 627 **generator.Generator.PROPERTIES_LOCATION, 628 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 629 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 630 } 631 632 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 633 RESERVED_KEYWORDS = { 634 *generator.Generator.RESERVED_KEYWORDS, 635 "all", 636 "and", 637 "any", 638 "array", 639 "as", 640 "asc", 641 "assert_rows_modified", 642 "at", 643 "between", 644 "by", 645 "case", 646 "cast", 647 "collate", 648 "contains", 649 "create", 650 "cross", 651 "cube", 652 "current", 653 "default", 654 "define", 655 "desc", 656 "distinct", 657 "else", 658 "end", 659 "enum", 660 "escape", 661 "except", 662 "exclude", 663 "exists", 664 "extract", 665 "false", 666 "fetch", 667 "following", 668 "for", 669 "from", 670 "full", 671 "group", 672 "grouping", 673 "groups", 674 "hash", 675 "having", 676 "if", 677 "ignore", 678 "in", 679 "inner", 680 "intersect", 681 "interval", 682 "into", 683 "is", 684 "join", 685 "lateral", 686 "left", 687 "like", 688 "limit", 689 "lookup", 690 "merge", 691 "natural", 692 "new", 693 "no", 694 "not", 695 "null", 696 "nulls", 697 "of", 698 "on", 699 "or", 700 "order", 701 "outer", 702 "over", 703 "partition", 704 "preceding", 705 "proto", 706 "qualify", 707 "range", 708 "recursive", 709 "respect", 710 "right", 711 "rollup", 712 "rows", 713 "select", 714 "set", 715 "some", 716 "struct", 717 "tablesample", 718 "then", 719 "to", 720 "treat", 721 "true", 722 "unbounded", 723 "union", 724 "unnest", 725 "using", 726 "when", 727 "where", 728 "window", 729 "with", 730 "within", 731 } 732 733 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 734 if isinstance(expression.this, exp.TsOrDsToDate): 735 this: exp.Expression = expression.this 736 else: 737 this = expression 738 739 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 740 741 def struct_sql(self, expression: exp.Struct) -> str: 742 args = [] 743 for expr in expression.expressions: 744 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 745 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 746 else: 747 arg = self.sql(expr) 748 749 args.append(arg) 750 751 return self.func("STRUCT", *args) 752 753 def eq_sql(self, expression: exp.EQ) -> str: 754 # Operands of = cannot be NULL in BigQuery 755 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 756 if not isinstance(expression.parent, exp.Update): 757 return "NULL" 758 759 return self.binary(expression, "=") 760 761 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 762 parent = expression.parent 763 764 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 765 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 766 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 767 return self.func( 768 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 769 ) 770 771 return super().attimezone_sql(expression) 772 773 def trycast_sql(self, expression: exp.TryCast) -> str: 774 return self.cast_sql(expression, safe_prefix="SAFE_") 775 776 def cte_sql(self, expression: exp.CTE) -> str: 777 if expression.alias_column_names: 778 self.unsupported("Column names in CTE definition are not supported.") 779 return super().cte_sql(expression) 780 781 def array_sql(self, expression: exp.Array) -> str: 782 first_arg = seq_get(expression.expressions, 0) 783 if isinstance(first_arg, exp.Subqueryable): 784 return f"ARRAY{self.wrap(self.sql(first_arg))}" 785 786 return inline_array_sql(self, expression) 787 788 def bracket_sql(self, expression: exp.Bracket) -> str: 789 this = self.sql(expression, "this") 790 expressions = expression.expressions 791 792 if len(expressions) == 1: 793 arg = expressions[0] 794 if arg.type is None: 795 from sqlglot.optimizer.annotate_types import annotate_types 796 797 arg = annotate_types(arg) 798 799 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 800 # BQ doesn't support bracket syntax with string values 801 return f"{this}.{arg.name}" 802 803 expressions_sql = ", ".join(self.sql(e) for e in expressions) 804 offset = expression.args.get("offset") 805 806 if offset == 0: 807 expressions_sql = f"OFFSET({expressions_sql})" 808 elif offset == 1: 809 expressions_sql = f"ORDINAL({expressions_sql})" 810 elif offset is not None: 811 self.unsupported(f"Unsupported array offset: {offset}") 812 813 if expression.args.get("safe"): 814 expressions_sql = f"SAFE_{expressions_sql}" 815 816 return f"{this}[{expressions_sql}]" 817 818 def transaction_sql(self, *_) -> str: 819 return "BEGIN TRANSACTION" 820 821 def commit_sql(self, *_) -> str: 822 return "COMMIT TRANSACTION" 823 824 def rollback_sql(self, *_) -> str: 825 return "ROLLBACK TRANSACTION" 826 827 def in_unnest_op(self, expression: exp.Unnest) -> str: 828 return self.sql(expression) 829 830 def except_op(self, expression: exp.Except) -> str: 831 if not expression.args.get("distinct", False): 832 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 833 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 834 835 def intersect_op(self, expression: exp.Intersect) -> str: 836 if not expression.args.get("distinct", False): 837 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 838 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 839 840 def with_properties(self, properties: exp.Properties) -> str: 841 return self.properties(properties, prefix=self.seg("OPTIONS")) 842 843 def version_sql(self, expression: exp.Version) -> str: 844 if expression.name == "TIMESTAMP": 845 expression.set("this", "SYSTEM_TIME") 846 return super().version_sql(expression)
Determines the day of week of DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Determines whether or not UNNEST
table aliases are treated as column aliases.
Determines whether or not user-defined data types are supported.
Specifies the strategy according to which identifiers should be normalized.
Associates this dialect's time formats with their equivalent Python strftime
format.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
265 def normalize_identifier(self, expression: E) -> E: 266 if isinstance(expression, exp.Identifier): 267 parent = expression.parent 268 while isinstance(parent, exp.Dot): 269 parent = parent.parent 270 271 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 272 # The following check is essentially a heuristic to detect tables based on whether or 273 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 274 if ( 275 not isinstance(parent, exp.UserDefinedFunction) 276 and not (isinstance(parent, exp.Table) and parent.db) 277 and not expression.meta.get("is_table") 278 ): 279 expression.set("this", expression.this.lower()) 280 281 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
283 class Tokenizer(tokens.Tokenizer): 284 QUOTES = ["'", '"', '"""', "'''"] 285 COMMENTS = ["--", "#", ("/*", "*/")] 286 IDENTIFIERS = ["`"] 287 STRING_ESCAPES = ["\\"] 288 289 HEX_STRINGS = [("0x", ""), ("0X", "")] 290 291 BYTE_STRINGS = [ 292 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 293 ] 294 295 RAW_STRINGS = [ 296 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 297 ] 298 299 KEYWORDS = { 300 **tokens.Tokenizer.KEYWORDS, 301 "ANY TYPE": TokenType.VARIANT, 302 "BEGIN": TokenType.COMMAND, 303 "BEGIN TRANSACTION": TokenType.BEGIN, 304 "BYTES": TokenType.BINARY, 305 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 306 "DECLARE": TokenType.COMMAND, 307 "FLOAT64": TokenType.DOUBLE, 308 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 309 "MODEL": TokenType.MODEL, 310 "NOT DETERMINISTIC": TokenType.VOLATILE, 311 "RECORD": TokenType.STRUCT, 312 "TIMESTAMP": TokenType.TIMESTAMPTZ, 313 } 314 KEYWORDS.pop("DIV")
316 class Parser(parser.Parser): 317 PREFIXED_PIVOT_COLUMNS = True 318 319 LOG_DEFAULTS_TO_LN = True 320 321 FUNCTIONS = { 322 **parser.Parser.FUNCTIONS, 323 "DATE": _parse_date, 324 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 325 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 326 "DATE_TRUNC": lambda args: exp.DateTrunc( 327 unit=exp.Literal.string(str(seq_get(args, 1))), 328 this=seq_get(args, 0), 329 ), 330 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 331 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 332 "DIV": binary_from_function(exp.IntDiv), 333 "FORMAT_DATE": lambda args: exp.TimeToStr( 334 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 335 ), 336 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 337 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 338 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 339 ), 340 "MD5": exp.MD5Digest.from_arg_list, 341 "TO_HEX": _parse_to_hex, 342 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 343 [seq_get(args, 1), seq_get(args, 0)] 344 ), 345 "PARSE_TIMESTAMP": _parse_timestamp, 346 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 347 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 348 this=seq_get(args, 0), 349 expression=seq_get(args, 1), 350 position=seq_get(args, 2), 351 occurrence=seq_get(args, 3), 352 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 353 ), 354 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 355 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 356 "SPLIT": lambda args: exp.Split( 357 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 358 this=seq_get(args, 0), 359 expression=seq_get(args, 1) or exp.Literal.string(","), 360 ), 361 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 362 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 363 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 364 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 365 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 366 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 367 ), 368 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 369 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 370 ), 371 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime( 372 this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS 373 ), 374 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 375 } 376 377 FUNCTION_PARSERS = { 378 **parser.Parser.FUNCTION_PARSERS, 379 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 380 } 381 FUNCTION_PARSERS.pop("TRIM") 382 383 NO_PAREN_FUNCTIONS = { 384 **parser.Parser.NO_PAREN_FUNCTIONS, 385 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 386 } 387 388 NESTED_TYPE_TOKENS = { 389 *parser.Parser.NESTED_TYPE_TOKENS, 390 TokenType.TABLE, 391 } 392 393 ID_VAR_TOKENS = { 394 *parser.Parser.ID_VAR_TOKENS, 395 TokenType.VALUES, 396 } 397 398 PROPERTY_PARSERS = { 399 **parser.Parser.PROPERTY_PARSERS, 400 "NOT DETERMINISTIC": lambda self: self.expression( 401 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 402 ), 403 "OPTIONS": lambda self: self._parse_with_property(), 404 } 405 406 CONSTRAINT_PARSERS = { 407 **parser.Parser.CONSTRAINT_PARSERS, 408 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 409 } 410 411 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 412 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 413 414 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 415 416 STATEMENT_PARSERS = { 417 **parser.Parser.STATEMENT_PARSERS, 418 TokenType.END: lambda self: self._parse_as_command(self._prev), 419 TokenType.FOR: lambda self: self._parse_for_in(), 420 } 421 422 BRACKET_OFFSETS = { 423 "OFFSET": (0, False), 424 "ORDINAL": (1, False), 425 "SAFE_OFFSET": (0, True), 426 "SAFE_ORDINAL": (1, True), 427 } 428 429 def _parse_for_in(self) -> exp.ForIn: 430 this = self._parse_range() 431 self._match_text_seq("DO") 432 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 433 434 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 435 this = super()._parse_table_part(schema=schema) or self._parse_number() 436 437 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 438 if isinstance(this, exp.Identifier): 439 table_name = this.name 440 while self._match(TokenType.DASH, advance=False) and self._next: 441 self._advance(2) 442 table_name += f"-{self._prev.text}" 443 444 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 445 elif isinstance(this, exp.Literal): 446 table_name = this.name 447 448 if self._is_connected() and self._parse_var(any_token=True): 449 table_name += self._prev.text 450 451 this = exp.Identifier(this=table_name, quoted=True) 452 453 return this 454 455 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 456 table = super()._parse_table_parts(schema=schema) 457 if isinstance(table.this, exp.Identifier) and "." in table.name: 458 catalog, db, this, *rest = ( 459 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 460 for x in split_num_words(table.name, ".", 3) 461 ) 462 463 if rest and this: 464 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 465 466 table = exp.Table(this=this, db=db, catalog=catalog) 467 468 return table 469 470 def _parse_json_object(self) -> exp.JSONObject: 471 json_object = super()._parse_json_object() 472 array_kv_pair = seq_get(json_object.expressions, 0) 473 474 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 475 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 476 if ( 477 array_kv_pair 478 and isinstance(array_kv_pair.this, exp.Array) 479 and isinstance(array_kv_pair.expression, exp.Array) 480 ): 481 keys = array_kv_pair.this.expressions 482 values = array_kv_pair.expression.expressions 483 484 json_object.set( 485 "expressions", 486 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 487 ) 488 489 return json_object 490 491 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 492 bracket = super()._parse_bracket(this) 493 494 if this is bracket: 495 return bracket 496 497 if isinstance(bracket, exp.Bracket): 498 for expression in bracket.expressions: 499 name = expression.name.upper() 500 501 if name not in self.BRACKET_OFFSETS: 502 break 503 504 offset, safe = self.BRACKET_OFFSETS[name] 505 bracket.set("offset", offset) 506 bracket.set("safe", safe) 507 expression.replace(expression.expressions[0]) 508 509 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
511 class Generator(generator.Generator): 512 EXPLICIT_UNION = True 513 INTERVAL_ALLOWS_PLURAL_FORM = False 514 JOIN_HINTS = False 515 QUERY_HINTS = False 516 TABLE_HINTS = False 517 LIMIT_FETCH = "LIMIT" 518 RENAME_TABLE_WITH_DB = False 519 NVL2_SUPPORTED = False 520 UNNEST_WITH_ORDINALITY = False 521 COLLATE_IS_FUNC = True 522 LIMIT_ONLY_LITERALS = True 523 524 TRANSFORMS = { 525 **generator.Generator.TRANSFORMS, 526 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 527 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 528 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 529 exp.ArrayContains: _array_contains_sql, 530 exp.ArraySize: rename_func("ARRAY_LENGTH"), 531 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 532 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 533 if e.args.get("default") 534 else f"COLLATE {self.sql(e, 'this')}", 535 exp.Create: _create_sql, 536 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 537 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 538 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 539 exp.DateFromParts: rename_func("DATE"), 540 exp.DateStrToDate: datestrtodate_sql, 541 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 542 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 543 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 544 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 545 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 546 exp.GroupConcat: rename_func("STRING_AGG"), 547 exp.Hex: rename_func("TO_HEX"), 548 exp.If: if_sql(false_value="NULL"), 549 exp.ILike: no_ilike_sql, 550 exp.IntDiv: rename_func("DIV"), 551 exp.JSONFormat: rename_func("TO_JSON_STRING"), 552 exp.JSONKeyValue: json_keyvalue_comma_sql, 553 exp.Max: max_or_greatest, 554 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 555 exp.MD5Digest: rename_func("MD5"), 556 exp.Min: min_or_least, 557 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 558 exp.RegexpExtract: lambda self, e: self.func( 559 "REGEXP_EXTRACT", 560 e.this, 561 e.expression, 562 e.args.get("position"), 563 e.args.get("occurrence"), 564 ), 565 exp.RegexpReplace: regexp_replace_sql, 566 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 567 exp.ReturnsProperty: _returnsproperty_sql, 568 exp.Select: transforms.preprocess( 569 [ 570 transforms.explode_to_unnest(), 571 _unqualify_unnest, 572 transforms.eliminate_distinct_on, 573 _alias_ordered_group, 574 transforms.eliminate_semi_and_anti_joins, 575 ] 576 ), 577 exp.SHA2: lambda self, e: self.func( 578 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 579 ), 580 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 581 if e.name == "IMMUTABLE" 582 else "NOT DETERMINISTIC", 583 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 584 exp.StrToTime: lambda self, e: self.func( 585 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 586 ), 587 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 588 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 589 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 590 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 591 exp.TimeStrToTime: timestrtotime_sql, 592 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 593 exp.TsOrDsAdd: _ts_or_ds_add_sql, 594 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 595 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 596 exp.Unhex: rename_func("FROM_HEX"), 597 exp.UnixToTime: _unix_to_time_sql, 598 exp.Values: _derived_table_values_to_unnest, 599 exp.VariancePop: rename_func("VAR_POP"), 600 } 601 602 TYPE_MAPPING = { 603 **generator.Generator.TYPE_MAPPING, 604 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 605 exp.DataType.Type.BIGINT: "INT64", 606 exp.DataType.Type.BINARY: "BYTES", 607 exp.DataType.Type.BOOLEAN: "BOOL", 608 exp.DataType.Type.CHAR: "STRING", 609 exp.DataType.Type.DECIMAL: "NUMERIC", 610 exp.DataType.Type.DOUBLE: "FLOAT64", 611 exp.DataType.Type.FLOAT: "FLOAT64", 612 exp.DataType.Type.INT: "INT64", 613 exp.DataType.Type.NCHAR: "STRING", 614 exp.DataType.Type.NVARCHAR: "STRING", 615 exp.DataType.Type.SMALLINT: "INT64", 616 exp.DataType.Type.TEXT: "STRING", 617 exp.DataType.Type.TIMESTAMP: "DATETIME", 618 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 619 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 620 exp.DataType.Type.TINYINT: "INT64", 621 exp.DataType.Type.VARBINARY: "BYTES", 622 exp.DataType.Type.VARCHAR: "STRING", 623 exp.DataType.Type.VARIANT: "ANY TYPE", 624 } 625 626 PROPERTIES_LOCATION = { 627 **generator.Generator.PROPERTIES_LOCATION, 628 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 629 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 630 } 631 632 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 633 RESERVED_KEYWORDS = { 634 *generator.Generator.RESERVED_KEYWORDS, 635 "all", 636 "and", 637 "any", 638 "array", 639 "as", 640 "asc", 641 "assert_rows_modified", 642 "at", 643 "between", 644 "by", 645 "case", 646 "cast", 647 "collate", 648 "contains", 649 "create", 650 "cross", 651 "cube", 652 "current", 653 "default", 654 "define", 655 "desc", 656 "distinct", 657 "else", 658 "end", 659 "enum", 660 "escape", 661 "except", 662 "exclude", 663 "exists", 664 "extract", 665 "false", 666 "fetch", 667 "following", 668 "for", 669 "from", 670 "full", 671 "group", 672 "grouping", 673 "groups", 674 "hash", 675 "having", 676 "if", 677 "ignore", 678 "in", 679 "inner", 680 "intersect", 681 "interval", 682 "into", 683 "is", 684 "join", 685 "lateral", 686 "left", 687 "like", 688 "limit", 689 "lookup", 690 "merge", 691 "natural", 692 "new", 693 "no", 694 "not", 695 "null", 696 "nulls", 697 "of", 698 "on", 699 "or", 700 "order", 701 "outer", 702 "over", 703 "partition", 704 "preceding", 705 "proto", 706 "qualify", 707 "range", 708 "recursive", 709 "respect", 710 "right", 711 "rollup", 712 "rows", 713 "select", 714 "set", 715 "some", 716 "struct", 717 "tablesample", 718 "then", 719 "to", 720 "treat", 721 "true", 722 "unbounded", 723 "union", 724 "unnest", 725 "using", 726 "when", 727 "where", 728 "window", 729 "with", 730 "within", 731 } 732 733 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 734 if isinstance(expression.this, exp.TsOrDsToDate): 735 this: exp.Expression = expression.this 736 else: 737 this = expression 738 739 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 740 741 def struct_sql(self, expression: exp.Struct) -> str: 742 args = [] 743 for expr in expression.expressions: 744 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 745 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 746 else: 747 arg = self.sql(expr) 748 749 args.append(arg) 750 751 return self.func("STRUCT", *args) 752 753 def eq_sql(self, expression: exp.EQ) -> str: 754 # Operands of = cannot be NULL in BigQuery 755 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 756 if not isinstance(expression.parent, exp.Update): 757 return "NULL" 758 759 return self.binary(expression, "=") 760 761 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 762 parent = expression.parent 763 764 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 765 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 766 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 767 return self.func( 768 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 769 ) 770 771 return super().attimezone_sql(expression) 772 773 def trycast_sql(self, expression: exp.TryCast) -> str: 774 return self.cast_sql(expression, safe_prefix="SAFE_") 775 776 def cte_sql(self, expression: exp.CTE) -> str: 777 if expression.alias_column_names: 778 self.unsupported("Column names in CTE definition are not supported.") 779 return super().cte_sql(expression) 780 781 def array_sql(self, expression: exp.Array) -> str: 782 first_arg = seq_get(expression.expressions, 0) 783 if isinstance(first_arg, exp.Subqueryable): 784 return f"ARRAY{self.wrap(self.sql(first_arg))}" 785 786 return inline_array_sql(self, expression) 787 788 def bracket_sql(self, expression: exp.Bracket) -> str: 789 this = self.sql(expression, "this") 790 expressions = expression.expressions 791 792 if len(expressions) == 1: 793 arg = expressions[0] 794 if arg.type is None: 795 from sqlglot.optimizer.annotate_types import annotate_types 796 797 arg = annotate_types(arg) 798 799 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 800 # BQ doesn't support bracket syntax with string values 801 return f"{this}.{arg.name}" 802 803 expressions_sql = ", ".join(self.sql(e) for e in expressions) 804 offset = expression.args.get("offset") 805 806 if offset == 0: 807 expressions_sql = f"OFFSET({expressions_sql})" 808 elif offset == 1: 809 expressions_sql = f"ORDINAL({expressions_sql})" 810 elif offset is not None: 811 self.unsupported(f"Unsupported array offset: {offset}") 812 813 if expression.args.get("safe"): 814 expressions_sql = f"SAFE_{expressions_sql}" 815 816 return f"{this}[{expressions_sql}]" 817 818 def transaction_sql(self, *_) -> str: 819 return "BEGIN TRANSACTION" 820 821 def commit_sql(self, *_) -> str: 822 return "COMMIT TRANSACTION" 823 824 def rollback_sql(self, *_) -> str: 825 return "ROLLBACK TRANSACTION" 826 827 def in_unnest_op(self, expression: exp.Unnest) -> str: 828 return self.sql(expression) 829 830 def except_op(self, expression: exp.Except) -> str: 831 if not expression.args.get("distinct", False): 832 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 833 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 834 835 def intersect_op(self, expression: exp.Intersect) -> str: 836 if not expression.args.get("distinct", False): 837 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 838 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 839 840 def with_properties(self, properties: exp.Properties) -> str: 841 return self.properties(properties, prefix=self.seg("OPTIONS")) 842 843 def version_sql(self, expression: exp.Version) -> str: 844 if expression.name == "TIMESTAMP": 845 expression.set("this", "SYSTEM_TIME") 846 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
741 def struct_sql(self, expression: exp.Struct) -> str: 742 args = [] 743 for expr in expression.expressions: 744 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 745 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 746 else: 747 arg = self.sql(expr) 748 749 args.append(arg) 750 751 return self.func("STRUCT", *args)
761 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 762 parent = expression.parent 763 764 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 765 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 766 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 767 return self.func( 768 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 769 ) 770 771 return super().attimezone_sql(expression)
788 def bracket_sql(self, expression: exp.Bracket) -> str: 789 this = self.sql(expression, "this") 790 expressions = expression.expressions 791 792 if len(expressions) == 1: 793 arg = expressions[0] 794 if arg.type is None: 795 from sqlglot.optimizer.annotate_types import annotate_types 796 797 arg = annotate_types(arg) 798 799 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 800 # BQ doesn't support bracket syntax with string values 801 return f"{this}.{arg.name}" 802 803 expressions_sql = ", ".join(self.sql(e) for e in expressions) 804 offset = expression.args.get("offset") 805 806 if offset == 0: 807 expressions_sql = f"OFFSET({expressions_sql})" 808 elif offset == 1: 809 expressions_sql = f"ORDINAL({expressions_sql})" 810 elif offset is not None: 811 self.unsupported(f"Unsupported array offset: {offset}") 812 813 if expression.args.get("safe"): 814 expressions_sql = f"SAFE_{expressions_sql}" 815 816 return f"{this}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SIZE_IS_PERCENT
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- atindex_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql