sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4import datetime 5 6from sqlglot import exp, generator, parser, tokens 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 NormalizationStrategy, 10 arg_max_or_min_no_count, 11 build_date_delta, 12 build_formatted_time, 13 inline_array_sql, 14 json_extract_segments, 15 json_path_key_only_name, 16 no_pivot_sql, 17 build_json_extract_path, 18 rename_func, 19 sha256_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23 trim_sql, 24) 25from sqlglot.generator import Generator 26from sqlglot.helper import is_int, seq_get 27from sqlglot.tokens import Token, TokenType 28 29DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 30 31 32def _build_date_format(args: t.List) -> exp.TimeToStr: 33 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 34 35 timezone = seq_get(args, 2) 36 if timezone: 37 expr.set("zone", timezone) 38 39 return expr 40 41 42def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 43 scale = expression.args.get("scale") 44 timestamp = expression.this 45 46 if scale in (None, exp.UnixToTime.SECONDS): 47 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MILLIS: 49 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.MICROS: 51 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 if scale == exp.UnixToTime.NANOS: 53 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 54 55 return self.func( 56 "fromUnixTimestamp", 57 exp.cast( 58 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 59 ), 60 ) 61 62 63def _lower_func(sql: str) -> str: 64 index = sql.index("(") 65 return sql[:index].lower() + sql[index:] 66 67 68def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 69 quantile = expression.args["quantile"] 70 args = f"({self.sql(expression, 'this')})" 71 72 if isinstance(quantile, exp.Array): 73 func = self.func("quantiles", *quantile) 74 else: 75 func = self.func("quantile", quantile) 76 77 return func + args 78 79 80def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 81 if len(args) == 1: 82 return exp.CountIf(this=seq_get(args, 0)) 83 84 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 85 86 87def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 88 if len(args) == 3: 89 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 90 91 strtodate = exp.StrToDate.from_arg_list(args) 92 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 93 94 95def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 96 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 97 if not expression.unit: 98 return rename_func(name)(self, expression) 99 100 return self.func( 101 name, 102 unit_to_var(expression), 103 expression.expression, 104 expression.this, 105 ) 106 107 return _delta_sql 108 109 110def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 111 tz = expression.args.get("zone") 112 datatype = exp.DataType.build(exp.DataType.Type.TIMESTAMP) 113 ts = expression.this 114 if tz: 115 # build a datatype that encodes the timezone as a type parameter, eg DateTime('America/Los_Angeles') 116 datatype = exp.DataType.build( 117 exp.DataType.Type.TIMESTAMPTZ, # Type.TIMESTAMPTZ maps to DateTime 118 expressions=[exp.DataTypeParam(this=tz)], 119 ) 120 121 if isinstance(ts, exp.Literal): 122 # strip the timezone out of the literal, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 123 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if it's part of the timestamp string 124 ts_without_tz = ( 125 datetime.datetime.fromisoformat(ts.name).replace(tzinfo=None).isoformat(sep=" ") 126 ) 127 ts = exp.Literal.string(ts_without_tz) 128 129 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 130 131 132class ClickHouse(Dialect): 133 NORMALIZE_FUNCTIONS: bool | str = False 134 NULL_ORDERING = "nulls_are_last" 135 SUPPORTS_USER_DEFINED_TYPES = False 136 SAFE_DIVISION = True 137 LOG_BASE_FIRST: t.Optional[bool] = None 138 FORCE_EARLY_ALIAS_REF_EXPANSION = True 139 140 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 141 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 142 143 UNESCAPED_SEQUENCES = { 144 "\\0": "\0", 145 } 146 147 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 148 149 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 150 exp.Except: False, 151 exp.Intersect: False, 152 exp.Union: None, 153 } 154 155 class Tokenizer(tokens.Tokenizer): 156 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 157 IDENTIFIERS = ['"', "`"] 158 STRING_ESCAPES = ["'", "\\"] 159 BIT_STRINGS = [("0b", "")] 160 HEX_STRINGS = [("0x", ""), ("0X", "")] 161 HEREDOC_STRINGS = ["$"] 162 163 KEYWORDS = { 164 **tokens.Tokenizer.KEYWORDS, 165 "ATTACH": TokenType.COMMAND, 166 "DATE32": TokenType.DATE32, 167 "DATETIME64": TokenType.DATETIME64, 168 "DICTIONARY": TokenType.DICTIONARY, 169 "ENUM8": TokenType.ENUM8, 170 "ENUM16": TokenType.ENUM16, 171 "FINAL": TokenType.FINAL, 172 "FIXEDSTRING": TokenType.FIXEDSTRING, 173 "FLOAT32": TokenType.FLOAT, 174 "FLOAT64": TokenType.DOUBLE, 175 "GLOBAL": TokenType.GLOBAL, 176 "INT256": TokenType.INT256, 177 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 178 "MAP": TokenType.MAP, 179 "NESTED": TokenType.NESTED, 180 "SAMPLE": TokenType.TABLE_SAMPLE, 181 "TUPLE": TokenType.STRUCT, 182 "UINT128": TokenType.UINT128, 183 "UINT16": TokenType.USMALLINT, 184 "UINT256": TokenType.UINT256, 185 "UINT32": TokenType.UINT, 186 "UINT64": TokenType.UBIGINT, 187 "UINT8": TokenType.UTINYINT, 188 "IPV4": TokenType.IPV4, 189 "IPV6": TokenType.IPV6, 190 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 191 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 192 "SYSTEM": TokenType.COMMAND, 193 "PREWHERE": TokenType.PREWHERE, 194 } 195 KEYWORDS.pop("/*+") 196 197 SINGLE_TOKENS = { 198 **tokens.Tokenizer.SINGLE_TOKENS, 199 "$": TokenType.HEREDOC_STRING, 200 } 201 202 class Parser(parser.Parser): 203 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 204 # * select x from t1 union all select x from t2 limit 1; 205 # * select x from t1 union all (select x from t2 limit 1); 206 MODIFIERS_ATTACHED_TO_SET_OP = False 207 INTERVAL_SPANS = False 208 209 FUNCTIONS = { 210 **parser.Parser.FUNCTIONS, 211 "ANY": exp.AnyValue.from_arg_list, 212 "ARRAYSUM": exp.ArraySum.from_arg_list, 213 "COUNTIF": _build_count_if, 214 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 215 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 217 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATE_FORMAT": _build_date_format, 219 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 220 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 221 "FORMATDATETIME": _build_date_format, 222 "JSONEXTRACTSTRING": build_json_extract_path( 223 exp.JSONExtractScalar, zero_based_indexing=False 224 ), 225 "MAP": parser.build_var_map, 226 "MATCH": exp.RegexpLike.from_arg_list, 227 "RANDCANONICAL": exp.Rand.from_arg_list, 228 "STR_TO_DATE": _build_str_to_date, 229 "TUPLE": exp.Struct.from_arg_list, 230 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 231 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 233 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "UNIQ": exp.ApproxDistinct.from_arg_list, 235 "XOR": lambda args: exp.Xor(expressions=args), 236 "MD5": exp.MD5Digest.from_arg_list, 237 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 238 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 239 } 240 241 AGG_FUNCTIONS = { 242 "count", 243 "min", 244 "max", 245 "sum", 246 "avg", 247 "any", 248 "stddevPop", 249 "stddevSamp", 250 "varPop", 251 "varSamp", 252 "corr", 253 "covarPop", 254 "covarSamp", 255 "entropy", 256 "exponentialMovingAverage", 257 "intervalLengthSum", 258 "kolmogorovSmirnovTest", 259 "mannWhitneyUTest", 260 "median", 261 "rankCorr", 262 "sumKahan", 263 "studentTTest", 264 "welchTTest", 265 "anyHeavy", 266 "anyLast", 267 "boundingRatio", 268 "first_value", 269 "last_value", 270 "argMin", 271 "argMax", 272 "avgWeighted", 273 "topK", 274 "topKWeighted", 275 "deltaSum", 276 "deltaSumTimestamp", 277 "groupArray", 278 "groupArrayLast", 279 "groupUniqArray", 280 "groupArrayInsertAt", 281 "groupArrayMovingAvg", 282 "groupArrayMovingSum", 283 "groupArraySample", 284 "groupBitAnd", 285 "groupBitOr", 286 "groupBitXor", 287 "groupBitmap", 288 "groupBitmapAnd", 289 "groupBitmapOr", 290 "groupBitmapXor", 291 "sumWithOverflow", 292 "sumMap", 293 "minMap", 294 "maxMap", 295 "skewSamp", 296 "skewPop", 297 "kurtSamp", 298 "kurtPop", 299 "uniq", 300 "uniqExact", 301 "uniqCombined", 302 "uniqCombined64", 303 "uniqHLL12", 304 "uniqTheta", 305 "quantile", 306 "quantiles", 307 "quantileExact", 308 "quantilesExact", 309 "quantileExactLow", 310 "quantilesExactLow", 311 "quantileExactHigh", 312 "quantilesExactHigh", 313 "quantileExactWeighted", 314 "quantilesExactWeighted", 315 "quantileTiming", 316 "quantilesTiming", 317 "quantileTimingWeighted", 318 "quantilesTimingWeighted", 319 "quantileDeterministic", 320 "quantilesDeterministic", 321 "quantileTDigest", 322 "quantilesTDigest", 323 "quantileTDigestWeighted", 324 "quantilesTDigestWeighted", 325 "quantileBFloat16", 326 "quantilesBFloat16", 327 "quantileBFloat16Weighted", 328 "quantilesBFloat16Weighted", 329 "simpleLinearRegression", 330 "stochasticLinearRegression", 331 "stochasticLogisticRegression", 332 "categoricalInformationValue", 333 "contingency", 334 "cramersV", 335 "cramersVBiasCorrected", 336 "theilsU", 337 "maxIntersections", 338 "maxIntersectionsPosition", 339 "meanZTest", 340 "quantileInterpolatedWeighted", 341 "quantilesInterpolatedWeighted", 342 "quantileGK", 343 "quantilesGK", 344 "sparkBar", 345 "sumCount", 346 "largestTriangleThreeBuckets", 347 "histogram", 348 "sequenceMatch", 349 "sequenceCount", 350 "windowFunnel", 351 "retention", 352 "uniqUpTo", 353 "sequenceNextNode", 354 "exponentialTimeDecayedAvg", 355 } 356 357 AGG_FUNCTIONS_SUFFIXES = [ 358 "If", 359 "Array", 360 "ArrayIf", 361 "Map", 362 "SimpleState", 363 "State", 364 "Merge", 365 "MergeState", 366 "ForEach", 367 "Distinct", 368 "OrDefault", 369 "OrNull", 370 "Resample", 371 "ArgMin", 372 "ArgMax", 373 ] 374 375 FUNC_TOKENS = { 376 *parser.Parser.FUNC_TOKENS, 377 TokenType.SET, 378 } 379 380 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 381 382 ID_VAR_TOKENS = { 383 *parser.Parser.ID_VAR_TOKENS, 384 TokenType.LIKE, 385 } 386 387 AGG_FUNC_MAPPING = ( 388 lambda functions, suffixes: { 389 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 390 } 391 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 392 393 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 394 395 FUNCTION_PARSERS = { 396 **parser.Parser.FUNCTION_PARSERS, 397 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 398 "QUANTILE": lambda self: self._parse_quantile(), 399 } 400 401 FUNCTION_PARSERS.pop("MATCH") 402 403 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 404 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 405 406 RANGE_PARSERS = { 407 **parser.Parser.RANGE_PARSERS, 408 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 409 and self._parse_in(this, is_global=True), 410 } 411 412 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 413 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 414 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 415 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 416 417 JOIN_KINDS = { 418 *parser.Parser.JOIN_KINDS, 419 TokenType.ANY, 420 TokenType.ASOF, 421 TokenType.ARRAY, 422 } 423 424 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 425 TokenType.ANY, 426 TokenType.ARRAY, 427 TokenType.FINAL, 428 TokenType.FORMAT, 429 TokenType.SETTINGS, 430 } 431 432 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 433 TokenType.FORMAT, 434 } 435 436 LOG_DEFAULTS_TO_LN = True 437 438 QUERY_MODIFIER_PARSERS = { 439 **parser.Parser.QUERY_MODIFIER_PARSERS, 440 TokenType.SETTINGS: lambda self: ( 441 "settings", 442 self._advance() or self._parse_csv(self._parse_assignment), 443 ), 444 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 445 } 446 447 CONSTRAINT_PARSERS = { 448 **parser.Parser.CONSTRAINT_PARSERS, 449 "INDEX": lambda self: self._parse_index_constraint(), 450 "CODEC": lambda self: self._parse_compress(), 451 } 452 453 ALTER_PARSERS = { 454 **parser.Parser.ALTER_PARSERS, 455 "REPLACE": lambda self: self._parse_alter_table_replace(), 456 } 457 458 SCHEMA_UNNAMED_CONSTRAINTS = { 459 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 460 "INDEX", 461 } 462 463 PLACEHOLDER_PARSERS = { 464 **parser.Parser.PLACEHOLDER_PARSERS, 465 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 466 } 467 468 def _parse_types( 469 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 470 ) -> t.Optional[exp.Expression]: 471 dtype = super()._parse_types( 472 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 473 ) 474 if isinstance(dtype, exp.DataType): 475 # Mark every type as non-nullable which is ClickHouse's default. This marker 476 # helps us transpile types from other dialects to ClickHouse, so that we can 477 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 478 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 479 # `Nullable` type constructor 480 dtype.set("nullable", False) 481 482 return dtype 483 484 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 485 index = self._index 486 this = self._parse_bitwise() 487 if self._match(TokenType.FROM): 488 self._retreat(index) 489 return super()._parse_extract() 490 491 # We return Anonymous here because extract and regexpExtract have different semantics, 492 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 493 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 494 # 495 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 496 self._match(TokenType.COMMA) 497 return self.expression( 498 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 499 ) 500 501 def _parse_assignment(self) -> t.Optional[exp.Expression]: 502 this = super()._parse_assignment() 503 504 if self._match(TokenType.PLACEHOLDER): 505 return self.expression( 506 exp.If, 507 this=this, 508 true=self._parse_assignment(), 509 false=self._match(TokenType.COLON) and self._parse_assignment(), 510 ) 511 512 return this 513 514 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 515 """ 516 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 517 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 518 """ 519 this = self._parse_id_var() 520 self._match(TokenType.COLON) 521 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 522 self._match_text_seq("IDENTIFIER") and "Identifier" 523 ) 524 525 if not kind: 526 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 527 elif not self._match(TokenType.R_BRACE): 528 self.raise_error("Expecting }") 529 530 return self.expression(exp.Placeholder, this=this, kind=kind) 531 532 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 533 this = super()._parse_in(this) 534 this.set("is_global", is_global) 535 return this 536 537 def _parse_table( 538 self, 539 schema: bool = False, 540 joins: bool = False, 541 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 542 parse_bracket: bool = False, 543 is_db_reference: bool = False, 544 parse_partition: bool = False, 545 ) -> t.Optional[exp.Expression]: 546 this = super()._parse_table( 547 schema=schema, 548 joins=joins, 549 alias_tokens=alias_tokens, 550 parse_bracket=parse_bracket, 551 is_db_reference=is_db_reference, 552 ) 553 554 if self._match(TokenType.FINAL): 555 this = self.expression(exp.Final, this=this) 556 557 return this 558 559 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 560 return super()._parse_position(haystack_first=True) 561 562 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 563 def _parse_cte(self) -> exp.CTE: 564 # WITH <identifier> AS <subquery expression> 565 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 566 567 if not cte: 568 # WITH <expression> AS <identifier> 569 cte = self.expression( 570 exp.CTE, 571 this=self._parse_assignment(), 572 alias=self._parse_table_alias(), 573 scalar=True, 574 ) 575 576 return cte 577 578 def _parse_join_parts( 579 self, 580 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 581 is_global = self._match(TokenType.GLOBAL) and self._prev 582 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 583 584 if kind_pre: 585 kind = self._match_set(self.JOIN_KINDS) and self._prev 586 side = self._match_set(self.JOIN_SIDES) and self._prev 587 return is_global, side, kind 588 589 return ( 590 is_global, 591 self._match_set(self.JOIN_SIDES) and self._prev, 592 self._match_set(self.JOIN_KINDS) and self._prev, 593 ) 594 595 def _parse_join( 596 self, skip_join_token: bool = False, parse_bracket: bool = False 597 ) -> t.Optional[exp.Join]: 598 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 599 if join: 600 join.set("global", join.args.pop("method", None)) 601 602 return join 603 604 def _parse_function( 605 self, 606 functions: t.Optional[t.Dict[str, t.Callable]] = None, 607 anonymous: bool = False, 608 optional_parens: bool = True, 609 any_token: bool = False, 610 ) -> t.Optional[exp.Expression]: 611 expr = super()._parse_function( 612 functions=functions, 613 anonymous=anonymous, 614 optional_parens=optional_parens, 615 any_token=any_token, 616 ) 617 618 func = expr.this if isinstance(expr, exp.Window) else expr 619 620 # Aggregate functions can be split in 2 parts: <func_name><suffix> 621 parts = ( 622 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 623 ) 624 625 if parts: 626 params = self._parse_func_params(func) 627 628 kwargs = { 629 "this": func.this, 630 "expressions": func.expressions, 631 } 632 if parts[1]: 633 kwargs["parts"] = parts 634 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 635 else: 636 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 637 638 kwargs["exp_class"] = exp_class 639 if params: 640 kwargs["params"] = params 641 642 func = self.expression(**kwargs) 643 644 if isinstance(expr, exp.Window): 645 # The window's func was parsed as Anonymous in base parser, fix its 646 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 647 expr.set("this", func) 648 elif params: 649 # Params have blocked super()._parse_function() from parsing the following window 650 # (if that exists) as they're standing between the function call and the window spec 651 expr = self._parse_window(func) 652 else: 653 expr = func 654 655 return expr 656 657 def _parse_func_params( 658 self, this: t.Optional[exp.Func] = None 659 ) -> t.Optional[t.List[exp.Expression]]: 660 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 661 return self._parse_csv(self._parse_lambda) 662 663 if self._match(TokenType.L_PAREN): 664 params = self._parse_csv(self._parse_lambda) 665 self._match_r_paren(this) 666 return params 667 668 return None 669 670 def _parse_quantile(self) -> exp.Quantile: 671 this = self._parse_lambda() 672 params = self._parse_func_params() 673 if params: 674 return self.expression(exp.Quantile, this=params[0], quantile=this) 675 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 676 677 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 678 return super()._parse_wrapped_id_vars(optional=True) 679 680 def _parse_primary_key( 681 self, wrapped_optional: bool = False, in_props: bool = False 682 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 683 return super()._parse_primary_key( 684 wrapped_optional=wrapped_optional or in_props, in_props=in_props 685 ) 686 687 def _parse_on_property(self) -> t.Optional[exp.Expression]: 688 index = self._index 689 if self._match_text_seq("CLUSTER"): 690 this = self._parse_id_var() 691 if this: 692 return self.expression(exp.OnCluster, this=this) 693 else: 694 self._retreat(index) 695 return None 696 697 def _parse_index_constraint( 698 self, kind: t.Optional[str] = None 699 ) -> exp.IndexColumnConstraint: 700 # INDEX name1 expr TYPE type1(args) GRANULARITY value 701 this = self._parse_id_var() 702 expression = self._parse_assignment() 703 704 index_type = self._match_text_seq("TYPE") and ( 705 self._parse_function() or self._parse_var() 706 ) 707 708 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 709 710 return self.expression( 711 exp.IndexColumnConstraint, 712 this=this, 713 expression=expression, 714 index_type=index_type, 715 granularity=granularity, 716 ) 717 718 def _parse_partition(self) -> t.Optional[exp.Partition]: 719 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 720 if not self._match(TokenType.PARTITION): 721 return None 722 723 if self._match_text_seq("ID"): 724 # Corresponds to the PARTITION ID <string_value> syntax 725 expressions: t.List[exp.Expression] = [ 726 self.expression(exp.PartitionId, this=self._parse_string()) 727 ] 728 else: 729 expressions = self._parse_expressions() 730 731 return self.expression(exp.Partition, expressions=expressions) 732 733 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 734 partition = self._parse_partition() 735 736 if not partition or not self._match(TokenType.FROM): 737 return None 738 739 return self.expression( 740 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 741 ) 742 743 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 744 if not self._match_text_seq("PROJECTION"): 745 return None 746 747 return self.expression( 748 exp.ProjectionDef, 749 this=self._parse_id_var(), 750 expression=self._parse_wrapped(self._parse_statement), 751 ) 752 753 def _parse_constraint(self) -> t.Optional[exp.Expression]: 754 return super()._parse_constraint() or self._parse_projection_def() 755 756 class Generator(generator.Generator): 757 QUERY_HINTS = False 758 STRUCT_DELIMITER = ("(", ")") 759 NVL2_SUPPORTED = False 760 TABLESAMPLE_REQUIRES_PARENS = False 761 TABLESAMPLE_SIZE_IS_ROWS = False 762 TABLESAMPLE_KEYWORDS = "SAMPLE" 763 LAST_DAY_SUPPORTS_DATE_PART = False 764 CAN_IMPLEMENT_ARRAY_ANY = True 765 SUPPORTS_TO_NUMBER = False 766 JOIN_HINTS = False 767 TABLE_HINTS = False 768 GROUPINGS_SEP = "" 769 SET_OP_MODIFIERS = False 770 SUPPORTS_TABLE_ALIAS_COLUMNS = False 771 VALUES_AS_TABLE = False 772 773 STRING_TYPE_MAPPING = { 774 exp.DataType.Type.CHAR: "String", 775 exp.DataType.Type.LONGBLOB: "String", 776 exp.DataType.Type.LONGTEXT: "String", 777 exp.DataType.Type.MEDIUMBLOB: "String", 778 exp.DataType.Type.MEDIUMTEXT: "String", 779 exp.DataType.Type.TINYBLOB: "String", 780 exp.DataType.Type.TINYTEXT: "String", 781 exp.DataType.Type.TEXT: "String", 782 exp.DataType.Type.VARBINARY: "String", 783 exp.DataType.Type.VARCHAR: "String", 784 } 785 786 SUPPORTED_JSON_PATH_PARTS = { 787 exp.JSONPathKey, 788 exp.JSONPathRoot, 789 exp.JSONPathSubscript, 790 } 791 792 TYPE_MAPPING = { 793 **generator.Generator.TYPE_MAPPING, 794 **STRING_TYPE_MAPPING, 795 exp.DataType.Type.ARRAY: "Array", 796 exp.DataType.Type.BIGINT: "Int64", 797 exp.DataType.Type.DATE32: "Date32", 798 exp.DataType.Type.DATETIME: "DateTime", 799 exp.DataType.Type.DATETIME64: "DateTime64", 800 exp.DataType.Type.TIMESTAMP: "DateTime", 801 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 802 exp.DataType.Type.DOUBLE: "Float64", 803 exp.DataType.Type.ENUM: "Enum", 804 exp.DataType.Type.ENUM8: "Enum8", 805 exp.DataType.Type.ENUM16: "Enum16", 806 exp.DataType.Type.FIXEDSTRING: "FixedString", 807 exp.DataType.Type.FLOAT: "Float32", 808 exp.DataType.Type.INT: "Int32", 809 exp.DataType.Type.MEDIUMINT: "Int32", 810 exp.DataType.Type.INT128: "Int128", 811 exp.DataType.Type.INT256: "Int256", 812 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 813 exp.DataType.Type.MAP: "Map", 814 exp.DataType.Type.NESTED: "Nested", 815 exp.DataType.Type.NULLABLE: "Nullable", 816 exp.DataType.Type.SMALLINT: "Int16", 817 exp.DataType.Type.STRUCT: "Tuple", 818 exp.DataType.Type.TINYINT: "Int8", 819 exp.DataType.Type.UBIGINT: "UInt64", 820 exp.DataType.Type.UINT: "UInt32", 821 exp.DataType.Type.UINT128: "UInt128", 822 exp.DataType.Type.UINT256: "UInt256", 823 exp.DataType.Type.USMALLINT: "UInt16", 824 exp.DataType.Type.UTINYINT: "UInt8", 825 exp.DataType.Type.IPV4: "IPv4", 826 exp.DataType.Type.IPV6: "IPv6", 827 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 828 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 829 } 830 831 TRANSFORMS = { 832 **generator.Generator.TRANSFORMS, 833 exp.AnyValue: rename_func("any"), 834 exp.ApproxDistinct: rename_func("uniq"), 835 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 836 exp.ArraySize: rename_func("LENGTH"), 837 exp.ArraySum: rename_func("arraySum"), 838 exp.ArgMax: arg_max_or_min_no_count("argMax"), 839 exp.ArgMin: arg_max_or_min_no_count("argMin"), 840 exp.Array: inline_array_sql, 841 exp.CastToStrType: rename_func("CAST"), 842 exp.CountIf: rename_func("countIf"), 843 exp.CompressColumnConstraint: lambda self, 844 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 845 exp.ComputedColumnConstraint: lambda self, 846 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 847 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 848 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 849 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 850 exp.DateStrToDate: rename_func("toDate"), 851 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 852 exp.Explode: rename_func("arrayJoin"), 853 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 854 exp.IsNan: rename_func("isNaN"), 855 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 856 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 857 exp.JSONPathKey: json_path_key_only_name, 858 exp.JSONPathRoot: lambda *_: "", 859 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 860 exp.Nullif: rename_func("nullIf"), 861 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 862 exp.Pivot: no_pivot_sql, 863 exp.Quantile: _quantile_sql, 864 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 865 exp.Rand: rename_func("randCanonical"), 866 exp.StartsWith: rename_func("startsWith"), 867 exp.StrPosition: lambda self, e: self.func( 868 "position", e.this, e.args.get("substr"), e.args.get("position") 869 ), 870 exp.TimeToStr: lambda self, e: self.func( 871 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("zone") 872 ), 873 exp.TimeStrToTime: _timestrtotime_sql, 874 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 875 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 876 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 877 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 878 exp.MD5Digest: rename_func("MD5"), 879 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 880 exp.SHA: rename_func("SHA1"), 881 exp.SHA2: sha256_sql, 882 exp.UnixToTime: _unix_to_time_sql, 883 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 884 exp.Trim: trim_sql, 885 exp.Variance: rename_func("varSamp"), 886 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 887 exp.Stddev: rename_func("stddevSamp"), 888 } 889 890 PROPERTIES_LOCATION = { 891 **generator.Generator.PROPERTIES_LOCATION, 892 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 893 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 894 exp.OnCluster: exp.Properties.Location.POST_NAME, 895 } 896 897 # There's no list in docs, but it can be found in Clickhouse code 898 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 899 ON_CLUSTER_TARGETS = { 900 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 901 "DATABASE", 902 "TABLE", 903 "VIEW", 904 "DICTIONARY", 905 "INDEX", 906 "FUNCTION", 907 "NAMED COLLECTION", 908 } 909 910 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 911 NON_NULLABLE_TYPES = { 912 exp.DataType.Type.ARRAY, 913 exp.DataType.Type.MAP, 914 exp.DataType.Type.NULLABLE, 915 exp.DataType.Type.STRUCT, 916 } 917 918 def strtodate_sql(self, expression: exp.StrToDate) -> str: 919 strtodate_sql = self.function_fallback_sql(expression) 920 921 if not isinstance(expression.parent, exp.Cast): 922 # StrToDate returns DATEs in other dialects (eg. postgres), so 923 # this branch aims to improve the transpilation to clickhouse 924 return f"CAST({strtodate_sql} AS DATE)" 925 926 return strtodate_sql 927 928 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 929 this = expression.this 930 931 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 932 return self.sql(this) 933 934 return super().cast_sql(expression, safe_prefix=safe_prefix) 935 936 def trycast_sql(self, expression: exp.TryCast) -> str: 937 dtype = expression.to 938 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 939 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 940 dtype.set("nullable", True) 941 942 return super().cast_sql(expression) 943 944 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 945 this = self.json_path_part(expression.this) 946 return str(int(this) + 1) if is_int(this) else this 947 948 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 949 return f"AS {self.sql(expression, 'this')}" 950 951 def _any_to_has( 952 self, 953 expression: exp.EQ | exp.NEQ, 954 default: t.Callable[[t.Any], str], 955 prefix: str = "", 956 ) -> str: 957 if isinstance(expression.left, exp.Any): 958 arr = expression.left 959 this = expression.right 960 elif isinstance(expression.right, exp.Any): 961 arr = expression.right 962 this = expression.left 963 else: 964 return default(expression) 965 966 return prefix + self.func("has", arr.this.unnest(), this) 967 968 def eq_sql(self, expression: exp.EQ) -> str: 969 return self._any_to_has(expression, super().eq_sql) 970 971 def neq_sql(self, expression: exp.NEQ) -> str: 972 return self._any_to_has(expression, super().neq_sql, "NOT ") 973 974 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 975 # Manually add a flag to make the search case-insensitive 976 regex = self.func("CONCAT", "'(?i)'", expression.expression) 977 return self.func("match", expression.this, regex) 978 979 def datatype_sql(self, expression: exp.DataType) -> str: 980 # String is the standard ClickHouse type, every other variant is just an alias. 981 # Additionally, any supplied length parameter will be ignored. 982 # 983 # https://clickhouse.com/docs/en/sql-reference/data-types/string 984 if expression.this in self.STRING_TYPE_MAPPING: 985 dtype = "String" 986 else: 987 dtype = super().datatype_sql(expression) 988 989 # This section changes the type to `Nullable(...)` if the following conditions hold: 990 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 991 # and change their semantics 992 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 993 # constraint: "Type of Map key must be a type, that can be represented by integer or 994 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 995 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 996 parent = expression.parent 997 if ( 998 expression.args.get("nullable") is not False 999 and not ( 1000 isinstance(parent, exp.DataType) 1001 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1002 and expression.index in (None, 0) 1003 ) 1004 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1005 ): 1006 dtype = f"Nullable({dtype})" 1007 1008 return dtype 1009 1010 def cte_sql(self, expression: exp.CTE) -> str: 1011 if expression.args.get("scalar"): 1012 this = self.sql(expression, "this") 1013 alias = self.sql(expression, "alias") 1014 return f"{this} AS {alias}" 1015 1016 return super().cte_sql(expression) 1017 1018 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1019 return super().after_limit_modifiers(expression) + [ 1020 ( 1021 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1022 if expression.args.get("settings") 1023 else "" 1024 ), 1025 ( 1026 self.seg("FORMAT ") + self.sql(expression, "format") 1027 if expression.args.get("format") 1028 else "" 1029 ), 1030 ] 1031 1032 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1033 params = self.expressions(expression, key="params", flat=True) 1034 return self.func(expression.name, *expression.expressions) + f"({params})" 1035 1036 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1037 return self.func(expression.name, *expression.expressions) 1038 1039 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1040 return self.anonymousaggfunc_sql(expression) 1041 1042 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1043 return self.parameterizedagg_sql(expression) 1044 1045 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1046 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1047 1048 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1049 return f"ON CLUSTER {self.sql(expression, 'this')}" 1050 1051 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1052 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1053 exp.Properties.Location.POST_NAME 1054 ): 1055 this_name = self.sql( 1056 expression.this if isinstance(expression.this, exp.Schema) else expression, 1057 "this", 1058 ) 1059 this_properties = " ".join( 1060 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1061 ) 1062 this_schema = self.schema_columns_sql(expression.this) 1063 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1064 1065 return super().createable_sql(expression, locations) 1066 1067 def create_sql(self, expression: exp.Create) -> str: 1068 # The comment property comes last in CTAS statements, i.e. after the query 1069 query = expression.expression 1070 if isinstance(query, exp.Query): 1071 comment_prop = expression.find(exp.SchemaCommentProperty) 1072 if comment_prop: 1073 comment_prop.pop() 1074 query.replace(exp.paren(query)) 1075 else: 1076 comment_prop = None 1077 1078 create_sql = super().create_sql(expression) 1079 1080 comment_sql = self.sql(comment_prop) 1081 comment_sql = f" {comment_sql}" if comment_sql else "" 1082 1083 return f"{create_sql}{comment_sql}" 1084 1085 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1086 this = self.indent(self.sql(expression, "this")) 1087 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1088 1089 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1090 this = self.sql(expression, "this") 1091 this = f" {this}" if this else "" 1092 expr = self.sql(expression, "expression") 1093 expr = f" {expr}" if expr else "" 1094 index_type = self.sql(expression, "index_type") 1095 index_type = f" TYPE {index_type}" if index_type else "" 1096 granularity = self.sql(expression, "granularity") 1097 granularity = f" GRANULARITY {granularity}" if granularity else "" 1098 1099 return f"INDEX{this}{expr}{index_type}{granularity}" 1100 1101 def partition_sql(self, expression: exp.Partition) -> str: 1102 return f"PARTITION {self.expressions(expression, flat=True)}" 1103 1104 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1105 return f"ID {self.sql(expression.this)}" 1106 1107 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1108 return ( 1109 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1110 ) 1111 1112 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1113 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
133class ClickHouse(Dialect): 134 NORMALIZE_FUNCTIONS: bool | str = False 135 NULL_ORDERING = "nulls_are_last" 136 SUPPORTS_USER_DEFINED_TYPES = False 137 SAFE_DIVISION = True 138 LOG_BASE_FIRST: t.Optional[bool] = None 139 FORCE_EARLY_ALIAS_REF_EXPANSION = True 140 141 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 142 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 143 144 UNESCAPED_SEQUENCES = { 145 "\\0": "\0", 146 } 147 148 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 149 150 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 151 exp.Except: False, 152 exp.Intersect: False, 153 exp.Union: None, 154 } 155 156 class Tokenizer(tokens.Tokenizer): 157 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 158 IDENTIFIERS = ['"', "`"] 159 STRING_ESCAPES = ["'", "\\"] 160 BIT_STRINGS = [("0b", "")] 161 HEX_STRINGS = [("0x", ""), ("0X", "")] 162 HEREDOC_STRINGS = ["$"] 163 164 KEYWORDS = { 165 **tokens.Tokenizer.KEYWORDS, 166 "ATTACH": TokenType.COMMAND, 167 "DATE32": TokenType.DATE32, 168 "DATETIME64": TokenType.DATETIME64, 169 "DICTIONARY": TokenType.DICTIONARY, 170 "ENUM8": TokenType.ENUM8, 171 "ENUM16": TokenType.ENUM16, 172 "FINAL": TokenType.FINAL, 173 "FIXEDSTRING": TokenType.FIXEDSTRING, 174 "FLOAT32": TokenType.FLOAT, 175 "FLOAT64": TokenType.DOUBLE, 176 "GLOBAL": TokenType.GLOBAL, 177 "INT256": TokenType.INT256, 178 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 179 "MAP": TokenType.MAP, 180 "NESTED": TokenType.NESTED, 181 "SAMPLE": TokenType.TABLE_SAMPLE, 182 "TUPLE": TokenType.STRUCT, 183 "UINT128": TokenType.UINT128, 184 "UINT16": TokenType.USMALLINT, 185 "UINT256": TokenType.UINT256, 186 "UINT32": TokenType.UINT, 187 "UINT64": TokenType.UBIGINT, 188 "UINT8": TokenType.UTINYINT, 189 "IPV4": TokenType.IPV4, 190 "IPV6": TokenType.IPV6, 191 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 192 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 193 "SYSTEM": TokenType.COMMAND, 194 "PREWHERE": TokenType.PREWHERE, 195 } 196 KEYWORDS.pop("/*+") 197 198 SINGLE_TOKENS = { 199 **tokens.Tokenizer.SINGLE_TOKENS, 200 "$": TokenType.HEREDOC_STRING, 201 } 202 203 class Parser(parser.Parser): 204 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 205 # * select x from t1 union all select x from t2 limit 1; 206 # * select x from t1 union all (select x from t2 limit 1); 207 MODIFIERS_ATTACHED_TO_SET_OP = False 208 INTERVAL_SPANS = False 209 210 FUNCTIONS = { 211 **parser.Parser.FUNCTIONS, 212 "ANY": exp.AnyValue.from_arg_list, 213 "ARRAYSUM": exp.ArraySum.from_arg_list, 214 "COUNTIF": _build_count_if, 215 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 217 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 219 "DATE_FORMAT": _build_date_format, 220 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 221 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 222 "FORMATDATETIME": _build_date_format, 223 "JSONEXTRACTSTRING": build_json_extract_path( 224 exp.JSONExtractScalar, zero_based_indexing=False 225 ), 226 "MAP": parser.build_var_map, 227 "MATCH": exp.RegexpLike.from_arg_list, 228 "RANDCANONICAL": exp.Rand.from_arg_list, 229 "STR_TO_DATE": _build_str_to_date, 230 "TUPLE": exp.Struct.from_arg_list, 231 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 233 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 235 "UNIQ": exp.ApproxDistinct.from_arg_list, 236 "XOR": lambda args: exp.Xor(expressions=args), 237 "MD5": exp.MD5Digest.from_arg_list, 238 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 239 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 240 } 241 242 AGG_FUNCTIONS = { 243 "count", 244 "min", 245 "max", 246 "sum", 247 "avg", 248 "any", 249 "stddevPop", 250 "stddevSamp", 251 "varPop", 252 "varSamp", 253 "corr", 254 "covarPop", 255 "covarSamp", 256 "entropy", 257 "exponentialMovingAverage", 258 "intervalLengthSum", 259 "kolmogorovSmirnovTest", 260 "mannWhitneyUTest", 261 "median", 262 "rankCorr", 263 "sumKahan", 264 "studentTTest", 265 "welchTTest", 266 "anyHeavy", 267 "anyLast", 268 "boundingRatio", 269 "first_value", 270 "last_value", 271 "argMin", 272 "argMax", 273 "avgWeighted", 274 "topK", 275 "topKWeighted", 276 "deltaSum", 277 "deltaSumTimestamp", 278 "groupArray", 279 "groupArrayLast", 280 "groupUniqArray", 281 "groupArrayInsertAt", 282 "groupArrayMovingAvg", 283 "groupArrayMovingSum", 284 "groupArraySample", 285 "groupBitAnd", 286 "groupBitOr", 287 "groupBitXor", 288 "groupBitmap", 289 "groupBitmapAnd", 290 "groupBitmapOr", 291 "groupBitmapXor", 292 "sumWithOverflow", 293 "sumMap", 294 "minMap", 295 "maxMap", 296 "skewSamp", 297 "skewPop", 298 "kurtSamp", 299 "kurtPop", 300 "uniq", 301 "uniqExact", 302 "uniqCombined", 303 "uniqCombined64", 304 "uniqHLL12", 305 "uniqTheta", 306 "quantile", 307 "quantiles", 308 "quantileExact", 309 "quantilesExact", 310 "quantileExactLow", 311 "quantilesExactLow", 312 "quantileExactHigh", 313 "quantilesExactHigh", 314 "quantileExactWeighted", 315 "quantilesExactWeighted", 316 "quantileTiming", 317 "quantilesTiming", 318 "quantileTimingWeighted", 319 "quantilesTimingWeighted", 320 "quantileDeterministic", 321 "quantilesDeterministic", 322 "quantileTDigest", 323 "quantilesTDigest", 324 "quantileTDigestWeighted", 325 "quantilesTDigestWeighted", 326 "quantileBFloat16", 327 "quantilesBFloat16", 328 "quantileBFloat16Weighted", 329 "quantilesBFloat16Weighted", 330 "simpleLinearRegression", 331 "stochasticLinearRegression", 332 "stochasticLogisticRegression", 333 "categoricalInformationValue", 334 "contingency", 335 "cramersV", 336 "cramersVBiasCorrected", 337 "theilsU", 338 "maxIntersections", 339 "maxIntersectionsPosition", 340 "meanZTest", 341 "quantileInterpolatedWeighted", 342 "quantilesInterpolatedWeighted", 343 "quantileGK", 344 "quantilesGK", 345 "sparkBar", 346 "sumCount", 347 "largestTriangleThreeBuckets", 348 "histogram", 349 "sequenceMatch", 350 "sequenceCount", 351 "windowFunnel", 352 "retention", 353 "uniqUpTo", 354 "sequenceNextNode", 355 "exponentialTimeDecayedAvg", 356 } 357 358 AGG_FUNCTIONS_SUFFIXES = [ 359 "If", 360 "Array", 361 "ArrayIf", 362 "Map", 363 "SimpleState", 364 "State", 365 "Merge", 366 "MergeState", 367 "ForEach", 368 "Distinct", 369 "OrDefault", 370 "OrNull", 371 "Resample", 372 "ArgMin", 373 "ArgMax", 374 ] 375 376 FUNC_TOKENS = { 377 *parser.Parser.FUNC_TOKENS, 378 TokenType.SET, 379 } 380 381 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.LIKE, 386 } 387 388 AGG_FUNC_MAPPING = ( 389 lambda functions, suffixes: { 390 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 391 } 392 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 393 394 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 399 "QUANTILE": lambda self: self._parse_quantile(), 400 } 401 402 FUNCTION_PARSERS.pop("MATCH") 403 404 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 405 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 406 407 RANGE_PARSERS = { 408 **parser.Parser.RANGE_PARSERS, 409 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 410 and self._parse_in(this, is_global=True), 411 } 412 413 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 414 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 415 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 416 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 417 418 JOIN_KINDS = { 419 *parser.Parser.JOIN_KINDS, 420 TokenType.ANY, 421 TokenType.ASOF, 422 TokenType.ARRAY, 423 } 424 425 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 426 TokenType.ANY, 427 TokenType.ARRAY, 428 TokenType.FINAL, 429 TokenType.FORMAT, 430 TokenType.SETTINGS, 431 } 432 433 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 434 TokenType.FORMAT, 435 } 436 437 LOG_DEFAULTS_TO_LN = True 438 439 QUERY_MODIFIER_PARSERS = { 440 **parser.Parser.QUERY_MODIFIER_PARSERS, 441 TokenType.SETTINGS: lambda self: ( 442 "settings", 443 self._advance() or self._parse_csv(self._parse_assignment), 444 ), 445 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 446 } 447 448 CONSTRAINT_PARSERS = { 449 **parser.Parser.CONSTRAINT_PARSERS, 450 "INDEX": lambda self: self._parse_index_constraint(), 451 "CODEC": lambda self: self._parse_compress(), 452 } 453 454 ALTER_PARSERS = { 455 **parser.Parser.ALTER_PARSERS, 456 "REPLACE": lambda self: self._parse_alter_table_replace(), 457 } 458 459 SCHEMA_UNNAMED_CONSTRAINTS = { 460 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 461 "INDEX", 462 } 463 464 PLACEHOLDER_PARSERS = { 465 **parser.Parser.PLACEHOLDER_PARSERS, 466 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 467 } 468 469 def _parse_types( 470 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 471 ) -> t.Optional[exp.Expression]: 472 dtype = super()._parse_types( 473 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 474 ) 475 if isinstance(dtype, exp.DataType): 476 # Mark every type as non-nullable which is ClickHouse's default. This marker 477 # helps us transpile types from other dialects to ClickHouse, so that we can 478 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 479 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 480 # `Nullable` type constructor 481 dtype.set("nullable", False) 482 483 return dtype 484 485 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 486 index = self._index 487 this = self._parse_bitwise() 488 if self._match(TokenType.FROM): 489 self._retreat(index) 490 return super()._parse_extract() 491 492 # We return Anonymous here because extract and regexpExtract have different semantics, 493 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 494 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 495 # 496 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 497 self._match(TokenType.COMMA) 498 return self.expression( 499 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 500 ) 501 502 def _parse_assignment(self) -> t.Optional[exp.Expression]: 503 this = super()._parse_assignment() 504 505 if self._match(TokenType.PLACEHOLDER): 506 return self.expression( 507 exp.If, 508 this=this, 509 true=self._parse_assignment(), 510 false=self._match(TokenType.COLON) and self._parse_assignment(), 511 ) 512 513 return this 514 515 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 516 """ 517 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 518 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 519 """ 520 this = self._parse_id_var() 521 self._match(TokenType.COLON) 522 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 523 self._match_text_seq("IDENTIFIER") and "Identifier" 524 ) 525 526 if not kind: 527 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 528 elif not self._match(TokenType.R_BRACE): 529 self.raise_error("Expecting }") 530 531 return self.expression(exp.Placeholder, this=this, kind=kind) 532 533 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 534 this = super()._parse_in(this) 535 this.set("is_global", is_global) 536 return this 537 538 def _parse_table( 539 self, 540 schema: bool = False, 541 joins: bool = False, 542 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 543 parse_bracket: bool = False, 544 is_db_reference: bool = False, 545 parse_partition: bool = False, 546 ) -> t.Optional[exp.Expression]: 547 this = super()._parse_table( 548 schema=schema, 549 joins=joins, 550 alias_tokens=alias_tokens, 551 parse_bracket=parse_bracket, 552 is_db_reference=is_db_reference, 553 ) 554 555 if self._match(TokenType.FINAL): 556 this = self.expression(exp.Final, this=this) 557 558 return this 559 560 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 561 return super()._parse_position(haystack_first=True) 562 563 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 564 def _parse_cte(self) -> exp.CTE: 565 # WITH <identifier> AS <subquery expression> 566 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 567 568 if not cte: 569 # WITH <expression> AS <identifier> 570 cte = self.expression( 571 exp.CTE, 572 this=self._parse_assignment(), 573 alias=self._parse_table_alias(), 574 scalar=True, 575 ) 576 577 return cte 578 579 def _parse_join_parts( 580 self, 581 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 582 is_global = self._match(TokenType.GLOBAL) and self._prev 583 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 584 585 if kind_pre: 586 kind = self._match_set(self.JOIN_KINDS) and self._prev 587 side = self._match_set(self.JOIN_SIDES) and self._prev 588 return is_global, side, kind 589 590 return ( 591 is_global, 592 self._match_set(self.JOIN_SIDES) and self._prev, 593 self._match_set(self.JOIN_KINDS) and self._prev, 594 ) 595 596 def _parse_join( 597 self, skip_join_token: bool = False, parse_bracket: bool = False 598 ) -> t.Optional[exp.Join]: 599 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 600 if join: 601 join.set("global", join.args.pop("method", None)) 602 603 return join 604 605 def _parse_function( 606 self, 607 functions: t.Optional[t.Dict[str, t.Callable]] = None, 608 anonymous: bool = False, 609 optional_parens: bool = True, 610 any_token: bool = False, 611 ) -> t.Optional[exp.Expression]: 612 expr = super()._parse_function( 613 functions=functions, 614 anonymous=anonymous, 615 optional_parens=optional_parens, 616 any_token=any_token, 617 ) 618 619 func = expr.this if isinstance(expr, exp.Window) else expr 620 621 # Aggregate functions can be split in 2 parts: <func_name><suffix> 622 parts = ( 623 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 624 ) 625 626 if parts: 627 params = self._parse_func_params(func) 628 629 kwargs = { 630 "this": func.this, 631 "expressions": func.expressions, 632 } 633 if parts[1]: 634 kwargs["parts"] = parts 635 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 636 else: 637 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 638 639 kwargs["exp_class"] = exp_class 640 if params: 641 kwargs["params"] = params 642 643 func = self.expression(**kwargs) 644 645 if isinstance(expr, exp.Window): 646 # The window's func was parsed as Anonymous in base parser, fix its 647 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 648 expr.set("this", func) 649 elif params: 650 # Params have blocked super()._parse_function() from parsing the following window 651 # (if that exists) as they're standing between the function call and the window spec 652 expr = self._parse_window(func) 653 else: 654 expr = func 655 656 return expr 657 658 def _parse_func_params( 659 self, this: t.Optional[exp.Func] = None 660 ) -> t.Optional[t.List[exp.Expression]]: 661 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 662 return self._parse_csv(self._parse_lambda) 663 664 if self._match(TokenType.L_PAREN): 665 params = self._parse_csv(self._parse_lambda) 666 self._match_r_paren(this) 667 return params 668 669 return None 670 671 def _parse_quantile(self) -> exp.Quantile: 672 this = self._parse_lambda() 673 params = self._parse_func_params() 674 if params: 675 return self.expression(exp.Quantile, this=params[0], quantile=this) 676 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 677 678 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 679 return super()._parse_wrapped_id_vars(optional=True) 680 681 def _parse_primary_key( 682 self, wrapped_optional: bool = False, in_props: bool = False 683 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 684 return super()._parse_primary_key( 685 wrapped_optional=wrapped_optional or in_props, in_props=in_props 686 ) 687 688 def _parse_on_property(self) -> t.Optional[exp.Expression]: 689 index = self._index 690 if self._match_text_seq("CLUSTER"): 691 this = self._parse_id_var() 692 if this: 693 return self.expression(exp.OnCluster, this=this) 694 else: 695 self._retreat(index) 696 return None 697 698 def _parse_index_constraint( 699 self, kind: t.Optional[str] = None 700 ) -> exp.IndexColumnConstraint: 701 # INDEX name1 expr TYPE type1(args) GRANULARITY value 702 this = self._parse_id_var() 703 expression = self._parse_assignment() 704 705 index_type = self._match_text_seq("TYPE") and ( 706 self._parse_function() or self._parse_var() 707 ) 708 709 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 710 711 return self.expression( 712 exp.IndexColumnConstraint, 713 this=this, 714 expression=expression, 715 index_type=index_type, 716 granularity=granularity, 717 ) 718 719 def _parse_partition(self) -> t.Optional[exp.Partition]: 720 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 721 if not self._match(TokenType.PARTITION): 722 return None 723 724 if self._match_text_seq("ID"): 725 # Corresponds to the PARTITION ID <string_value> syntax 726 expressions: t.List[exp.Expression] = [ 727 self.expression(exp.PartitionId, this=self._parse_string()) 728 ] 729 else: 730 expressions = self._parse_expressions() 731 732 return self.expression(exp.Partition, expressions=expressions) 733 734 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 735 partition = self._parse_partition() 736 737 if not partition or not self._match(TokenType.FROM): 738 return None 739 740 return self.expression( 741 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 742 ) 743 744 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 745 if not self._match_text_seq("PROJECTION"): 746 return None 747 748 return self.expression( 749 exp.ProjectionDef, 750 this=self._parse_id_var(), 751 expression=self._parse_wrapped(self._parse_statement), 752 ) 753 754 def _parse_constraint(self) -> t.Optional[exp.Expression]: 755 return super()._parse_constraint() or self._parse_projection_def() 756 757 class Generator(generator.Generator): 758 QUERY_HINTS = False 759 STRUCT_DELIMITER = ("(", ")") 760 NVL2_SUPPORTED = False 761 TABLESAMPLE_REQUIRES_PARENS = False 762 TABLESAMPLE_SIZE_IS_ROWS = False 763 TABLESAMPLE_KEYWORDS = "SAMPLE" 764 LAST_DAY_SUPPORTS_DATE_PART = False 765 CAN_IMPLEMENT_ARRAY_ANY = True 766 SUPPORTS_TO_NUMBER = False 767 JOIN_HINTS = False 768 TABLE_HINTS = False 769 GROUPINGS_SEP = "" 770 SET_OP_MODIFIERS = False 771 SUPPORTS_TABLE_ALIAS_COLUMNS = False 772 VALUES_AS_TABLE = False 773 774 STRING_TYPE_MAPPING = { 775 exp.DataType.Type.CHAR: "String", 776 exp.DataType.Type.LONGBLOB: "String", 777 exp.DataType.Type.LONGTEXT: "String", 778 exp.DataType.Type.MEDIUMBLOB: "String", 779 exp.DataType.Type.MEDIUMTEXT: "String", 780 exp.DataType.Type.TINYBLOB: "String", 781 exp.DataType.Type.TINYTEXT: "String", 782 exp.DataType.Type.TEXT: "String", 783 exp.DataType.Type.VARBINARY: "String", 784 exp.DataType.Type.VARCHAR: "String", 785 } 786 787 SUPPORTED_JSON_PATH_PARTS = { 788 exp.JSONPathKey, 789 exp.JSONPathRoot, 790 exp.JSONPathSubscript, 791 } 792 793 TYPE_MAPPING = { 794 **generator.Generator.TYPE_MAPPING, 795 **STRING_TYPE_MAPPING, 796 exp.DataType.Type.ARRAY: "Array", 797 exp.DataType.Type.BIGINT: "Int64", 798 exp.DataType.Type.DATE32: "Date32", 799 exp.DataType.Type.DATETIME: "DateTime", 800 exp.DataType.Type.DATETIME64: "DateTime64", 801 exp.DataType.Type.TIMESTAMP: "DateTime", 802 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 803 exp.DataType.Type.DOUBLE: "Float64", 804 exp.DataType.Type.ENUM: "Enum", 805 exp.DataType.Type.ENUM8: "Enum8", 806 exp.DataType.Type.ENUM16: "Enum16", 807 exp.DataType.Type.FIXEDSTRING: "FixedString", 808 exp.DataType.Type.FLOAT: "Float32", 809 exp.DataType.Type.INT: "Int32", 810 exp.DataType.Type.MEDIUMINT: "Int32", 811 exp.DataType.Type.INT128: "Int128", 812 exp.DataType.Type.INT256: "Int256", 813 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 814 exp.DataType.Type.MAP: "Map", 815 exp.DataType.Type.NESTED: "Nested", 816 exp.DataType.Type.NULLABLE: "Nullable", 817 exp.DataType.Type.SMALLINT: "Int16", 818 exp.DataType.Type.STRUCT: "Tuple", 819 exp.DataType.Type.TINYINT: "Int8", 820 exp.DataType.Type.UBIGINT: "UInt64", 821 exp.DataType.Type.UINT: "UInt32", 822 exp.DataType.Type.UINT128: "UInt128", 823 exp.DataType.Type.UINT256: "UInt256", 824 exp.DataType.Type.USMALLINT: "UInt16", 825 exp.DataType.Type.UTINYINT: "UInt8", 826 exp.DataType.Type.IPV4: "IPv4", 827 exp.DataType.Type.IPV6: "IPv6", 828 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 829 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 830 } 831 832 TRANSFORMS = { 833 **generator.Generator.TRANSFORMS, 834 exp.AnyValue: rename_func("any"), 835 exp.ApproxDistinct: rename_func("uniq"), 836 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 837 exp.ArraySize: rename_func("LENGTH"), 838 exp.ArraySum: rename_func("arraySum"), 839 exp.ArgMax: arg_max_or_min_no_count("argMax"), 840 exp.ArgMin: arg_max_or_min_no_count("argMin"), 841 exp.Array: inline_array_sql, 842 exp.CastToStrType: rename_func("CAST"), 843 exp.CountIf: rename_func("countIf"), 844 exp.CompressColumnConstraint: lambda self, 845 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 846 exp.ComputedColumnConstraint: lambda self, 847 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 848 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 849 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 850 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 851 exp.DateStrToDate: rename_func("toDate"), 852 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 853 exp.Explode: rename_func("arrayJoin"), 854 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 855 exp.IsNan: rename_func("isNaN"), 856 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 857 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 858 exp.JSONPathKey: json_path_key_only_name, 859 exp.JSONPathRoot: lambda *_: "", 860 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 861 exp.Nullif: rename_func("nullIf"), 862 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 863 exp.Pivot: no_pivot_sql, 864 exp.Quantile: _quantile_sql, 865 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 866 exp.Rand: rename_func("randCanonical"), 867 exp.StartsWith: rename_func("startsWith"), 868 exp.StrPosition: lambda self, e: self.func( 869 "position", e.this, e.args.get("substr"), e.args.get("position") 870 ), 871 exp.TimeToStr: lambda self, e: self.func( 872 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("zone") 873 ), 874 exp.TimeStrToTime: _timestrtotime_sql, 875 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 876 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 877 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 878 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 879 exp.MD5Digest: rename_func("MD5"), 880 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 881 exp.SHA: rename_func("SHA1"), 882 exp.SHA2: sha256_sql, 883 exp.UnixToTime: _unix_to_time_sql, 884 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 885 exp.Trim: trim_sql, 886 exp.Variance: rename_func("varSamp"), 887 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 888 exp.Stddev: rename_func("stddevSamp"), 889 } 890 891 PROPERTIES_LOCATION = { 892 **generator.Generator.PROPERTIES_LOCATION, 893 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 894 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 895 exp.OnCluster: exp.Properties.Location.POST_NAME, 896 } 897 898 # There's no list in docs, but it can be found in Clickhouse code 899 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 900 ON_CLUSTER_TARGETS = { 901 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 902 "DATABASE", 903 "TABLE", 904 "VIEW", 905 "DICTIONARY", 906 "INDEX", 907 "FUNCTION", 908 "NAMED COLLECTION", 909 } 910 911 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 912 NON_NULLABLE_TYPES = { 913 exp.DataType.Type.ARRAY, 914 exp.DataType.Type.MAP, 915 exp.DataType.Type.NULLABLE, 916 exp.DataType.Type.STRUCT, 917 } 918 919 def strtodate_sql(self, expression: exp.StrToDate) -> str: 920 strtodate_sql = self.function_fallback_sql(expression) 921 922 if not isinstance(expression.parent, exp.Cast): 923 # StrToDate returns DATEs in other dialects (eg. postgres), so 924 # this branch aims to improve the transpilation to clickhouse 925 return f"CAST({strtodate_sql} AS DATE)" 926 927 return strtodate_sql 928 929 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 930 this = expression.this 931 932 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 933 return self.sql(this) 934 935 return super().cast_sql(expression, safe_prefix=safe_prefix) 936 937 def trycast_sql(self, expression: exp.TryCast) -> str: 938 dtype = expression.to 939 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 940 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 941 dtype.set("nullable", True) 942 943 return super().cast_sql(expression) 944 945 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 946 this = self.json_path_part(expression.this) 947 return str(int(this) + 1) if is_int(this) else this 948 949 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 950 return f"AS {self.sql(expression, 'this')}" 951 952 def _any_to_has( 953 self, 954 expression: exp.EQ | exp.NEQ, 955 default: t.Callable[[t.Any], str], 956 prefix: str = "", 957 ) -> str: 958 if isinstance(expression.left, exp.Any): 959 arr = expression.left 960 this = expression.right 961 elif isinstance(expression.right, exp.Any): 962 arr = expression.right 963 this = expression.left 964 else: 965 return default(expression) 966 967 return prefix + self.func("has", arr.this.unnest(), this) 968 969 def eq_sql(self, expression: exp.EQ) -> str: 970 return self._any_to_has(expression, super().eq_sql) 971 972 def neq_sql(self, expression: exp.NEQ) -> str: 973 return self._any_to_has(expression, super().neq_sql, "NOT ") 974 975 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 976 # Manually add a flag to make the search case-insensitive 977 regex = self.func("CONCAT", "'(?i)'", expression.expression) 978 return self.func("match", expression.this, regex) 979 980 def datatype_sql(self, expression: exp.DataType) -> str: 981 # String is the standard ClickHouse type, every other variant is just an alias. 982 # Additionally, any supplied length parameter will be ignored. 983 # 984 # https://clickhouse.com/docs/en/sql-reference/data-types/string 985 if expression.this in self.STRING_TYPE_MAPPING: 986 dtype = "String" 987 else: 988 dtype = super().datatype_sql(expression) 989 990 # This section changes the type to `Nullable(...)` if the following conditions hold: 991 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 992 # and change their semantics 993 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 994 # constraint: "Type of Map key must be a type, that can be represented by integer or 995 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 996 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 997 parent = expression.parent 998 if ( 999 expression.args.get("nullable") is not False 1000 and not ( 1001 isinstance(parent, exp.DataType) 1002 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1003 and expression.index in (None, 0) 1004 ) 1005 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1006 ): 1007 dtype = f"Nullable({dtype})" 1008 1009 return dtype 1010 1011 def cte_sql(self, expression: exp.CTE) -> str: 1012 if expression.args.get("scalar"): 1013 this = self.sql(expression, "this") 1014 alias = self.sql(expression, "alias") 1015 return f"{this} AS {alias}" 1016 1017 return super().cte_sql(expression) 1018 1019 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1020 return super().after_limit_modifiers(expression) + [ 1021 ( 1022 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1023 if expression.args.get("settings") 1024 else "" 1025 ), 1026 ( 1027 self.seg("FORMAT ") + self.sql(expression, "format") 1028 if expression.args.get("format") 1029 else "" 1030 ), 1031 ] 1032 1033 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1034 params = self.expressions(expression, key="params", flat=True) 1035 return self.func(expression.name, *expression.expressions) + f"({params})" 1036 1037 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1038 return self.func(expression.name, *expression.expressions) 1039 1040 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1041 return self.anonymousaggfunc_sql(expression) 1042 1043 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1044 return self.parameterizedagg_sql(expression) 1045 1046 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1047 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1048 1049 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1050 return f"ON CLUSTER {self.sql(expression, 'this')}" 1051 1052 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1053 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1054 exp.Properties.Location.POST_NAME 1055 ): 1056 this_name = self.sql( 1057 expression.this if isinstance(expression.this, exp.Schema) else expression, 1058 "this", 1059 ) 1060 this_properties = " ".join( 1061 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1062 ) 1063 this_schema = self.schema_columns_sql(expression.this) 1064 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1065 1066 return super().createable_sql(expression, locations) 1067 1068 def create_sql(self, expression: exp.Create) -> str: 1069 # The comment property comes last in CTAS statements, i.e. after the query 1070 query = expression.expression 1071 if isinstance(query, exp.Query): 1072 comment_prop = expression.find(exp.SchemaCommentProperty) 1073 if comment_prop: 1074 comment_prop.pop() 1075 query.replace(exp.paren(query)) 1076 else: 1077 comment_prop = None 1078 1079 create_sql = super().create_sql(expression) 1080 1081 comment_sql = self.sql(comment_prop) 1082 comment_sql = f" {comment_sql}" if comment_sql else "" 1083 1084 return f"{create_sql}{comment_sql}" 1085 1086 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1087 this = self.indent(self.sql(expression, "this")) 1088 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1089 1090 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1091 this = self.sql(expression, "this") 1092 this = f" {this}" if this else "" 1093 expr = self.sql(expression, "expression") 1094 expr = f" {expr}" if expr else "" 1095 index_type = self.sql(expression, "index_type") 1096 index_type = f" TYPE {index_type}" if index_type else "" 1097 granularity = self.sql(expression, "granularity") 1098 granularity = f" GRANULARITY {granularity}" if granularity else "" 1099 1100 return f"INDEX{this}{expr}{index_type}{granularity}" 1101 1102 def partition_sql(self, expression: exp.Partition) -> str: 1103 return f"PARTITION {self.expressions(expression, flat=True)}" 1104 1105 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1106 return f"ID {self.sql(expression.this)}" 1107 1108 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1109 return ( 1110 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1111 ) 1112 1113 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1114 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Whether the base comes first in the LOG function.
Possible values: True, False, None (two arguments are not supported by LOG)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None when either DISTINCT or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
156 class Tokenizer(tokens.Tokenizer): 157 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 158 IDENTIFIERS = ['"', "`"] 159 STRING_ESCAPES = ["'", "\\"] 160 BIT_STRINGS = [("0b", "")] 161 HEX_STRINGS = [("0x", ""), ("0X", "")] 162 HEREDOC_STRINGS = ["$"] 163 164 KEYWORDS = { 165 **tokens.Tokenizer.KEYWORDS, 166 "ATTACH": TokenType.COMMAND, 167 "DATE32": TokenType.DATE32, 168 "DATETIME64": TokenType.DATETIME64, 169 "DICTIONARY": TokenType.DICTIONARY, 170 "ENUM8": TokenType.ENUM8, 171 "ENUM16": TokenType.ENUM16, 172 "FINAL": TokenType.FINAL, 173 "FIXEDSTRING": TokenType.FIXEDSTRING, 174 "FLOAT32": TokenType.FLOAT, 175 "FLOAT64": TokenType.DOUBLE, 176 "GLOBAL": TokenType.GLOBAL, 177 "INT256": TokenType.INT256, 178 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 179 "MAP": TokenType.MAP, 180 "NESTED": TokenType.NESTED, 181 "SAMPLE": TokenType.TABLE_SAMPLE, 182 "TUPLE": TokenType.STRUCT, 183 "UINT128": TokenType.UINT128, 184 "UINT16": TokenType.USMALLINT, 185 "UINT256": TokenType.UINT256, 186 "UINT32": TokenType.UINT, 187 "UINT64": TokenType.UBIGINT, 188 "UINT8": TokenType.UTINYINT, 189 "IPV4": TokenType.IPV4, 190 "IPV6": TokenType.IPV6, 191 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 192 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 193 "SYSTEM": TokenType.COMMAND, 194 "PREWHERE": TokenType.PREWHERE, 195 } 196 KEYWORDS.pop("/*+") 197 198 SINGLE_TOKENS = { 199 **tokens.Tokenizer.SINGLE_TOKENS, 200 "$": TokenType.HEREDOC_STRING, 201 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
203 class Parser(parser.Parser): 204 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 205 # * select x from t1 union all select x from t2 limit 1; 206 # * select x from t1 union all (select x from t2 limit 1); 207 MODIFIERS_ATTACHED_TO_SET_OP = False 208 INTERVAL_SPANS = False 209 210 FUNCTIONS = { 211 **parser.Parser.FUNCTIONS, 212 "ANY": exp.AnyValue.from_arg_list, 213 "ARRAYSUM": exp.ArraySum.from_arg_list, 214 "COUNTIF": _build_count_if, 215 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 217 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 219 "DATE_FORMAT": _build_date_format, 220 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 221 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 222 "FORMATDATETIME": _build_date_format, 223 "JSONEXTRACTSTRING": build_json_extract_path( 224 exp.JSONExtractScalar, zero_based_indexing=False 225 ), 226 "MAP": parser.build_var_map, 227 "MATCH": exp.RegexpLike.from_arg_list, 228 "RANDCANONICAL": exp.Rand.from_arg_list, 229 "STR_TO_DATE": _build_str_to_date, 230 "TUPLE": exp.Struct.from_arg_list, 231 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 233 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 235 "UNIQ": exp.ApproxDistinct.from_arg_list, 236 "XOR": lambda args: exp.Xor(expressions=args), 237 "MD5": exp.MD5Digest.from_arg_list, 238 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 239 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 240 } 241 242 AGG_FUNCTIONS = { 243 "count", 244 "min", 245 "max", 246 "sum", 247 "avg", 248 "any", 249 "stddevPop", 250 "stddevSamp", 251 "varPop", 252 "varSamp", 253 "corr", 254 "covarPop", 255 "covarSamp", 256 "entropy", 257 "exponentialMovingAverage", 258 "intervalLengthSum", 259 "kolmogorovSmirnovTest", 260 "mannWhitneyUTest", 261 "median", 262 "rankCorr", 263 "sumKahan", 264 "studentTTest", 265 "welchTTest", 266 "anyHeavy", 267 "anyLast", 268 "boundingRatio", 269 "first_value", 270 "last_value", 271 "argMin", 272 "argMax", 273 "avgWeighted", 274 "topK", 275 "topKWeighted", 276 "deltaSum", 277 "deltaSumTimestamp", 278 "groupArray", 279 "groupArrayLast", 280 "groupUniqArray", 281 "groupArrayInsertAt", 282 "groupArrayMovingAvg", 283 "groupArrayMovingSum", 284 "groupArraySample", 285 "groupBitAnd", 286 "groupBitOr", 287 "groupBitXor", 288 "groupBitmap", 289 "groupBitmapAnd", 290 "groupBitmapOr", 291 "groupBitmapXor", 292 "sumWithOverflow", 293 "sumMap", 294 "minMap", 295 "maxMap", 296 "skewSamp", 297 "skewPop", 298 "kurtSamp", 299 "kurtPop", 300 "uniq", 301 "uniqExact", 302 "uniqCombined", 303 "uniqCombined64", 304 "uniqHLL12", 305 "uniqTheta", 306 "quantile", 307 "quantiles", 308 "quantileExact", 309 "quantilesExact", 310 "quantileExactLow", 311 "quantilesExactLow", 312 "quantileExactHigh", 313 "quantilesExactHigh", 314 "quantileExactWeighted", 315 "quantilesExactWeighted", 316 "quantileTiming", 317 "quantilesTiming", 318 "quantileTimingWeighted", 319 "quantilesTimingWeighted", 320 "quantileDeterministic", 321 "quantilesDeterministic", 322 "quantileTDigest", 323 "quantilesTDigest", 324 "quantileTDigestWeighted", 325 "quantilesTDigestWeighted", 326 "quantileBFloat16", 327 "quantilesBFloat16", 328 "quantileBFloat16Weighted", 329 "quantilesBFloat16Weighted", 330 "simpleLinearRegression", 331 "stochasticLinearRegression", 332 "stochasticLogisticRegression", 333 "categoricalInformationValue", 334 "contingency", 335 "cramersV", 336 "cramersVBiasCorrected", 337 "theilsU", 338 "maxIntersections", 339 "maxIntersectionsPosition", 340 "meanZTest", 341 "quantileInterpolatedWeighted", 342 "quantilesInterpolatedWeighted", 343 "quantileGK", 344 "quantilesGK", 345 "sparkBar", 346 "sumCount", 347 "largestTriangleThreeBuckets", 348 "histogram", 349 "sequenceMatch", 350 "sequenceCount", 351 "windowFunnel", 352 "retention", 353 "uniqUpTo", 354 "sequenceNextNode", 355 "exponentialTimeDecayedAvg", 356 } 357 358 AGG_FUNCTIONS_SUFFIXES = [ 359 "If", 360 "Array", 361 "ArrayIf", 362 "Map", 363 "SimpleState", 364 "State", 365 "Merge", 366 "MergeState", 367 "ForEach", 368 "Distinct", 369 "OrDefault", 370 "OrNull", 371 "Resample", 372 "ArgMin", 373 "ArgMax", 374 ] 375 376 FUNC_TOKENS = { 377 *parser.Parser.FUNC_TOKENS, 378 TokenType.SET, 379 } 380 381 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.LIKE, 386 } 387 388 AGG_FUNC_MAPPING = ( 389 lambda functions, suffixes: { 390 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 391 } 392 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 393 394 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 399 "QUANTILE": lambda self: self._parse_quantile(), 400 } 401 402 FUNCTION_PARSERS.pop("MATCH") 403 404 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 405 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 406 407 RANGE_PARSERS = { 408 **parser.Parser.RANGE_PARSERS, 409 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 410 and self._parse_in(this, is_global=True), 411 } 412 413 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 414 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 415 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 416 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 417 418 JOIN_KINDS = { 419 *parser.Parser.JOIN_KINDS, 420 TokenType.ANY, 421 TokenType.ASOF, 422 TokenType.ARRAY, 423 } 424 425 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 426 TokenType.ANY, 427 TokenType.ARRAY, 428 TokenType.FINAL, 429 TokenType.FORMAT, 430 TokenType.SETTINGS, 431 } 432 433 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 434 TokenType.FORMAT, 435 } 436 437 LOG_DEFAULTS_TO_LN = True 438 439 QUERY_MODIFIER_PARSERS = { 440 **parser.Parser.QUERY_MODIFIER_PARSERS, 441 TokenType.SETTINGS: lambda self: ( 442 "settings", 443 self._advance() or self._parse_csv(self._parse_assignment), 444 ), 445 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 446 } 447 448 CONSTRAINT_PARSERS = { 449 **parser.Parser.CONSTRAINT_PARSERS, 450 "INDEX": lambda self: self._parse_index_constraint(), 451 "CODEC": lambda self: self._parse_compress(), 452 } 453 454 ALTER_PARSERS = { 455 **parser.Parser.ALTER_PARSERS, 456 "REPLACE": lambda self: self._parse_alter_table_replace(), 457 } 458 459 SCHEMA_UNNAMED_CONSTRAINTS = { 460 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 461 "INDEX", 462 } 463 464 PLACEHOLDER_PARSERS = { 465 **parser.Parser.PLACEHOLDER_PARSERS, 466 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 467 } 468 469 def _parse_types( 470 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 471 ) -> t.Optional[exp.Expression]: 472 dtype = super()._parse_types( 473 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 474 ) 475 if isinstance(dtype, exp.DataType): 476 # Mark every type as non-nullable which is ClickHouse's default. This marker 477 # helps us transpile types from other dialects to ClickHouse, so that we can 478 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 479 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 480 # `Nullable` type constructor 481 dtype.set("nullable", False) 482 483 return dtype 484 485 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 486 index = self._index 487 this = self._parse_bitwise() 488 if self._match(TokenType.FROM): 489 self._retreat(index) 490 return super()._parse_extract() 491 492 # We return Anonymous here because extract and regexpExtract have different semantics, 493 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 494 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 495 # 496 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 497 self._match(TokenType.COMMA) 498 return self.expression( 499 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 500 ) 501 502 def _parse_assignment(self) -> t.Optional[exp.Expression]: 503 this = super()._parse_assignment() 504 505 if self._match(TokenType.PLACEHOLDER): 506 return self.expression( 507 exp.If, 508 this=this, 509 true=self._parse_assignment(), 510 false=self._match(TokenType.COLON) and self._parse_assignment(), 511 ) 512 513 return this 514 515 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 516 """ 517 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 518 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 519 """ 520 this = self._parse_id_var() 521 self._match(TokenType.COLON) 522 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 523 self._match_text_seq("IDENTIFIER") and "Identifier" 524 ) 525 526 if not kind: 527 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 528 elif not self._match(TokenType.R_BRACE): 529 self.raise_error("Expecting }") 530 531 return self.expression(exp.Placeholder, this=this, kind=kind) 532 533 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 534 this = super()._parse_in(this) 535 this.set("is_global", is_global) 536 return this 537 538 def _parse_table( 539 self, 540 schema: bool = False, 541 joins: bool = False, 542 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 543 parse_bracket: bool = False, 544 is_db_reference: bool = False, 545 parse_partition: bool = False, 546 ) -> t.Optional[exp.Expression]: 547 this = super()._parse_table( 548 schema=schema, 549 joins=joins, 550 alias_tokens=alias_tokens, 551 parse_bracket=parse_bracket, 552 is_db_reference=is_db_reference, 553 ) 554 555 if self._match(TokenType.FINAL): 556 this = self.expression(exp.Final, this=this) 557 558 return this 559 560 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 561 return super()._parse_position(haystack_first=True) 562 563 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 564 def _parse_cte(self) -> exp.CTE: 565 # WITH <identifier> AS <subquery expression> 566 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 567 568 if not cte: 569 # WITH <expression> AS <identifier> 570 cte = self.expression( 571 exp.CTE, 572 this=self._parse_assignment(), 573 alias=self._parse_table_alias(), 574 scalar=True, 575 ) 576 577 return cte 578 579 def _parse_join_parts( 580 self, 581 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 582 is_global = self._match(TokenType.GLOBAL) and self._prev 583 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 584 585 if kind_pre: 586 kind = self._match_set(self.JOIN_KINDS) and self._prev 587 side = self._match_set(self.JOIN_SIDES) and self._prev 588 return is_global, side, kind 589 590 return ( 591 is_global, 592 self._match_set(self.JOIN_SIDES) and self._prev, 593 self._match_set(self.JOIN_KINDS) and self._prev, 594 ) 595 596 def _parse_join( 597 self, skip_join_token: bool = False, parse_bracket: bool = False 598 ) -> t.Optional[exp.Join]: 599 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 600 if join: 601 join.set("global", join.args.pop("method", None)) 602 603 return join 604 605 def _parse_function( 606 self, 607 functions: t.Optional[t.Dict[str, t.Callable]] = None, 608 anonymous: bool = False, 609 optional_parens: bool = True, 610 any_token: bool = False, 611 ) -> t.Optional[exp.Expression]: 612 expr = super()._parse_function( 613 functions=functions, 614 anonymous=anonymous, 615 optional_parens=optional_parens, 616 any_token=any_token, 617 ) 618 619 func = expr.this if isinstance(expr, exp.Window) else expr 620 621 # Aggregate functions can be split in 2 parts: <func_name><suffix> 622 parts = ( 623 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 624 ) 625 626 if parts: 627 params = self._parse_func_params(func) 628 629 kwargs = { 630 "this": func.this, 631 "expressions": func.expressions, 632 } 633 if parts[1]: 634 kwargs["parts"] = parts 635 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 636 else: 637 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 638 639 kwargs["exp_class"] = exp_class 640 if params: 641 kwargs["params"] = params 642 643 func = self.expression(**kwargs) 644 645 if isinstance(expr, exp.Window): 646 # The window's func was parsed as Anonymous in base parser, fix its 647 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 648 expr.set("this", func) 649 elif params: 650 # Params have blocked super()._parse_function() from parsing the following window 651 # (if that exists) as they're standing between the function call and the window spec 652 expr = self._parse_window(func) 653 else: 654 expr = func 655 656 return expr 657 658 def _parse_func_params( 659 self, this: t.Optional[exp.Func] = None 660 ) -> t.Optional[t.List[exp.Expression]]: 661 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 662 return self._parse_csv(self._parse_lambda) 663 664 if self._match(TokenType.L_PAREN): 665 params = self._parse_csv(self._parse_lambda) 666 self._match_r_paren(this) 667 return params 668 669 return None 670 671 def _parse_quantile(self) -> exp.Quantile: 672 this = self._parse_lambda() 673 params = self._parse_func_params() 674 if params: 675 return self.expression(exp.Quantile, this=params[0], quantile=this) 676 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 677 678 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 679 return super()._parse_wrapped_id_vars(optional=True) 680 681 def _parse_primary_key( 682 self, wrapped_optional: bool = False, in_props: bool = False 683 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 684 return super()._parse_primary_key( 685 wrapped_optional=wrapped_optional or in_props, in_props=in_props 686 ) 687 688 def _parse_on_property(self) -> t.Optional[exp.Expression]: 689 index = self._index 690 if self._match_text_seq("CLUSTER"): 691 this = self._parse_id_var() 692 if this: 693 return self.expression(exp.OnCluster, this=this) 694 else: 695 self._retreat(index) 696 return None 697 698 def _parse_index_constraint( 699 self, kind: t.Optional[str] = None 700 ) -> exp.IndexColumnConstraint: 701 # INDEX name1 expr TYPE type1(args) GRANULARITY value 702 this = self._parse_id_var() 703 expression = self._parse_assignment() 704 705 index_type = self._match_text_seq("TYPE") and ( 706 self._parse_function() or self._parse_var() 707 ) 708 709 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 710 711 return self.expression( 712 exp.IndexColumnConstraint, 713 this=this, 714 expression=expression, 715 index_type=index_type, 716 granularity=granularity, 717 ) 718 719 def _parse_partition(self) -> t.Optional[exp.Partition]: 720 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 721 if not self._match(TokenType.PARTITION): 722 return None 723 724 if self._match_text_seq("ID"): 725 # Corresponds to the PARTITION ID <string_value> syntax 726 expressions: t.List[exp.Expression] = [ 727 self.expression(exp.PartitionId, this=self._parse_string()) 728 ] 729 else: 730 expressions = self._parse_expressions() 731 732 return self.expression(exp.Partition, expressions=expressions) 733 734 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 735 partition = self._parse_partition() 736 737 if not partition or not self._match(TokenType.FROM): 738 return None 739 740 return self.expression( 741 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 742 ) 743 744 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 745 if not self._match_text_seq("PROJECTION"): 746 return None 747 748 return self.expression( 749 exp.ProjectionDef, 750 this=self._parse_id_var(), 751 expression=self._parse_wrapped(self._parse_statement), 752 ) 753 754 def _parse_constraint(self) -> t.Optional[exp.Expression]: 755 return super()._parse_constraint() or self._parse_projection_def()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
757 class Generator(generator.Generator): 758 QUERY_HINTS = False 759 STRUCT_DELIMITER = ("(", ")") 760 NVL2_SUPPORTED = False 761 TABLESAMPLE_REQUIRES_PARENS = False 762 TABLESAMPLE_SIZE_IS_ROWS = False 763 TABLESAMPLE_KEYWORDS = "SAMPLE" 764 LAST_DAY_SUPPORTS_DATE_PART = False 765 CAN_IMPLEMENT_ARRAY_ANY = True 766 SUPPORTS_TO_NUMBER = False 767 JOIN_HINTS = False 768 TABLE_HINTS = False 769 GROUPINGS_SEP = "" 770 SET_OP_MODIFIERS = False 771 SUPPORTS_TABLE_ALIAS_COLUMNS = False 772 VALUES_AS_TABLE = False 773 774 STRING_TYPE_MAPPING = { 775 exp.DataType.Type.CHAR: "String", 776 exp.DataType.Type.LONGBLOB: "String", 777 exp.DataType.Type.LONGTEXT: "String", 778 exp.DataType.Type.MEDIUMBLOB: "String", 779 exp.DataType.Type.MEDIUMTEXT: "String", 780 exp.DataType.Type.TINYBLOB: "String", 781 exp.DataType.Type.TINYTEXT: "String", 782 exp.DataType.Type.TEXT: "String", 783 exp.DataType.Type.VARBINARY: "String", 784 exp.DataType.Type.VARCHAR: "String", 785 } 786 787 SUPPORTED_JSON_PATH_PARTS = { 788 exp.JSONPathKey, 789 exp.JSONPathRoot, 790 exp.JSONPathSubscript, 791 } 792 793 TYPE_MAPPING = { 794 **generator.Generator.TYPE_MAPPING, 795 **STRING_TYPE_MAPPING, 796 exp.DataType.Type.ARRAY: "Array", 797 exp.DataType.Type.BIGINT: "Int64", 798 exp.DataType.Type.DATE32: "Date32", 799 exp.DataType.Type.DATETIME: "DateTime", 800 exp.DataType.Type.DATETIME64: "DateTime64", 801 exp.DataType.Type.TIMESTAMP: "DateTime", 802 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 803 exp.DataType.Type.DOUBLE: "Float64", 804 exp.DataType.Type.ENUM: "Enum", 805 exp.DataType.Type.ENUM8: "Enum8", 806 exp.DataType.Type.ENUM16: "Enum16", 807 exp.DataType.Type.FIXEDSTRING: "FixedString", 808 exp.DataType.Type.FLOAT: "Float32", 809 exp.DataType.Type.INT: "Int32", 810 exp.DataType.Type.MEDIUMINT: "Int32", 811 exp.DataType.Type.INT128: "Int128", 812 exp.DataType.Type.INT256: "Int256", 813 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 814 exp.DataType.Type.MAP: "Map", 815 exp.DataType.Type.NESTED: "Nested", 816 exp.DataType.Type.NULLABLE: "Nullable", 817 exp.DataType.Type.SMALLINT: "Int16", 818 exp.DataType.Type.STRUCT: "Tuple", 819 exp.DataType.Type.TINYINT: "Int8", 820 exp.DataType.Type.UBIGINT: "UInt64", 821 exp.DataType.Type.UINT: "UInt32", 822 exp.DataType.Type.UINT128: "UInt128", 823 exp.DataType.Type.UINT256: "UInt256", 824 exp.DataType.Type.USMALLINT: "UInt16", 825 exp.DataType.Type.UTINYINT: "UInt8", 826 exp.DataType.Type.IPV4: "IPv4", 827 exp.DataType.Type.IPV6: "IPv6", 828 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 829 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 830 } 831 832 TRANSFORMS = { 833 **generator.Generator.TRANSFORMS, 834 exp.AnyValue: rename_func("any"), 835 exp.ApproxDistinct: rename_func("uniq"), 836 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 837 exp.ArraySize: rename_func("LENGTH"), 838 exp.ArraySum: rename_func("arraySum"), 839 exp.ArgMax: arg_max_or_min_no_count("argMax"), 840 exp.ArgMin: arg_max_or_min_no_count("argMin"), 841 exp.Array: inline_array_sql, 842 exp.CastToStrType: rename_func("CAST"), 843 exp.CountIf: rename_func("countIf"), 844 exp.CompressColumnConstraint: lambda self, 845 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 846 exp.ComputedColumnConstraint: lambda self, 847 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 848 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 849 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 850 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 851 exp.DateStrToDate: rename_func("toDate"), 852 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 853 exp.Explode: rename_func("arrayJoin"), 854 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 855 exp.IsNan: rename_func("isNaN"), 856 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 857 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 858 exp.JSONPathKey: json_path_key_only_name, 859 exp.JSONPathRoot: lambda *_: "", 860 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 861 exp.Nullif: rename_func("nullIf"), 862 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 863 exp.Pivot: no_pivot_sql, 864 exp.Quantile: _quantile_sql, 865 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 866 exp.Rand: rename_func("randCanonical"), 867 exp.StartsWith: rename_func("startsWith"), 868 exp.StrPosition: lambda self, e: self.func( 869 "position", e.this, e.args.get("substr"), e.args.get("position") 870 ), 871 exp.TimeToStr: lambda self, e: self.func( 872 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("zone") 873 ), 874 exp.TimeStrToTime: _timestrtotime_sql, 875 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 876 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 877 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 878 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 879 exp.MD5Digest: rename_func("MD5"), 880 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 881 exp.SHA: rename_func("SHA1"), 882 exp.SHA2: sha256_sql, 883 exp.UnixToTime: _unix_to_time_sql, 884 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 885 exp.Trim: trim_sql, 886 exp.Variance: rename_func("varSamp"), 887 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 888 exp.Stddev: rename_func("stddevSamp"), 889 } 890 891 PROPERTIES_LOCATION = { 892 **generator.Generator.PROPERTIES_LOCATION, 893 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 894 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 895 exp.OnCluster: exp.Properties.Location.POST_NAME, 896 } 897 898 # There's no list in docs, but it can be found in Clickhouse code 899 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 900 ON_CLUSTER_TARGETS = { 901 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 902 "DATABASE", 903 "TABLE", 904 "VIEW", 905 "DICTIONARY", 906 "INDEX", 907 "FUNCTION", 908 "NAMED COLLECTION", 909 } 910 911 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 912 NON_NULLABLE_TYPES = { 913 exp.DataType.Type.ARRAY, 914 exp.DataType.Type.MAP, 915 exp.DataType.Type.NULLABLE, 916 exp.DataType.Type.STRUCT, 917 } 918 919 def strtodate_sql(self, expression: exp.StrToDate) -> str: 920 strtodate_sql = self.function_fallback_sql(expression) 921 922 if not isinstance(expression.parent, exp.Cast): 923 # StrToDate returns DATEs in other dialects (eg. postgres), so 924 # this branch aims to improve the transpilation to clickhouse 925 return f"CAST({strtodate_sql} AS DATE)" 926 927 return strtodate_sql 928 929 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 930 this = expression.this 931 932 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 933 return self.sql(this) 934 935 return super().cast_sql(expression, safe_prefix=safe_prefix) 936 937 def trycast_sql(self, expression: exp.TryCast) -> str: 938 dtype = expression.to 939 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 940 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 941 dtype.set("nullable", True) 942 943 return super().cast_sql(expression) 944 945 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 946 this = self.json_path_part(expression.this) 947 return str(int(this) + 1) if is_int(this) else this 948 949 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 950 return f"AS {self.sql(expression, 'this')}" 951 952 def _any_to_has( 953 self, 954 expression: exp.EQ | exp.NEQ, 955 default: t.Callable[[t.Any], str], 956 prefix: str = "", 957 ) -> str: 958 if isinstance(expression.left, exp.Any): 959 arr = expression.left 960 this = expression.right 961 elif isinstance(expression.right, exp.Any): 962 arr = expression.right 963 this = expression.left 964 else: 965 return default(expression) 966 967 return prefix + self.func("has", arr.this.unnest(), this) 968 969 def eq_sql(self, expression: exp.EQ) -> str: 970 return self._any_to_has(expression, super().eq_sql) 971 972 def neq_sql(self, expression: exp.NEQ) -> str: 973 return self._any_to_has(expression, super().neq_sql, "NOT ") 974 975 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 976 # Manually add a flag to make the search case-insensitive 977 regex = self.func("CONCAT", "'(?i)'", expression.expression) 978 return self.func("match", expression.this, regex) 979 980 def datatype_sql(self, expression: exp.DataType) -> str: 981 # String is the standard ClickHouse type, every other variant is just an alias. 982 # Additionally, any supplied length parameter will be ignored. 983 # 984 # https://clickhouse.com/docs/en/sql-reference/data-types/string 985 if expression.this in self.STRING_TYPE_MAPPING: 986 dtype = "String" 987 else: 988 dtype = super().datatype_sql(expression) 989 990 # This section changes the type to `Nullable(...)` if the following conditions hold: 991 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 992 # and change their semantics 993 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 994 # constraint: "Type of Map key must be a type, that can be represented by integer or 995 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 996 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 997 parent = expression.parent 998 if ( 999 expression.args.get("nullable") is not False 1000 and not ( 1001 isinstance(parent, exp.DataType) 1002 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1003 and expression.index in (None, 0) 1004 ) 1005 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1006 ): 1007 dtype = f"Nullable({dtype})" 1008 1009 return dtype 1010 1011 def cte_sql(self, expression: exp.CTE) -> str: 1012 if expression.args.get("scalar"): 1013 this = self.sql(expression, "this") 1014 alias = self.sql(expression, "alias") 1015 return f"{this} AS {alias}" 1016 1017 return super().cte_sql(expression) 1018 1019 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1020 return super().after_limit_modifiers(expression) + [ 1021 ( 1022 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1023 if expression.args.get("settings") 1024 else "" 1025 ), 1026 ( 1027 self.seg("FORMAT ") + self.sql(expression, "format") 1028 if expression.args.get("format") 1029 else "" 1030 ), 1031 ] 1032 1033 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1034 params = self.expressions(expression, key="params", flat=True) 1035 return self.func(expression.name, *expression.expressions) + f"({params})" 1036 1037 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1038 return self.func(expression.name, *expression.expressions) 1039 1040 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1041 return self.anonymousaggfunc_sql(expression) 1042 1043 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1044 return self.parameterizedagg_sql(expression) 1045 1046 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1047 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1048 1049 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1050 return f"ON CLUSTER {self.sql(expression, 'this')}" 1051 1052 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1053 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1054 exp.Properties.Location.POST_NAME 1055 ): 1056 this_name = self.sql( 1057 expression.this if isinstance(expression.this, exp.Schema) else expression, 1058 "this", 1059 ) 1060 this_properties = " ".join( 1061 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1062 ) 1063 this_schema = self.schema_columns_sql(expression.this) 1064 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1065 1066 return super().createable_sql(expression, locations) 1067 1068 def create_sql(self, expression: exp.Create) -> str: 1069 # The comment property comes last in CTAS statements, i.e. after the query 1070 query = expression.expression 1071 if isinstance(query, exp.Query): 1072 comment_prop = expression.find(exp.SchemaCommentProperty) 1073 if comment_prop: 1074 comment_prop.pop() 1075 query.replace(exp.paren(query)) 1076 else: 1077 comment_prop = None 1078 1079 create_sql = super().create_sql(expression) 1080 1081 comment_sql = self.sql(comment_prop) 1082 comment_sql = f" {comment_sql}" if comment_sql else "" 1083 1084 return f"{create_sql}{comment_sql}" 1085 1086 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1087 this = self.indent(self.sql(expression, "this")) 1088 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1089 1090 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1091 this = self.sql(expression, "this") 1092 this = f" {this}" if this else "" 1093 expr = self.sql(expression, "expression") 1094 expr = f" {expr}" if expr else "" 1095 index_type = self.sql(expression, "index_type") 1096 index_type = f" TYPE {index_type}" if index_type else "" 1097 granularity = self.sql(expression, "granularity") 1098 granularity = f" GRANULARITY {granularity}" if granularity else "" 1099 1100 return f"INDEX{this}{expr}{index_type}{granularity}" 1101 1102 def partition_sql(self, expression: exp.Partition) -> str: 1103 return f"PARTITION {self.expressions(expression, flat=True)}" 1104 1105 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1106 return f"ID {self.sql(expression.this)}" 1107 1108 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1109 return ( 1110 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1111 ) 1112 1113 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1114 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
919 def strtodate_sql(self, expression: exp.StrToDate) -> str: 920 strtodate_sql = self.function_fallback_sql(expression) 921 922 if not isinstance(expression.parent, exp.Cast): 923 # StrToDate returns DATEs in other dialects (eg. postgres), so 924 # this branch aims to improve the transpilation to clickhouse 925 return f"CAST({strtodate_sql} AS DATE)" 926 927 return strtodate_sql
929 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 930 this = expression.this 931 932 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 933 return self.sql(this) 934 935 return super().cast_sql(expression, safe_prefix=safe_prefix)
937 def trycast_sql(self, expression: exp.TryCast) -> str: 938 dtype = expression.to 939 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 940 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 941 dtype.set("nullable", True) 942 943 return super().cast_sql(expression)
980 def datatype_sql(self, expression: exp.DataType) -> str: 981 # String is the standard ClickHouse type, every other variant is just an alias. 982 # Additionally, any supplied length parameter will be ignored. 983 # 984 # https://clickhouse.com/docs/en/sql-reference/data-types/string 985 if expression.this in self.STRING_TYPE_MAPPING: 986 dtype = "String" 987 else: 988 dtype = super().datatype_sql(expression) 989 990 # This section changes the type to `Nullable(...)` if the following conditions hold: 991 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 992 # and change their semantics 993 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 994 # constraint: "Type of Map key must be a type, that can be represented by integer or 995 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 996 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 997 parent = expression.parent 998 if ( 999 expression.args.get("nullable") is not False 1000 and not ( 1001 isinstance(parent, exp.DataType) 1002 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1003 and expression.index in (None, 0) 1004 ) 1005 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1006 ): 1007 dtype = f"Nullable({dtype})" 1008 1009 return dtype
1019 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1020 return super().after_limit_modifiers(expression) + [ 1021 ( 1022 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1023 if expression.args.get("settings") 1024 else "" 1025 ), 1026 ( 1027 self.seg("FORMAT ") + self.sql(expression, "format") 1028 if expression.args.get("format") 1029 else "" 1030 ), 1031 ]
1052 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1053 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1054 exp.Properties.Location.POST_NAME 1055 ): 1056 this_name = self.sql( 1057 expression.this if isinstance(expression.this, exp.Schema) else expression, 1058 "this", 1059 ) 1060 this_properties = " ".join( 1061 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1062 ) 1063 this_schema = self.schema_columns_sql(expression.this) 1064 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1065 1066 return super().createable_sql(expression, locations)
1068 def create_sql(self, expression: exp.Create) -> str: 1069 # The comment property comes last in CTAS statements, i.e. after the query 1070 query = expression.expression 1071 if isinstance(query, exp.Query): 1072 comment_prop = expression.find(exp.SchemaCommentProperty) 1073 if comment_prop: 1074 comment_prop.pop() 1075 query.replace(exp.paren(query)) 1076 else: 1077 comment_prop = None 1078 1079 create_sql = super().create_sql(expression) 1080 1081 comment_sql = self.sql(comment_prop) 1082 comment_sql = f" {comment_sql}" if comment_sql else "" 1083 1084 return f"{create_sql}{comment_sql}"
1090 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1091 this = self.sql(expression, "this") 1092 this = f" {this}" if this else "" 1093 expr = self.sql(expression, "expression") 1094 expr = f" {expr}" if expr else "" 1095 index_type = self.sql(expression, "index_type") 1096 index_type = f" TYPE {index_type}" if index_type else "" 1097 granularity = self.sql(expression, "granularity") 1098 granularity = f" GRANULARITY {granularity}" if granularity else "" 1099 1100 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_NULLABLE_TYPES
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql