sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 arg_max_or_min_no_count, 10 build_date_delta, 11 build_formatted_time, 12 inline_array_sql, 13 json_extract_segments, 14 json_path_key_only_name, 15 no_pivot_sql, 16 build_json_extract_path, 17 rename_func, 18 sha256_sql, 19 var_map_sql, 20 timestamptrunc_sql, 21 unit_to_var, 22) 23from sqlglot.generator import Generator 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import Token, TokenType 26 27DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 28 29 30def _build_date_format(args: t.List) -> exp.TimeToStr: 31 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 32 33 timezone = seq_get(args, 2) 34 if timezone: 35 expr.set("timezone", timezone) 36 37 return expr 38 39 40def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 41 scale = expression.args.get("scale") 42 timestamp = expression.this 43 44 if scale in (None, exp.UnixToTime.SECONDS): 45 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 46 if scale == exp.UnixToTime.MILLIS: 47 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MICROS: 49 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.NANOS: 51 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 53 return self.func( 54 "fromUnixTimestamp", 55 exp.cast( 56 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 57 ), 58 ) 59 60 61def _lower_func(sql: str) -> str: 62 index = sql.index("(") 63 return sql[:index].lower() + sql[index:] 64 65 66def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 67 quantile = expression.args["quantile"] 68 args = f"({self.sql(expression, 'this')})" 69 70 if isinstance(quantile, exp.Array): 71 func = self.func("quantiles", *quantile) 72 else: 73 func = self.func("quantile", quantile) 74 75 return func + args 76 77 78def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 79 if len(args) == 1: 80 return exp.CountIf(this=seq_get(args, 0)) 81 82 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 83 84 85def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 86 if len(args) == 3: 87 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 88 89 strtodate = exp.StrToDate.from_arg_list(args) 90 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 91 92 93def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 94 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 95 if not expression.unit: 96 return rename_func(name)(self, expression) 97 98 return self.func( 99 name, 100 unit_to_var(expression), 101 expression.expression, 102 expression.this, 103 ) 104 105 return _delta_sql 106 107 108class ClickHouse(Dialect): 109 NORMALIZE_FUNCTIONS: bool | str = False 110 NULL_ORDERING = "nulls_are_last" 111 SUPPORTS_USER_DEFINED_TYPES = False 112 SAFE_DIVISION = True 113 LOG_BASE_FIRST: t.Optional[bool] = None 114 FORCE_EARLY_ALIAS_REF_EXPANSION = True 115 116 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 117 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 118 119 UNESCAPED_SEQUENCES = { 120 "\\0": "\0", 121 } 122 123 class Tokenizer(tokens.Tokenizer): 124 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 125 IDENTIFIERS = ['"', "`"] 126 STRING_ESCAPES = ["'", "\\"] 127 BIT_STRINGS = [("0b", "")] 128 HEX_STRINGS = [("0x", ""), ("0X", "")] 129 HEREDOC_STRINGS = ["$"] 130 131 KEYWORDS = { 132 **tokens.Tokenizer.KEYWORDS, 133 "ATTACH": TokenType.COMMAND, 134 "DATE32": TokenType.DATE32, 135 "DATETIME64": TokenType.DATETIME64, 136 "DICTIONARY": TokenType.DICTIONARY, 137 "ENUM8": TokenType.ENUM8, 138 "ENUM16": TokenType.ENUM16, 139 "FINAL": TokenType.FINAL, 140 "FIXEDSTRING": TokenType.FIXEDSTRING, 141 "FLOAT32": TokenType.FLOAT, 142 "FLOAT64": TokenType.DOUBLE, 143 "GLOBAL": TokenType.GLOBAL, 144 "INT256": TokenType.INT256, 145 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 146 "MAP": TokenType.MAP, 147 "NESTED": TokenType.NESTED, 148 "SAMPLE": TokenType.TABLE_SAMPLE, 149 "TUPLE": TokenType.STRUCT, 150 "UINT128": TokenType.UINT128, 151 "UINT16": TokenType.USMALLINT, 152 "UINT256": TokenType.UINT256, 153 "UINT32": TokenType.UINT, 154 "UINT64": TokenType.UBIGINT, 155 "UINT8": TokenType.UTINYINT, 156 "IPV4": TokenType.IPV4, 157 "IPV6": TokenType.IPV6, 158 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 159 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 160 "SYSTEM": TokenType.COMMAND, 161 "PREWHERE": TokenType.PREWHERE, 162 } 163 KEYWORDS.pop("/*+") 164 165 SINGLE_TOKENS = { 166 **tokens.Tokenizer.SINGLE_TOKENS, 167 "$": TokenType.HEREDOC_STRING, 168 } 169 170 class Parser(parser.Parser): 171 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 172 # * select x from t1 union all select x from t2 limit 1; 173 # * select x from t1 union all (select x from t2 limit 1); 174 MODIFIERS_ATTACHED_TO_SET_OP = False 175 INTERVAL_SPANS = False 176 177 FUNCTIONS = { 178 **parser.Parser.FUNCTIONS, 179 "ANY": exp.AnyValue.from_arg_list, 180 "ARRAYSUM": exp.ArraySum.from_arg_list, 181 "COUNTIF": _build_count_if, 182 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 183 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 184 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 185 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 186 "DATE_FORMAT": _build_date_format, 187 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 188 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 189 "FORMATDATETIME": _build_date_format, 190 "JSONEXTRACTSTRING": build_json_extract_path( 191 exp.JSONExtractScalar, zero_based_indexing=False 192 ), 193 "MAP": parser.build_var_map, 194 "MATCH": exp.RegexpLike.from_arg_list, 195 "RANDCANONICAL": exp.Rand.from_arg_list, 196 "STR_TO_DATE": _build_str_to_date, 197 "TUPLE": exp.Struct.from_arg_list, 198 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 199 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 200 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 201 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 202 "UNIQ": exp.ApproxDistinct.from_arg_list, 203 "XOR": lambda args: exp.Xor(expressions=args), 204 "MD5": exp.MD5Digest.from_arg_list, 205 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 206 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 207 } 208 209 AGG_FUNCTIONS = { 210 "count", 211 "min", 212 "max", 213 "sum", 214 "avg", 215 "any", 216 "stddevPop", 217 "stddevSamp", 218 "varPop", 219 "varSamp", 220 "corr", 221 "covarPop", 222 "covarSamp", 223 "entropy", 224 "exponentialMovingAverage", 225 "intervalLengthSum", 226 "kolmogorovSmirnovTest", 227 "mannWhitneyUTest", 228 "median", 229 "rankCorr", 230 "sumKahan", 231 "studentTTest", 232 "welchTTest", 233 "anyHeavy", 234 "anyLast", 235 "boundingRatio", 236 "first_value", 237 "last_value", 238 "argMin", 239 "argMax", 240 "avgWeighted", 241 "topK", 242 "topKWeighted", 243 "deltaSum", 244 "deltaSumTimestamp", 245 "groupArray", 246 "groupArrayLast", 247 "groupUniqArray", 248 "groupArrayInsertAt", 249 "groupArrayMovingAvg", 250 "groupArrayMovingSum", 251 "groupArraySample", 252 "groupBitAnd", 253 "groupBitOr", 254 "groupBitXor", 255 "groupBitmap", 256 "groupBitmapAnd", 257 "groupBitmapOr", 258 "groupBitmapXor", 259 "sumWithOverflow", 260 "sumMap", 261 "minMap", 262 "maxMap", 263 "skewSamp", 264 "skewPop", 265 "kurtSamp", 266 "kurtPop", 267 "uniq", 268 "uniqExact", 269 "uniqCombined", 270 "uniqCombined64", 271 "uniqHLL12", 272 "uniqTheta", 273 "quantile", 274 "quantiles", 275 "quantileExact", 276 "quantilesExact", 277 "quantileExactLow", 278 "quantilesExactLow", 279 "quantileExactHigh", 280 "quantilesExactHigh", 281 "quantileExactWeighted", 282 "quantilesExactWeighted", 283 "quantileTiming", 284 "quantilesTiming", 285 "quantileTimingWeighted", 286 "quantilesTimingWeighted", 287 "quantileDeterministic", 288 "quantilesDeterministic", 289 "quantileTDigest", 290 "quantilesTDigest", 291 "quantileTDigestWeighted", 292 "quantilesTDigestWeighted", 293 "quantileBFloat16", 294 "quantilesBFloat16", 295 "quantileBFloat16Weighted", 296 "quantilesBFloat16Weighted", 297 "simpleLinearRegression", 298 "stochasticLinearRegression", 299 "stochasticLogisticRegression", 300 "categoricalInformationValue", 301 "contingency", 302 "cramersV", 303 "cramersVBiasCorrected", 304 "theilsU", 305 "maxIntersections", 306 "maxIntersectionsPosition", 307 "meanZTest", 308 "quantileInterpolatedWeighted", 309 "quantilesInterpolatedWeighted", 310 "quantileGK", 311 "quantilesGK", 312 "sparkBar", 313 "sumCount", 314 "largestTriangleThreeBuckets", 315 "histogram", 316 "sequenceMatch", 317 "sequenceCount", 318 "windowFunnel", 319 "retention", 320 "uniqUpTo", 321 "sequenceNextNode", 322 "exponentialTimeDecayedAvg", 323 } 324 325 AGG_FUNCTIONS_SUFFIXES = [ 326 "If", 327 "Array", 328 "ArrayIf", 329 "Map", 330 "SimpleState", 331 "State", 332 "Merge", 333 "MergeState", 334 "ForEach", 335 "Distinct", 336 "OrDefault", 337 "OrNull", 338 "Resample", 339 "ArgMin", 340 "ArgMax", 341 ] 342 343 FUNC_TOKENS = { 344 *parser.Parser.FUNC_TOKENS, 345 TokenType.SET, 346 } 347 348 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 349 350 ID_VAR_TOKENS = { 351 *parser.Parser.ID_VAR_TOKENS, 352 TokenType.LIKE, 353 } 354 355 AGG_FUNC_MAPPING = ( 356 lambda functions, suffixes: { 357 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 358 } 359 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 360 361 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 362 363 FUNCTION_PARSERS = { 364 **parser.Parser.FUNCTION_PARSERS, 365 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 366 "QUANTILE": lambda self: self._parse_quantile(), 367 } 368 369 FUNCTION_PARSERS.pop("MATCH") 370 371 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 372 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 373 374 RANGE_PARSERS = { 375 **parser.Parser.RANGE_PARSERS, 376 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 377 and self._parse_in(this, is_global=True), 378 } 379 380 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 381 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 382 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 383 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 384 385 JOIN_KINDS = { 386 *parser.Parser.JOIN_KINDS, 387 TokenType.ANY, 388 TokenType.ASOF, 389 TokenType.ARRAY, 390 } 391 392 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 393 TokenType.ANY, 394 TokenType.ARRAY, 395 TokenType.FINAL, 396 TokenType.FORMAT, 397 TokenType.SETTINGS, 398 } 399 400 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 401 TokenType.FORMAT, 402 } 403 404 LOG_DEFAULTS_TO_LN = True 405 406 QUERY_MODIFIER_PARSERS = { 407 **parser.Parser.QUERY_MODIFIER_PARSERS, 408 TokenType.SETTINGS: lambda self: ( 409 "settings", 410 self._advance() or self._parse_csv(self._parse_assignment), 411 ), 412 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 413 } 414 415 CONSTRAINT_PARSERS = { 416 **parser.Parser.CONSTRAINT_PARSERS, 417 "INDEX": lambda self: self._parse_index_constraint(), 418 "CODEC": lambda self: self._parse_compress(), 419 } 420 421 ALTER_PARSERS = { 422 **parser.Parser.ALTER_PARSERS, 423 "REPLACE": lambda self: self._parse_alter_table_replace(), 424 } 425 426 SCHEMA_UNNAMED_CONSTRAINTS = { 427 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 428 "INDEX", 429 } 430 431 def _parse_types( 432 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 433 ) -> t.Optional[exp.Expression]: 434 dtype = super()._parse_types( 435 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 436 ) 437 if isinstance(dtype, exp.DataType): 438 # Mark every type as non-nullable which is ClickHouse's default. This marker 439 # helps us transpile types from other dialects to ClickHouse, so that we can 440 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 441 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 442 # `Nullable` type constructor 443 dtype.set("nullable", False) 444 445 return dtype 446 447 def _parse_create(self) -> exp.Create | exp.Command: 448 create = super()._parse_create() 449 450 # DATABASE in ClickHouse is the same as SCHEMA in other dialects 451 if isinstance(create, exp.Create) and create.kind == "DATABASE": 452 create.set("kind", "SCHEMA") 453 454 return create 455 456 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 457 index = self._index 458 this = self._parse_bitwise() 459 if self._match(TokenType.FROM): 460 self._retreat(index) 461 return super()._parse_extract() 462 463 # We return Anonymous here because extract and regexpExtract have different semantics, 464 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 465 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 466 # 467 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 468 self._match(TokenType.COMMA) 469 return self.expression( 470 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 471 ) 472 473 def _parse_assignment(self) -> t.Optional[exp.Expression]: 474 this = super()._parse_assignment() 475 476 if self._match(TokenType.PLACEHOLDER): 477 return self.expression( 478 exp.If, 479 this=this, 480 true=self._parse_assignment(), 481 false=self._match(TokenType.COLON) and self._parse_assignment(), 482 ) 483 484 return this 485 486 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 487 """ 488 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 489 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 490 """ 491 if not self._match(TokenType.L_BRACE): 492 return None 493 494 this = self._parse_id_var() 495 self._match(TokenType.COLON) 496 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 497 self._match_text_seq("IDENTIFIER") and "Identifier" 498 ) 499 500 if not kind: 501 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 502 elif not self._match(TokenType.R_BRACE): 503 self.raise_error("Expecting }") 504 505 return self.expression(exp.Placeholder, this=this, kind=kind) 506 507 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 508 this = super()._parse_in(this) 509 this.set("is_global", is_global) 510 return this 511 512 def _parse_table( 513 self, 514 schema: bool = False, 515 joins: bool = False, 516 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 517 parse_bracket: bool = False, 518 is_db_reference: bool = False, 519 parse_partition: bool = False, 520 ) -> t.Optional[exp.Expression]: 521 this = super()._parse_table( 522 schema=schema, 523 joins=joins, 524 alias_tokens=alias_tokens, 525 parse_bracket=parse_bracket, 526 is_db_reference=is_db_reference, 527 ) 528 529 if self._match(TokenType.FINAL): 530 this = self.expression(exp.Final, this=this) 531 532 return this 533 534 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 535 return super()._parse_position(haystack_first=True) 536 537 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 538 def _parse_cte(self) -> exp.CTE: 539 # WITH <identifier> AS <subquery expression> 540 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 541 542 if not cte: 543 # WITH <expression> AS <identifier> 544 cte = self.expression( 545 exp.CTE, 546 this=self._parse_assignment(), 547 alias=self._parse_table_alias(), 548 scalar=True, 549 ) 550 551 return cte 552 553 def _parse_join_parts( 554 self, 555 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 556 is_global = self._match(TokenType.GLOBAL) and self._prev 557 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 558 559 if kind_pre: 560 kind = self._match_set(self.JOIN_KINDS) and self._prev 561 side = self._match_set(self.JOIN_SIDES) and self._prev 562 return is_global, side, kind 563 564 return ( 565 is_global, 566 self._match_set(self.JOIN_SIDES) and self._prev, 567 self._match_set(self.JOIN_KINDS) and self._prev, 568 ) 569 570 def _parse_join( 571 self, skip_join_token: bool = False, parse_bracket: bool = False 572 ) -> t.Optional[exp.Join]: 573 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 574 if join: 575 join.set("global", join.args.pop("method", None)) 576 577 return join 578 579 def _parse_function( 580 self, 581 functions: t.Optional[t.Dict[str, t.Callable]] = None, 582 anonymous: bool = False, 583 optional_parens: bool = True, 584 any_token: bool = False, 585 ) -> t.Optional[exp.Expression]: 586 expr = super()._parse_function( 587 functions=functions, 588 anonymous=anonymous, 589 optional_parens=optional_parens, 590 any_token=any_token, 591 ) 592 593 func = expr.this if isinstance(expr, exp.Window) else expr 594 595 # Aggregate functions can be split in 2 parts: <func_name><suffix> 596 parts = ( 597 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 598 ) 599 600 if parts: 601 params = self._parse_func_params(func) 602 603 kwargs = { 604 "this": func.this, 605 "expressions": func.expressions, 606 } 607 if parts[1]: 608 kwargs["parts"] = parts 609 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 610 else: 611 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 612 613 kwargs["exp_class"] = exp_class 614 if params: 615 kwargs["params"] = params 616 617 func = self.expression(**kwargs) 618 619 if isinstance(expr, exp.Window): 620 # The window's func was parsed as Anonymous in base parser, fix its 621 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 622 expr.set("this", func) 623 elif params: 624 # Params have blocked super()._parse_function() from parsing the following window 625 # (if that exists) as they're standing between the function call and the window spec 626 expr = self._parse_window(func) 627 else: 628 expr = func 629 630 return expr 631 632 def _parse_func_params( 633 self, this: t.Optional[exp.Func] = None 634 ) -> t.Optional[t.List[exp.Expression]]: 635 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 636 return self._parse_csv(self._parse_lambda) 637 638 if self._match(TokenType.L_PAREN): 639 params = self._parse_csv(self._parse_lambda) 640 self._match_r_paren(this) 641 return params 642 643 return None 644 645 def _parse_quantile(self) -> exp.Quantile: 646 this = self._parse_lambda() 647 params = self._parse_func_params() 648 if params: 649 return self.expression(exp.Quantile, this=params[0], quantile=this) 650 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 651 652 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 653 return super()._parse_wrapped_id_vars(optional=True) 654 655 def _parse_primary_key( 656 self, wrapped_optional: bool = False, in_props: bool = False 657 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 658 return super()._parse_primary_key( 659 wrapped_optional=wrapped_optional or in_props, in_props=in_props 660 ) 661 662 def _parse_on_property(self) -> t.Optional[exp.Expression]: 663 index = self._index 664 if self._match_text_seq("CLUSTER"): 665 this = self._parse_id_var() 666 if this: 667 return self.expression(exp.OnCluster, this=this) 668 else: 669 self._retreat(index) 670 return None 671 672 def _parse_index_constraint( 673 self, kind: t.Optional[str] = None 674 ) -> exp.IndexColumnConstraint: 675 # INDEX name1 expr TYPE type1(args) GRANULARITY value 676 this = self._parse_id_var() 677 expression = self._parse_assignment() 678 679 index_type = self._match_text_seq("TYPE") and ( 680 self._parse_function() or self._parse_var() 681 ) 682 683 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 684 685 return self.expression( 686 exp.IndexColumnConstraint, 687 this=this, 688 expression=expression, 689 index_type=index_type, 690 granularity=granularity, 691 ) 692 693 def _parse_partition(self) -> t.Optional[exp.Partition]: 694 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 695 if not self._match(TokenType.PARTITION): 696 return None 697 698 if self._match_text_seq("ID"): 699 # Corresponds to the PARTITION ID <string_value> syntax 700 expressions: t.List[exp.Expression] = [ 701 self.expression(exp.PartitionId, this=self._parse_string()) 702 ] 703 else: 704 expressions = self._parse_expressions() 705 706 return self.expression(exp.Partition, expressions=expressions) 707 708 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 709 partition = self._parse_partition() 710 711 if not partition or not self._match(TokenType.FROM): 712 return None 713 714 return self.expression( 715 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 716 ) 717 718 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 719 if not self._match_text_seq("PROJECTION"): 720 return None 721 722 return self.expression( 723 exp.ProjectionDef, 724 this=self._parse_id_var(), 725 expression=self._parse_wrapped(self._parse_statement), 726 ) 727 728 def _parse_constraint(self) -> t.Optional[exp.Expression]: 729 return super()._parse_constraint() or self._parse_projection_def() 730 731 class Generator(generator.Generator): 732 QUERY_HINTS = False 733 STRUCT_DELIMITER = ("(", ")") 734 NVL2_SUPPORTED = False 735 TABLESAMPLE_REQUIRES_PARENS = False 736 TABLESAMPLE_SIZE_IS_ROWS = False 737 TABLESAMPLE_KEYWORDS = "SAMPLE" 738 LAST_DAY_SUPPORTS_DATE_PART = False 739 CAN_IMPLEMENT_ARRAY_ANY = True 740 SUPPORTS_TO_NUMBER = False 741 JOIN_HINTS = False 742 TABLE_HINTS = False 743 EXPLICIT_SET_OP = True 744 GROUPINGS_SEP = "" 745 SET_OP_MODIFIERS = False 746 SUPPORTS_TABLE_ALIAS_COLUMNS = False 747 VALUES_AS_TABLE = False 748 749 STRING_TYPE_MAPPING = { 750 exp.DataType.Type.CHAR: "String", 751 exp.DataType.Type.LONGBLOB: "String", 752 exp.DataType.Type.LONGTEXT: "String", 753 exp.DataType.Type.MEDIUMBLOB: "String", 754 exp.DataType.Type.MEDIUMTEXT: "String", 755 exp.DataType.Type.TINYBLOB: "String", 756 exp.DataType.Type.TINYTEXT: "String", 757 exp.DataType.Type.TEXT: "String", 758 exp.DataType.Type.VARBINARY: "String", 759 exp.DataType.Type.VARCHAR: "String", 760 } 761 762 SUPPORTED_JSON_PATH_PARTS = { 763 exp.JSONPathKey, 764 exp.JSONPathRoot, 765 exp.JSONPathSubscript, 766 } 767 768 TYPE_MAPPING = { 769 **generator.Generator.TYPE_MAPPING, 770 **STRING_TYPE_MAPPING, 771 exp.DataType.Type.ARRAY: "Array", 772 exp.DataType.Type.BIGINT: "Int64", 773 exp.DataType.Type.DATE32: "Date32", 774 exp.DataType.Type.DATETIME64: "DateTime64", 775 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 776 exp.DataType.Type.DOUBLE: "Float64", 777 exp.DataType.Type.ENUM: "Enum", 778 exp.DataType.Type.ENUM8: "Enum8", 779 exp.DataType.Type.ENUM16: "Enum16", 780 exp.DataType.Type.FIXEDSTRING: "FixedString", 781 exp.DataType.Type.FLOAT: "Float32", 782 exp.DataType.Type.INT: "Int32", 783 exp.DataType.Type.MEDIUMINT: "Int32", 784 exp.DataType.Type.INT128: "Int128", 785 exp.DataType.Type.INT256: "Int256", 786 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 787 exp.DataType.Type.MAP: "Map", 788 exp.DataType.Type.NESTED: "Nested", 789 exp.DataType.Type.NULLABLE: "Nullable", 790 exp.DataType.Type.SMALLINT: "Int16", 791 exp.DataType.Type.STRUCT: "Tuple", 792 exp.DataType.Type.TINYINT: "Int8", 793 exp.DataType.Type.UBIGINT: "UInt64", 794 exp.DataType.Type.UINT: "UInt32", 795 exp.DataType.Type.UINT128: "UInt128", 796 exp.DataType.Type.UINT256: "UInt256", 797 exp.DataType.Type.USMALLINT: "UInt16", 798 exp.DataType.Type.UTINYINT: "UInt8", 799 exp.DataType.Type.IPV4: "IPv4", 800 exp.DataType.Type.IPV6: "IPv6", 801 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 802 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 803 } 804 805 TRANSFORMS = { 806 **generator.Generator.TRANSFORMS, 807 exp.AnyValue: rename_func("any"), 808 exp.ApproxDistinct: rename_func("uniq"), 809 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 810 exp.ArraySize: rename_func("LENGTH"), 811 exp.ArraySum: rename_func("arraySum"), 812 exp.ArgMax: arg_max_or_min_no_count("argMax"), 813 exp.ArgMin: arg_max_or_min_no_count("argMin"), 814 exp.Array: inline_array_sql, 815 exp.CastToStrType: rename_func("CAST"), 816 exp.CountIf: rename_func("countIf"), 817 exp.CompressColumnConstraint: lambda self, 818 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 819 exp.ComputedColumnConstraint: lambda self, 820 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 821 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 822 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 823 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 824 exp.DateStrToDate: rename_func("toDate"), 825 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 826 exp.Explode: rename_func("arrayJoin"), 827 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 828 exp.IsNan: rename_func("isNaN"), 829 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 830 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 831 exp.JSONPathKey: json_path_key_only_name, 832 exp.JSONPathRoot: lambda *_: "", 833 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 834 exp.Nullif: rename_func("nullIf"), 835 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 836 exp.Pivot: no_pivot_sql, 837 exp.Quantile: _quantile_sql, 838 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 839 exp.Rand: rename_func("randCanonical"), 840 exp.StartsWith: rename_func("startsWith"), 841 exp.StrPosition: lambda self, e: self.func( 842 "position", e.this, e.args.get("substr"), e.args.get("position") 843 ), 844 exp.TimeToStr: lambda self, e: self.func( 845 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 846 ), 847 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 848 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 849 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 850 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 851 exp.MD5Digest: rename_func("MD5"), 852 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 853 exp.SHA: rename_func("SHA1"), 854 exp.SHA2: sha256_sql, 855 exp.UnixToTime: _unix_to_time_sql, 856 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 857 exp.Variance: rename_func("varSamp"), 858 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 859 exp.Stddev: rename_func("stddevSamp"), 860 } 861 862 PROPERTIES_LOCATION = { 863 **generator.Generator.PROPERTIES_LOCATION, 864 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 865 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 866 exp.OnCluster: exp.Properties.Location.POST_NAME, 867 } 868 869 # There's no list in docs, but it can be found in Clickhouse code 870 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 871 ON_CLUSTER_TARGETS = { 872 "DATABASE", 873 "TABLE", 874 "VIEW", 875 "DICTIONARY", 876 "INDEX", 877 "FUNCTION", 878 "NAMED COLLECTION", 879 } 880 881 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 882 NON_NULLABLE_TYPES = { 883 exp.DataType.Type.ARRAY, 884 exp.DataType.Type.MAP, 885 exp.DataType.Type.NULLABLE, 886 exp.DataType.Type.STRUCT, 887 } 888 889 def strtodate_sql(self, expression: exp.StrToDate) -> str: 890 strtodate_sql = self.function_fallback_sql(expression) 891 892 if not isinstance(expression.parent, exp.Cast): 893 # StrToDate returns DATEs in other dialects (eg. postgres), so 894 # this branch aims to improve the transpilation to clickhouse 895 return f"CAST({strtodate_sql} AS DATE)" 896 897 return strtodate_sql 898 899 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 900 this = expression.this 901 902 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 903 return self.sql(this) 904 905 return super().cast_sql(expression, safe_prefix=safe_prefix) 906 907 def trycast_sql(self, expression: exp.TryCast) -> str: 908 dtype = expression.to 909 if not dtype.is_type(*self.NON_NULLABLE_TYPES): 910 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 911 dtype.set("nullable", True) 912 913 return super().cast_sql(expression) 914 915 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 916 this = self.json_path_part(expression.this) 917 return str(int(this) + 1) if is_int(this) else this 918 919 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 920 return f"AS {self.sql(expression, 'this')}" 921 922 def _any_to_has( 923 self, 924 expression: exp.EQ | exp.NEQ, 925 default: t.Callable[[t.Any], str], 926 prefix: str = "", 927 ) -> str: 928 if isinstance(expression.left, exp.Any): 929 arr = expression.left 930 this = expression.right 931 elif isinstance(expression.right, exp.Any): 932 arr = expression.right 933 this = expression.left 934 else: 935 return default(expression) 936 937 return prefix + self.func("has", arr.this.unnest(), this) 938 939 def eq_sql(self, expression: exp.EQ) -> str: 940 return self._any_to_has(expression, super().eq_sql) 941 942 def neq_sql(self, expression: exp.NEQ) -> str: 943 return self._any_to_has(expression, super().neq_sql, "NOT ") 944 945 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 946 # Manually add a flag to make the search case-insensitive 947 regex = self.func("CONCAT", "'(?i)'", expression.expression) 948 return self.func("match", expression.this, regex) 949 950 def datatype_sql(self, expression: exp.DataType) -> str: 951 # String is the standard ClickHouse type, every other variant is just an alias. 952 # Additionally, any supplied length parameter will be ignored. 953 # 954 # https://clickhouse.com/docs/en/sql-reference/data-types/string 955 if expression.this in self.STRING_TYPE_MAPPING: 956 dtype = "String" 957 else: 958 dtype = super().datatype_sql(expression) 959 960 # This section changes the type to `Nullable(...)` if the following conditions hold: 961 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 962 # and change their semantics 963 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 964 # constraint: "Type of Map key must be a type, that can be represented by integer or 965 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 966 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 967 parent = expression.parent 968 if ( 969 expression.args.get("nullable") is not False 970 and not ( 971 isinstance(parent, exp.DataType) 972 and parent.is_type(exp.DataType.Type.MAP) 973 and expression.index in (None, 0) 974 ) 975 and not expression.is_type(*self.NON_NULLABLE_TYPES) 976 ): 977 dtype = f"Nullable({dtype})" 978 979 return dtype 980 981 def cte_sql(self, expression: exp.CTE) -> str: 982 if expression.args.get("scalar"): 983 this = self.sql(expression, "this") 984 alias = self.sql(expression, "alias") 985 return f"{this} AS {alias}" 986 987 return super().cte_sql(expression) 988 989 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 990 return super().after_limit_modifiers(expression) + [ 991 ( 992 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 993 if expression.args.get("settings") 994 else "" 995 ), 996 ( 997 self.seg("FORMAT ") + self.sql(expression, "format") 998 if expression.args.get("format") 999 else "" 1000 ), 1001 ] 1002 1003 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1004 params = self.expressions(expression, key="params", flat=True) 1005 return self.func(expression.name, *expression.expressions) + f"({params})" 1006 1007 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1008 return self.func(expression.name, *expression.expressions) 1009 1010 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1011 return self.anonymousaggfunc_sql(expression) 1012 1013 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1014 return self.parameterizedagg_sql(expression) 1015 1016 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1017 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1018 1019 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1020 return f"ON CLUSTER {self.sql(expression, 'this')}" 1021 1022 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1023 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1024 exp.Properties.Location.POST_NAME 1025 ): 1026 this_name = self.sql( 1027 expression.this if isinstance(expression.this, exp.Schema) else expression, 1028 "this", 1029 ) 1030 this_properties = " ".join( 1031 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1032 ) 1033 this_schema = self.schema_columns_sql(expression.this) 1034 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1035 1036 return super().createable_sql(expression, locations) 1037 1038 def create_sql(self, expression: exp.Create) -> str: 1039 # The comment property comes last in CTAS statements, i.e. after the query 1040 query = expression.expression 1041 if isinstance(query, exp.Query): 1042 comment_prop = expression.find(exp.SchemaCommentProperty) 1043 if comment_prop: 1044 comment_prop.pop() 1045 query.replace(exp.paren(query)) 1046 else: 1047 comment_prop = None 1048 1049 # ClickHouse only has DATABASEs and objects under them, eg. TABLEs, VIEWs, etc 1050 if expression.kind == "SCHEMA": 1051 expression.set("kind", "DATABASE") 1052 1053 create_sql = super().create_sql(expression) 1054 1055 comment_sql = self.sql(comment_prop) 1056 comment_sql = f" {comment_sql}" if comment_sql else "" 1057 1058 return f"{create_sql}{comment_sql}" 1059 1060 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1061 this = self.indent(self.sql(expression, "this")) 1062 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1063 1064 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1065 this = self.sql(expression, "this") 1066 this = f" {this}" if this else "" 1067 expr = self.sql(expression, "expression") 1068 expr = f" {expr}" if expr else "" 1069 index_type = self.sql(expression, "index_type") 1070 index_type = f" TYPE {index_type}" if index_type else "" 1071 granularity = self.sql(expression, "granularity") 1072 granularity = f" GRANULARITY {granularity}" if granularity else "" 1073 1074 return f"INDEX{this}{expr}{index_type}{granularity}" 1075 1076 def partition_sql(self, expression: exp.Partition) -> str: 1077 return f"PARTITION {self.expressions(expression, flat=True)}" 1078 1079 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1080 return f"ID {self.sql(expression.this)}" 1081 1082 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1083 return ( 1084 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1085 ) 1086 1087 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1088 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
109class ClickHouse(Dialect): 110 NORMALIZE_FUNCTIONS: bool | str = False 111 NULL_ORDERING = "nulls_are_last" 112 SUPPORTS_USER_DEFINED_TYPES = False 113 SAFE_DIVISION = True 114 LOG_BASE_FIRST: t.Optional[bool] = None 115 FORCE_EARLY_ALIAS_REF_EXPANSION = True 116 117 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 118 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 119 120 UNESCAPED_SEQUENCES = { 121 "\\0": "\0", 122 } 123 124 class Tokenizer(tokens.Tokenizer): 125 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 126 IDENTIFIERS = ['"', "`"] 127 STRING_ESCAPES = ["'", "\\"] 128 BIT_STRINGS = [("0b", "")] 129 HEX_STRINGS = [("0x", ""), ("0X", "")] 130 HEREDOC_STRINGS = ["$"] 131 132 KEYWORDS = { 133 **tokens.Tokenizer.KEYWORDS, 134 "ATTACH": TokenType.COMMAND, 135 "DATE32": TokenType.DATE32, 136 "DATETIME64": TokenType.DATETIME64, 137 "DICTIONARY": TokenType.DICTIONARY, 138 "ENUM8": TokenType.ENUM8, 139 "ENUM16": TokenType.ENUM16, 140 "FINAL": TokenType.FINAL, 141 "FIXEDSTRING": TokenType.FIXEDSTRING, 142 "FLOAT32": TokenType.FLOAT, 143 "FLOAT64": TokenType.DOUBLE, 144 "GLOBAL": TokenType.GLOBAL, 145 "INT256": TokenType.INT256, 146 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 147 "MAP": TokenType.MAP, 148 "NESTED": TokenType.NESTED, 149 "SAMPLE": TokenType.TABLE_SAMPLE, 150 "TUPLE": TokenType.STRUCT, 151 "UINT128": TokenType.UINT128, 152 "UINT16": TokenType.USMALLINT, 153 "UINT256": TokenType.UINT256, 154 "UINT32": TokenType.UINT, 155 "UINT64": TokenType.UBIGINT, 156 "UINT8": TokenType.UTINYINT, 157 "IPV4": TokenType.IPV4, 158 "IPV6": TokenType.IPV6, 159 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 160 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 161 "SYSTEM": TokenType.COMMAND, 162 "PREWHERE": TokenType.PREWHERE, 163 } 164 KEYWORDS.pop("/*+") 165 166 SINGLE_TOKENS = { 167 **tokens.Tokenizer.SINGLE_TOKENS, 168 "$": TokenType.HEREDOC_STRING, 169 } 170 171 class Parser(parser.Parser): 172 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 173 # * select x from t1 union all select x from t2 limit 1; 174 # * select x from t1 union all (select x from t2 limit 1); 175 MODIFIERS_ATTACHED_TO_SET_OP = False 176 INTERVAL_SPANS = False 177 178 FUNCTIONS = { 179 **parser.Parser.FUNCTIONS, 180 "ANY": exp.AnyValue.from_arg_list, 181 "ARRAYSUM": exp.ArraySum.from_arg_list, 182 "COUNTIF": _build_count_if, 183 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 184 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 185 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 186 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 187 "DATE_FORMAT": _build_date_format, 188 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 189 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 190 "FORMATDATETIME": _build_date_format, 191 "JSONEXTRACTSTRING": build_json_extract_path( 192 exp.JSONExtractScalar, zero_based_indexing=False 193 ), 194 "MAP": parser.build_var_map, 195 "MATCH": exp.RegexpLike.from_arg_list, 196 "RANDCANONICAL": exp.Rand.from_arg_list, 197 "STR_TO_DATE": _build_str_to_date, 198 "TUPLE": exp.Struct.from_arg_list, 199 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 200 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 201 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 202 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 203 "UNIQ": exp.ApproxDistinct.from_arg_list, 204 "XOR": lambda args: exp.Xor(expressions=args), 205 "MD5": exp.MD5Digest.from_arg_list, 206 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 207 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 208 } 209 210 AGG_FUNCTIONS = { 211 "count", 212 "min", 213 "max", 214 "sum", 215 "avg", 216 "any", 217 "stddevPop", 218 "stddevSamp", 219 "varPop", 220 "varSamp", 221 "corr", 222 "covarPop", 223 "covarSamp", 224 "entropy", 225 "exponentialMovingAverage", 226 "intervalLengthSum", 227 "kolmogorovSmirnovTest", 228 "mannWhitneyUTest", 229 "median", 230 "rankCorr", 231 "sumKahan", 232 "studentTTest", 233 "welchTTest", 234 "anyHeavy", 235 "anyLast", 236 "boundingRatio", 237 "first_value", 238 "last_value", 239 "argMin", 240 "argMax", 241 "avgWeighted", 242 "topK", 243 "topKWeighted", 244 "deltaSum", 245 "deltaSumTimestamp", 246 "groupArray", 247 "groupArrayLast", 248 "groupUniqArray", 249 "groupArrayInsertAt", 250 "groupArrayMovingAvg", 251 "groupArrayMovingSum", 252 "groupArraySample", 253 "groupBitAnd", 254 "groupBitOr", 255 "groupBitXor", 256 "groupBitmap", 257 "groupBitmapAnd", 258 "groupBitmapOr", 259 "groupBitmapXor", 260 "sumWithOverflow", 261 "sumMap", 262 "minMap", 263 "maxMap", 264 "skewSamp", 265 "skewPop", 266 "kurtSamp", 267 "kurtPop", 268 "uniq", 269 "uniqExact", 270 "uniqCombined", 271 "uniqCombined64", 272 "uniqHLL12", 273 "uniqTheta", 274 "quantile", 275 "quantiles", 276 "quantileExact", 277 "quantilesExact", 278 "quantileExactLow", 279 "quantilesExactLow", 280 "quantileExactHigh", 281 "quantilesExactHigh", 282 "quantileExactWeighted", 283 "quantilesExactWeighted", 284 "quantileTiming", 285 "quantilesTiming", 286 "quantileTimingWeighted", 287 "quantilesTimingWeighted", 288 "quantileDeterministic", 289 "quantilesDeterministic", 290 "quantileTDigest", 291 "quantilesTDigest", 292 "quantileTDigestWeighted", 293 "quantilesTDigestWeighted", 294 "quantileBFloat16", 295 "quantilesBFloat16", 296 "quantileBFloat16Weighted", 297 "quantilesBFloat16Weighted", 298 "simpleLinearRegression", 299 "stochasticLinearRegression", 300 "stochasticLogisticRegression", 301 "categoricalInformationValue", 302 "contingency", 303 "cramersV", 304 "cramersVBiasCorrected", 305 "theilsU", 306 "maxIntersections", 307 "maxIntersectionsPosition", 308 "meanZTest", 309 "quantileInterpolatedWeighted", 310 "quantilesInterpolatedWeighted", 311 "quantileGK", 312 "quantilesGK", 313 "sparkBar", 314 "sumCount", 315 "largestTriangleThreeBuckets", 316 "histogram", 317 "sequenceMatch", 318 "sequenceCount", 319 "windowFunnel", 320 "retention", 321 "uniqUpTo", 322 "sequenceNextNode", 323 "exponentialTimeDecayedAvg", 324 } 325 326 AGG_FUNCTIONS_SUFFIXES = [ 327 "If", 328 "Array", 329 "ArrayIf", 330 "Map", 331 "SimpleState", 332 "State", 333 "Merge", 334 "MergeState", 335 "ForEach", 336 "Distinct", 337 "OrDefault", 338 "OrNull", 339 "Resample", 340 "ArgMin", 341 "ArgMax", 342 ] 343 344 FUNC_TOKENS = { 345 *parser.Parser.FUNC_TOKENS, 346 TokenType.SET, 347 } 348 349 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 350 351 ID_VAR_TOKENS = { 352 *parser.Parser.ID_VAR_TOKENS, 353 TokenType.LIKE, 354 } 355 356 AGG_FUNC_MAPPING = ( 357 lambda functions, suffixes: { 358 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 359 } 360 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 361 362 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 363 364 FUNCTION_PARSERS = { 365 **parser.Parser.FUNCTION_PARSERS, 366 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 367 "QUANTILE": lambda self: self._parse_quantile(), 368 } 369 370 FUNCTION_PARSERS.pop("MATCH") 371 372 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 373 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 374 375 RANGE_PARSERS = { 376 **parser.Parser.RANGE_PARSERS, 377 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 378 and self._parse_in(this, is_global=True), 379 } 380 381 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 382 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 383 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 384 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 385 386 JOIN_KINDS = { 387 *parser.Parser.JOIN_KINDS, 388 TokenType.ANY, 389 TokenType.ASOF, 390 TokenType.ARRAY, 391 } 392 393 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 394 TokenType.ANY, 395 TokenType.ARRAY, 396 TokenType.FINAL, 397 TokenType.FORMAT, 398 TokenType.SETTINGS, 399 } 400 401 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 402 TokenType.FORMAT, 403 } 404 405 LOG_DEFAULTS_TO_LN = True 406 407 QUERY_MODIFIER_PARSERS = { 408 **parser.Parser.QUERY_MODIFIER_PARSERS, 409 TokenType.SETTINGS: lambda self: ( 410 "settings", 411 self._advance() or self._parse_csv(self._parse_assignment), 412 ), 413 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "INDEX": lambda self: self._parse_index_constraint(), 419 "CODEC": lambda self: self._parse_compress(), 420 } 421 422 ALTER_PARSERS = { 423 **parser.Parser.ALTER_PARSERS, 424 "REPLACE": lambda self: self._parse_alter_table_replace(), 425 } 426 427 SCHEMA_UNNAMED_CONSTRAINTS = { 428 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 429 "INDEX", 430 } 431 432 def _parse_types( 433 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 434 ) -> t.Optional[exp.Expression]: 435 dtype = super()._parse_types( 436 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 437 ) 438 if isinstance(dtype, exp.DataType): 439 # Mark every type as non-nullable which is ClickHouse's default. This marker 440 # helps us transpile types from other dialects to ClickHouse, so that we can 441 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 442 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 443 # `Nullable` type constructor 444 dtype.set("nullable", False) 445 446 return dtype 447 448 def _parse_create(self) -> exp.Create | exp.Command: 449 create = super()._parse_create() 450 451 # DATABASE in ClickHouse is the same as SCHEMA in other dialects 452 if isinstance(create, exp.Create) and create.kind == "DATABASE": 453 create.set("kind", "SCHEMA") 454 455 return create 456 457 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 458 index = self._index 459 this = self._parse_bitwise() 460 if self._match(TokenType.FROM): 461 self._retreat(index) 462 return super()._parse_extract() 463 464 # We return Anonymous here because extract and regexpExtract have different semantics, 465 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 466 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 467 # 468 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 469 self._match(TokenType.COMMA) 470 return self.expression( 471 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 472 ) 473 474 def _parse_assignment(self) -> t.Optional[exp.Expression]: 475 this = super()._parse_assignment() 476 477 if self._match(TokenType.PLACEHOLDER): 478 return self.expression( 479 exp.If, 480 this=this, 481 true=self._parse_assignment(), 482 false=self._match(TokenType.COLON) and self._parse_assignment(), 483 ) 484 485 return this 486 487 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 488 """ 489 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 490 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 491 """ 492 if not self._match(TokenType.L_BRACE): 493 return None 494 495 this = self._parse_id_var() 496 self._match(TokenType.COLON) 497 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 498 self._match_text_seq("IDENTIFIER") and "Identifier" 499 ) 500 501 if not kind: 502 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 503 elif not self._match(TokenType.R_BRACE): 504 self.raise_error("Expecting }") 505 506 return self.expression(exp.Placeholder, this=this, kind=kind) 507 508 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 509 this = super()._parse_in(this) 510 this.set("is_global", is_global) 511 return this 512 513 def _parse_table( 514 self, 515 schema: bool = False, 516 joins: bool = False, 517 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 518 parse_bracket: bool = False, 519 is_db_reference: bool = False, 520 parse_partition: bool = False, 521 ) -> t.Optional[exp.Expression]: 522 this = super()._parse_table( 523 schema=schema, 524 joins=joins, 525 alias_tokens=alias_tokens, 526 parse_bracket=parse_bracket, 527 is_db_reference=is_db_reference, 528 ) 529 530 if self._match(TokenType.FINAL): 531 this = self.expression(exp.Final, this=this) 532 533 return this 534 535 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 536 return super()._parse_position(haystack_first=True) 537 538 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 539 def _parse_cte(self) -> exp.CTE: 540 # WITH <identifier> AS <subquery expression> 541 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 542 543 if not cte: 544 # WITH <expression> AS <identifier> 545 cte = self.expression( 546 exp.CTE, 547 this=self._parse_assignment(), 548 alias=self._parse_table_alias(), 549 scalar=True, 550 ) 551 552 return cte 553 554 def _parse_join_parts( 555 self, 556 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 557 is_global = self._match(TokenType.GLOBAL) and self._prev 558 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 559 560 if kind_pre: 561 kind = self._match_set(self.JOIN_KINDS) and self._prev 562 side = self._match_set(self.JOIN_SIDES) and self._prev 563 return is_global, side, kind 564 565 return ( 566 is_global, 567 self._match_set(self.JOIN_SIDES) and self._prev, 568 self._match_set(self.JOIN_KINDS) and self._prev, 569 ) 570 571 def _parse_join( 572 self, skip_join_token: bool = False, parse_bracket: bool = False 573 ) -> t.Optional[exp.Join]: 574 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 575 if join: 576 join.set("global", join.args.pop("method", None)) 577 578 return join 579 580 def _parse_function( 581 self, 582 functions: t.Optional[t.Dict[str, t.Callable]] = None, 583 anonymous: bool = False, 584 optional_parens: bool = True, 585 any_token: bool = False, 586 ) -> t.Optional[exp.Expression]: 587 expr = super()._parse_function( 588 functions=functions, 589 anonymous=anonymous, 590 optional_parens=optional_parens, 591 any_token=any_token, 592 ) 593 594 func = expr.this if isinstance(expr, exp.Window) else expr 595 596 # Aggregate functions can be split in 2 parts: <func_name><suffix> 597 parts = ( 598 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 599 ) 600 601 if parts: 602 params = self._parse_func_params(func) 603 604 kwargs = { 605 "this": func.this, 606 "expressions": func.expressions, 607 } 608 if parts[1]: 609 kwargs["parts"] = parts 610 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 611 else: 612 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 613 614 kwargs["exp_class"] = exp_class 615 if params: 616 kwargs["params"] = params 617 618 func = self.expression(**kwargs) 619 620 if isinstance(expr, exp.Window): 621 # The window's func was parsed as Anonymous in base parser, fix its 622 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 623 expr.set("this", func) 624 elif params: 625 # Params have blocked super()._parse_function() from parsing the following window 626 # (if that exists) as they're standing between the function call and the window spec 627 expr = self._parse_window(func) 628 else: 629 expr = func 630 631 return expr 632 633 def _parse_func_params( 634 self, this: t.Optional[exp.Func] = None 635 ) -> t.Optional[t.List[exp.Expression]]: 636 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 637 return self._parse_csv(self._parse_lambda) 638 639 if self._match(TokenType.L_PAREN): 640 params = self._parse_csv(self._parse_lambda) 641 self._match_r_paren(this) 642 return params 643 644 return None 645 646 def _parse_quantile(self) -> exp.Quantile: 647 this = self._parse_lambda() 648 params = self._parse_func_params() 649 if params: 650 return self.expression(exp.Quantile, this=params[0], quantile=this) 651 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 652 653 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 654 return super()._parse_wrapped_id_vars(optional=True) 655 656 def _parse_primary_key( 657 self, wrapped_optional: bool = False, in_props: bool = False 658 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 659 return super()._parse_primary_key( 660 wrapped_optional=wrapped_optional or in_props, in_props=in_props 661 ) 662 663 def _parse_on_property(self) -> t.Optional[exp.Expression]: 664 index = self._index 665 if self._match_text_seq("CLUSTER"): 666 this = self._parse_id_var() 667 if this: 668 return self.expression(exp.OnCluster, this=this) 669 else: 670 self._retreat(index) 671 return None 672 673 def _parse_index_constraint( 674 self, kind: t.Optional[str] = None 675 ) -> exp.IndexColumnConstraint: 676 # INDEX name1 expr TYPE type1(args) GRANULARITY value 677 this = self._parse_id_var() 678 expression = self._parse_assignment() 679 680 index_type = self._match_text_seq("TYPE") and ( 681 self._parse_function() or self._parse_var() 682 ) 683 684 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 685 686 return self.expression( 687 exp.IndexColumnConstraint, 688 this=this, 689 expression=expression, 690 index_type=index_type, 691 granularity=granularity, 692 ) 693 694 def _parse_partition(self) -> t.Optional[exp.Partition]: 695 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 696 if not self._match(TokenType.PARTITION): 697 return None 698 699 if self._match_text_seq("ID"): 700 # Corresponds to the PARTITION ID <string_value> syntax 701 expressions: t.List[exp.Expression] = [ 702 self.expression(exp.PartitionId, this=self._parse_string()) 703 ] 704 else: 705 expressions = self._parse_expressions() 706 707 return self.expression(exp.Partition, expressions=expressions) 708 709 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 710 partition = self._parse_partition() 711 712 if not partition or not self._match(TokenType.FROM): 713 return None 714 715 return self.expression( 716 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 717 ) 718 719 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 720 if not self._match_text_seq("PROJECTION"): 721 return None 722 723 return self.expression( 724 exp.ProjectionDef, 725 this=self._parse_id_var(), 726 expression=self._parse_wrapped(self._parse_statement), 727 ) 728 729 def _parse_constraint(self) -> t.Optional[exp.Expression]: 730 return super()._parse_constraint() or self._parse_projection_def() 731 732 class Generator(generator.Generator): 733 QUERY_HINTS = False 734 STRUCT_DELIMITER = ("(", ")") 735 NVL2_SUPPORTED = False 736 TABLESAMPLE_REQUIRES_PARENS = False 737 TABLESAMPLE_SIZE_IS_ROWS = False 738 TABLESAMPLE_KEYWORDS = "SAMPLE" 739 LAST_DAY_SUPPORTS_DATE_PART = False 740 CAN_IMPLEMENT_ARRAY_ANY = True 741 SUPPORTS_TO_NUMBER = False 742 JOIN_HINTS = False 743 TABLE_HINTS = False 744 EXPLICIT_SET_OP = True 745 GROUPINGS_SEP = "" 746 SET_OP_MODIFIERS = False 747 SUPPORTS_TABLE_ALIAS_COLUMNS = False 748 VALUES_AS_TABLE = False 749 750 STRING_TYPE_MAPPING = { 751 exp.DataType.Type.CHAR: "String", 752 exp.DataType.Type.LONGBLOB: "String", 753 exp.DataType.Type.LONGTEXT: "String", 754 exp.DataType.Type.MEDIUMBLOB: "String", 755 exp.DataType.Type.MEDIUMTEXT: "String", 756 exp.DataType.Type.TINYBLOB: "String", 757 exp.DataType.Type.TINYTEXT: "String", 758 exp.DataType.Type.TEXT: "String", 759 exp.DataType.Type.VARBINARY: "String", 760 exp.DataType.Type.VARCHAR: "String", 761 } 762 763 SUPPORTED_JSON_PATH_PARTS = { 764 exp.JSONPathKey, 765 exp.JSONPathRoot, 766 exp.JSONPathSubscript, 767 } 768 769 TYPE_MAPPING = { 770 **generator.Generator.TYPE_MAPPING, 771 **STRING_TYPE_MAPPING, 772 exp.DataType.Type.ARRAY: "Array", 773 exp.DataType.Type.BIGINT: "Int64", 774 exp.DataType.Type.DATE32: "Date32", 775 exp.DataType.Type.DATETIME64: "DateTime64", 776 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 777 exp.DataType.Type.DOUBLE: "Float64", 778 exp.DataType.Type.ENUM: "Enum", 779 exp.DataType.Type.ENUM8: "Enum8", 780 exp.DataType.Type.ENUM16: "Enum16", 781 exp.DataType.Type.FIXEDSTRING: "FixedString", 782 exp.DataType.Type.FLOAT: "Float32", 783 exp.DataType.Type.INT: "Int32", 784 exp.DataType.Type.MEDIUMINT: "Int32", 785 exp.DataType.Type.INT128: "Int128", 786 exp.DataType.Type.INT256: "Int256", 787 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 788 exp.DataType.Type.MAP: "Map", 789 exp.DataType.Type.NESTED: "Nested", 790 exp.DataType.Type.NULLABLE: "Nullable", 791 exp.DataType.Type.SMALLINT: "Int16", 792 exp.DataType.Type.STRUCT: "Tuple", 793 exp.DataType.Type.TINYINT: "Int8", 794 exp.DataType.Type.UBIGINT: "UInt64", 795 exp.DataType.Type.UINT: "UInt32", 796 exp.DataType.Type.UINT128: "UInt128", 797 exp.DataType.Type.UINT256: "UInt256", 798 exp.DataType.Type.USMALLINT: "UInt16", 799 exp.DataType.Type.UTINYINT: "UInt8", 800 exp.DataType.Type.IPV4: "IPv4", 801 exp.DataType.Type.IPV6: "IPv6", 802 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 803 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 804 } 805 806 TRANSFORMS = { 807 **generator.Generator.TRANSFORMS, 808 exp.AnyValue: rename_func("any"), 809 exp.ApproxDistinct: rename_func("uniq"), 810 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 811 exp.ArraySize: rename_func("LENGTH"), 812 exp.ArraySum: rename_func("arraySum"), 813 exp.ArgMax: arg_max_or_min_no_count("argMax"), 814 exp.ArgMin: arg_max_or_min_no_count("argMin"), 815 exp.Array: inline_array_sql, 816 exp.CastToStrType: rename_func("CAST"), 817 exp.CountIf: rename_func("countIf"), 818 exp.CompressColumnConstraint: lambda self, 819 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 820 exp.ComputedColumnConstraint: lambda self, 821 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 822 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 823 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 824 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 825 exp.DateStrToDate: rename_func("toDate"), 826 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 827 exp.Explode: rename_func("arrayJoin"), 828 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 829 exp.IsNan: rename_func("isNaN"), 830 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 831 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 832 exp.JSONPathKey: json_path_key_only_name, 833 exp.JSONPathRoot: lambda *_: "", 834 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 835 exp.Nullif: rename_func("nullIf"), 836 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 837 exp.Pivot: no_pivot_sql, 838 exp.Quantile: _quantile_sql, 839 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 840 exp.Rand: rename_func("randCanonical"), 841 exp.StartsWith: rename_func("startsWith"), 842 exp.StrPosition: lambda self, e: self.func( 843 "position", e.this, e.args.get("substr"), e.args.get("position") 844 ), 845 exp.TimeToStr: lambda self, e: self.func( 846 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 847 ), 848 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 849 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 850 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 851 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 852 exp.MD5Digest: rename_func("MD5"), 853 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 854 exp.SHA: rename_func("SHA1"), 855 exp.SHA2: sha256_sql, 856 exp.UnixToTime: _unix_to_time_sql, 857 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 858 exp.Variance: rename_func("varSamp"), 859 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 860 exp.Stddev: rename_func("stddevSamp"), 861 } 862 863 PROPERTIES_LOCATION = { 864 **generator.Generator.PROPERTIES_LOCATION, 865 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 866 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 867 exp.OnCluster: exp.Properties.Location.POST_NAME, 868 } 869 870 # There's no list in docs, but it can be found in Clickhouse code 871 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 872 ON_CLUSTER_TARGETS = { 873 "DATABASE", 874 "TABLE", 875 "VIEW", 876 "DICTIONARY", 877 "INDEX", 878 "FUNCTION", 879 "NAMED COLLECTION", 880 } 881 882 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 883 NON_NULLABLE_TYPES = { 884 exp.DataType.Type.ARRAY, 885 exp.DataType.Type.MAP, 886 exp.DataType.Type.NULLABLE, 887 exp.DataType.Type.STRUCT, 888 } 889 890 def strtodate_sql(self, expression: exp.StrToDate) -> str: 891 strtodate_sql = self.function_fallback_sql(expression) 892 893 if not isinstance(expression.parent, exp.Cast): 894 # StrToDate returns DATEs in other dialects (eg. postgres), so 895 # this branch aims to improve the transpilation to clickhouse 896 return f"CAST({strtodate_sql} AS DATE)" 897 898 return strtodate_sql 899 900 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 901 this = expression.this 902 903 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 904 return self.sql(this) 905 906 return super().cast_sql(expression, safe_prefix=safe_prefix) 907 908 def trycast_sql(self, expression: exp.TryCast) -> str: 909 dtype = expression.to 910 if not dtype.is_type(*self.NON_NULLABLE_TYPES): 911 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 912 dtype.set("nullable", True) 913 914 return super().cast_sql(expression) 915 916 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 917 this = self.json_path_part(expression.this) 918 return str(int(this) + 1) if is_int(this) else this 919 920 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 921 return f"AS {self.sql(expression, 'this')}" 922 923 def _any_to_has( 924 self, 925 expression: exp.EQ | exp.NEQ, 926 default: t.Callable[[t.Any], str], 927 prefix: str = "", 928 ) -> str: 929 if isinstance(expression.left, exp.Any): 930 arr = expression.left 931 this = expression.right 932 elif isinstance(expression.right, exp.Any): 933 arr = expression.right 934 this = expression.left 935 else: 936 return default(expression) 937 938 return prefix + self.func("has", arr.this.unnest(), this) 939 940 def eq_sql(self, expression: exp.EQ) -> str: 941 return self._any_to_has(expression, super().eq_sql) 942 943 def neq_sql(self, expression: exp.NEQ) -> str: 944 return self._any_to_has(expression, super().neq_sql, "NOT ") 945 946 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 947 # Manually add a flag to make the search case-insensitive 948 regex = self.func("CONCAT", "'(?i)'", expression.expression) 949 return self.func("match", expression.this, regex) 950 951 def datatype_sql(self, expression: exp.DataType) -> str: 952 # String is the standard ClickHouse type, every other variant is just an alias. 953 # Additionally, any supplied length parameter will be ignored. 954 # 955 # https://clickhouse.com/docs/en/sql-reference/data-types/string 956 if expression.this in self.STRING_TYPE_MAPPING: 957 dtype = "String" 958 else: 959 dtype = super().datatype_sql(expression) 960 961 # This section changes the type to `Nullable(...)` if the following conditions hold: 962 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 963 # and change their semantics 964 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 965 # constraint: "Type of Map key must be a type, that can be represented by integer or 966 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 967 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 968 parent = expression.parent 969 if ( 970 expression.args.get("nullable") is not False 971 and not ( 972 isinstance(parent, exp.DataType) 973 and parent.is_type(exp.DataType.Type.MAP) 974 and expression.index in (None, 0) 975 ) 976 and not expression.is_type(*self.NON_NULLABLE_TYPES) 977 ): 978 dtype = f"Nullable({dtype})" 979 980 return dtype 981 982 def cte_sql(self, expression: exp.CTE) -> str: 983 if expression.args.get("scalar"): 984 this = self.sql(expression, "this") 985 alias = self.sql(expression, "alias") 986 return f"{this} AS {alias}" 987 988 return super().cte_sql(expression) 989 990 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 991 return super().after_limit_modifiers(expression) + [ 992 ( 993 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 994 if expression.args.get("settings") 995 else "" 996 ), 997 ( 998 self.seg("FORMAT ") + self.sql(expression, "format") 999 if expression.args.get("format") 1000 else "" 1001 ), 1002 ] 1003 1004 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1005 params = self.expressions(expression, key="params", flat=True) 1006 return self.func(expression.name, *expression.expressions) + f"({params})" 1007 1008 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1009 return self.func(expression.name, *expression.expressions) 1010 1011 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1012 return self.anonymousaggfunc_sql(expression) 1013 1014 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1015 return self.parameterizedagg_sql(expression) 1016 1017 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1018 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1019 1020 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1021 return f"ON CLUSTER {self.sql(expression, 'this')}" 1022 1023 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1024 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1025 exp.Properties.Location.POST_NAME 1026 ): 1027 this_name = self.sql( 1028 expression.this if isinstance(expression.this, exp.Schema) else expression, 1029 "this", 1030 ) 1031 this_properties = " ".join( 1032 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1033 ) 1034 this_schema = self.schema_columns_sql(expression.this) 1035 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1036 1037 return super().createable_sql(expression, locations) 1038 1039 def create_sql(self, expression: exp.Create) -> str: 1040 # The comment property comes last in CTAS statements, i.e. after the query 1041 query = expression.expression 1042 if isinstance(query, exp.Query): 1043 comment_prop = expression.find(exp.SchemaCommentProperty) 1044 if comment_prop: 1045 comment_prop.pop() 1046 query.replace(exp.paren(query)) 1047 else: 1048 comment_prop = None 1049 1050 # ClickHouse only has DATABASEs and objects under them, eg. TABLEs, VIEWs, etc 1051 if expression.kind == "SCHEMA": 1052 expression.set("kind", "DATABASE") 1053 1054 create_sql = super().create_sql(expression) 1055 1056 comment_sql = self.sql(comment_prop) 1057 comment_sql = f" {comment_sql}" if comment_sql else "" 1058 1059 return f"{create_sql}{comment_sql}" 1060 1061 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1062 this = self.indent(self.sql(expression, "this")) 1063 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1064 1065 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1066 this = self.sql(expression, "this") 1067 this = f" {this}" if this else "" 1068 expr = self.sql(expression, "expression") 1069 expr = f" {expr}" if expr else "" 1070 index_type = self.sql(expression, "index_type") 1071 index_type = f" TYPE {index_type}" if index_type else "" 1072 granularity = self.sql(expression, "granularity") 1073 granularity = f" GRANULARITY {granularity}" if granularity else "" 1074 1075 return f"INDEX{this}{expr}{index_type}{granularity}" 1076 1077 def partition_sql(self, expression: exp.Partition) -> str: 1078 return f"PARTITION {self.expressions(expression, flat=True)}" 1079 1080 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1081 return f"ID {self.sql(expression.this)}" 1082 1083 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1084 return ( 1085 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1086 ) 1087 1088 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1089 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
124 class Tokenizer(tokens.Tokenizer): 125 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 126 IDENTIFIERS = ['"', "`"] 127 STRING_ESCAPES = ["'", "\\"] 128 BIT_STRINGS = [("0b", "")] 129 HEX_STRINGS = [("0x", ""), ("0X", "")] 130 HEREDOC_STRINGS = ["$"] 131 132 KEYWORDS = { 133 **tokens.Tokenizer.KEYWORDS, 134 "ATTACH": TokenType.COMMAND, 135 "DATE32": TokenType.DATE32, 136 "DATETIME64": TokenType.DATETIME64, 137 "DICTIONARY": TokenType.DICTIONARY, 138 "ENUM8": TokenType.ENUM8, 139 "ENUM16": TokenType.ENUM16, 140 "FINAL": TokenType.FINAL, 141 "FIXEDSTRING": TokenType.FIXEDSTRING, 142 "FLOAT32": TokenType.FLOAT, 143 "FLOAT64": TokenType.DOUBLE, 144 "GLOBAL": TokenType.GLOBAL, 145 "INT256": TokenType.INT256, 146 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 147 "MAP": TokenType.MAP, 148 "NESTED": TokenType.NESTED, 149 "SAMPLE": TokenType.TABLE_SAMPLE, 150 "TUPLE": TokenType.STRUCT, 151 "UINT128": TokenType.UINT128, 152 "UINT16": TokenType.USMALLINT, 153 "UINT256": TokenType.UINT256, 154 "UINT32": TokenType.UINT, 155 "UINT64": TokenType.UBIGINT, 156 "UINT8": TokenType.UTINYINT, 157 "IPV4": TokenType.IPV4, 158 "IPV6": TokenType.IPV6, 159 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 160 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 161 "SYSTEM": TokenType.COMMAND, 162 "PREWHERE": TokenType.PREWHERE, 163 } 164 KEYWORDS.pop("/*+") 165 166 SINGLE_TOKENS = { 167 **tokens.Tokenizer.SINGLE_TOKENS, 168 "$": TokenType.HEREDOC_STRING, 169 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
171 class Parser(parser.Parser): 172 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 173 # * select x from t1 union all select x from t2 limit 1; 174 # * select x from t1 union all (select x from t2 limit 1); 175 MODIFIERS_ATTACHED_TO_SET_OP = False 176 INTERVAL_SPANS = False 177 178 FUNCTIONS = { 179 **parser.Parser.FUNCTIONS, 180 "ANY": exp.AnyValue.from_arg_list, 181 "ARRAYSUM": exp.ArraySum.from_arg_list, 182 "COUNTIF": _build_count_if, 183 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 184 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 185 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 186 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 187 "DATE_FORMAT": _build_date_format, 188 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 189 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 190 "FORMATDATETIME": _build_date_format, 191 "JSONEXTRACTSTRING": build_json_extract_path( 192 exp.JSONExtractScalar, zero_based_indexing=False 193 ), 194 "MAP": parser.build_var_map, 195 "MATCH": exp.RegexpLike.from_arg_list, 196 "RANDCANONICAL": exp.Rand.from_arg_list, 197 "STR_TO_DATE": _build_str_to_date, 198 "TUPLE": exp.Struct.from_arg_list, 199 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 200 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 201 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 202 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 203 "UNIQ": exp.ApproxDistinct.from_arg_list, 204 "XOR": lambda args: exp.Xor(expressions=args), 205 "MD5": exp.MD5Digest.from_arg_list, 206 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 207 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 208 } 209 210 AGG_FUNCTIONS = { 211 "count", 212 "min", 213 "max", 214 "sum", 215 "avg", 216 "any", 217 "stddevPop", 218 "stddevSamp", 219 "varPop", 220 "varSamp", 221 "corr", 222 "covarPop", 223 "covarSamp", 224 "entropy", 225 "exponentialMovingAverage", 226 "intervalLengthSum", 227 "kolmogorovSmirnovTest", 228 "mannWhitneyUTest", 229 "median", 230 "rankCorr", 231 "sumKahan", 232 "studentTTest", 233 "welchTTest", 234 "anyHeavy", 235 "anyLast", 236 "boundingRatio", 237 "first_value", 238 "last_value", 239 "argMin", 240 "argMax", 241 "avgWeighted", 242 "topK", 243 "topKWeighted", 244 "deltaSum", 245 "deltaSumTimestamp", 246 "groupArray", 247 "groupArrayLast", 248 "groupUniqArray", 249 "groupArrayInsertAt", 250 "groupArrayMovingAvg", 251 "groupArrayMovingSum", 252 "groupArraySample", 253 "groupBitAnd", 254 "groupBitOr", 255 "groupBitXor", 256 "groupBitmap", 257 "groupBitmapAnd", 258 "groupBitmapOr", 259 "groupBitmapXor", 260 "sumWithOverflow", 261 "sumMap", 262 "minMap", 263 "maxMap", 264 "skewSamp", 265 "skewPop", 266 "kurtSamp", 267 "kurtPop", 268 "uniq", 269 "uniqExact", 270 "uniqCombined", 271 "uniqCombined64", 272 "uniqHLL12", 273 "uniqTheta", 274 "quantile", 275 "quantiles", 276 "quantileExact", 277 "quantilesExact", 278 "quantileExactLow", 279 "quantilesExactLow", 280 "quantileExactHigh", 281 "quantilesExactHigh", 282 "quantileExactWeighted", 283 "quantilesExactWeighted", 284 "quantileTiming", 285 "quantilesTiming", 286 "quantileTimingWeighted", 287 "quantilesTimingWeighted", 288 "quantileDeterministic", 289 "quantilesDeterministic", 290 "quantileTDigest", 291 "quantilesTDigest", 292 "quantileTDigestWeighted", 293 "quantilesTDigestWeighted", 294 "quantileBFloat16", 295 "quantilesBFloat16", 296 "quantileBFloat16Weighted", 297 "quantilesBFloat16Weighted", 298 "simpleLinearRegression", 299 "stochasticLinearRegression", 300 "stochasticLogisticRegression", 301 "categoricalInformationValue", 302 "contingency", 303 "cramersV", 304 "cramersVBiasCorrected", 305 "theilsU", 306 "maxIntersections", 307 "maxIntersectionsPosition", 308 "meanZTest", 309 "quantileInterpolatedWeighted", 310 "quantilesInterpolatedWeighted", 311 "quantileGK", 312 "quantilesGK", 313 "sparkBar", 314 "sumCount", 315 "largestTriangleThreeBuckets", 316 "histogram", 317 "sequenceMatch", 318 "sequenceCount", 319 "windowFunnel", 320 "retention", 321 "uniqUpTo", 322 "sequenceNextNode", 323 "exponentialTimeDecayedAvg", 324 } 325 326 AGG_FUNCTIONS_SUFFIXES = [ 327 "If", 328 "Array", 329 "ArrayIf", 330 "Map", 331 "SimpleState", 332 "State", 333 "Merge", 334 "MergeState", 335 "ForEach", 336 "Distinct", 337 "OrDefault", 338 "OrNull", 339 "Resample", 340 "ArgMin", 341 "ArgMax", 342 ] 343 344 FUNC_TOKENS = { 345 *parser.Parser.FUNC_TOKENS, 346 TokenType.SET, 347 } 348 349 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 350 351 ID_VAR_TOKENS = { 352 *parser.Parser.ID_VAR_TOKENS, 353 TokenType.LIKE, 354 } 355 356 AGG_FUNC_MAPPING = ( 357 lambda functions, suffixes: { 358 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 359 } 360 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 361 362 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 363 364 FUNCTION_PARSERS = { 365 **parser.Parser.FUNCTION_PARSERS, 366 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 367 "QUANTILE": lambda self: self._parse_quantile(), 368 } 369 370 FUNCTION_PARSERS.pop("MATCH") 371 372 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 373 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 374 375 RANGE_PARSERS = { 376 **parser.Parser.RANGE_PARSERS, 377 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 378 and self._parse_in(this, is_global=True), 379 } 380 381 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 382 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 383 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 384 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 385 386 JOIN_KINDS = { 387 *parser.Parser.JOIN_KINDS, 388 TokenType.ANY, 389 TokenType.ASOF, 390 TokenType.ARRAY, 391 } 392 393 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 394 TokenType.ANY, 395 TokenType.ARRAY, 396 TokenType.FINAL, 397 TokenType.FORMAT, 398 TokenType.SETTINGS, 399 } 400 401 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 402 TokenType.FORMAT, 403 } 404 405 LOG_DEFAULTS_TO_LN = True 406 407 QUERY_MODIFIER_PARSERS = { 408 **parser.Parser.QUERY_MODIFIER_PARSERS, 409 TokenType.SETTINGS: lambda self: ( 410 "settings", 411 self._advance() or self._parse_csv(self._parse_assignment), 412 ), 413 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "INDEX": lambda self: self._parse_index_constraint(), 419 "CODEC": lambda self: self._parse_compress(), 420 } 421 422 ALTER_PARSERS = { 423 **parser.Parser.ALTER_PARSERS, 424 "REPLACE": lambda self: self._parse_alter_table_replace(), 425 } 426 427 SCHEMA_UNNAMED_CONSTRAINTS = { 428 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 429 "INDEX", 430 } 431 432 def _parse_types( 433 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 434 ) -> t.Optional[exp.Expression]: 435 dtype = super()._parse_types( 436 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 437 ) 438 if isinstance(dtype, exp.DataType): 439 # Mark every type as non-nullable which is ClickHouse's default. This marker 440 # helps us transpile types from other dialects to ClickHouse, so that we can 441 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 442 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 443 # `Nullable` type constructor 444 dtype.set("nullable", False) 445 446 return dtype 447 448 def _parse_create(self) -> exp.Create | exp.Command: 449 create = super()._parse_create() 450 451 # DATABASE in ClickHouse is the same as SCHEMA in other dialects 452 if isinstance(create, exp.Create) and create.kind == "DATABASE": 453 create.set("kind", "SCHEMA") 454 455 return create 456 457 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 458 index = self._index 459 this = self._parse_bitwise() 460 if self._match(TokenType.FROM): 461 self._retreat(index) 462 return super()._parse_extract() 463 464 # We return Anonymous here because extract and regexpExtract have different semantics, 465 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 466 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 467 # 468 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 469 self._match(TokenType.COMMA) 470 return self.expression( 471 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 472 ) 473 474 def _parse_assignment(self) -> t.Optional[exp.Expression]: 475 this = super()._parse_assignment() 476 477 if self._match(TokenType.PLACEHOLDER): 478 return self.expression( 479 exp.If, 480 this=this, 481 true=self._parse_assignment(), 482 false=self._match(TokenType.COLON) and self._parse_assignment(), 483 ) 484 485 return this 486 487 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 488 """ 489 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 490 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 491 """ 492 if not self._match(TokenType.L_BRACE): 493 return None 494 495 this = self._parse_id_var() 496 self._match(TokenType.COLON) 497 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 498 self._match_text_seq("IDENTIFIER") and "Identifier" 499 ) 500 501 if not kind: 502 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 503 elif not self._match(TokenType.R_BRACE): 504 self.raise_error("Expecting }") 505 506 return self.expression(exp.Placeholder, this=this, kind=kind) 507 508 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 509 this = super()._parse_in(this) 510 this.set("is_global", is_global) 511 return this 512 513 def _parse_table( 514 self, 515 schema: bool = False, 516 joins: bool = False, 517 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 518 parse_bracket: bool = False, 519 is_db_reference: bool = False, 520 parse_partition: bool = False, 521 ) -> t.Optional[exp.Expression]: 522 this = super()._parse_table( 523 schema=schema, 524 joins=joins, 525 alias_tokens=alias_tokens, 526 parse_bracket=parse_bracket, 527 is_db_reference=is_db_reference, 528 ) 529 530 if self._match(TokenType.FINAL): 531 this = self.expression(exp.Final, this=this) 532 533 return this 534 535 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 536 return super()._parse_position(haystack_first=True) 537 538 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 539 def _parse_cte(self) -> exp.CTE: 540 # WITH <identifier> AS <subquery expression> 541 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 542 543 if not cte: 544 # WITH <expression> AS <identifier> 545 cte = self.expression( 546 exp.CTE, 547 this=self._parse_assignment(), 548 alias=self._parse_table_alias(), 549 scalar=True, 550 ) 551 552 return cte 553 554 def _parse_join_parts( 555 self, 556 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 557 is_global = self._match(TokenType.GLOBAL) and self._prev 558 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 559 560 if kind_pre: 561 kind = self._match_set(self.JOIN_KINDS) and self._prev 562 side = self._match_set(self.JOIN_SIDES) and self._prev 563 return is_global, side, kind 564 565 return ( 566 is_global, 567 self._match_set(self.JOIN_SIDES) and self._prev, 568 self._match_set(self.JOIN_KINDS) and self._prev, 569 ) 570 571 def _parse_join( 572 self, skip_join_token: bool = False, parse_bracket: bool = False 573 ) -> t.Optional[exp.Join]: 574 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 575 if join: 576 join.set("global", join.args.pop("method", None)) 577 578 return join 579 580 def _parse_function( 581 self, 582 functions: t.Optional[t.Dict[str, t.Callable]] = None, 583 anonymous: bool = False, 584 optional_parens: bool = True, 585 any_token: bool = False, 586 ) -> t.Optional[exp.Expression]: 587 expr = super()._parse_function( 588 functions=functions, 589 anonymous=anonymous, 590 optional_parens=optional_parens, 591 any_token=any_token, 592 ) 593 594 func = expr.this if isinstance(expr, exp.Window) else expr 595 596 # Aggregate functions can be split in 2 parts: <func_name><suffix> 597 parts = ( 598 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 599 ) 600 601 if parts: 602 params = self._parse_func_params(func) 603 604 kwargs = { 605 "this": func.this, 606 "expressions": func.expressions, 607 } 608 if parts[1]: 609 kwargs["parts"] = parts 610 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 611 else: 612 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 613 614 kwargs["exp_class"] = exp_class 615 if params: 616 kwargs["params"] = params 617 618 func = self.expression(**kwargs) 619 620 if isinstance(expr, exp.Window): 621 # The window's func was parsed as Anonymous in base parser, fix its 622 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 623 expr.set("this", func) 624 elif params: 625 # Params have blocked super()._parse_function() from parsing the following window 626 # (if that exists) as they're standing between the function call and the window spec 627 expr = self._parse_window(func) 628 else: 629 expr = func 630 631 return expr 632 633 def _parse_func_params( 634 self, this: t.Optional[exp.Func] = None 635 ) -> t.Optional[t.List[exp.Expression]]: 636 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 637 return self._parse_csv(self._parse_lambda) 638 639 if self._match(TokenType.L_PAREN): 640 params = self._parse_csv(self._parse_lambda) 641 self._match_r_paren(this) 642 return params 643 644 return None 645 646 def _parse_quantile(self) -> exp.Quantile: 647 this = self._parse_lambda() 648 params = self._parse_func_params() 649 if params: 650 return self.expression(exp.Quantile, this=params[0], quantile=this) 651 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 652 653 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 654 return super()._parse_wrapped_id_vars(optional=True) 655 656 def _parse_primary_key( 657 self, wrapped_optional: bool = False, in_props: bool = False 658 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 659 return super()._parse_primary_key( 660 wrapped_optional=wrapped_optional or in_props, in_props=in_props 661 ) 662 663 def _parse_on_property(self) -> t.Optional[exp.Expression]: 664 index = self._index 665 if self._match_text_seq("CLUSTER"): 666 this = self._parse_id_var() 667 if this: 668 return self.expression(exp.OnCluster, this=this) 669 else: 670 self._retreat(index) 671 return None 672 673 def _parse_index_constraint( 674 self, kind: t.Optional[str] = None 675 ) -> exp.IndexColumnConstraint: 676 # INDEX name1 expr TYPE type1(args) GRANULARITY value 677 this = self._parse_id_var() 678 expression = self._parse_assignment() 679 680 index_type = self._match_text_seq("TYPE") and ( 681 self._parse_function() or self._parse_var() 682 ) 683 684 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 685 686 return self.expression( 687 exp.IndexColumnConstraint, 688 this=this, 689 expression=expression, 690 index_type=index_type, 691 granularity=granularity, 692 ) 693 694 def _parse_partition(self) -> t.Optional[exp.Partition]: 695 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 696 if not self._match(TokenType.PARTITION): 697 return None 698 699 if self._match_text_seq("ID"): 700 # Corresponds to the PARTITION ID <string_value> syntax 701 expressions: t.List[exp.Expression] = [ 702 self.expression(exp.PartitionId, this=self._parse_string()) 703 ] 704 else: 705 expressions = self._parse_expressions() 706 707 return self.expression(exp.Partition, expressions=expressions) 708 709 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 710 partition = self._parse_partition() 711 712 if not partition or not self._match(TokenType.FROM): 713 return None 714 715 return self.expression( 716 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 717 ) 718 719 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 720 if not self._match_text_seq("PROJECTION"): 721 return None 722 723 return self.expression( 724 exp.ProjectionDef, 725 this=self._parse_id_var(), 726 expression=self._parse_wrapped(self._parse_statement), 727 ) 728 729 def _parse_constraint(self) -> t.Optional[exp.Expression]: 730 return super()._parse_constraint() or self._parse_projection_def()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
732 class Generator(generator.Generator): 733 QUERY_HINTS = False 734 STRUCT_DELIMITER = ("(", ")") 735 NVL2_SUPPORTED = False 736 TABLESAMPLE_REQUIRES_PARENS = False 737 TABLESAMPLE_SIZE_IS_ROWS = False 738 TABLESAMPLE_KEYWORDS = "SAMPLE" 739 LAST_DAY_SUPPORTS_DATE_PART = False 740 CAN_IMPLEMENT_ARRAY_ANY = True 741 SUPPORTS_TO_NUMBER = False 742 JOIN_HINTS = False 743 TABLE_HINTS = False 744 EXPLICIT_SET_OP = True 745 GROUPINGS_SEP = "" 746 SET_OP_MODIFIERS = False 747 SUPPORTS_TABLE_ALIAS_COLUMNS = False 748 VALUES_AS_TABLE = False 749 750 STRING_TYPE_MAPPING = { 751 exp.DataType.Type.CHAR: "String", 752 exp.DataType.Type.LONGBLOB: "String", 753 exp.DataType.Type.LONGTEXT: "String", 754 exp.DataType.Type.MEDIUMBLOB: "String", 755 exp.DataType.Type.MEDIUMTEXT: "String", 756 exp.DataType.Type.TINYBLOB: "String", 757 exp.DataType.Type.TINYTEXT: "String", 758 exp.DataType.Type.TEXT: "String", 759 exp.DataType.Type.VARBINARY: "String", 760 exp.DataType.Type.VARCHAR: "String", 761 } 762 763 SUPPORTED_JSON_PATH_PARTS = { 764 exp.JSONPathKey, 765 exp.JSONPathRoot, 766 exp.JSONPathSubscript, 767 } 768 769 TYPE_MAPPING = { 770 **generator.Generator.TYPE_MAPPING, 771 **STRING_TYPE_MAPPING, 772 exp.DataType.Type.ARRAY: "Array", 773 exp.DataType.Type.BIGINT: "Int64", 774 exp.DataType.Type.DATE32: "Date32", 775 exp.DataType.Type.DATETIME64: "DateTime64", 776 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 777 exp.DataType.Type.DOUBLE: "Float64", 778 exp.DataType.Type.ENUM: "Enum", 779 exp.DataType.Type.ENUM8: "Enum8", 780 exp.DataType.Type.ENUM16: "Enum16", 781 exp.DataType.Type.FIXEDSTRING: "FixedString", 782 exp.DataType.Type.FLOAT: "Float32", 783 exp.DataType.Type.INT: "Int32", 784 exp.DataType.Type.MEDIUMINT: "Int32", 785 exp.DataType.Type.INT128: "Int128", 786 exp.DataType.Type.INT256: "Int256", 787 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 788 exp.DataType.Type.MAP: "Map", 789 exp.DataType.Type.NESTED: "Nested", 790 exp.DataType.Type.NULLABLE: "Nullable", 791 exp.DataType.Type.SMALLINT: "Int16", 792 exp.DataType.Type.STRUCT: "Tuple", 793 exp.DataType.Type.TINYINT: "Int8", 794 exp.DataType.Type.UBIGINT: "UInt64", 795 exp.DataType.Type.UINT: "UInt32", 796 exp.DataType.Type.UINT128: "UInt128", 797 exp.DataType.Type.UINT256: "UInt256", 798 exp.DataType.Type.USMALLINT: "UInt16", 799 exp.DataType.Type.UTINYINT: "UInt8", 800 exp.DataType.Type.IPV4: "IPv4", 801 exp.DataType.Type.IPV6: "IPv6", 802 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 803 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 804 } 805 806 TRANSFORMS = { 807 **generator.Generator.TRANSFORMS, 808 exp.AnyValue: rename_func("any"), 809 exp.ApproxDistinct: rename_func("uniq"), 810 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 811 exp.ArraySize: rename_func("LENGTH"), 812 exp.ArraySum: rename_func("arraySum"), 813 exp.ArgMax: arg_max_or_min_no_count("argMax"), 814 exp.ArgMin: arg_max_or_min_no_count("argMin"), 815 exp.Array: inline_array_sql, 816 exp.CastToStrType: rename_func("CAST"), 817 exp.CountIf: rename_func("countIf"), 818 exp.CompressColumnConstraint: lambda self, 819 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 820 exp.ComputedColumnConstraint: lambda self, 821 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 822 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 823 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 824 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 825 exp.DateStrToDate: rename_func("toDate"), 826 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 827 exp.Explode: rename_func("arrayJoin"), 828 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 829 exp.IsNan: rename_func("isNaN"), 830 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 831 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 832 exp.JSONPathKey: json_path_key_only_name, 833 exp.JSONPathRoot: lambda *_: "", 834 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 835 exp.Nullif: rename_func("nullIf"), 836 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 837 exp.Pivot: no_pivot_sql, 838 exp.Quantile: _quantile_sql, 839 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 840 exp.Rand: rename_func("randCanonical"), 841 exp.StartsWith: rename_func("startsWith"), 842 exp.StrPosition: lambda self, e: self.func( 843 "position", e.this, e.args.get("substr"), e.args.get("position") 844 ), 845 exp.TimeToStr: lambda self, e: self.func( 846 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 847 ), 848 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 849 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 850 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 851 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 852 exp.MD5Digest: rename_func("MD5"), 853 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 854 exp.SHA: rename_func("SHA1"), 855 exp.SHA2: sha256_sql, 856 exp.UnixToTime: _unix_to_time_sql, 857 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 858 exp.Variance: rename_func("varSamp"), 859 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 860 exp.Stddev: rename_func("stddevSamp"), 861 } 862 863 PROPERTIES_LOCATION = { 864 **generator.Generator.PROPERTIES_LOCATION, 865 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 866 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 867 exp.OnCluster: exp.Properties.Location.POST_NAME, 868 } 869 870 # There's no list in docs, but it can be found in Clickhouse code 871 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 872 ON_CLUSTER_TARGETS = { 873 "DATABASE", 874 "TABLE", 875 "VIEW", 876 "DICTIONARY", 877 "INDEX", 878 "FUNCTION", 879 "NAMED COLLECTION", 880 } 881 882 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 883 NON_NULLABLE_TYPES = { 884 exp.DataType.Type.ARRAY, 885 exp.DataType.Type.MAP, 886 exp.DataType.Type.NULLABLE, 887 exp.DataType.Type.STRUCT, 888 } 889 890 def strtodate_sql(self, expression: exp.StrToDate) -> str: 891 strtodate_sql = self.function_fallback_sql(expression) 892 893 if not isinstance(expression.parent, exp.Cast): 894 # StrToDate returns DATEs in other dialects (eg. postgres), so 895 # this branch aims to improve the transpilation to clickhouse 896 return f"CAST({strtodate_sql} AS DATE)" 897 898 return strtodate_sql 899 900 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 901 this = expression.this 902 903 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 904 return self.sql(this) 905 906 return super().cast_sql(expression, safe_prefix=safe_prefix) 907 908 def trycast_sql(self, expression: exp.TryCast) -> str: 909 dtype = expression.to 910 if not dtype.is_type(*self.NON_NULLABLE_TYPES): 911 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 912 dtype.set("nullable", True) 913 914 return super().cast_sql(expression) 915 916 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 917 this = self.json_path_part(expression.this) 918 return str(int(this) + 1) if is_int(this) else this 919 920 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 921 return f"AS {self.sql(expression, 'this')}" 922 923 def _any_to_has( 924 self, 925 expression: exp.EQ | exp.NEQ, 926 default: t.Callable[[t.Any], str], 927 prefix: str = "", 928 ) -> str: 929 if isinstance(expression.left, exp.Any): 930 arr = expression.left 931 this = expression.right 932 elif isinstance(expression.right, exp.Any): 933 arr = expression.right 934 this = expression.left 935 else: 936 return default(expression) 937 938 return prefix + self.func("has", arr.this.unnest(), this) 939 940 def eq_sql(self, expression: exp.EQ) -> str: 941 return self._any_to_has(expression, super().eq_sql) 942 943 def neq_sql(self, expression: exp.NEQ) -> str: 944 return self._any_to_has(expression, super().neq_sql, "NOT ") 945 946 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 947 # Manually add a flag to make the search case-insensitive 948 regex = self.func("CONCAT", "'(?i)'", expression.expression) 949 return self.func("match", expression.this, regex) 950 951 def datatype_sql(self, expression: exp.DataType) -> str: 952 # String is the standard ClickHouse type, every other variant is just an alias. 953 # Additionally, any supplied length parameter will be ignored. 954 # 955 # https://clickhouse.com/docs/en/sql-reference/data-types/string 956 if expression.this in self.STRING_TYPE_MAPPING: 957 dtype = "String" 958 else: 959 dtype = super().datatype_sql(expression) 960 961 # This section changes the type to `Nullable(...)` if the following conditions hold: 962 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 963 # and change their semantics 964 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 965 # constraint: "Type of Map key must be a type, that can be represented by integer or 966 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 967 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 968 parent = expression.parent 969 if ( 970 expression.args.get("nullable") is not False 971 and not ( 972 isinstance(parent, exp.DataType) 973 and parent.is_type(exp.DataType.Type.MAP) 974 and expression.index in (None, 0) 975 ) 976 and not expression.is_type(*self.NON_NULLABLE_TYPES) 977 ): 978 dtype = f"Nullable({dtype})" 979 980 return dtype 981 982 def cte_sql(self, expression: exp.CTE) -> str: 983 if expression.args.get("scalar"): 984 this = self.sql(expression, "this") 985 alias = self.sql(expression, "alias") 986 return f"{this} AS {alias}" 987 988 return super().cte_sql(expression) 989 990 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 991 return super().after_limit_modifiers(expression) + [ 992 ( 993 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 994 if expression.args.get("settings") 995 else "" 996 ), 997 ( 998 self.seg("FORMAT ") + self.sql(expression, "format") 999 if expression.args.get("format") 1000 else "" 1001 ), 1002 ] 1003 1004 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1005 params = self.expressions(expression, key="params", flat=True) 1006 return self.func(expression.name, *expression.expressions) + f"({params})" 1007 1008 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1009 return self.func(expression.name, *expression.expressions) 1010 1011 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1012 return self.anonymousaggfunc_sql(expression) 1013 1014 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1015 return self.parameterizedagg_sql(expression) 1016 1017 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1018 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1019 1020 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1021 return f"ON CLUSTER {self.sql(expression, 'this')}" 1022 1023 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1024 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1025 exp.Properties.Location.POST_NAME 1026 ): 1027 this_name = self.sql( 1028 expression.this if isinstance(expression.this, exp.Schema) else expression, 1029 "this", 1030 ) 1031 this_properties = " ".join( 1032 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1033 ) 1034 this_schema = self.schema_columns_sql(expression.this) 1035 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1036 1037 return super().createable_sql(expression, locations) 1038 1039 def create_sql(self, expression: exp.Create) -> str: 1040 # The comment property comes last in CTAS statements, i.e. after the query 1041 query = expression.expression 1042 if isinstance(query, exp.Query): 1043 comment_prop = expression.find(exp.SchemaCommentProperty) 1044 if comment_prop: 1045 comment_prop.pop() 1046 query.replace(exp.paren(query)) 1047 else: 1048 comment_prop = None 1049 1050 # ClickHouse only has DATABASEs and objects under them, eg. TABLEs, VIEWs, etc 1051 if expression.kind == "SCHEMA": 1052 expression.set("kind", "DATABASE") 1053 1054 create_sql = super().create_sql(expression) 1055 1056 comment_sql = self.sql(comment_prop) 1057 comment_sql = f" {comment_sql}" if comment_sql else "" 1058 1059 return f"{create_sql}{comment_sql}" 1060 1061 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1062 this = self.indent(self.sql(expression, "this")) 1063 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1064 1065 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1066 this = self.sql(expression, "this") 1067 this = f" {this}" if this else "" 1068 expr = self.sql(expression, "expression") 1069 expr = f" {expr}" if expr else "" 1070 index_type = self.sql(expression, "index_type") 1071 index_type = f" TYPE {index_type}" if index_type else "" 1072 granularity = self.sql(expression, "granularity") 1073 granularity = f" GRANULARITY {granularity}" if granularity else "" 1074 1075 return f"INDEX{this}{expr}{index_type}{granularity}" 1076 1077 def partition_sql(self, expression: exp.Partition) -> str: 1078 return f"PARTITION {self.expressions(expression, flat=True)}" 1079 1080 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1081 return f"ID {self.sql(expression.this)}" 1082 1083 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1084 return ( 1085 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1086 ) 1087 1088 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1089 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
890 def strtodate_sql(self, expression: exp.StrToDate) -> str: 891 strtodate_sql = self.function_fallback_sql(expression) 892 893 if not isinstance(expression.parent, exp.Cast): 894 # StrToDate returns DATEs in other dialects (eg. postgres), so 895 # this branch aims to improve the transpilation to clickhouse 896 return f"CAST({strtodate_sql} AS DATE)" 897 898 return strtodate_sql
900 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 901 this = expression.this 902 903 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 904 return self.sql(this) 905 906 return super().cast_sql(expression, safe_prefix=safe_prefix)
951 def datatype_sql(self, expression: exp.DataType) -> str: 952 # String is the standard ClickHouse type, every other variant is just an alias. 953 # Additionally, any supplied length parameter will be ignored. 954 # 955 # https://clickhouse.com/docs/en/sql-reference/data-types/string 956 if expression.this in self.STRING_TYPE_MAPPING: 957 dtype = "String" 958 else: 959 dtype = super().datatype_sql(expression) 960 961 # This section changes the type to `Nullable(...)` if the following conditions hold: 962 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 963 # and change their semantics 964 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 965 # constraint: "Type of Map key must be a type, that can be represented by integer or 966 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 967 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 968 parent = expression.parent 969 if ( 970 expression.args.get("nullable") is not False 971 and not ( 972 isinstance(parent, exp.DataType) 973 and parent.is_type(exp.DataType.Type.MAP) 974 and expression.index in (None, 0) 975 ) 976 and not expression.is_type(*self.NON_NULLABLE_TYPES) 977 ): 978 dtype = f"Nullable({dtype})" 979 980 return dtype
990 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 991 return super().after_limit_modifiers(expression) + [ 992 ( 993 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 994 if expression.args.get("settings") 995 else "" 996 ), 997 ( 998 self.seg("FORMAT ") + self.sql(expression, "format") 999 if expression.args.get("format") 1000 else "" 1001 ), 1002 ]
1023 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1024 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1025 exp.Properties.Location.POST_NAME 1026 ): 1027 this_name = self.sql( 1028 expression.this if isinstance(expression.this, exp.Schema) else expression, 1029 "this", 1030 ) 1031 this_properties = " ".join( 1032 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1033 ) 1034 this_schema = self.schema_columns_sql(expression.this) 1035 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1036 1037 return super().createable_sql(expression, locations)
1039 def create_sql(self, expression: exp.Create) -> str: 1040 # The comment property comes last in CTAS statements, i.e. after the query 1041 query = expression.expression 1042 if isinstance(query, exp.Query): 1043 comment_prop = expression.find(exp.SchemaCommentProperty) 1044 if comment_prop: 1045 comment_prop.pop() 1046 query.replace(exp.paren(query)) 1047 else: 1048 comment_prop = None 1049 1050 # ClickHouse only has DATABASEs and objects under them, eg. TABLEs, VIEWs, etc 1051 if expression.kind == "SCHEMA": 1052 expression.set("kind", "DATABASE") 1053 1054 create_sql = super().create_sql(expression) 1055 1056 comment_sql = self.sql(comment_prop) 1057 comment_sql = f" {comment_sql}" if comment_sql else "" 1058 1059 return f"{create_sql}{comment_sql}"
1065 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1066 this = self.sql(expression, "this") 1067 this = f" {this}" if this else "" 1068 expr = self.sql(expression, "expression") 1069 expr = f" {expr}" if expr else "" 1070 index_type = self.sql(expression, "index_type") 1071 index_type = f" TYPE {index_type}" if index_type else "" 1072 granularity = self.sql(expression, "granularity") 1073 granularity = f" GRANULARITY {granularity}" if granularity else "" 1074 1075 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_NULLABLE_TYPES
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql