sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 no_pivot_sql, 15 build_json_extract_path, 16 rename_func, 17 sha256_sql, 18 var_map_sql, 19 timestamptrunc_sql, 20 unit_to_var, 21 trim_sql, 22) 23from sqlglot.generator import Generator 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import Token, TokenType 26from sqlglot.generator import unsupported_args 27 28DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 29 30 31def _build_date_format(args: t.List) -> exp.TimeToStr: 32 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 33 34 timezone = seq_get(args, 2) 35 if timezone: 36 expr.set("zone", timezone) 37 38 return expr 39 40 41def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 42 scale = expression.args.get("scale") 43 timestamp = expression.this 44 45 if scale in (None, exp.UnixToTime.SECONDS): 46 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 47 if scale == exp.UnixToTime.MILLIS: 48 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.MICROS: 50 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 if scale == exp.UnixToTime.NANOS: 52 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 53 54 return self.func( 55 "fromUnixTimestamp", 56 exp.cast( 57 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 58 ), 59 ) 60 61 62def _lower_func(sql: str) -> str: 63 index = sql.index("(") 64 return sql[:index].lower() + sql[index:] 65 66 67def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 68 quantile = expression.args["quantile"] 69 args = f"({self.sql(expression, 'this')})" 70 71 if isinstance(quantile, exp.Array): 72 func = self.func("quantiles", *quantile) 73 else: 74 func = self.func("quantile", quantile) 75 76 return func + args 77 78 79def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 80 if len(args) == 1: 81 return exp.CountIf(this=seq_get(args, 0)) 82 83 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 84 85 86def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 87 if len(args) == 3: 88 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 89 90 strtodate = exp.StrToDate.from_arg_list(args) 91 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 92 93 94def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 95 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 96 if not expression.unit: 97 return rename_func(name)(self, expression) 98 99 return self.func( 100 name, 101 unit_to_var(expression), 102 expression.expression, 103 expression.this, 104 ) 105 106 return _delta_sql 107 108 109def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 110 ts = expression.this 111 112 tz = expression.args.get("zone") 113 if tz and isinstance(ts, exp.Literal): 114 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 115 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 116 # 117 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 118 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 119 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 120 ts_string = ts.name.strip() 121 122 # separate [date and time] from [fractional seconds and UTC offset] 123 ts_parts = ts_string.split(".") 124 if len(ts_parts) == 2: 125 # separate fractional seconds and UTC offset 126 offset_sep = "+" if "+" in ts_parts[1] else "-" 127 ts_frac_parts = ts_parts[1].split(offset_sep) 128 num_frac_parts = len(ts_frac_parts) 129 130 # pad to 6 digits if fractional seconds present 131 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 132 ts_string = "".join( 133 [ 134 ts_parts[0], # date and time 135 ".", 136 ts_frac_parts[0], # fractional seconds 137 offset_sep if num_frac_parts > 1 else "", 138 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 139 ] 140 ) 141 142 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 143 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 144 # it's part of the timestamp string 145 ts_without_tz = ( 146 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 147 ) 148 ts = exp.Literal.string(ts_without_tz) 149 150 # Non-nullable DateTime64 with microsecond precision 151 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 152 datatype = exp.DataType.build( 153 exp.DataType.Type.DATETIME64, 154 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 155 nullable=False, 156 ) 157 158 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 159 160 161class ClickHouse(Dialect): 162 NORMALIZE_FUNCTIONS: bool | str = False 163 NULL_ORDERING = "nulls_are_last" 164 SUPPORTS_USER_DEFINED_TYPES = False 165 SAFE_DIVISION = True 166 LOG_BASE_FIRST: t.Optional[bool] = None 167 FORCE_EARLY_ALIAS_REF_EXPANSION = True 168 169 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 170 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 171 172 UNESCAPED_SEQUENCES = { 173 "\\0": "\0", 174 } 175 176 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 177 178 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 179 exp.Except: False, 180 exp.Intersect: False, 181 exp.Union: None, 182 } 183 184 class Tokenizer(tokens.Tokenizer): 185 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 186 IDENTIFIERS = ['"', "`"] 187 IDENTIFIER_ESCAPES = ["\\"] 188 STRING_ESCAPES = ["'", "\\"] 189 BIT_STRINGS = [("0b", "")] 190 HEX_STRINGS = [("0x", ""), ("0X", "")] 191 HEREDOC_STRINGS = ["$"] 192 193 KEYWORDS = { 194 **tokens.Tokenizer.KEYWORDS, 195 "ATTACH": TokenType.COMMAND, 196 "DATE32": TokenType.DATE32, 197 "DATETIME64": TokenType.DATETIME64, 198 "DICTIONARY": TokenType.DICTIONARY, 199 "ENUM8": TokenType.ENUM8, 200 "ENUM16": TokenType.ENUM16, 201 "FINAL": TokenType.FINAL, 202 "FIXEDSTRING": TokenType.FIXEDSTRING, 203 "FLOAT32": TokenType.FLOAT, 204 "FLOAT64": TokenType.DOUBLE, 205 "GLOBAL": TokenType.GLOBAL, 206 "INT256": TokenType.INT256, 207 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 208 "MAP": TokenType.MAP, 209 "NESTED": TokenType.NESTED, 210 "SAMPLE": TokenType.TABLE_SAMPLE, 211 "TUPLE": TokenType.STRUCT, 212 "UINT128": TokenType.UINT128, 213 "UINT16": TokenType.USMALLINT, 214 "UINT256": TokenType.UINT256, 215 "UINT32": TokenType.UINT, 216 "UINT64": TokenType.UBIGINT, 217 "UINT8": TokenType.UTINYINT, 218 "IPV4": TokenType.IPV4, 219 "IPV6": TokenType.IPV6, 220 "POINT": TokenType.POINT, 221 "RING": TokenType.RING, 222 "LINESTRING": TokenType.LINESTRING, 223 "MULTILINESTRING": TokenType.MULTILINESTRING, 224 "POLYGON": TokenType.POLYGON, 225 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 226 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 227 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 228 "SYSTEM": TokenType.COMMAND, 229 "PREWHERE": TokenType.PREWHERE, 230 } 231 KEYWORDS.pop("/*+") 232 233 SINGLE_TOKENS = { 234 **tokens.Tokenizer.SINGLE_TOKENS, 235 "$": TokenType.HEREDOC_STRING, 236 } 237 238 class Parser(parser.Parser): 239 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 240 # * select x from t1 union all select x from t2 limit 1; 241 # * select x from t1 union all (select x from t2 limit 1); 242 MODIFIERS_ATTACHED_TO_SET_OP = False 243 INTERVAL_SPANS = False 244 245 FUNCTIONS = { 246 **parser.Parser.FUNCTIONS, 247 "ANY": exp.AnyValue.from_arg_list, 248 "ARRAYSUM": exp.ArraySum.from_arg_list, 249 "COUNTIF": _build_count_if, 250 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 251 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 252 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 253 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 254 "DATE_FORMAT": _build_date_format, 255 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 256 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 257 "FORMATDATETIME": _build_date_format, 258 "JSONEXTRACTSTRING": build_json_extract_path( 259 exp.JSONExtractScalar, zero_based_indexing=False 260 ), 261 "MAP": parser.build_var_map, 262 "MATCH": exp.RegexpLike.from_arg_list, 263 "RANDCANONICAL": exp.Rand.from_arg_list, 264 "STR_TO_DATE": _build_str_to_date, 265 "TUPLE": exp.Struct.from_arg_list, 266 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 267 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 268 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 269 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 270 "UNIQ": exp.ApproxDistinct.from_arg_list, 271 "XOR": lambda args: exp.Xor(expressions=args), 272 "MD5": exp.MD5Digest.from_arg_list, 273 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 274 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 275 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 276 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 277 } 278 FUNCTIONS.pop("TRANSFORM") 279 280 AGG_FUNCTIONS = { 281 "count", 282 "min", 283 "max", 284 "sum", 285 "avg", 286 "any", 287 "stddevPop", 288 "stddevSamp", 289 "varPop", 290 "varSamp", 291 "corr", 292 "covarPop", 293 "covarSamp", 294 "entropy", 295 "exponentialMovingAverage", 296 "intervalLengthSum", 297 "kolmogorovSmirnovTest", 298 "mannWhitneyUTest", 299 "median", 300 "rankCorr", 301 "sumKahan", 302 "studentTTest", 303 "welchTTest", 304 "anyHeavy", 305 "anyLast", 306 "boundingRatio", 307 "first_value", 308 "last_value", 309 "argMin", 310 "argMax", 311 "avgWeighted", 312 "topK", 313 "topKWeighted", 314 "deltaSum", 315 "deltaSumTimestamp", 316 "groupArray", 317 "groupArrayLast", 318 "groupUniqArray", 319 "groupArrayInsertAt", 320 "groupArrayMovingAvg", 321 "groupArrayMovingSum", 322 "groupArraySample", 323 "groupBitAnd", 324 "groupBitOr", 325 "groupBitXor", 326 "groupBitmap", 327 "groupBitmapAnd", 328 "groupBitmapOr", 329 "groupBitmapXor", 330 "sumWithOverflow", 331 "sumMap", 332 "minMap", 333 "maxMap", 334 "skewSamp", 335 "skewPop", 336 "kurtSamp", 337 "kurtPop", 338 "uniq", 339 "uniqExact", 340 "uniqCombined", 341 "uniqCombined64", 342 "uniqHLL12", 343 "uniqTheta", 344 "quantile", 345 "quantiles", 346 "quantileExact", 347 "quantilesExact", 348 "quantileExactLow", 349 "quantilesExactLow", 350 "quantileExactHigh", 351 "quantilesExactHigh", 352 "quantileExactWeighted", 353 "quantilesExactWeighted", 354 "quantileTiming", 355 "quantilesTiming", 356 "quantileTimingWeighted", 357 "quantilesTimingWeighted", 358 "quantileDeterministic", 359 "quantilesDeterministic", 360 "quantileTDigest", 361 "quantilesTDigest", 362 "quantileTDigestWeighted", 363 "quantilesTDigestWeighted", 364 "quantileBFloat16", 365 "quantilesBFloat16", 366 "quantileBFloat16Weighted", 367 "quantilesBFloat16Weighted", 368 "simpleLinearRegression", 369 "stochasticLinearRegression", 370 "stochasticLogisticRegression", 371 "categoricalInformationValue", 372 "contingency", 373 "cramersV", 374 "cramersVBiasCorrected", 375 "theilsU", 376 "maxIntersections", 377 "maxIntersectionsPosition", 378 "meanZTest", 379 "quantileInterpolatedWeighted", 380 "quantilesInterpolatedWeighted", 381 "quantileGK", 382 "quantilesGK", 383 "sparkBar", 384 "sumCount", 385 "largestTriangleThreeBuckets", 386 "histogram", 387 "sequenceMatch", 388 "sequenceCount", 389 "windowFunnel", 390 "retention", 391 "uniqUpTo", 392 "sequenceNextNode", 393 "exponentialTimeDecayedAvg", 394 } 395 396 AGG_FUNCTIONS_SUFFIXES = [ 397 "If", 398 "Array", 399 "ArrayIf", 400 "Map", 401 "SimpleState", 402 "State", 403 "Merge", 404 "MergeState", 405 "ForEach", 406 "Distinct", 407 "OrDefault", 408 "OrNull", 409 "Resample", 410 "ArgMin", 411 "ArgMax", 412 ] 413 414 FUNC_TOKENS = { 415 *parser.Parser.FUNC_TOKENS, 416 TokenType.SET, 417 } 418 419 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 420 421 ID_VAR_TOKENS = { 422 *parser.Parser.ID_VAR_TOKENS, 423 TokenType.LIKE, 424 } 425 426 AGG_FUNC_MAPPING = ( 427 lambda functions, suffixes: { 428 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 429 } 430 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 431 432 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 433 434 FUNCTION_PARSERS = { 435 **parser.Parser.FUNCTION_PARSERS, 436 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 437 "QUANTILE": lambda self: self._parse_quantile(), 438 "MEDIAN": lambda self: self._parse_quantile(), 439 "COLUMNS": lambda self: self._parse_columns(), 440 } 441 442 FUNCTION_PARSERS.pop("MATCH") 443 444 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 445 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 446 447 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 448 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 449 450 RANGE_PARSERS = { 451 **parser.Parser.RANGE_PARSERS, 452 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 453 and self._parse_in(this, is_global=True), 454 } 455 456 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 457 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 458 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 459 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 460 461 JOIN_KINDS = { 462 *parser.Parser.JOIN_KINDS, 463 TokenType.ANY, 464 TokenType.ASOF, 465 TokenType.ARRAY, 466 } 467 468 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 469 TokenType.ANY, 470 TokenType.ARRAY, 471 TokenType.FINAL, 472 TokenType.FORMAT, 473 TokenType.SETTINGS, 474 } 475 476 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 477 TokenType.FORMAT, 478 } 479 480 LOG_DEFAULTS_TO_LN = True 481 482 QUERY_MODIFIER_PARSERS = { 483 **parser.Parser.QUERY_MODIFIER_PARSERS, 484 TokenType.SETTINGS: lambda self: ( 485 "settings", 486 self._advance() or self._parse_csv(self._parse_assignment), 487 ), 488 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 489 } 490 491 CONSTRAINT_PARSERS = { 492 **parser.Parser.CONSTRAINT_PARSERS, 493 "INDEX": lambda self: self._parse_index_constraint(), 494 "CODEC": lambda self: self._parse_compress(), 495 } 496 497 ALTER_PARSERS = { 498 **parser.Parser.ALTER_PARSERS, 499 "REPLACE": lambda self: self._parse_alter_table_replace(), 500 } 501 502 SCHEMA_UNNAMED_CONSTRAINTS = { 503 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 504 "INDEX", 505 } 506 507 PLACEHOLDER_PARSERS = { 508 **parser.Parser.PLACEHOLDER_PARSERS, 509 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 510 } 511 512 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 513 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 514 return self._parse_lambda() 515 516 def _parse_types( 517 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 518 ) -> t.Optional[exp.Expression]: 519 dtype = super()._parse_types( 520 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 521 ) 522 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 523 # Mark every type as non-nullable which is ClickHouse's default, unless it's 524 # already marked as nullable. This marker helps us transpile types from other 525 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 526 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 527 # fail in ClickHouse without the `Nullable` type constructor. 528 dtype.set("nullable", False) 529 530 return dtype 531 532 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 533 index = self._index 534 this = self._parse_bitwise() 535 if self._match(TokenType.FROM): 536 self._retreat(index) 537 return super()._parse_extract() 538 539 # We return Anonymous here because extract and regexpExtract have different semantics, 540 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 541 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 542 # 543 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 544 self._match(TokenType.COMMA) 545 return self.expression( 546 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 547 ) 548 549 def _parse_assignment(self) -> t.Optional[exp.Expression]: 550 this = super()._parse_assignment() 551 552 if self._match(TokenType.PLACEHOLDER): 553 return self.expression( 554 exp.If, 555 this=this, 556 true=self._parse_assignment(), 557 false=self._match(TokenType.COLON) and self._parse_assignment(), 558 ) 559 560 return this 561 562 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 563 """ 564 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 565 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 566 """ 567 this = self._parse_id_var() 568 self._match(TokenType.COLON) 569 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 570 self._match_text_seq("IDENTIFIER") and "Identifier" 571 ) 572 573 if not kind: 574 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 575 elif not self._match(TokenType.R_BRACE): 576 self.raise_error("Expecting }") 577 578 return self.expression(exp.Placeholder, this=this, kind=kind) 579 580 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 581 this = super()._parse_in(this) 582 this.set("is_global", is_global) 583 return this 584 585 def _parse_table( 586 self, 587 schema: bool = False, 588 joins: bool = False, 589 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 590 parse_bracket: bool = False, 591 is_db_reference: bool = False, 592 parse_partition: bool = False, 593 ) -> t.Optional[exp.Expression]: 594 this = super()._parse_table( 595 schema=schema, 596 joins=joins, 597 alias_tokens=alias_tokens, 598 parse_bracket=parse_bracket, 599 is_db_reference=is_db_reference, 600 ) 601 602 if self._match(TokenType.FINAL): 603 this = self.expression(exp.Final, this=this) 604 605 return this 606 607 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 608 return super()._parse_position(haystack_first=True) 609 610 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 611 def _parse_cte(self) -> exp.CTE: 612 # WITH <identifier> AS <subquery expression> 613 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 614 615 if not cte: 616 # WITH <expression> AS <identifier> 617 cte = self.expression( 618 exp.CTE, 619 this=self._parse_assignment(), 620 alias=self._parse_table_alias(), 621 scalar=True, 622 ) 623 624 return cte 625 626 def _parse_join_parts( 627 self, 628 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 629 is_global = self._match(TokenType.GLOBAL) and self._prev 630 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 631 632 if kind_pre: 633 kind = self._match_set(self.JOIN_KINDS) and self._prev 634 side = self._match_set(self.JOIN_SIDES) and self._prev 635 return is_global, side, kind 636 637 return ( 638 is_global, 639 self._match_set(self.JOIN_SIDES) and self._prev, 640 self._match_set(self.JOIN_KINDS) and self._prev, 641 ) 642 643 def _parse_join( 644 self, skip_join_token: bool = False, parse_bracket: bool = False 645 ) -> t.Optional[exp.Join]: 646 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 647 if join: 648 join.set("global", join.args.pop("method", None)) 649 650 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 651 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 652 if join.kind == "ARRAY": 653 for table in join.find_all(exp.Table): 654 table.replace(table.to_column()) 655 656 return join 657 658 def _parse_function( 659 self, 660 functions: t.Optional[t.Dict[str, t.Callable]] = None, 661 anonymous: bool = False, 662 optional_parens: bool = True, 663 any_token: bool = False, 664 ) -> t.Optional[exp.Expression]: 665 expr = super()._parse_function( 666 functions=functions, 667 anonymous=anonymous, 668 optional_parens=optional_parens, 669 any_token=any_token, 670 ) 671 672 func = expr.this if isinstance(expr, exp.Window) else expr 673 674 # Aggregate functions can be split in 2 parts: <func_name><suffix> 675 parts = ( 676 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 677 ) 678 679 if parts: 680 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 681 params = self._parse_func_params(anon_func) 682 683 kwargs = { 684 "this": anon_func.this, 685 "expressions": anon_func.expressions, 686 } 687 if parts[1]: 688 kwargs["parts"] = parts 689 exp_class: t.Type[exp.Expression] = ( 690 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 691 ) 692 else: 693 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 694 695 kwargs["exp_class"] = exp_class 696 if params: 697 kwargs["params"] = params 698 699 func = self.expression(**kwargs) 700 701 if isinstance(expr, exp.Window): 702 # The window's func was parsed as Anonymous in base parser, fix its 703 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 704 expr.set("this", func) 705 elif params: 706 # Params have blocked super()._parse_function() from parsing the following window 707 # (if that exists) as they're standing between the function call and the window spec 708 expr = self._parse_window(func) 709 else: 710 expr = func 711 712 return expr 713 714 def _parse_func_params( 715 self, this: t.Optional[exp.Func] = None 716 ) -> t.Optional[t.List[exp.Expression]]: 717 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 718 return self._parse_csv(self._parse_lambda) 719 720 if self._match(TokenType.L_PAREN): 721 params = self._parse_csv(self._parse_lambda) 722 self._match_r_paren(this) 723 return params 724 725 return None 726 727 def _parse_quantile(self) -> exp.Quantile: 728 this = self._parse_lambda() 729 params = self._parse_func_params() 730 if params: 731 return self.expression(exp.Quantile, this=params[0], quantile=this) 732 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 733 734 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 735 return super()._parse_wrapped_id_vars(optional=True) 736 737 def _parse_primary_key( 738 self, wrapped_optional: bool = False, in_props: bool = False 739 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 740 return super()._parse_primary_key( 741 wrapped_optional=wrapped_optional or in_props, in_props=in_props 742 ) 743 744 def _parse_on_property(self) -> t.Optional[exp.Expression]: 745 index = self._index 746 if self._match_text_seq("CLUSTER"): 747 this = self._parse_id_var() 748 if this: 749 return self.expression(exp.OnCluster, this=this) 750 else: 751 self._retreat(index) 752 return None 753 754 def _parse_index_constraint( 755 self, kind: t.Optional[str] = None 756 ) -> exp.IndexColumnConstraint: 757 # INDEX name1 expr TYPE type1(args) GRANULARITY value 758 this = self._parse_id_var() 759 expression = self._parse_assignment() 760 761 index_type = self._match_text_seq("TYPE") and ( 762 self._parse_function() or self._parse_var() 763 ) 764 765 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 766 767 return self.expression( 768 exp.IndexColumnConstraint, 769 this=this, 770 expression=expression, 771 index_type=index_type, 772 granularity=granularity, 773 ) 774 775 def _parse_partition(self) -> t.Optional[exp.Partition]: 776 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 777 if not self._match(TokenType.PARTITION): 778 return None 779 780 if self._match_text_seq("ID"): 781 # Corresponds to the PARTITION ID <string_value> syntax 782 expressions: t.List[exp.Expression] = [ 783 self.expression(exp.PartitionId, this=self._parse_string()) 784 ] 785 else: 786 expressions = self._parse_expressions() 787 788 return self.expression(exp.Partition, expressions=expressions) 789 790 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 791 partition = self._parse_partition() 792 793 if not partition or not self._match(TokenType.FROM): 794 return None 795 796 return self.expression( 797 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 798 ) 799 800 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 801 if not self._match_text_seq("PROJECTION"): 802 return None 803 804 return self.expression( 805 exp.ProjectionDef, 806 this=self._parse_id_var(), 807 expression=self._parse_wrapped(self._parse_statement), 808 ) 809 810 def _parse_constraint(self) -> t.Optional[exp.Expression]: 811 return super()._parse_constraint() or self._parse_projection_def() 812 813 def _parse_alias( 814 self, this: t.Optional[exp.Expression], explicit: bool = False 815 ) -> t.Optional[exp.Expression]: 816 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 817 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 818 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 819 return this 820 821 return super()._parse_alias(this=this, explicit=explicit) 822 823 def _parse_expression(self) -> t.Optional[exp.Expression]: 824 this = super()._parse_expression() 825 826 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 827 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 828 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 829 self._match(TokenType.R_PAREN) 830 831 return this 832 833 def _parse_columns(self) -> exp.Expression: 834 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 835 836 while self._next and self._match_text_seq(")", "APPLY", "("): 837 self._match(TokenType.R_PAREN) 838 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 839 return this 840 841 class Generator(generator.Generator): 842 QUERY_HINTS = False 843 STRUCT_DELIMITER = ("(", ")") 844 NVL2_SUPPORTED = False 845 TABLESAMPLE_REQUIRES_PARENS = False 846 TABLESAMPLE_SIZE_IS_ROWS = False 847 TABLESAMPLE_KEYWORDS = "SAMPLE" 848 LAST_DAY_SUPPORTS_DATE_PART = False 849 CAN_IMPLEMENT_ARRAY_ANY = True 850 SUPPORTS_TO_NUMBER = False 851 JOIN_HINTS = False 852 TABLE_HINTS = False 853 GROUPINGS_SEP = "" 854 SET_OP_MODIFIERS = False 855 SUPPORTS_TABLE_ALIAS_COLUMNS = False 856 VALUES_AS_TABLE = False 857 ARRAY_SIZE_NAME = "LENGTH" 858 859 STRING_TYPE_MAPPING = { 860 exp.DataType.Type.CHAR: "String", 861 exp.DataType.Type.LONGBLOB: "String", 862 exp.DataType.Type.LONGTEXT: "String", 863 exp.DataType.Type.MEDIUMBLOB: "String", 864 exp.DataType.Type.MEDIUMTEXT: "String", 865 exp.DataType.Type.TINYBLOB: "String", 866 exp.DataType.Type.TINYTEXT: "String", 867 exp.DataType.Type.TEXT: "String", 868 exp.DataType.Type.VARBINARY: "String", 869 exp.DataType.Type.VARCHAR: "String", 870 } 871 872 SUPPORTED_JSON_PATH_PARTS = { 873 exp.JSONPathKey, 874 exp.JSONPathRoot, 875 exp.JSONPathSubscript, 876 } 877 878 TYPE_MAPPING = { 879 **generator.Generator.TYPE_MAPPING, 880 **STRING_TYPE_MAPPING, 881 exp.DataType.Type.ARRAY: "Array", 882 exp.DataType.Type.BOOLEAN: "Bool", 883 exp.DataType.Type.BIGINT: "Int64", 884 exp.DataType.Type.DATE32: "Date32", 885 exp.DataType.Type.DATETIME: "DateTime", 886 exp.DataType.Type.DATETIME64: "DateTime64", 887 exp.DataType.Type.DECIMAL: "Decimal", 888 exp.DataType.Type.DECIMAL32: "Decimal32", 889 exp.DataType.Type.DECIMAL64: "Decimal64", 890 exp.DataType.Type.DECIMAL128: "Decimal128", 891 exp.DataType.Type.DECIMAL256: "Decimal256", 892 exp.DataType.Type.TIMESTAMP: "DateTime", 893 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 894 exp.DataType.Type.DOUBLE: "Float64", 895 exp.DataType.Type.ENUM: "Enum", 896 exp.DataType.Type.ENUM8: "Enum8", 897 exp.DataType.Type.ENUM16: "Enum16", 898 exp.DataType.Type.FIXEDSTRING: "FixedString", 899 exp.DataType.Type.FLOAT: "Float32", 900 exp.DataType.Type.INT: "Int32", 901 exp.DataType.Type.MEDIUMINT: "Int32", 902 exp.DataType.Type.INT128: "Int128", 903 exp.DataType.Type.INT256: "Int256", 904 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 905 exp.DataType.Type.MAP: "Map", 906 exp.DataType.Type.NESTED: "Nested", 907 exp.DataType.Type.SMALLINT: "Int16", 908 exp.DataType.Type.STRUCT: "Tuple", 909 exp.DataType.Type.TINYINT: "Int8", 910 exp.DataType.Type.UBIGINT: "UInt64", 911 exp.DataType.Type.UINT: "UInt32", 912 exp.DataType.Type.UINT128: "UInt128", 913 exp.DataType.Type.UINT256: "UInt256", 914 exp.DataType.Type.USMALLINT: "UInt16", 915 exp.DataType.Type.UTINYINT: "UInt8", 916 exp.DataType.Type.IPV4: "IPv4", 917 exp.DataType.Type.IPV6: "IPv6", 918 exp.DataType.Type.POINT: "Point", 919 exp.DataType.Type.RING: "Ring", 920 exp.DataType.Type.LINESTRING: "LineString", 921 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 922 exp.DataType.Type.POLYGON: "Polygon", 923 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 924 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 925 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 926 } 927 928 TRANSFORMS = { 929 **generator.Generator.TRANSFORMS, 930 exp.AnyValue: rename_func("any"), 931 exp.ApproxDistinct: rename_func("uniq"), 932 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 933 exp.ArraySum: rename_func("arraySum"), 934 exp.ArgMax: arg_max_or_min_no_count("argMax"), 935 exp.ArgMin: arg_max_or_min_no_count("argMin"), 936 exp.Array: inline_array_sql, 937 exp.CastToStrType: rename_func("CAST"), 938 exp.CountIf: rename_func("countIf"), 939 exp.CompressColumnConstraint: lambda self, 940 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 941 exp.ComputedColumnConstraint: lambda self, 942 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 943 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 944 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 945 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 946 exp.DateStrToDate: rename_func("toDate"), 947 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 948 exp.Explode: rename_func("arrayJoin"), 949 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 950 exp.IsNan: rename_func("isNaN"), 951 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 952 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 953 exp.JSONPathKey: json_path_key_only_name, 954 exp.JSONPathRoot: lambda *_: "", 955 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 956 exp.Median: rename_func("median"), 957 exp.Nullif: rename_func("nullIf"), 958 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 959 exp.Pivot: no_pivot_sql, 960 exp.Quantile: _quantile_sql, 961 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 962 exp.Rand: rename_func("randCanonical"), 963 exp.StartsWith: rename_func("startsWith"), 964 exp.StrPosition: lambda self, e: self.func( 965 "position", e.this, e.args.get("substr"), e.args.get("position") 966 ), 967 exp.TimeToStr: lambda self, e: self.func( 968 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 969 ), 970 exp.TimeStrToTime: _timestrtotime_sql, 971 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 972 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 973 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 974 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 975 exp.MD5Digest: rename_func("MD5"), 976 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 977 exp.SHA: rename_func("SHA1"), 978 exp.SHA2: sha256_sql, 979 exp.UnixToTime: _unix_to_time_sql, 980 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 981 exp.Trim: trim_sql, 982 exp.Variance: rename_func("varSamp"), 983 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 984 exp.Stddev: rename_func("stddevSamp"), 985 exp.Chr: rename_func("CHAR"), 986 exp.Lag: lambda self, e: self.func( 987 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 988 ), 989 exp.Lead: lambda self, e: self.func( 990 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 991 ), 992 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 993 rename_func("editDistance") 994 ), 995 } 996 997 PROPERTIES_LOCATION = { 998 **generator.Generator.PROPERTIES_LOCATION, 999 exp.OnCluster: exp.Properties.Location.POST_NAME, 1000 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1001 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1002 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1003 } 1004 1005 # There's no list in docs, but it can be found in Clickhouse code 1006 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1007 ON_CLUSTER_TARGETS = { 1008 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1009 "DATABASE", 1010 "TABLE", 1011 "VIEW", 1012 "DICTIONARY", 1013 "INDEX", 1014 "FUNCTION", 1015 "NAMED COLLECTION", 1016 } 1017 1018 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1019 NON_NULLABLE_TYPES = { 1020 exp.DataType.Type.ARRAY, 1021 exp.DataType.Type.MAP, 1022 exp.DataType.Type.STRUCT, 1023 exp.DataType.Type.POINT, 1024 exp.DataType.Type.RING, 1025 exp.DataType.Type.LINESTRING, 1026 exp.DataType.Type.MULTILINESTRING, 1027 exp.DataType.Type.POLYGON, 1028 exp.DataType.Type.MULTIPOLYGON, 1029 } 1030 1031 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1032 strtodate_sql = self.function_fallback_sql(expression) 1033 1034 if not isinstance(expression.parent, exp.Cast): 1035 # StrToDate returns DATEs in other dialects (eg. postgres), so 1036 # this branch aims to improve the transpilation to clickhouse 1037 return f"CAST({strtodate_sql} AS DATE)" 1038 1039 return strtodate_sql 1040 1041 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1042 this = expression.this 1043 1044 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1045 return self.sql(this) 1046 1047 return super().cast_sql(expression, safe_prefix=safe_prefix) 1048 1049 def trycast_sql(self, expression: exp.TryCast) -> str: 1050 dtype = expression.to 1051 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1052 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1053 dtype.set("nullable", True) 1054 1055 return super().cast_sql(expression) 1056 1057 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1058 this = self.json_path_part(expression.this) 1059 return str(int(this) + 1) if is_int(this) else this 1060 1061 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1062 return f"AS {self.sql(expression, 'this')}" 1063 1064 def _any_to_has( 1065 self, 1066 expression: exp.EQ | exp.NEQ, 1067 default: t.Callable[[t.Any], str], 1068 prefix: str = "", 1069 ) -> str: 1070 if isinstance(expression.left, exp.Any): 1071 arr = expression.left 1072 this = expression.right 1073 elif isinstance(expression.right, exp.Any): 1074 arr = expression.right 1075 this = expression.left 1076 else: 1077 return default(expression) 1078 1079 return prefix + self.func("has", arr.this.unnest(), this) 1080 1081 def eq_sql(self, expression: exp.EQ) -> str: 1082 return self._any_to_has(expression, super().eq_sql) 1083 1084 def neq_sql(self, expression: exp.NEQ) -> str: 1085 return self._any_to_has(expression, super().neq_sql, "NOT ") 1086 1087 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1088 # Manually add a flag to make the search case-insensitive 1089 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1090 return self.func("match", expression.this, regex) 1091 1092 def datatype_sql(self, expression: exp.DataType) -> str: 1093 # String is the standard ClickHouse type, every other variant is just an alias. 1094 # Additionally, any supplied length parameter will be ignored. 1095 # 1096 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1097 if expression.this in self.STRING_TYPE_MAPPING: 1098 dtype = "String" 1099 else: 1100 dtype = super().datatype_sql(expression) 1101 1102 # This section changes the type to `Nullable(...)` if the following conditions hold: 1103 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1104 # and change their semantics 1105 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1106 # constraint: "Type of Map key must be a type, that can be represented by integer or 1107 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1108 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1109 parent = expression.parent 1110 nullable = expression.args.get("nullable") 1111 if nullable is True or ( 1112 nullable is None 1113 and not ( 1114 isinstance(parent, exp.DataType) 1115 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1116 and expression.index in (None, 0) 1117 ) 1118 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1119 ): 1120 dtype = f"Nullable({dtype})" 1121 1122 return dtype 1123 1124 def cte_sql(self, expression: exp.CTE) -> str: 1125 if expression.args.get("scalar"): 1126 this = self.sql(expression, "this") 1127 alias = self.sql(expression, "alias") 1128 return f"{this} AS {alias}" 1129 1130 return super().cte_sql(expression) 1131 1132 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1133 return super().after_limit_modifiers(expression) + [ 1134 ( 1135 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1136 if expression.args.get("settings") 1137 else "" 1138 ), 1139 ( 1140 self.seg("FORMAT ") + self.sql(expression, "format") 1141 if expression.args.get("format") 1142 else "" 1143 ), 1144 ] 1145 1146 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1147 params = self.expressions(expression, key="params", flat=True) 1148 return self.func(expression.name, *expression.expressions) + f"({params})" 1149 1150 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1151 return self.func(expression.name, *expression.expressions) 1152 1153 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1154 return self.anonymousaggfunc_sql(expression) 1155 1156 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1157 return self.parameterizedagg_sql(expression) 1158 1159 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1160 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1161 1162 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1163 return f"ON CLUSTER {self.sql(expression, 'this')}" 1164 1165 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1166 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1167 exp.Properties.Location.POST_NAME 1168 ): 1169 this_name = self.sql( 1170 expression.this if isinstance(expression.this, exp.Schema) else expression, 1171 "this", 1172 ) 1173 this_properties = " ".join( 1174 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1175 ) 1176 this_schema = self.schema_columns_sql(expression.this) 1177 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1178 1179 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1180 1181 return super().createable_sql(expression, locations) 1182 1183 def create_sql(self, expression: exp.Create) -> str: 1184 # The comment property comes last in CTAS statements, i.e. after the query 1185 query = expression.expression 1186 if isinstance(query, exp.Query): 1187 comment_prop = expression.find(exp.SchemaCommentProperty) 1188 if comment_prop: 1189 comment_prop.pop() 1190 query.replace(exp.paren(query)) 1191 else: 1192 comment_prop = None 1193 1194 create_sql = super().create_sql(expression) 1195 1196 comment_sql = self.sql(comment_prop) 1197 comment_sql = f" {comment_sql}" if comment_sql else "" 1198 1199 return f"{create_sql}{comment_sql}" 1200 1201 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1202 this = self.indent(self.sql(expression, "this")) 1203 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1204 1205 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1206 this = self.sql(expression, "this") 1207 this = f" {this}" if this else "" 1208 expr = self.sql(expression, "expression") 1209 expr = f" {expr}" if expr else "" 1210 index_type = self.sql(expression, "index_type") 1211 index_type = f" TYPE {index_type}" if index_type else "" 1212 granularity = self.sql(expression, "granularity") 1213 granularity = f" GRANULARITY {granularity}" if granularity else "" 1214 1215 return f"INDEX{this}{expr}{index_type}{granularity}" 1216 1217 def partition_sql(self, expression: exp.Partition) -> str: 1218 return f"PARTITION {self.expressions(expression, flat=True)}" 1219 1220 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1221 return f"ID {self.sql(expression.this)}" 1222 1223 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1224 return ( 1225 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1226 ) 1227 1228 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1229 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1230 1231 def is_sql(self, expression: exp.Is) -> str: 1232 is_sql = super().is_sql(expression) 1233 1234 if isinstance(expression.parent, exp.Not): 1235 # value IS NOT NULL -> NOT (value IS NULL) 1236 is_sql = self.wrap(is_sql) 1237 1238 return is_sql
162class ClickHouse(Dialect): 163 NORMALIZE_FUNCTIONS: bool | str = False 164 NULL_ORDERING = "nulls_are_last" 165 SUPPORTS_USER_DEFINED_TYPES = False 166 SAFE_DIVISION = True 167 LOG_BASE_FIRST: t.Optional[bool] = None 168 FORCE_EARLY_ALIAS_REF_EXPANSION = True 169 170 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 171 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 172 173 UNESCAPED_SEQUENCES = { 174 "\\0": "\0", 175 } 176 177 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 178 179 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 180 exp.Except: False, 181 exp.Intersect: False, 182 exp.Union: None, 183 } 184 185 class Tokenizer(tokens.Tokenizer): 186 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 187 IDENTIFIERS = ['"', "`"] 188 IDENTIFIER_ESCAPES = ["\\"] 189 STRING_ESCAPES = ["'", "\\"] 190 BIT_STRINGS = [("0b", "")] 191 HEX_STRINGS = [("0x", ""), ("0X", "")] 192 HEREDOC_STRINGS = ["$"] 193 194 KEYWORDS = { 195 **tokens.Tokenizer.KEYWORDS, 196 "ATTACH": TokenType.COMMAND, 197 "DATE32": TokenType.DATE32, 198 "DATETIME64": TokenType.DATETIME64, 199 "DICTIONARY": TokenType.DICTIONARY, 200 "ENUM8": TokenType.ENUM8, 201 "ENUM16": TokenType.ENUM16, 202 "FINAL": TokenType.FINAL, 203 "FIXEDSTRING": TokenType.FIXEDSTRING, 204 "FLOAT32": TokenType.FLOAT, 205 "FLOAT64": TokenType.DOUBLE, 206 "GLOBAL": TokenType.GLOBAL, 207 "INT256": TokenType.INT256, 208 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 209 "MAP": TokenType.MAP, 210 "NESTED": TokenType.NESTED, 211 "SAMPLE": TokenType.TABLE_SAMPLE, 212 "TUPLE": TokenType.STRUCT, 213 "UINT128": TokenType.UINT128, 214 "UINT16": TokenType.USMALLINT, 215 "UINT256": TokenType.UINT256, 216 "UINT32": TokenType.UINT, 217 "UINT64": TokenType.UBIGINT, 218 "UINT8": TokenType.UTINYINT, 219 "IPV4": TokenType.IPV4, 220 "IPV6": TokenType.IPV6, 221 "POINT": TokenType.POINT, 222 "RING": TokenType.RING, 223 "LINESTRING": TokenType.LINESTRING, 224 "MULTILINESTRING": TokenType.MULTILINESTRING, 225 "POLYGON": TokenType.POLYGON, 226 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 227 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 228 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 229 "SYSTEM": TokenType.COMMAND, 230 "PREWHERE": TokenType.PREWHERE, 231 } 232 KEYWORDS.pop("/*+") 233 234 SINGLE_TOKENS = { 235 **tokens.Tokenizer.SINGLE_TOKENS, 236 "$": TokenType.HEREDOC_STRING, 237 } 238 239 class Parser(parser.Parser): 240 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 241 # * select x from t1 union all select x from t2 limit 1; 242 # * select x from t1 union all (select x from t2 limit 1); 243 MODIFIERS_ATTACHED_TO_SET_OP = False 244 INTERVAL_SPANS = False 245 246 FUNCTIONS = { 247 **parser.Parser.FUNCTIONS, 248 "ANY": exp.AnyValue.from_arg_list, 249 "ARRAYSUM": exp.ArraySum.from_arg_list, 250 "COUNTIF": _build_count_if, 251 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 252 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 253 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 254 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 255 "DATE_FORMAT": _build_date_format, 256 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 257 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 258 "FORMATDATETIME": _build_date_format, 259 "JSONEXTRACTSTRING": build_json_extract_path( 260 exp.JSONExtractScalar, zero_based_indexing=False 261 ), 262 "MAP": parser.build_var_map, 263 "MATCH": exp.RegexpLike.from_arg_list, 264 "RANDCANONICAL": exp.Rand.from_arg_list, 265 "STR_TO_DATE": _build_str_to_date, 266 "TUPLE": exp.Struct.from_arg_list, 267 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 268 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 269 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 270 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 271 "UNIQ": exp.ApproxDistinct.from_arg_list, 272 "XOR": lambda args: exp.Xor(expressions=args), 273 "MD5": exp.MD5Digest.from_arg_list, 274 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 275 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 276 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 277 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 278 } 279 FUNCTIONS.pop("TRANSFORM") 280 281 AGG_FUNCTIONS = { 282 "count", 283 "min", 284 "max", 285 "sum", 286 "avg", 287 "any", 288 "stddevPop", 289 "stddevSamp", 290 "varPop", 291 "varSamp", 292 "corr", 293 "covarPop", 294 "covarSamp", 295 "entropy", 296 "exponentialMovingAverage", 297 "intervalLengthSum", 298 "kolmogorovSmirnovTest", 299 "mannWhitneyUTest", 300 "median", 301 "rankCorr", 302 "sumKahan", 303 "studentTTest", 304 "welchTTest", 305 "anyHeavy", 306 "anyLast", 307 "boundingRatio", 308 "first_value", 309 "last_value", 310 "argMin", 311 "argMax", 312 "avgWeighted", 313 "topK", 314 "topKWeighted", 315 "deltaSum", 316 "deltaSumTimestamp", 317 "groupArray", 318 "groupArrayLast", 319 "groupUniqArray", 320 "groupArrayInsertAt", 321 "groupArrayMovingAvg", 322 "groupArrayMovingSum", 323 "groupArraySample", 324 "groupBitAnd", 325 "groupBitOr", 326 "groupBitXor", 327 "groupBitmap", 328 "groupBitmapAnd", 329 "groupBitmapOr", 330 "groupBitmapXor", 331 "sumWithOverflow", 332 "sumMap", 333 "minMap", 334 "maxMap", 335 "skewSamp", 336 "skewPop", 337 "kurtSamp", 338 "kurtPop", 339 "uniq", 340 "uniqExact", 341 "uniqCombined", 342 "uniqCombined64", 343 "uniqHLL12", 344 "uniqTheta", 345 "quantile", 346 "quantiles", 347 "quantileExact", 348 "quantilesExact", 349 "quantileExactLow", 350 "quantilesExactLow", 351 "quantileExactHigh", 352 "quantilesExactHigh", 353 "quantileExactWeighted", 354 "quantilesExactWeighted", 355 "quantileTiming", 356 "quantilesTiming", 357 "quantileTimingWeighted", 358 "quantilesTimingWeighted", 359 "quantileDeterministic", 360 "quantilesDeterministic", 361 "quantileTDigest", 362 "quantilesTDigest", 363 "quantileTDigestWeighted", 364 "quantilesTDigestWeighted", 365 "quantileBFloat16", 366 "quantilesBFloat16", 367 "quantileBFloat16Weighted", 368 "quantilesBFloat16Weighted", 369 "simpleLinearRegression", 370 "stochasticLinearRegression", 371 "stochasticLogisticRegression", 372 "categoricalInformationValue", 373 "contingency", 374 "cramersV", 375 "cramersVBiasCorrected", 376 "theilsU", 377 "maxIntersections", 378 "maxIntersectionsPosition", 379 "meanZTest", 380 "quantileInterpolatedWeighted", 381 "quantilesInterpolatedWeighted", 382 "quantileGK", 383 "quantilesGK", 384 "sparkBar", 385 "sumCount", 386 "largestTriangleThreeBuckets", 387 "histogram", 388 "sequenceMatch", 389 "sequenceCount", 390 "windowFunnel", 391 "retention", 392 "uniqUpTo", 393 "sequenceNextNode", 394 "exponentialTimeDecayedAvg", 395 } 396 397 AGG_FUNCTIONS_SUFFIXES = [ 398 "If", 399 "Array", 400 "ArrayIf", 401 "Map", 402 "SimpleState", 403 "State", 404 "Merge", 405 "MergeState", 406 "ForEach", 407 "Distinct", 408 "OrDefault", 409 "OrNull", 410 "Resample", 411 "ArgMin", 412 "ArgMax", 413 ] 414 415 FUNC_TOKENS = { 416 *parser.Parser.FUNC_TOKENS, 417 TokenType.SET, 418 } 419 420 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 421 422 ID_VAR_TOKENS = { 423 *parser.Parser.ID_VAR_TOKENS, 424 TokenType.LIKE, 425 } 426 427 AGG_FUNC_MAPPING = ( 428 lambda functions, suffixes: { 429 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 430 } 431 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 432 433 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 434 435 FUNCTION_PARSERS = { 436 **parser.Parser.FUNCTION_PARSERS, 437 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 438 "QUANTILE": lambda self: self._parse_quantile(), 439 "MEDIAN": lambda self: self._parse_quantile(), 440 "COLUMNS": lambda self: self._parse_columns(), 441 } 442 443 FUNCTION_PARSERS.pop("MATCH") 444 445 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 446 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 447 448 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 449 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 450 451 RANGE_PARSERS = { 452 **parser.Parser.RANGE_PARSERS, 453 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 454 and self._parse_in(this, is_global=True), 455 } 456 457 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 458 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 459 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 460 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 461 462 JOIN_KINDS = { 463 *parser.Parser.JOIN_KINDS, 464 TokenType.ANY, 465 TokenType.ASOF, 466 TokenType.ARRAY, 467 } 468 469 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 470 TokenType.ANY, 471 TokenType.ARRAY, 472 TokenType.FINAL, 473 TokenType.FORMAT, 474 TokenType.SETTINGS, 475 } 476 477 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 478 TokenType.FORMAT, 479 } 480 481 LOG_DEFAULTS_TO_LN = True 482 483 QUERY_MODIFIER_PARSERS = { 484 **parser.Parser.QUERY_MODIFIER_PARSERS, 485 TokenType.SETTINGS: lambda self: ( 486 "settings", 487 self._advance() or self._parse_csv(self._parse_assignment), 488 ), 489 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 490 } 491 492 CONSTRAINT_PARSERS = { 493 **parser.Parser.CONSTRAINT_PARSERS, 494 "INDEX": lambda self: self._parse_index_constraint(), 495 "CODEC": lambda self: self._parse_compress(), 496 } 497 498 ALTER_PARSERS = { 499 **parser.Parser.ALTER_PARSERS, 500 "REPLACE": lambda self: self._parse_alter_table_replace(), 501 } 502 503 SCHEMA_UNNAMED_CONSTRAINTS = { 504 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 505 "INDEX", 506 } 507 508 PLACEHOLDER_PARSERS = { 509 **parser.Parser.PLACEHOLDER_PARSERS, 510 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 511 } 512 513 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 514 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 515 return self._parse_lambda() 516 517 def _parse_types( 518 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 519 ) -> t.Optional[exp.Expression]: 520 dtype = super()._parse_types( 521 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 522 ) 523 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 524 # Mark every type as non-nullable which is ClickHouse's default, unless it's 525 # already marked as nullable. This marker helps us transpile types from other 526 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 527 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 528 # fail in ClickHouse without the `Nullable` type constructor. 529 dtype.set("nullable", False) 530 531 return dtype 532 533 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 534 index = self._index 535 this = self._parse_bitwise() 536 if self._match(TokenType.FROM): 537 self._retreat(index) 538 return super()._parse_extract() 539 540 # We return Anonymous here because extract and regexpExtract have different semantics, 541 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 542 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 543 # 544 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 545 self._match(TokenType.COMMA) 546 return self.expression( 547 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 548 ) 549 550 def _parse_assignment(self) -> t.Optional[exp.Expression]: 551 this = super()._parse_assignment() 552 553 if self._match(TokenType.PLACEHOLDER): 554 return self.expression( 555 exp.If, 556 this=this, 557 true=self._parse_assignment(), 558 false=self._match(TokenType.COLON) and self._parse_assignment(), 559 ) 560 561 return this 562 563 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 564 """ 565 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 566 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 567 """ 568 this = self._parse_id_var() 569 self._match(TokenType.COLON) 570 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 571 self._match_text_seq("IDENTIFIER") and "Identifier" 572 ) 573 574 if not kind: 575 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 576 elif not self._match(TokenType.R_BRACE): 577 self.raise_error("Expecting }") 578 579 return self.expression(exp.Placeholder, this=this, kind=kind) 580 581 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 582 this = super()._parse_in(this) 583 this.set("is_global", is_global) 584 return this 585 586 def _parse_table( 587 self, 588 schema: bool = False, 589 joins: bool = False, 590 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 591 parse_bracket: bool = False, 592 is_db_reference: bool = False, 593 parse_partition: bool = False, 594 ) -> t.Optional[exp.Expression]: 595 this = super()._parse_table( 596 schema=schema, 597 joins=joins, 598 alias_tokens=alias_tokens, 599 parse_bracket=parse_bracket, 600 is_db_reference=is_db_reference, 601 ) 602 603 if self._match(TokenType.FINAL): 604 this = self.expression(exp.Final, this=this) 605 606 return this 607 608 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 609 return super()._parse_position(haystack_first=True) 610 611 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 612 def _parse_cte(self) -> exp.CTE: 613 # WITH <identifier> AS <subquery expression> 614 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 615 616 if not cte: 617 # WITH <expression> AS <identifier> 618 cte = self.expression( 619 exp.CTE, 620 this=self._parse_assignment(), 621 alias=self._parse_table_alias(), 622 scalar=True, 623 ) 624 625 return cte 626 627 def _parse_join_parts( 628 self, 629 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 630 is_global = self._match(TokenType.GLOBAL) and self._prev 631 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 632 633 if kind_pre: 634 kind = self._match_set(self.JOIN_KINDS) and self._prev 635 side = self._match_set(self.JOIN_SIDES) and self._prev 636 return is_global, side, kind 637 638 return ( 639 is_global, 640 self._match_set(self.JOIN_SIDES) and self._prev, 641 self._match_set(self.JOIN_KINDS) and self._prev, 642 ) 643 644 def _parse_join( 645 self, skip_join_token: bool = False, parse_bracket: bool = False 646 ) -> t.Optional[exp.Join]: 647 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 648 if join: 649 join.set("global", join.args.pop("method", None)) 650 651 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 652 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 653 if join.kind == "ARRAY": 654 for table in join.find_all(exp.Table): 655 table.replace(table.to_column()) 656 657 return join 658 659 def _parse_function( 660 self, 661 functions: t.Optional[t.Dict[str, t.Callable]] = None, 662 anonymous: bool = False, 663 optional_parens: bool = True, 664 any_token: bool = False, 665 ) -> t.Optional[exp.Expression]: 666 expr = super()._parse_function( 667 functions=functions, 668 anonymous=anonymous, 669 optional_parens=optional_parens, 670 any_token=any_token, 671 ) 672 673 func = expr.this if isinstance(expr, exp.Window) else expr 674 675 # Aggregate functions can be split in 2 parts: <func_name><suffix> 676 parts = ( 677 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 678 ) 679 680 if parts: 681 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 682 params = self._parse_func_params(anon_func) 683 684 kwargs = { 685 "this": anon_func.this, 686 "expressions": anon_func.expressions, 687 } 688 if parts[1]: 689 kwargs["parts"] = parts 690 exp_class: t.Type[exp.Expression] = ( 691 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 692 ) 693 else: 694 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 695 696 kwargs["exp_class"] = exp_class 697 if params: 698 kwargs["params"] = params 699 700 func = self.expression(**kwargs) 701 702 if isinstance(expr, exp.Window): 703 # The window's func was parsed as Anonymous in base parser, fix its 704 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 705 expr.set("this", func) 706 elif params: 707 # Params have blocked super()._parse_function() from parsing the following window 708 # (if that exists) as they're standing between the function call and the window spec 709 expr = self._parse_window(func) 710 else: 711 expr = func 712 713 return expr 714 715 def _parse_func_params( 716 self, this: t.Optional[exp.Func] = None 717 ) -> t.Optional[t.List[exp.Expression]]: 718 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 719 return self._parse_csv(self._parse_lambda) 720 721 if self._match(TokenType.L_PAREN): 722 params = self._parse_csv(self._parse_lambda) 723 self._match_r_paren(this) 724 return params 725 726 return None 727 728 def _parse_quantile(self) -> exp.Quantile: 729 this = self._parse_lambda() 730 params = self._parse_func_params() 731 if params: 732 return self.expression(exp.Quantile, this=params[0], quantile=this) 733 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 734 735 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 736 return super()._parse_wrapped_id_vars(optional=True) 737 738 def _parse_primary_key( 739 self, wrapped_optional: bool = False, in_props: bool = False 740 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 741 return super()._parse_primary_key( 742 wrapped_optional=wrapped_optional or in_props, in_props=in_props 743 ) 744 745 def _parse_on_property(self) -> t.Optional[exp.Expression]: 746 index = self._index 747 if self._match_text_seq("CLUSTER"): 748 this = self._parse_id_var() 749 if this: 750 return self.expression(exp.OnCluster, this=this) 751 else: 752 self._retreat(index) 753 return None 754 755 def _parse_index_constraint( 756 self, kind: t.Optional[str] = None 757 ) -> exp.IndexColumnConstraint: 758 # INDEX name1 expr TYPE type1(args) GRANULARITY value 759 this = self._parse_id_var() 760 expression = self._parse_assignment() 761 762 index_type = self._match_text_seq("TYPE") and ( 763 self._parse_function() or self._parse_var() 764 ) 765 766 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 767 768 return self.expression( 769 exp.IndexColumnConstraint, 770 this=this, 771 expression=expression, 772 index_type=index_type, 773 granularity=granularity, 774 ) 775 776 def _parse_partition(self) -> t.Optional[exp.Partition]: 777 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 778 if not self._match(TokenType.PARTITION): 779 return None 780 781 if self._match_text_seq("ID"): 782 # Corresponds to the PARTITION ID <string_value> syntax 783 expressions: t.List[exp.Expression] = [ 784 self.expression(exp.PartitionId, this=self._parse_string()) 785 ] 786 else: 787 expressions = self._parse_expressions() 788 789 return self.expression(exp.Partition, expressions=expressions) 790 791 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 792 partition = self._parse_partition() 793 794 if not partition or not self._match(TokenType.FROM): 795 return None 796 797 return self.expression( 798 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 799 ) 800 801 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 802 if not self._match_text_seq("PROJECTION"): 803 return None 804 805 return self.expression( 806 exp.ProjectionDef, 807 this=self._parse_id_var(), 808 expression=self._parse_wrapped(self._parse_statement), 809 ) 810 811 def _parse_constraint(self) -> t.Optional[exp.Expression]: 812 return super()._parse_constraint() or self._parse_projection_def() 813 814 def _parse_alias( 815 self, this: t.Optional[exp.Expression], explicit: bool = False 816 ) -> t.Optional[exp.Expression]: 817 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 818 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 819 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 820 return this 821 822 return super()._parse_alias(this=this, explicit=explicit) 823 824 def _parse_expression(self) -> t.Optional[exp.Expression]: 825 this = super()._parse_expression() 826 827 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 828 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 829 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 830 self._match(TokenType.R_PAREN) 831 832 return this 833 834 def _parse_columns(self) -> exp.Expression: 835 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 836 837 while self._next and self._match_text_seq(")", "APPLY", "("): 838 self._match(TokenType.R_PAREN) 839 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 840 return this 841 842 class Generator(generator.Generator): 843 QUERY_HINTS = False 844 STRUCT_DELIMITER = ("(", ")") 845 NVL2_SUPPORTED = False 846 TABLESAMPLE_REQUIRES_PARENS = False 847 TABLESAMPLE_SIZE_IS_ROWS = False 848 TABLESAMPLE_KEYWORDS = "SAMPLE" 849 LAST_DAY_SUPPORTS_DATE_PART = False 850 CAN_IMPLEMENT_ARRAY_ANY = True 851 SUPPORTS_TO_NUMBER = False 852 JOIN_HINTS = False 853 TABLE_HINTS = False 854 GROUPINGS_SEP = "" 855 SET_OP_MODIFIERS = False 856 SUPPORTS_TABLE_ALIAS_COLUMNS = False 857 VALUES_AS_TABLE = False 858 ARRAY_SIZE_NAME = "LENGTH" 859 860 STRING_TYPE_MAPPING = { 861 exp.DataType.Type.CHAR: "String", 862 exp.DataType.Type.LONGBLOB: "String", 863 exp.DataType.Type.LONGTEXT: "String", 864 exp.DataType.Type.MEDIUMBLOB: "String", 865 exp.DataType.Type.MEDIUMTEXT: "String", 866 exp.DataType.Type.TINYBLOB: "String", 867 exp.DataType.Type.TINYTEXT: "String", 868 exp.DataType.Type.TEXT: "String", 869 exp.DataType.Type.VARBINARY: "String", 870 exp.DataType.Type.VARCHAR: "String", 871 } 872 873 SUPPORTED_JSON_PATH_PARTS = { 874 exp.JSONPathKey, 875 exp.JSONPathRoot, 876 exp.JSONPathSubscript, 877 } 878 879 TYPE_MAPPING = { 880 **generator.Generator.TYPE_MAPPING, 881 **STRING_TYPE_MAPPING, 882 exp.DataType.Type.ARRAY: "Array", 883 exp.DataType.Type.BOOLEAN: "Bool", 884 exp.DataType.Type.BIGINT: "Int64", 885 exp.DataType.Type.DATE32: "Date32", 886 exp.DataType.Type.DATETIME: "DateTime", 887 exp.DataType.Type.DATETIME64: "DateTime64", 888 exp.DataType.Type.DECIMAL: "Decimal", 889 exp.DataType.Type.DECIMAL32: "Decimal32", 890 exp.DataType.Type.DECIMAL64: "Decimal64", 891 exp.DataType.Type.DECIMAL128: "Decimal128", 892 exp.DataType.Type.DECIMAL256: "Decimal256", 893 exp.DataType.Type.TIMESTAMP: "DateTime", 894 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 895 exp.DataType.Type.DOUBLE: "Float64", 896 exp.DataType.Type.ENUM: "Enum", 897 exp.DataType.Type.ENUM8: "Enum8", 898 exp.DataType.Type.ENUM16: "Enum16", 899 exp.DataType.Type.FIXEDSTRING: "FixedString", 900 exp.DataType.Type.FLOAT: "Float32", 901 exp.DataType.Type.INT: "Int32", 902 exp.DataType.Type.MEDIUMINT: "Int32", 903 exp.DataType.Type.INT128: "Int128", 904 exp.DataType.Type.INT256: "Int256", 905 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 906 exp.DataType.Type.MAP: "Map", 907 exp.DataType.Type.NESTED: "Nested", 908 exp.DataType.Type.SMALLINT: "Int16", 909 exp.DataType.Type.STRUCT: "Tuple", 910 exp.DataType.Type.TINYINT: "Int8", 911 exp.DataType.Type.UBIGINT: "UInt64", 912 exp.DataType.Type.UINT: "UInt32", 913 exp.DataType.Type.UINT128: "UInt128", 914 exp.DataType.Type.UINT256: "UInt256", 915 exp.DataType.Type.USMALLINT: "UInt16", 916 exp.DataType.Type.UTINYINT: "UInt8", 917 exp.DataType.Type.IPV4: "IPv4", 918 exp.DataType.Type.IPV6: "IPv6", 919 exp.DataType.Type.POINT: "Point", 920 exp.DataType.Type.RING: "Ring", 921 exp.DataType.Type.LINESTRING: "LineString", 922 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 923 exp.DataType.Type.POLYGON: "Polygon", 924 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 925 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 926 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 927 } 928 929 TRANSFORMS = { 930 **generator.Generator.TRANSFORMS, 931 exp.AnyValue: rename_func("any"), 932 exp.ApproxDistinct: rename_func("uniq"), 933 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 934 exp.ArraySum: rename_func("arraySum"), 935 exp.ArgMax: arg_max_or_min_no_count("argMax"), 936 exp.ArgMin: arg_max_or_min_no_count("argMin"), 937 exp.Array: inline_array_sql, 938 exp.CastToStrType: rename_func("CAST"), 939 exp.CountIf: rename_func("countIf"), 940 exp.CompressColumnConstraint: lambda self, 941 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 942 exp.ComputedColumnConstraint: lambda self, 943 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 944 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 945 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 946 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 947 exp.DateStrToDate: rename_func("toDate"), 948 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 949 exp.Explode: rename_func("arrayJoin"), 950 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 951 exp.IsNan: rename_func("isNaN"), 952 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 953 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 954 exp.JSONPathKey: json_path_key_only_name, 955 exp.JSONPathRoot: lambda *_: "", 956 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 957 exp.Median: rename_func("median"), 958 exp.Nullif: rename_func("nullIf"), 959 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 960 exp.Pivot: no_pivot_sql, 961 exp.Quantile: _quantile_sql, 962 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 963 exp.Rand: rename_func("randCanonical"), 964 exp.StartsWith: rename_func("startsWith"), 965 exp.StrPosition: lambda self, e: self.func( 966 "position", e.this, e.args.get("substr"), e.args.get("position") 967 ), 968 exp.TimeToStr: lambda self, e: self.func( 969 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 970 ), 971 exp.TimeStrToTime: _timestrtotime_sql, 972 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 973 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 974 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 975 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 976 exp.MD5Digest: rename_func("MD5"), 977 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 978 exp.SHA: rename_func("SHA1"), 979 exp.SHA2: sha256_sql, 980 exp.UnixToTime: _unix_to_time_sql, 981 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 982 exp.Trim: trim_sql, 983 exp.Variance: rename_func("varSamp"), 984 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 985 exp.Stddev: rename_func("stddevSamp"), 986 exp.Chr: rename_func("CHAR"), 987 exp.Lag: lambda self, e: self.func( 988 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 989 ), 990 exp.Lead: lambda self, e: self.func( 991 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 992 ), 993 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 994 rename_func("editDistance") 995 ), 996 } 997 998 PROPERTIES_LOCATION = { 999 **generator.Generator.PROPERTIES_LOCATION, 1000 exp.OnCluster: exp.Properties.Location.POST_NAME, 1001 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1002 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1003 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1004 } 1005 1006 # There's no list in docs, but it can be found in Clickhouse code 1007 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1008 ON_CLUSTER_TARGETS = { 1009 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1010 "DATABASE", 1011 "TABLE", 1012 "VIEW", 1013 "DICTIONARY", 1014 "INDEX", 1015 "FUNCTION", 1016 "NAMED COLLECTION", 1017 } 1018 1019 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1020 NON_NULLABLE_TYPES = { 1021 exp.DataType.Type.ARRAY, 1022 exp.DataType.Type.MAP, 1023 exp.DataType.Type.STRUCT, 1024 exp.DataType.Type.POINT, 1025 exp.DataType.Type.RING, 1026 exp.DataType.Type.LINESTRING, 1027 exp.DataType.Type.MULTILINESTRING, 1028 exp.DataType.Type.POLYGON, 1029 exp.DataType.Type.MULTIPOLYGON, 1030 } 1031 1032 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1033 strtodate_sql = self.function_fallback_sql(expression) 1034 1035 if not isinstance(expression.parent, exp.Cast): 1036 # StrToDate returns DATEs in other dialects (eg. postgres), so 1037 # this branch aims to improve the transpilation to clickhouse 1038 return f"CAST({strtodate_sql} AS DATE)" 1039 1040 return strtodate_sql 1041 1042 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1043 this = expression.this 1044 1045 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1046 return self.sql(this) 1047 1048 return super().cast_sql(expression, safe_prefix=safe_prefix) 1049 1050 def trycast_sql(self, expression: exp.TryCast) -> str: 1051 dtype = expression.to 1052 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1053 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1054 dtype.set("nullable", True) 1055 1056 return super().cast_sql(expression) 1057 1058 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1059 this = self.json_path_part(expression.this) 1060 return str(int(this) + 1) if is_int(this) else this 1061 1062 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1063 return f"AS {self.sql(expression, 'this')}" 1064 1065 def _any_to_has( 1066 self, 1067 expression: exp.EQ | exp.NEQ, 1068 default: t.Callable[[t.Any], str], 1069 prefix: str = "", 1070 ) -> str: 1071 if isinstance(expression.left, exp.Any): 1072 arr = expression.left 1073 this = expression.right 1074 elif isinstance(expression.right, exp.Any): 1075 arr = expression.right 1076 this = expression.left 1077 else: 1078 return default(expression) 1079 1080 return prefix + self.func("has", arr.this.unnest(), this) 1081 1082 def eq_sql(self, expression: exp.EQ) -> str: 1083 return self._any_to_has(expression, super().eq_sql) 1084 1085 def neq_sql(self, expression: exp.NEQ) -> str: 1086 return self._any_to_has(expression, super().neq_sql, "NOT ") 1087 1088 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1089 # Manually add a flag to make the search case-insensitive 1090 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1091 return self.func("match", expression.this, regex) 1092 1093 def datatype_sql(self, expression: exp.DataType) -> str: 1094 # String is the standard ClickHouse type, every other variant is just an alias. 1095 # Additionally, any supplied length parameter will be ignored. 1096 # 1097 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1098 if expression.this in self.STRING_TYPE_MAPPING: 1099 dtype = "String" 1100 else: 1101 dtype = super().datatype_sql(expression) 1102 1103 # This section changes the type to `Nullable(...)` if the following conditions hold: 1104 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1105 # and change their semantics 1106 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1107 # constraint: "Type of Map key must be a type, that can be represented by integer or 1108 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1109 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1110 parent = expression.parent 1111 nullable = expression.args.get("nullable") 1112 if nullable is True or ( 1113 nullable is None 1114 and not ( 1115 isinstance(parent, exp.DataType) 1116 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1117 and expression.index in (None, 0) 1118 ) 1119 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1120 ): 1121 dtype = f"Nullable({dtype})" 1122 1123 return dtype 1124 1125 def cte_sql(self, expression: exp.CTE) -> str: 1126 if expression.args.get("scalar"): 1127 this = self.sql(expression, "this") 1128 alias = self.sql(expression, "alias") 1129 return f"{this} AS {alias}" 1130 1131 return super().cte_sql(expression) 1132 1133 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1134 return super().after_limit_modifiers(expression) + [ 1135 ( 1136 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1137 if expression.args.get("settings") 1138 else "" 1139 ), 1140 ( 1141 self.seg("FORMAT ") + self.sql(expression, "format") 1142 if expression.args.get("format") 1143 else "" 1144 ), 1145 ] 1146 1147 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1148 params = self.expressions(expression, key="params", flat=True) 1149 return self.func(expression.name, *expression.expressions) + f"({params})" 1150 1151 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1152 return self.func(expression.name, *expression.expressions) 1153 1154 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1155 return self.anonymousaggfunc_sql(expression) 1156 1157 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1158 return self.parameterizedagg_sql(expression) 1159 1160 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1161 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1162 1163 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1164 return f"ON CLUSTER {self.sql(expression, 'this')}" 1165 1166 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1167 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1168 exp.Properties.Location.POST_NAME 1169 ): 1170 this_name = self.sql( 1171 expression.this if isinstance(expression.this, exp.Schema) else expression, 1172 "this", 1173 ) 1174 this_properties = " ".join( 1175 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1176 ) 1177 this_schema = self.schema_columns_sql(expression.this) 1178 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1179 1180 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1181 1182 return super().createable_sql(expression, locations) 1183 1184 def create_sql(self, expression: exp.Create) -> str: 1185 # The comment property comes last in CTAS statements, i.e. after the query 1186 query = expression.expression 1187 if isinstance(query, exp.Query): 1188 comment_prop = expression.find(exp.SchemaCommentProperty) 1189 if comment_prop: 1190 comment_prop.pop() 1191 query.replace(exp.paren(query)) 1192 else: 1193 comment_prop = None 1194 1195 create_sql = super().create_sql(expression) 1196 1197 comment_sql = self.sql(comment_prop) 1198 comment_sql = f" {comment_sql}" if comment_sql else "" 1199 1200 return f"{create_sql}{comment_sql}" 1201 1202 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1203 this = self.indent(self.sql(expression, "this")) 1204 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1205 1206 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1207 this = self.sql(expression, "this") 1208 this = f" {this}" if this else "" 1209 expr = self.sql(expression, "expression") 1210 expr = f" {expr}" if expr else "" 1211 index_type = self.sql(expression, "index_type") 1212 index_type = f" TYPE {index_type}" if index_type else "" 1213 granularity = self.sql(expression, "granularity") 1214 granularity = f" GRANULARITY {granularity}" if granularity else "" 1215 1216 return f"INDEX{this}{expr}{index_type}{granularity}" 1217 1218 def partition_sql(self, expression: exp.Partition) -> str: 1219 return f"PARTITION {self.expressions(expression, flat=True)}" 1220 1221 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1222 return f"ID {self.sql(expression.this)}" 1223 1224 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1225 return ( 1226 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1227 ) 1228 1229 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1230 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1231 1232 def is_sql(self, expression: exp.Is) -> str: 1233 is_sql = super().is_sql(expression) 1234 1235 if isinstance(expression.parent, exp.Not): 1236 # value IS NOT NULL -> NOT (value IS NULL) 1237 is_sql = self.wrap(is_sql) 1238 1239 return is_sql
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
185 class Tokenizer(tokens.Tokenizer): 186 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 187 IDENTIFIERS = ['"', "`"] 188 IDENTIFIER_ESCAPES = ["\\"] 189 STRING_ESCAPES = ["'", "\\"] 190 BIT_STRINGS = [("0b", "")] 191 HEX_STRINGS = [("0x", ""), ("0X", "")] 192 HEREDOC_STRINGS = ["$"] 193 194 KEYWORDS = { 195 **tokens.Tokenizer.KEYWORDS, 196 "ATTACH": TokenType.COMMAND, 197 "DATE32": TokenType.DATE32, 198 "DATETIME64": TokenType.DATETIME64, 199 "DICTIONARY": TokenType.DICTIONARY, 200 "ENUM8": TokenType.ENUM8, 201 "ENUM16": TokenType.ENUM16, 202 "FINAL": TokenType.FINAL, 203 "FIXEDSTRING": TokenType.FIXEDSTRING, 204 "FLOAT32": TokenType.FLOAT, 205 "FLOAT64": TokenType.DOUBLE, 206 "GLOBAL": TokenType.GLOBAL, 207 "INT256": TokenType.INT256, 208 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 209 "MAP": TokenType.MAP, 210 "NESTED": TokenType.NESTED, 211 "SAMPLE": TokenType.TABLE_SAMPLE, 212 "TUPLE": TokenType.STRUCT, 213 "UINT128": TokenType.UINT128, 214 "UINT16": TokenType.USMALLINT, 215 "UINT256": TokenType.UINT256, 216 "UINT32": TokenType.UINT, 217 "UINT64": TokenType.UBIGINT, 218 "UINT8": TokenType.UTINYINT, 219 "IPV4": TokenType.IPV4, 220 "IPV6": TokenType.IPV6, 221 "POINT": TokenType.POINT, 222 "RING": TokenType.RING, 223 "LINESTRING": TokenType.LINESTRING, 224 "MULTILINESTRING": TokenType.MULTILINESTRING, 225 "POLYGON": TokenType.POLYGON, 226 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 227 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 228 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 229 "SYSTEM": TokenType.COMMAND, 230 "PREWHERE": TokenType.PREWHERE, 231 } 232 KEYWORDS.pop("/*+") 233 234 SINGLE_TOKENS = { 235 **tokens.Tokenizer.SINGLE_TOKENS, 236 "$": TokenType.HEREDOC_STRING, 237 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
239 class Parser(parser.Parser): 240 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 241 # * select x from t1 union all select x from t2 limit 1; 242 # * select x from t1 union all (select x from t2 limit 1); 243 MODIFIERS_ATTACHED_TO_SET_OP = False 244 INTERVAL_SPANS = False 245 246 FUNCTIONS = { 247 **parser.Parser.FUNCTIONS, 248 "ANY": exp.AnyValue.from_arg_list, 249 "ARRAYSUM": exp.ArraySum.from_arg_list, 250 "COUNTIF": _build_count_if, 251 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 252 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 253 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 254 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 255 "DATE_FORMAT": _build_date_format, 256 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 257 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 258 "FORMATDATETIME": _build_date_format, 259 "JSONEXTRACTSTRING": build_json_extract_path( 260 exp.JSONExtractScalar, zero_based_indexing=False 261 ), 262 "MAP": parser.build_var_map, 263 "MATCH": exp.RegexpLike.from_arg_list, 264 "RANDCANONICAL": exp.Rand.from_arg_list, 265 "STR_TO_DATE": _build_str_to_date, 266 "TUPLE": exp.Struct.from_arg_list, 267 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 268 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 269 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 270 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 271 "UNIQ": exp.ApproxDistinct.from_arg_list, 272 "XOR": lambda args: exp.Xor(expressions=args), 273 "MD5": exp.MD5Digest.from_arg_list, 274 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 275 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 276 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 277 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 278 } 279 FUNCTIONS.pop("TRANSFORM") 280 281 AGG_FUNCTIONS = { 282 "count", 283 "min", 284 "max", 285 "sum", 286 "avg", 287 "any", 288 "stddevPop", 289 "stddevSamp", 290 "varPop", 291 "varSamp", 292 "corr", 293 "covarPop", 294 "covarSamp", 295 "entropy", 296 "exponentialMovingAverage", 297 "intervalLengthSum", 298 "kolmogorovSmirnovTest", 299 "mannWhitneyUTest", 300 "median", 301 "rankCorr", 302 "sumKahan", 303 "studentTTest", 304 "welchTTest", 305 "anyHeavy", 306 "anyLast", 307 "boundingRatio", 308 "first_value", 309 "last_value", 310 "argMin", 311 "argMax", 312 "avgWeighted", 313 "topK", 314 "topKWeighted", 315 "deltaSum", 316 "deltaSumTimestamp", 317 "groupArray", 318 "groupArrayLast", 319 "groupUniqArray", 320 "groupArrayInsertAt", 321 "groupArrayMovingAvg", 322 "groupArrayMovingSum", 323 "groupArraySample", 324 "groupBitAnd", 325 "groupBitOr", 326 "groupBitXor", 327 "groupBitmap", 328 "groupBitmapAnd", 329 "groupBitmapOr", 330 "groupBitmapXor", 331 "sumWithOverflow", 332 "sumMap", 333 "minMap", 334 "maxMap", 335 "skewSamp", 336 "skewPop", 337 "kurtSamp", 338 "kurtPop", 339 "uniq", 340 "uniqExact", 341 "uniqCombined", 342 "uniqCombined64", 343 "uniqHLL12", 344 "uniqTheta", 345 "quantile", 346 "quantiles", 347 "quantileExact", 348 "quantilesExact", 349 "quantileExactLow", 350 "quantilesExactLow", 351 "quantileExactHigh", 352 "quantilesExactHigh", 353 "quantileExactWeighted", 354 "quantilesExactWeighted", 355 "quantileTiming", 356 "quantilesTiming", 357 "quantileTimingWeighted", 358 "quantilesTimingWeighted", 359 "quantileDeterministic", 360 "quantilesDeterministic", 361 "quantileTDigest", 362 "quantilesTDigest", 363 "quantileTDigestWeighted", 364 "quantilesTDigestWeighted", 365 "quantileBFloat16", 366 "quantilesBFloat16", 367 "quantileBFloat16Weighted", 368 "quantilesBFloat16Weighted", 369 "simpleLinearRegression", 370 "stochasticLinearRegression", 371 "stochasticLogisticRegression", 372 "categoricalInformationValue", 373 "contingency", 374 "cramersV", 375 "cramersVBiasCorrected", 376 "theilsU", 377 "maxIntersections", 378 "maxIntersectionsPosition", 379 "meanZTest", 380 "quantileInterpolatedWeighted", 381 "quantilesInterpolatedWeighted", 382 "quantileGK", 383 "quantilesGK", 384 "sparkBar", 385 "sumCount", 386 "largestTriangleThreeBuckets", 387 "histogram", 388 "sequenceMatch", 389 "sequenceCount", 390 "windowFunnel", 391 "retention", 392 "uniqUpTo", 393 "sequenceNextNode", 394 "exponentialTimeDecayedAvg", 395 } 396 397 AGG_FUNCTIONS_SUFFIXES = [ 398 "If", 399 "Array", 400 "ArrayIf", 401 "Map", 402 "SimpleState", 403 "State", 404 "Merge", 405 "MergeState", 406 "ForEach", 407 "Distinct", 408 "OrDefault", 409 "OrNull", 410 "Resample", 411 "ArgMin", 412 "ArgMax", 413 ] 414 415 FUNC_TOKENS = { 416 *parser.Parser.FUNC_TOKENS, 417 TokenType.SET, 418 } 419 420 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 421 422 ID_VAR_TOKENS = { 423 *parser.Parser.ID_VAR_TOKENS, 424 TokenType.LIKE, 425 } 426 427 AGG_FUNC_MAPPING = ( 428 lambda functions, suffixes: { 429 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 430 } 431 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 432 433 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 434 435 FUNCTION_PARSERS = { 436 **parser.Parser.FUNCTION_PARSERS, 437 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 438 "QUANTILE": lambda self: self._parse_quantile(), 439 "MEDIAN": lambda self: self._parse_quantile(), 440 "COLUMNS": lambda self: self._parse_columns(), 441 } 442 443 FUNCTION_PARSERS.pop("MATCH") 444 445 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 446 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 447 448 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 449 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 450 451 RANGE_PARSERS = { 452 **parser.Parser.RANGE_PARSERS, 453 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 454 and self._parse_in(this, is_global=True), 455 } 456 457 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 458 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 459 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 460 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 461 462 JOIN_KINDS = { 463 *parser.Parser.JOIN_KINDS, 464 TokenType.ANY, 465 TokenType.ASOF, 466 TokenType.ARRAY, 467 } 468 469 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 470 TokenType.ANY, 471 TokenType.ARRAY, 472 TokenType.FINAL, 473 TokenType.FORMAT, 474 TokenType.SETTINGS, 475 } 476 477 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 478 TokenType.FORMAT, 479 } 480 481 LOG_DEFAULTS_TO_LN = True 482 483 QUERY_MODIFIER_PARSERS = { 484 **parser.Parser.QUERY_MODIFIER_PARSERS, 485 TokenType.SETTINGS: lambda self: ( 486 "settings", 487 self._advance() or self._parse_csv(self._parse_assignment), 488 ), 489 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 490 } 491 492 CONSTRAINT_PARSERS = { 493 **parser.Parser.CONSTRAINT_PARSERS, 494 "INDEX": lambda self: self._parse_index_constraint(), 495 "CODEC": lambda self: self._parse_compress(), 496 } 497 498 ALTER_PARSERS = { 499 **parser.Parser.ALTER_PARSERS, 500 "REPLACE": lambda self: self._parse_alter_table_replace(), 501 } 502 503 SCHEMA_UNNAMED_CONSTRAINTS = { 504 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 505 "INDEX", 506 } 507 508 PLACEHOLDER_PARSERS = { 509 **parser.Parser.PLACEHOLDER_PARSERS, 510 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 511 } 512 513 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 514 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 515 return self._parse_lambda() 516 517 def _parse_types( 518 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 519 ) -> t.Optional[exp.Expression]: 520 dtype = super()._parse_types( 521 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 522 ) 523 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 524 # Mark every type as non-nullable which is ClickHouse's default, unless it's 525 # already marked as nullable. This marker helps us transpile types from other 526 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 527 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 528 # fail in ClickHouse without the `Nullable` type constructor. 529 dtype.set("nullable", False) 530 531 return dtype 532 533 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 534 index = self._index 535 this = self._parse_bitwise() 536 if self._match(TokenType.FROM): 537 self._retreat(index) 538 return super()._parse_extract() 539 540 # We return Anonymous here because extract and regexpExtract have different semantics, 541 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 542 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 543 # 544 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 545 self._match(TokenType.COMMA) 546 return self.expression( 547 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 548 ) 549 550 def _parse_assignment(self) -> t.Optional[exp.Expression]: 551 this = super()._parse_assignment() 552 553 if self._match(TokenType.PLACEHOLDER): 554 return self.expression( 555 exp.If, 556 this=this, 557 true=self._parse_assignment(), 558 false=self._match(TokenType.COLON) and self._parse_assignment(), 559 ) 560 561 return this 562 563 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 564 """ 565 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 566 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 567 """ 568 this = self._parse_id_var() 569 self._match(TokenType.COLON) 570 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 571 self._match_text_seq("IDENTIFIER") and "Identifier" 572 ) 573 574 if not kind: 575 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 576 elif not self._match(TokenType.R_BRACE): 577 self.raise_error("Expecting }") 578 579 return self.expression(exp.Placeholder, this=this, kind=kind) 580 581 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 582 this = super()._parse_in(this) 583 this.set("is_global", is_global) 584 return this 585 586 def _parse_table( 587 self, 588 schema: bool = False, 589 joins: bool = False, 590 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 591 parse_bracket: bool = False, 592 is_db_reference: bool = False, 593 parse_partition: bool = False, 594 ) -> t.Optional[exp.Expression]: 595 this = super()._parse_table( 596 schema=schema, 597 joins=joins, 598 alias_tokens=alias_tokens, 599 parse_bracket=parse_bracket, 600 is_db_reference=is_db_reference, 601 ) 602 603 if self._match(TokenType.FINAL): 604 this = self.expression(exp.Final, this=this) 605 606 return this 607 608 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 609 return super()._parse_position(haystack_first=True) 610 611 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 612 def _parse_cte(self) -> exp.CTE: 613 # WITH <identifier> AS <subquery expression> 614 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 615 616 if not cte: 617 # WITH <expression> AS <identifier> 618 cte = self.expression( 619 exp.CTE, 620 this=self._parse_assignment(), 621 alias=self._parse_table_alias(), 622 scalar=True, 623 ) 624 625 return cte 626 627 def _parse_join_parts( 628 self, 629 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 630 is_global = self._match(TokenType.GLOBAL) and self._prev 631 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 632 633 if kind_pre: 634 kind = self._match_set(self.JOIN_KINDS) and self._prev 635 side = self._match_set(self.JOIN_SIDES) and self._prev 636 return is_global, side, kind 637 638 return ( 639 is_global, 640 self._match_set(self.JOIN_SIDES) and self._prev, 641 self._match_set(self.JOIN_KINDS) and self._prev, 642 ) 643 644 def _parse_join( 645 self, skip_join_token: bool = False, parse_bracket: bool = False 646 ) -> t.Optional[exp.Join]: 647 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 648 if join: 649 join.set("global", join.args.pop("method", None)) 650 651 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 652 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 653 if join.kind == "ARRAY": 654 for table in join.find_all(exp.Table): 655 table.replace(table.to_column()) 656 657 return join 658 659 def _parse_function( 660 self, 661 functions: t.Optional[t.Dict[str, t.Callable]] = None, 662 anonymous: bool = False, 663 optional_parens: bool = True, 664 any_token: bool = False, 665 ) -> t.Optional[exp.Expression]: 666 expr = super()._parse_function( 667 functions=functions, 668 anonymous=anonymous, 669 optional_parens=optional_parens, 670 any_token=any_token, 671 ) 672 673 func = expr.this if isinstance(expr, exp.Window) else expr 674 675 # Aggregate functions can be split in 2 parts: <func_name><suffix> 676 parts = ( 677 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 678 ) 679 680 if parts: 681 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 682 params = self._parse_func_params(anon_func) 683 684 kwargs = { 685 "this": anon_func.this, 686 "expressions": anon_func.expressions, 687 } 688 if parts[1]: 689 kwargs["parts"] = parts 690 exp_class: t.Type[exp.Expression] = ( 691 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 692 ) 693 else: 694 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 695 696 kwargs["exp_class"] = exp_class 697 if params: 698 kwargs["params"] = params 699 700 func = self.expression(**kwargs) 701 702 if isinstance(expr, exp.Window): 703 # The window's func was parsed as Anonymous in base parser, fix its 704 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 705 expr.set("this", func) 706 elif params: 707 # Params have blocked super()._parse_function() from parsing the following window 708 # (if that exists) as they're standing between the function call and the window spec 709 expr = self._parse_window(func) 710 else: 711 expr = func 712 713 return expr 714 715 def _parse_func_params( 716 self, this: t.Optional[exp.Func] = None 717 ) -> t.Optional[t.List[exp.Expression]]: 718 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 719 return self._parse_csv(self._parse_lambda) 720 721 if self._match(TokenType.L_PAREN): 722 params = self._parse_csv(self._parse_lambda) 723 self._match_r_paren(this) 724 return params 725 726 return None 727 728 def _parse_quantile(self) -> exp.Quantile: 729 this = self._parse_lambda() 730 params = self._parse_func_params() 731 if params: 732 return self.expression(exp.Quantile, this=params[0], quantile=this) 733 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 734 735 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 736 return super()._parse_wrapped_id_vars(optional=True) 737 738 def _parse_primary_key( 739 self, wrapped_optional: bool = False, in_props: bool = False 740 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 741 return super()._parse_primary_key( 742 wrapped_optional=wrapped_optional or in_props, in_props=in_props 743 ) 744 745 def _parse_on_property(self) -> t.Optional[exp.Expression]: 746 index = self._index 747 if self._match_text_seq("CLUSTER"): 748 this = self._parse_id_var() 749 if this: 750 return self.expression(exp.OnCluster, this=this) 751 else: 752 self._retreat(index) 753 return None 754 755 def _parse_index_constraint( 756 self, kind: t.Optional[str] = None 757 ) -> exp.IndexColumnConstraint: 758 # INDEX name1 expr TYPE type1(args) GRANULARITY value 759 this = self._parse_id_var() 760 expression = self._parse_assignment() 761 762 index_type = self._match_text_seq("TYPE") and ( 763 self._parse_function() or self._parse_var() 764 ) 765 766 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 767 768 return self.expression( 769 exp.IndexColumnConstraint, 770 this=this, 771 expression=expression, 772 index_type=index_type, 773 granularity=granularity, 774 ) 775 776 def _parse_partition(self) -> t.Optional[exp.Partition]: 777 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 778 if not self._match(TokenType.PARTITION): 779 return None 780 781 if self._match_text_seq("ID"): 782 # Corresponds to the PARTITION ID <string_value> syntax 783 expressions: t.List[exp.Expression] = [ 784 self.expression(exp.PartitionId, this=self._parse_string()) 785 ] 786 else: 787 expressions = self._parse_expressions() 788 789 return self.expression(exp.Partition, expressions=expressions) 790 791 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 792 partition = self._parse_partition() 793 794 if not partition or not self._match(TokenType.FROM): 795 return None 796 797 return self.expression( 798 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 799 ) 800 801 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 802 if not self._match_text_seq("PROJECTION"): 803 return None 804 805 return self.expression( 806 exp.ProjectionDef, 807 this=self._parse_id_var(), 808 expression=self._parse_wrapped(self._parse_statement), 809 ) 810 811 def _parse_constraint(self) -> t.Optional[exp.Expression]: 812 return super()._parse_constraint() or self._parse_projection_def() 813 814 def _parse_alias( 815 self, this: t.Optional[exp.Expression], explicit: bool = False 816 ) -> t.Optional[exp.Expression]: 817 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 818 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 819 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 820 return this 821 822 return super()._parse_alias(this=this, explicit=explicit) 823 824 def _parse_expression(self) -> t.Optional[exp.Expression]: 825 this = super()._parse_expression() 826 827 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 828 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 829 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 830 self._match(TokenType.R_PAREN) 831 832 return this 833 834 def _parse_columns(self) -> exp.Expression: 835 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 836 837 while self._next and self._match_text_seq(")", "APPLY", "("): 838 self._match(TokenType.R_PAREN) 839 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 840 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
842 class Generator(generator.Generator): 843 QUERY_HINTS = False 844 STRUCT_DELIMITER = ("(", ")") 845 NVL2_SUPPORTED = False 846 TABLESAMPLE_REQUIRES_PARENS = False 847 TABLESAMPLE_SIZE_IS_ROWS = False 848 TABLESAMPLE_KEYWORDS = "SAMPLE" 849 LAST_DAY_SUPPORTS_DATE_PART = False 850 CAN_IMPLEMENT_ARRAY_ANY = True 851 SUPPORTS_TO_NUMBER = False 852 JOIN_HINTS = False 853 TABLE_HINTS = False 854 GROUPINGS_SEP = "" 855 SET_OP_MODIFIERS = False 856 SUPPORTS_TABLE_ALIAS_COLUMNS = False 857 VALUES_AS_TABLE = False 858 ARRAY_SIZE_NAME = "LENGTH" 859 860 STRING_TYPE_MAPPING = { 861 exp.DataType.Type.CHAR: "String", 862 exp.DataType.Type.LONGBLOB: "String", 863 exp.DataType.Type.LONGTEXT: "String", 864 exp.DataType.Type.MEDIUMBLOB: "String", 865 exp.DataType.Type.MEDIUMTEXT: "String", 866 exp.DataType.Type.TINYBLOB: "String", 867 exp.DataType.Type.TINYTEXT: "String", 868 exp.DataType.Type.TEXT: "String", 869 exp.DataType.Type.VARBINARY: "String", 870 exp.DataType.Type.VARCHAR: "String", 871 } 872 873 SUPPORTED_JSON_PATH_PARTS = { 874 exp.JSONPathKey, 875 exp.JSONPathRoot, 876 exp.JSONPathSubscript, 877 } 878 879 TYPE_MAPPING = { 880 **generator.Generator.TYPE_MAPPING, 881 **STRING_TYPE_MAPPING, 882 exp.DataType.Type.ARRAY: "Array", 883 exp.DataType.Type.BOOLEAN: "Bool", 884 exp.DataType.Type.BIGINT: "Int64", 885 exp.DataType.Type.DATE32: "Date32", 886 exp.DataType.Type.DATETIME: "DateTime", 887 exp.DataType.Type.DATETIME64: "DateTime64", 888 exp.DataType.Type.DECIMAL: "Decimal", 889 exp.DataType.Type.DECIMAL32: "Decimal32", 890 exp.DataType.Type.DECIMAL64: "Decimal64", 891 exp.DataType.Type.DECIMAL128: "Decimal128", 892 exp.DataType.Type.DECIMAL256: "Decimal256", 893 exp.DataType.Type.TIMESTAMP: "DateTime", 894 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 895 exp.DataType.Type.DOUBLE: "Float64", 896 exp.DataType.Type.ENUM: "Enum", 897 exp.DataType.Type.ENUM8: "Enum8", 898 exp.DataType.Type.ENUM16: "Enum16", 899 exp.DataType.Type.FIXEDSTRING: "FixedString", 900 exp.DataType.Type.FLOAT: "Float32", 901 exp.DataType.Type.INT: "Int32", 902 exp.DataType.Type.MEDIUMINT: "Int32", 903 exp.DataType.Type.INT128: "Int128", 904 exp.DataType.Type.INT256: "Int256", 905 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 906 exp.DataType.Type.MAP: "Map", 907 exp.DataType.Type.NESTED: "Nested", 908 exp.DataType.Type.SMALLINT: "Int16", 909 exp.DataType.Type.STRUCT: "Tuple", 910 exp.DataType.Type.TINYINT: "Int8", 911 exp.DataType.Type.UBIGINT: "UInt64", 912 exp.DataType.Type.UINT: "UInt32", 913 exp.DataType.Type.UINT128: "UInt128", 914 exp.DataType.Type.UINT256: "UInt256", 915 exp.DataType.Type.USMALLINT: "UInt16", 916 exp.DataType.Type.UTINYINT: "UInt8", 917 exp.DataType.Type.IPV4: "IPv4", 918 exp.DataType.Type.IPV6: "IPv6", 919 exp.DataType.Type.POINT: "Point", 920 exp.DataType.Type.RING: "Ring", 921 exp.DataType.Type.LINESTRING: "LineString", 922 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 923 exp.DataType.Type.POLYGON: "Polygon", 924 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 925 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 926 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 927 } 928 929 TRANSFORMS = { 930 **generator.Generator.TRANSFORMS, 931 exp.AnyValue: rename_func("any"), 932 exp.ApproxDistinct: rename_func("uniq"), 933 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 934 exp.ArraySum: rename_func("arraySum"), 935 exp.ArgMax: arg_max_or_min_no_count("argMax"), 936 exp.ArgMin: arg_max_or_min_no_count("argMin"), 937 exp.Array: inline_array_sql, 938 exp.CastToStrType: rename_func("CAST"), 939 exp.CountIf: rename_func("countIf"), 940 exp.CompressColumnConstraint: lambda self, 941 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 942 exp.ComputedColumnConstraint: lambda self, 943 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 944 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 945 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 946 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 947 exp.DateStrToDate: rename_func("toDate"), 948 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 949 exp.Explode: rename_func("arrayJoin"), 950 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 951 exp.IsNan: rename_func("isNaN"), 952 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 953 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 954 exp.JSONPathKey: json_path_key_only_name, 955 exp.JSONPathRoot: lambda *_: "", 956 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 957 exp.Median: rename_func("median"), 958 exp.Nullif: rename_func("nullIf"), 959 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 960 exp.Pivot: no_pivot_sql, 961 exp.Quantile: _quantile_sql, 962 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 963 exp.Rand: rename_func("randCanonical"), 964 exp.StartsWith: rename_func("startsWith"), 965 exp.StrPosition: lambda self, e: self.func( 966 "position", e.this, e.args.get("substr"), e.args.get("position") 967 ), 968 exp.TimeToStr: lambda self, e: self.func( 969 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 970 ), 971 exp.TimeStrToTime: _timestrtotime_sql, 972 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 973 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 974 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 975 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 976 exp.MD5Digest: rename_func("MD5"), 977 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 978 exp.SHA: rename_func("SHA1"), 979 exp.SHA2: sha256_sql, 980 exp.UnixToTime: _unix_to_time_sql, 981 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 982 exp.Trim: trim_sql, 983 exp.Variance: rename_func("varSamp"), 984 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 985 exp.Stddev: rename_func("stddevSamp"), 986 exp.Chr: rename_func("CHAR"), 987 exp.Lag: lambda self, e: self.func( 988 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 989 ), 990 exp.Lead: lambda self, e: self.func( 991 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 992 ), 993 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 994 rename_func("editDistance") 995 ), 996 } 997 998 PROPERTIES_LOCATION = { 999 **generator.Generator.PROPERTIES_LOCATION, 1000 exp.OnCluster: exp.Properties.Location.POST_NAME, 1001 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1002 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1003 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1004 } 1005 1006 # There's no list in docs, but it can be found in Clickhouse code 1007 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1008 ON_CLUSTER_TARGETS = { 1009 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1010 "DATABASE", 1011 "TABLE", 1012 "VIEW", 1013 "DICTIONARY", 1014 "INDEX", 1015 "FUNCTION", 1016 "NAMED COLLECTION", 1017 } 1018 1019 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1020 NON_NULLABLE_TYPES = { 1021 exp.DataType.Type.ARRAY, 1022 exp.DataType.Type.MAP, 1023 exp.DataType.Type.STRUCT, 1024 exp.DataType.Type.POINT, 1025 exp.DataType.Type.RING, 1026 exp.DataType.Type.LINESTRING, 1027 exp.DataType.Type.MULTILINESTRING, 1028 exp.DataType.Type.POLYGON, 1029 exp.DataType.Type.MULTIPOLYGON, 1030 } 1031 1032 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1033 strtodate_sql = self.function_fallback_sql(expression) 1034 1035 if not isinstance(expression.parent, exp.Cast): 1036 # StrToDate returns DATEs in other dialects (eg. postgres), so 1037 # this branch aims to improve the transpilation to clickhouse 1038 return f"CAST({strtodate_sql} AS DATE)" 1039 1040 return strtodate_sql 1041 1042 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1043 this = expression.this 1044 1045 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1046 return self.sql(this) 1047 1048 return super().cast_sql(expression, safe_prefix=safe_prefix) 1049 1050 def trycast_sql(self, expression: exp.TryCast) -> str: 1051 dtype = expression.to 1052 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1053 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1054 dtype.set("nullable", True) 1055 1056 return super().cast_sql(expression) 1057 1058 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1059 this = self.json_path_part(expression.this) 1060 return str(int(this) + 1) if is_int(this) else this 1061 1062 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1063 return f"AS {self.sql(expression, 'this')}" 1064 1065 def _any_to_has( 1066 self, 1067 expression: exp.EQ | exp.NEQ, 1068 default: t.Callable[[t.Any], str], 1069 prefix: str = "", 1070 ) -> str: 1071 if isinstance(expression.left, exp.Any): 1072 arr = expression.left 1073 this = expression.right 1074 elif isinstance(expression.right, exp.Any): 1075 arr = expression.right 1076 this = expression.left 1077 else: 1078 return default(expression) 1079 1080 return prefix + self.func("has", arr.this.unnest(), this) 1081 1082 def eq_sql(self, expression: exp.EQ) -> str: 1083 return self._any_to_has(expression, super().eq_sql) 1084 1085 def neq_sql(self, expression: exp.NEQ) -> str: 1086 return self._any_to_has(expression, super().neq_sql, "NOT ") 1087 1088 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1089 # Manually add a flag to make the search case-insensitive 1090 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1091 return self.func("match", expression.this, regex) 1092 1093 def datatype_sql(self, expression: exp.DataType) -> str: 1094 # String is the standard ClickHouse type, every other variant is just an alias. 1095 # Additionally, any supplied length parameter will be ignored. 1096 # 1097 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1098 if expression.this in self.STRING_TYPE_MAPPING: 1099 dtype = "String" 1100 else: 1101 dtype = super().datatype_sql(expression) 1102 1103 # This section changes the type to `Nullable(...)` if the following conditions hold: 1104 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1105 # and change their semantics 1106 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1107 # constraint: "Type of Map key must be a type, that can be represented by integer or 1108 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1109 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1110 parent = expression.parent 1111 nullable = expression.args.get("nullable") 1112 if nullable is True or ( 1113 nullable is None 1114 and not ( 1115 isinstance(parent, exp.DataType) 1116 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1117 and expression.index in (None, 0) 1118 ) 1119 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1120 ): 1121 dtype = f"Nullable({dtype})" 1122 1123 return dtype 1124 1125 def cte_sql(self, expression: exp.CTE) -> str: 1126 if expression.args.get("scalar"): 1127 this = self.sql(expression, "this") 1128 alias = self.sql(expression, "alias") 1129 return f"{this} AS {alias}" 1130 1131 return super().cte_sql(expression) 1132 1133 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1134 return super().after_limit_modifiers(expression) + [ 1135 ( 1136 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1137 if expression.args.get("settings") 1138 else "" 1139 ), 1140 ( 1141 self.seg("FORMAT ") + self.sql(expression, "format") 1142 if expression.args.get("format") 1143 else "" 1144 ), 1145 ] 1146 1147 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1148 params = self.expressions(expression, key="params", flat=True) 1149 return self.func(expression.name, *expression.expressions) + f"({params})" 1150 1151 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1152 return self.func(expression.name, *expression.expressions) 1153 1154 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1155 return self.anonymousaggfunc_sql(expression) 1156 1157 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1158 return self.parameterizedagg_sql(expression) 1159 1160 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1161 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1162 1163 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1164 return f"ON CLUSTER {self.sql(expression, 'this')}" 1165 1166 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1167 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1168 exp.Properties.Location.POST_NAME 1169 ): 1170 this_name = self.sql( 1171 expression.this if isinstance(expression.this, exp.Schema) else expression, 1172 "this", 1173 ) 1174 this_properties = " ".join( 1175 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1176 ) 1177 this_schema = self.schema_columns_sql(expression.this) 1178 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1179 1180 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1181 1182 return super().createable_sql(expression, locations) 1183 1184 def create_sql(self, expression: exp.Create) -> str: 1185 # The comment property comes last in CTAS statements, i.e. after the query 1186 query = expression.expression 1187 if isinstance(query, exp.Query): 1188 comment_prop = expression.find(exp.SchemaCommentProperty) 1189 if comment_prop: 1190 comment_prop.pop() 1191 query.replace(exp.paren(query)) 1192 else: 1193 comment_prop = None 1194 1195 create_sql = super().create_sql(expression) 1196 1197 comment_sql = self.sql(comment_prop) 1198 comment_sql = f" {comment_sql}" if comment_sql else "" 1199 1200 return f"{create_sql}{comment_sql}" 1201 1202 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1203 this = self.indent(self.sql(expression, "this")) 1204 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1205 1206 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1207 this = self.sql(expression, "this") 1208 this = f" {this}" if this else "" 1209 expr = self.sql(expression, "expression") 1210 expr = f" {expr}" if expr else "" 1211 index_type = self.sql(expression, "index_type") 1212 index_type = f" TYPE {index_type}" if index_type else "" 1213 granularity = self.sql(expression, "granularity") 1214 granularity = f" GRANULARITY {granularity}" if granularity else "" 1215 1216 return f"INDEX{this}{expr}{index_type}{granularity}" 1217 1218 def partition_sql(self, expression: exp.Partition) -> str: 1219 return f"PARTITION {self.expressions(expression, flat=True)}" 1220 1221 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1222 return f"ID {self.sql(expression.this)}" 1223 1224 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1225 return ( 1226 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1227 ) 1228 1229 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1230 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1231 1232 def is_sql(self, expression: exp.Is) -> str: 1233 is_sql = super().is_sql(expression) 1234 1235 if isinstance(expression.parent, exp.Not): 1236 # value IS NOT NULL -> NOT (value IS NULL) 1237 is_sql = self.wrap(is_sql) 1238 1239 return is_sql
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1032 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1033 strtodate_sql = self.function_fallback_sql(expression) 1034 1035 if not isinstance(expression.parent, exp.Cast): 1036 # StrToDate returns DATEs in other dialects (eg. postgres), so 1037 # this branch aims to improve the transpilation to clickhouse 1038 return f"CAST({strtodate_sql} AS DATE)" 1039 1040 return strtodate_sql
1042 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1043 this = expression.this 1044 1045 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1046 return self.sql(this) 1047 1048 return super().cast_sql(expression, safe_prefix=safe_prefix)
1050 def trycast_sql(self, expression: exp.TryCast) -> str: 1051 dtype = expression.to 1052 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1053 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1054 dtype.set("nullable", True) 1055 1056 return super().cast_sql(expression)
1093 def datatype_sql(self, expression: exp.DataType) -> str: 1094 # String is the standard ClickHouse type, every other variant is just an alias. 1095 # Additionally, any supplied length parameter will be ignored. 1096 # 1097 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1098 if expression.this in self.STRING_TYPE_MAPPING: 1099 dtype = "String" 1100 else: 1101 dtype = super().datatype_sql(expression) 1102 1103 # This section changes the type to `Nullable(...)` if the following conditions hold: 1104 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1105 # and change their semantics 1106 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1107 # constraint: "Type of Map key must be a type, that can be represented by integer or 1108 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1109 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1110 parent = expression.parent 1111 nullable = expression.args.get("nullable") 1112 if nullable is True or ( 1113 nullable is None 1114 and not ( 1115 isinstance(parent, exp.DataType) 1116 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1117 and expression.index in (None, 0) 1118 ) 1119 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1120 ): 1121 dtype = f"Nullable({dtype})" 1122 1123 return dtype
1133 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1134 return super().after_limit_modifiers(expression) + [ 1135 ( 1136 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1137 if expression.args.get("settings") 1138 else "" 1139 ), 1140 ( 1141 self.seg("FORMAT ") + self.sql(expression, "format") 1142 if expression.args.get("format") 1143 else "" 1144 ), 1145 ]
1166 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1167 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1168 exp.Properties.Location.POST_NAME 1169 ): 1170 this_name = self.sql( 1171 expression.this if isinstance(expression.this, exp.Schema) else expression, 1172 "this", 1173 ) 1174 this_properties = " ".join( 1175 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1176 ) 1177 this_schema = self.schema_columns_sql(expression.this) 1178 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1179 1180 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1181 1182 return super().createable_sql(expression, locations)
1184 def create_sql(self, expression: exp.Create) -> str: 1185 # The comment property comes last in CTAS statements, i.e. after the query 1186 query = expression.expression 1187 if isinstance(query, exp.Query): 1188 comment_prop = expression.find(exp.SchemaCommentProperty) 1189 if comment_prop: 1190 comment_prop.pop() 1191 query.replace(exp.paren(query)) 1192 else: 1193 comment_prop = None 1194 1195 create_sql = super().create_sql(expression) 1196 1197 comment_sql = self.sql(comment_prop) 1198 comment_sql = f" {comment_sql}" if comment_sql else "" 1199 1200 return f"{create_sql}{comment_sql}"
1206 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1207 this = self.sql(expression, "this") 1208 this = f" {this}" if this else "" 1209 expr = self.sql(expression, "expression") 1210 expr = f" {expr}" if expr else "" 1211 index_type = self.sql(expression, "index_type") 1212 index_type = f" TYPE {index_type}" if index_type else "" 1213 granularity = self.sql(expression, "granularity") 1214 granularity = f" GRANULARITY {granularity}" if granularity else "" 1215 1216 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql