sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.RLIKE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 *TYPE_TOKENS, 343 *SUBQUERY_PREDICATES, 344 } 345 346 CONJUNCTION = { 347 TokenType.AND: exp.And, 348 TokenType.OR: exp.Or, 349 } 350 351 EQUALITY = { 352 TokenType.EQ: exp.EQ, 353 TokenType.NEQ: exp.NEQ, 354 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 355 } 356 357 COMPARISON = { 358 TokenType.GT: exp.GT, 359 TokenType.GTE: exp.GTE, 360 TokenType.LT: exp.LT, 361 TokenType.LTE: exp.LTE, 362 } 363 364 BITWISE = { 365 TokenType.AMP: exp.BitwiseAnd, 366 TokenType.CARET: exp.BitwiseXor, 367 TokenType.PIPE: exp.BitwiseOr, 368 TokenType.DPIPE: exp.DPipe, 369 } 370 371 TERM = { 372 TokenType.DASH: exp.Sub, 373 TokenType.PLUS: exp.Add, 374 TokenType.MOD: exp.Mod, 375 TokenType.COLLATE: exp.Collate, 376 } 377 378 FACTOR = { 379 TokenType.DIV: exp.IntDiv, 380 TokenType.LR_ARROW: exp.Distance, 381 TokenType.SLASH: exp.Div, 382 TokenType.STAR: exp.Mul, 383 } 384 385 TIMESTAMPS = { 386 TokenType.TIME, 387 TokenType.TIMESTAMP, 388 TokenType.TIMESTAMPTZ, 389 TokenType.TIMESTAMPLTZ, 390 } 391 392 SET_OPERATIONS = { 393 TokenType.UNION, 394 TokenType.INTERSECT, 395 TokenType.EXCEPT, 396 } 397 398 JOIN_METHODS = { 399 TokenType.NATURAL, 400 TokenType.ASOF, 401 } 402 403 JOIN_SIDES = { 404 TokenType.LEFT, 405 TokenType.RIGHT, 406 TokenType.FULL, 407 } 408 409 JOIN_KINDS = { 410 TokenType.INNER, 411 TokenType.OUTER, 412 TokenType.CROSS, 413 TokenType.SEMI, 414 TokenType.ANTI, 415 } 416 417 JOIN_HINTS: t.Set[str] = set() 418 419 LAMBDAS = { 420 TokenType.ARROW: lambda self, expressions: self.expression( 421 exp.Lambda, 422 this=self._replace_lambda( 423 self._parse_conjunction(), 424 {node.name for node in expressions}, 425 ), 426 expressions=expressions, 427 ), 428 TokenType.FARROW: lambda self, expressions: self.expression( 429 exp.Kwarg, 430 this=exp.var(expressions[0].name), 431 expression=self._parse_conjunction(), 432 ), 433 } 434 435 COLUMN_OPERATORS = { 436 TokenType.DOT: None, 437 TokenType.DCOLON: lambda self, this, to: self.expression( 438 exp.Cast if self.STRICT_CAST else exp.TryCast, 439 this=this, 440 to=to, 441 ), 442 TokenType.ARROW: lambda self, this, path: self.expression( 443 exp.JSONExtract, 444 this=this, 445 expression=path, 446 ), 447 TokenType.DARROW: lambda self, this, path: self.expression( 448 exp.JSONExtractScalar, 449 this=this, 450 expression=path, 451 ), 452 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 453 exp.JSONBExtract, 454 this=this, 455 expression=path, 456 ), 457 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 458 exp.JSONBExtractScalar, 459 this=this, 460 expression=path, 461 ), 462 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 463 exp.JSONBContains, 464 this=this, 465 expression=key, 466 ), 467 } 468 469 EXPRESSION_PARSERS = { 470 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 471 exp.Column: lambda self: self._parse_column(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.DataType: lambda self: self._parse_types(), 474 exp.Expression: lambda self: self._parse_statement(), 475 exp.From: lambda self: self._parse_from(), 476 exp.Group: lambda self: self._parse_group(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.Identifier: lambda self: self._parse_id_var(), 479 exp.Join: lambda self: self._parse_join(), 480 exp.Lambda: lambda self: self._parse_lambda(), 481 exp.Lateral: lambda self: self._parse_lateral(), 482 exp.Limit: lambda self: self._parse_limit(), 483 exp.Offset: lambda self: self._parse_offset(), 484 exp.Order: lambda self: self._parse_order(), 485 exp.Ordered: lambda self: self._parse_ordered(), 486 exp.Properties: lambda self: self._parse_properties(), 487 exp.Qualify: lambda self: self._parse_qualify(), 488 exp.Returning: lambda self: self._parse_returning(), 489 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 490 exp.Table: lambda self: self._parse_table_parts(), 491 exp.TableAlias: lambda self: self._parse_table_alias(), 492 exp.Where: lambda self: self._parse_where(), 493 exp.Window: lambda self: self._parse_named_window(), 494 exp.With: lambda self: self._parse_with(), 495 "JOIN_TYPE": lambda self: self._parse_join_parts(), 496 } 497 498 STATEMENT_PARSERS = { 499 TokenType.ALTER: lambda self: self._parse_alter(), 500 TokenType.BEGIN: lambda self: self._parse_transaction(), 501 TokenType.CACHE: lambda self: self._parse_cache(), 502 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 503 TokenType.COMMENT: lambda self: self._parse_comment(), 504 TokenType.CREATE: lambda self: self._parse_create(), 505 TokenType.DELETE: lambda self: self._parse_delete(), 506 TokenType.DESC: lambda self: self._parse_describe(), 507 TokenType.DESCRIBE: lambda self: self._parse_describe(), 508 TokenType.DROP: lambda self: self._parse_drop(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER BY": lambda self: self._parse_cluster(), 588 "CLUSTERED": lambda self: self._parse_clustered_by(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "ANY_VALUE": lambda self: self._parse_any_value(), 721 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 722 "CONCAT": lambda self: self._parse_concat(), 723 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 724 "DECODE": lambda self: self._parse_decode(), 725 "EXTRACT": lambda self: self._parse_extract(), 726 "JSON_OBJECT": lambda self: self._parse_json_object(), 727 "LOG": lambda self: self._parse_logarithm(), 728 "MATCH": lambda self: self._parse_match_against(), 729 "OPENJSON": lambda self: self._parse_open_json(), 730 "POSITION": lambda self: self._parse_position(), 731 "SAFE_CAST": lambda self: self._parse_cast(False), 732 "STRING_AGG": lambda self: self._parse_string_agg(), 733 "SUBSTRING": lambda self: self._parse_substring(), 734 "TRIM": lambda self: self._parse_trim(), 735 "TRY_CAST": lambda self: self._parse_cast(False), 736 "TRY_CONVERT": lambda self: self._parse_convert(False), 737 } 738 739 QUERY_MODIFIER_PARSERS = { 740 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 741 TokenType.WHERE: lambda self: ("where", self._parse_where()), 742 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 743 TokenType.HAVING: lambda self: ("having", self._parse_having()), 744 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 745 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 746 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 747 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 748 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 749 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 750 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 751 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 752 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 753 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 754 TokenType.CLUSTER_BY: lambda self: ( 755 "cluster", 756 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 757 ), 758 TokenType.DISTRIBUTE_BY: lambda self: ( 759 "distribute", 760 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 761 ), 762 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 763 } 764 765 SET_PARSERS = { 766 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 767 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 768 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 769 "TRANSACTION": lambda self: self._parse_set_transaction(), 770 } 771 772 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 773 774 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 775 776 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 777 778 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 779 780 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 781 782 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 783 TRANSACTION_CHARACTERISTICS = { 784 "ISOLATION LEVEL REPEATABLE READ", 785 "ISOLATION LEVEL READ COMMITTED", 786 "ISOLATION LEVEL READ UNCOMMITTED", 787 "ISOLATION LEVEL SERIALIZABLE", 788 "READ WRITE", 789 "READ ONLY", 790 } 791 792 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 793 794 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 795 796 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 797 798 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 799 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 800 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 801 802 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 803 804 STRICT_CAST = True 805 806 # A NULL arg in CONCAT yields NULL by default 807 CONCAT_NULL_OUTPUTS_STRING = False 808 809 PREFIXED_PIVOT_COLUMNS = False 810 IDENTIFY_PIVOT_STRINGS = False 811 812 LOG_BASE_FIRST = True 813 LOG_DEFAULTS_TO_LN = False 814 815 __slots__ = ( 816 "error_level", 817 "error_message_context", 818 "max_errors", 819 "sql", 820 "errors", 821 "_tokens", 822 "_index", 823 "_curr", 824 "_next", 825 "_prev", 826 "_prev_comments", 827 ) 828 829 # Autofilled 830 INDEX_OFFSET: int = 0 831 UNNEST_COLUMN_ONLY: bool = False 832 ALIAS_POST_TABLESAMPLE: bool = False 833 STRICT_STRING_CONCAT = False 834 NULL_ORDERING: str = "nulls_are_small" 835 SHOW_TRIE: t.Dict = {} 836 SET_TRIE: t.Dict = {} 837 FORMAT_MAPPING: t.Dict[str, str] = {} 838 FORMAT_TRIE: t.Dict = {} 839 TIME_MAPPING: t.Dict[str, str] = {} 840 TIME_TRIE: t.Dict = {} 841 842 def __init__( 843 self, 844 error_level: t.Optional[ErrorLevel] = None, 845 error_message_context: int = 100, 846 max_errors: int = 3, 847 ): 848 self.error_level = error_level or ErrorLevel.IMMEDIATE 849 self.error_message_context = error_message_context 850 self.max_errors = max_errors 851 self.reset() 852 853 def reset(self): 854 self.sql = "" 855 self.errors = [] 856 self._tokens = [] 857 self._index = 0 858 self._curr = None 859 self._next = None 860 self._prev = None 861 self._prev_comments = None 862 863 def parse( 864 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 865 ) -> t.List[t.Optional[exp.Expression]]: 866 """ 867 Parses a list of tokens and returns a list of syntax trees, one tree 868 per parsed SQL statement. 869 870 Args: 871 raw_tokens: The list of tokens. 872 sql: The original SQL string, used to produce helpful debug messages. 873 874 Returns: 875 The list of the produced syntax trees. 876 """ 877 return self._parse( 878 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 879 ) 880 881 def parse_into( 882 self, 883 expression_types: exp.IntoType, 884 raw_tokens: t.List[Token], 885 sql: t.Optional[str] = None, 886 ) -> t.List[t.Optional[exp.Expression]]: 887 """ 888 Parses a list of tokens into a given Expression type. If a collection of Expression 889 types is given instead, this method will try to parse the token list into each one 890 of them, stopping at the first for which the parsing succeeds. 891 892 Args: 893 expression_types: The expression type(s) to try and parse the token list into. 894 raw_tokens: The list of tokens. 895 sql: The original SQL string, used to produce helpful debug messages. 896 897 Returns: 898 The target Expression. 899 """ 900 errors = [] 901 for expression_type in ensure_list(expression_types): 902 parser = self.EXPRESSION_PARSERS.get(expression_type) 903 if not parser: 904 raise TypeError(f"No parser registered for {expression_type}") 905 906 try: 907 return self._parse(parser, raw_tokens, sql) 908 except ParseError as e: 909 e.errors[0]["into_expression"] = expression_type 910 errors.append(e) 911 912 raise ParseError( 913 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 914 errors=merge_errors(errors), 915 ) from errors[-1] 916 917 def _parse( 918 self, 919 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 920 raw_tokens: t.List[Token], 921 sql: t.Optional[str] = None, 922 ) -> t.List[t.Optional[exp.Expression]]: 923 self.reset() 924 self.sql = sql or "" 925 926 total = len(raw_tokens) 927 chunks: t.List[t.List[Token]] = [[]] 928 929 for i, token in enumerate(raw_tokens): 930 if token.token_type == TokenType.SEMICOLON: 931 if i < total - 1: 932 chunks.append([]) 933 else: 934 chunks[-1].append(token) 935 936 expressions = [] 937 938 for tokens in chunks: 939 self._index = -1 940 self._tokens = tokens 941 self._advance() 942 943 expressions.append(parse_method(self)) 944 945 if self._index < len(self._tokens): 946 self.raise_error("Invalid expression / Unexpected token") 947 948 self.check_errors() 949 950 return expressions 951 952 def check_errors(self) -> None: 953 """Logs or raises any found errors, depending on the chosen error level setting.""" 954 if self.error_level == ErrorLevel.WARN: 955 for error in self.errors: 956 logger.error(str(error)) 957 elif self.error_level == ErrorLevel.RAISE and self.errors: 958 raise ParseError( 959 concat_messages(self.errors, self.max_errors), 960 errors=merge_errors(self.errors), 961 ) 962 963 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 964 """ 965 Appends an error in the list of recorded errors or raises it, depending on the chosen 966 error level setting. 967 """ 968 token = token or self._curr or self._prev or Token.string("") 969 start = token.start 970 end = token.end + 1 971 start_context = self.sql[max(start - self.error_message_context, 0) : start] 972 highlight = self.sql[start:end] 973 end_context = self.sql[end : end + self.error_message_context] 974 975 error = ParseError.new( 976 f"{message}. Line {token.line}, Col: {token.col}.\n" 977 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 978 description=message, 979 line=token.line, 980 col=token.col, 981 start_context=start_context, 982 highlight=highlight, 983 end_context=end_context, 984 ) 985 986 if self.error_level == ErrorLevel.IMMEDIATE: 987 raise error 988 989 self.errors.append(error) 990 991 def expression( 992 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 993 ) -> E: 994 """ 995 Creates a new, validated Expression. 996 997 Args: 998 exp_class: The expression class to instantiate. 999 comments: An optional list of comments to attach to the expression. 1000 kwargs: The arguments to set for the expression along with their respective values. 1001 1002 Returns: 1003 The target expression. 1004 """ 1005 instance = exp_class(**kwargs) 1006 instance.add_comments(comments) if comments else self._add_comments(instance) 1007 return self.validate_expression(instance) 1008 1009 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1010 if expression and self._prev_comments: 1011 expression.add_comments(self._prev_comments) 1012 self._prev_comments = None 1013 1014 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1015 """ 1016 Validates an Expression, making sure that all its mandatory arguments are set. 1017 1018 Args: 1019 expression: The expression to validate. 1020 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1021 1022 Returns: 1023 The validated expression. 1024 """ 1025 if self.error_level != ErrorLevel.IGNORE: 1026 for error_message in expression.error_messages(args): 1027 self.raise_error(error_message) 1028 1029 return expression 1030 1031 def _find_sql(self, start: Token, end: Token) -> str: 1032 return self.sql[start.start : end.end + 1] 1033 1034 def _advance(self, times: int = 1) -> None: 1035 self._index += times 1036 self._curr = seq_get(self._tokens, self._index) 1037 self._next = seq_get(self._tokens, self._index + 1) 1038 1039 if self._index > 0: 1040 self._prev = self._tokens[self._index - 1] 1041 self._prev_comments = self._prev.comments 1042 else: 1043 self._prev = None 1044 self._prev_comments = None 1045 1046 def _retreat(self, index: int) -> None: 1047 if index != self._index: 1048 self._advance(index - self._index) 1049 1050 def _parse_command(self) -> exp.Command: 1051 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1052 1053 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1054 start = self._prev 1055 exists = self._parse_exists() if allow_exists else None 1056 1057 self._match(TokenType.ON) 1058 1059 kind = self._match_set(self.CREATABLES) and self._prev 1060 if not kind: 1061 return self._parse_as_command(start) 1062 1063 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1064 this = self._parse_user_defined_function(kind=kind.token_type) 1065 elif kind.token_type == TokenType.TABLE: 1066 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1067 elif kind.token_type == TokenType.COLUMN: 1068 this = self._parse_column() 1069 else: 1070 this = self._parse_id_var() 1071 1072 self._match(TokenType.IS) 1073 1074 return self.expression( 1075 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1076 ) 1077 1078 def _parse_to_table( 1079 self, 1080 ) -> exp.ToTableProperty: 1081 table = self._parse_table_parts(schema=True) 1082 return self.expression(exp.ToTableProperty, this=table) 1083 1084 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1085 def _parse_ttl(self) -> exp.Expression: 1086 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1087 this = self._parse_bitwise() 1088 1089 if self._match_text_seq("DELETE"): 1090 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1091 if self._match_text_seq("RECOMPRESS"): 1092 return self.expression( 1093 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1094 ) 1095 if self._match_text_seq("TO", "DISK"): 1096 return self.expression( 1097 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1098 ) 1099 if self._match_text_seq("TO", "VOLUME"): 1100 return self.expression( 1101 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1102 ) 1103 1104 return this 1105 1106 expressions = self._parse_csv(_parse_ttl_action) 1107 where = self._parse_where() 1108 group = self._parse_group() 1109 1110 aggregates = None 1111 if group and self._match(TokenType.SET): 1112 aggregates = self._parse_csv(self._parse_set_item) 1113 1114 return self.expression( 1115 exp.MergeTreeTTL, 1116 expressions=expressions, 1117 where=where, 1118 group=group, 1119 aggregates=aggregates, 1120 ) 1121 1122 def _parse_statement(self) -> t.Optional[exp.Expression]: 1123 if self._curr is None: 1124 return None 1125 1126 if self._match_set(self.STATEMENT_PARSERS): 1127 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1128 1129 if self._match_set(Tokenizer.COMMANDS): 1130 return self._parse_command() 1131 1132 expression = self._parse_expression() 1133 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1134 return self._parse_query_modifiers(expression) 1135 1136 def _parse_drop(self) -> exp.Drop | exp.Command: 1137 start = self._prev 1138 temporary = self._match(TokenType.TEMPORARY) 1139 materialized = self._match_text_seq("MATERIALIZED") 1140 1141 kind = self._match_set(self.CREATABLES) and self._prev.text 1142 if not kind: 1143 return self._parse_as_command(start) 1144 1145 return self.expression( 1146 exp.Drop, 1147 comments=start.comments, 1148 exists=self._parse_exists(), 1149 this=self._parse_table(schema=True), 1150 kind=kind, 1151 temporary=temporary, 1152 materialized=materialized, 1153 cascade=self._match_text_seq("CASCADE"), 1154 constraints=self._match_text_seq("CONSTRAINTS"), 1155 purge=self._match_text_seq("PURGE"), 1156 ) 1157 1158 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1159 return ( 1160 self._match(TokenType.IF) 1161 and (not not_ or self._match(TokenType.NOT)) 1162 and self._match(TokenType.EXISTS) 1163 ) 1164 1165 def _parse_create(self) -> exp.Create | exp.Command: 1166 # Note: this can't be None because we've matched a statement parser 1167 start = self._prev 1168 replace = start.text.upper() == "REPLACE" or self._match_pair( 1169 TokenType.OR, TokenType.REPLACE 1170 ) 1171 unique = self._match(TokenType.UNIQUE) 1172 1173 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1174 self._advance() 1175 1176 properties = None 1177 create_token = self._match_set(self.CREATABLES) and self._prev 1178 1179 if not create_token: 1180 # exp.Properties.Location.POST_CREATE 1181 properties = self._parse_properties() 1182 create_token = self._match_set(self.CREATABLES) and self._prev 1183 1184 if not properties or not create_token: 1185 return self._parse_as_command(start) 1186 1187 exists = self._parse_exists(not_=True) 1188 this = None 1189 expression = None 1190 indexes = None 1191 no_schema_binding = None 1192 begin = None 1193 clone = None 1194 1195 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1196 nonlocal properties 1197 if properties and temp_props: 1198 properties.expressions.extend(temp_props.expressions) 1199 elif temp_props: 1200 properties = temp_props 1201 1202 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1203 this = self._parse_user_defined_function(kind=create_token.token_type) 1204 1205 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1206 extend_props(self._parse_properties()) 1207 1208 self._match(TokenType.ALIAS) 1209 begin = self._match(TokenType.BEGIN) 1210 return_ = self._match_text_seq("RETURN") 1211 expression = self._parse_statement() 1212 1213 if return_: 1214 expression = self.expression(exp.Return, this=expression) 1215 elif create_token.token_type == TokenType.INDEX: 1216 this = self._parse_index(index=self._parse_id_var()) 1217 elif create_token.token_type in self.DB_CREATABLES: 1218 table_parts = self._parse_table_parts(schema=True) 1219 1220 # exp.Properties.Location.POST_NAME 1221 self._match(TokenType.COMMA) 1222 extend_props(self._parse_properties(before=True)) 1223 1224 this = self._parse_schema(this=table_parts) 1225 1226 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1227 extend_props(self._parse_properties()) 1228 1229 self._match(TokenType.ALIAS) 1230 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1231 # exp.Properties.Location.POST_ALIAS 1232 extend_props(self._parse_properties()) 1233 1234 expression = self._parse_ddl_select() 1235 1236 if create_token.token_type == TokenType.TABLE: 1237 # exp.Properties.Location.POST_EXPRESSION 1238 extend_props(self._parse_properties()) 1239 1240 indexes = [] 1241 while True: 1242 index = self._parse_index() 1243 1244 # exp.Properties.Location.POST_INDEX 1245 extend_props(self._parse_properties()) 1246 1247 if not index: 1248 break 1249 else: 1250 self._match(TokenType.COMMA) 1251 indexes.append(index) 1252 elif create_token.token_type == TokenType.VIEW: 1253 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1254 no_schema_binding = True 1255 1256 if self._match_text_seq("CLONE"): 1257 clone = self._parse_table(schema=True) 1258 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1259 clone_kind = ( 1260 self._match(TokenType.L_PAREN) 1261 and self._match_texts(self.CLONE_KINDS) 1262 and self._prev.text.upper() 1263 ) 1264 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1265 self._match(TokenType.R_PAREN) 1266 clone = self.expression( 1267 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1268 ) 1269 1270 return self.expression( 1271 exp.Create, 1272 this=this, 1273 kind=create_token.text, 1274 replace=replace, 1275 unique=unique, 1276 expression=expression, 1277 exists=exists, 1278 properties=properties, 1279 indexes=indexes, 1280 no_schema_binding=no_schema_binding, 1281 begin=begin, 1282 clone=clone, 1283 ) 1284 1285 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1286 # only used for teradata currently 1287 self._match(TokenType.COMMA) 1288 1289 kwargs = { 1290 "no": self._match_text_seq("NO"), 1291 "dual": self._match_text_seq("DUAL"), 1292 "before": self._match_text_seq("BEFORE"), 1293 "default": self._match_text_seq("DEFAULT"), 1294 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1295 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1296 "after": self._match_text_seq("AFTER"), 1297 "minimum": self._match_texts(("MIN", "MINIMUM")), 1298 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1299 } 1300 1301 if self._match_texts(self.PROPERTY_PARSERS): 1302 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1303 try: 1304 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1305 except TypeError: 1306 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1307 1308 return None 1309 1310 def _parse_property(self) -> t.Optional[exp.Expression]: 1311 if self._match_texts(self.PROPERTY_PARSERS): 1312 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1313 1314 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1315 return self._parse_character_set(default=True) 1316 1317 if self._match_text_seq("COMPOUND", "SORTKEY"): 1318 return self._parse_sortkey(compound=True) 1319 1320 if self._match_text_seq("SQL", "SECURITY"): 1321 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1322 1323 assignment = self._match_pair( 1324 TokenType.VAR, TokenType.EQ, advance=False 1325 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1326 1327 if assignment: 1328 key = self._parse_var_or_string() 1329 self._match(TokenType.EQ) 1330 return self.expression(exp.Property, this=key, value=self._parse_column()) 1331 1332 return None 1333 1334 def _parse_stored(self) -> exp.FileFormatProperty: 1335 self._match(TokenType.ALIAS) 1336 1337 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1338 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1339 1340 return self.expression( 1341 exp.FileFormatProperty, 1342 this=self.expression( 1343 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1344 ) 1345 if input_format or output_format 1346 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1347 ) 1348 1349 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1350 self._match(TokenType.EQ) 1351 self._match(TokenType.ALIAS) 1352 return self.expression(exp_class, this=self._parse_field()) 1353 1354 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1355 properties = [] 1356 while True: 1357 if before: 1358 prop = self._parse_property_before() 1359 else: 1360 prop = self._parse_property() 1361 1362 if not prop: 1363 break 1364 for p in ensure_list(prop): 1365 properties.append(p) 1366 1367 if properties: 1368 return self.expression(exp.Properties, expressions=properties) 1369 1370 return None 1371 1372 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1373 return self.expression( 1374 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1375 ) 1376 1377 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1378 if self._index >= 2: 1379 pre_volatile_token = self._tokens[self._index - 2] 1380 else: 1381 pre_volatile_token = None 1382 1383 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1384 return exp.VolatileProperty() 1385 1386 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1387 1388 def _parse_with_property( 1389 self, 1390 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1391 if self._match(TokenType.L_PAREN, advance=False): 1392 return self._parse_wrapped_csv(self._parse_property) 1393 1394 if self._match_text_seq("JOURNAL"): 1395 return self._parse_withjournaltable() 1396 1397 if self._match_text_seq("DATA"): 1398 return self._parse_withdata(no=False) 1399 elif self._match_text_seq("NO", "DATA"): 1400 return self._parse_withdata(no=True) 1401 1402 if not self._next: 1403 return None 1404 1405 return self._parse_withisolatedloading() 1406 1407 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1408 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1409 self._match(TokenType.EQ) 1410 1411 user = self._parse_id_var() 1412 self._match(TokenType.PARAMETER) 1413 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1414 1415 if not user or not host: 1416 return None 1417 1418 return exp.DefinerProperty(this=f"{user}@{host}") 1419 1420 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1421 self._match(TokenType.TABLE) 1422 self._match(TokenType.EQ) 1423 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1424 1425 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1426 return self.expression(exp.LogProperty, no=no) 1427 1428 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1429 return self.expression(exp.JournalProperty, **kwargs) 1430 1431 def _parse_checksum(self) -> exp.ChecksumProperty: 1432 self._match(TokenType.EQ) 1433 1434 on = None 1435 if self._match(TokenType.ON): 1436 on = True 1437 elif self._match_text_seq("OFF"): 1438 on = False 1439 1440 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1441 1442 def _parse_cluster(self) -> exp.Cluster: 1443 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1444 1445 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1446 self._match_text_seq("BY") 1447 1448 self._match_l_paren() 1449 expressions = self._parse_csv(self._parse_column) 1450 self._match_r_paren() 1451 1452 if self._match_text_seq("SORTED", "BY"): 1453 self._match_l_paren() 1454 sorted_by = self._parse_csv(self._parse_ordered) 1455 self._match_r_paren() 1456 else: 1457 sorted_by = None 1458 1459 self._match(TokenType.INTO) 1460 buckets = self._parse_number() 1461 self._match_text_seq("BUCKETS") 1462 1463 return self.expression( 1464 exp.ClusteredByProperty, 1465 expressions=expressions, 1466 sorted_by=sorted_by, 1467 buckets=buckets, 1468 ) 1469 1470 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1471 if not self._match_text_seq("GRANTS"): 1472 self._retreat(self._index - 1) 1473 return None 1474 1475 return self.expression(exp.CopyGrantsProperty) 1476 1477 def _parse_freespace(self) -> exp.FreespaceProperty: 1478 self._match(TokenType.EQ) 1479 return self.expression( 1480 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1481 ) 1482 1483 def _parse_mergeblockratio( 1484 self, no: bool = False, default: bool = False 1485 ) -> exp.MergeBlockRatioProperty: 1486 if self._match(TokenType.EQ): 1487 return self.expression( 1488 exp.MergeBlockRatioProperty, 1489 this=self._parse_number(), 1490 percent=self._match(TokenType.PERCENT), 1491 ) 1492 1493 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1494 1495 def _parse_datablocksize( 1496 self, 1497 default: t.Optional[bool] = None, 1498 minimum: t.Optional[bool] = None, 1499 maximum: t.Optional[bool] = None, 1500 ) -> exp.DataBlocksizeProperty: 1501 self._match(TokenType.EQ) 1502 size = self._parse_number() 1503 1504 units = None 1505 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1506 units = self._prev.text 1507 1508 return self.expression( 1509 exp.DataBlocksizeProperty, 1510 size=size, 1511 units=units, 1512 default=default, 1513 minimum=minimum, 1514 maximum=maximum, 1515 ) 1516 1517 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1518 self._match(TokenType.EQ) 1519 always = self._match_text_seq("ALWAYS") 1520 manual = self._match_text_seq("MANUAL") 1521 never = self._match_text_seq("NEVER") 1522 default = self._match_text_seq("DEFAULT") 1523 1524 autotemp = None 1525 if self._match_text_seq("AUTOTEMP"): 1526 autotemp = self._parse_schema() 1527 1528 return self.expression( 1529 exp.BlockCompressionProperty, 1530 always=always, 1531 manual=manual, 1532 never=never, 1533 default=default, 1534 autotemp=autotemp, 1535 ) 1536 1537 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1538 no = self._match_text_seq("NO") 1539 concurrent = self._match_text_seq("CONCURRENT") 1540 self._match_text_seq("ISOLATED", "LOADING") 1541 for_all = self._match_text_seq("FOR", "ALL") 1542 for_insert = self._match_text_seq("FOR", "INSERT") 1543 for_none = self._match_text_seq("FOR", "NONE") 1544 return self.expression( 1545 exp.IsolatedLoadingProperty, 1546 no=no, 1547 concurrent=concurrent, 1548 for_all=for_all, 1549 for_insert=for_insert, 1550 for_none=for_none, 1551 ) 1552 1553 def _parse_locking(self) -> exp.LockingProperty: 1554 if self._match(TokenType.TABLE): 1555 kind = "TABLE" 1556 elif self._match(TokenType.VIEW): 1557 kind = "VIEW" 1558 elif self._match(TokenType.ROW): 1559 kind = "ROW" 1560 elif self._match_text_seq("DATABASE"): 1561 kind = "DATABASE" 1562 else: 1563 kind = None 1564 1565 if kind in ("DATABASE", "TABLE", "VIEW"): 1566 this = self._parse_table_parts() 1567 else: 1568 this = None 1569 1570 if self._match(TokenType.FOR): 1571 for_or_in = "FOR" 1572 elif self._match(TokenType.IN): 1573 for_or_in = "IN" 1574 else: 1575 for_or_in = None 1576 1577 if self._match_text_seq("ACCESS"): 1578 lock_type = "ACCESS" 1579 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1580 lock_type = "EXCLUSIVE" 1581 elif self._match_text_seq("SHARE"): 1582 lock_type = "SHARE" 1583 elif self._match_text_seq("READ"): 1584 lock_type = "READ" 1585 elif self._match_text_seq("WRITE"): 1586 lock_type = "WRITE" 1587 elif self._match_text_seq("CHECKSUM"): 1588 lock_type = "CHECKSUM" 1589 else: 1590 lock_type = None 1591 1592 override = self._match_text_seq("OVERRIDE") 1593 1594 return self.expression( 1595 exp.LockingProperty, 1596 this=this, 1597 kind=kind, 1598 for_or_in=for_or_in, 1599 lock_type=lock_type, 1600 override=override, 1601 ) 1602 1603 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1604 if self._match(TokenType.PARTITION_BY): 1605 return self._parse_csv(self._parse_conjunction) 1606 return [] 1607 1608 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1609 self._match(TokenType.EQ) 1610 return self.expression( 1611 exp.PartitionedByProperty, 1612 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1613 ) 1614 1615 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1616 if self._match_text_seq("AND", "STATISTICS"): 1617 statistics = True 1618 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1619 statistics = False 1620 else: 1621 statistics = None 1622 1623 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1624 1625 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1626 if self._match_text_seq("PRIMARY", "INDEX"): 1627 return exp.NoPrimaryIndexProperty() 1628 return None 1629 1630 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1631 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1632 return exp.OnCommitProperty() 1633 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1634 return exp.OnCommitProperty(delete=True) 1635 return None 1636 1637 def _parse_distkey(self) -> exp.DistKeyProperty: 1638 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1639 1640 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1641 table = self._parse_table(schema=True) 1642 1643 options = [] 1644 while self._match_texts(("INCLUDING", "EXCLUDING")): 1645 this = self._prev.text.upper() 1646 1647 id_var = self._parse_id_var() 1648 if not id_var: 1649 return None 1650 1651 options.append( 1652 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1653 ) 1654 1655 return self.expression(exp.LikeProperty, this=table, expressions=options) 1656 1657 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1658 return self.expression( 1659 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1660 ) 1661 1662 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1663 self._match(TokenType.EQ) 1664 return self.expression( 1665 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1666 ) 1667 1668 def _parse_returns(self) -> exp.ReturnsProperty: 1669 value: t.Optional[exp.Expression] 1670 is_table = self._match(TokenType.TABLE) 1671 1672 if is_table: 1673 if self._match(TokenType.LT): 1674 value = self.expression( 1675 exp.Schema, 1676 this="TABLE", 1677 expressions=self._parse_csv(self._parse_struct_types), 1678 ) 1679 if not self._match(TokenType.GT): 1680 self.raise_error("Expecting >") 1681 else: 1682 value = self._parse_schema(exp.var("TABLE")) 1683 else: 1684 value = self._parse_types() 1685 1686 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1687 1688 def _parse_describe(self) -> exp.Describe: 1689 kind = self._match_set(self.CREATABLES) and self._prev.text 1690 this = self._parse_table() 1691 return self.expression(exp.Describe, this=this, kind=kind) 1692 1693 def _parse_insert(self) -> exp.Insert: 1694 overwrite = self._match(TokenType.OVERWRITE) 1695 ignore = self._match(TokenType.IGNORE) 1696 local = self._match_text_seq("LOCAL") 1697 alternative = None 1698 1699 if self._match_text_seq("DIRECTORY"): 1700 this: t.Optional[exp.Expression] = self.expression( 1701 exp.Directory, 1702 this=self._parse_var_or_string(), 1703 local=local, 1704 row_format=self._parse_row_format(match_row=True), 1705 ) 1706 else: 1707 if self._match(TokenType.OR): 1708 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1709 1710 self._match(TokenType.INTO) 1711 self._match(TokenType.TABLE) 1712 this = self._parse_table(schema=True) 1713 1714 returning = self._parse_returning() 1715 1716 return self.expression( 1717 exp.Insert, 1718 this=this, 1719 exists=self._parse_exists(), 1720 partition=self._parse_partition(), 1721 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1722 and self._parse_conjunction(), 1723 expression=self._parse_ddl_select(), 1724 conflict=self._parse_on_conflict(), 1725 returning=returning or self._parse_returning(), 1726 overwrite=overwrite, 1727 alternative=alternative, 1728 ignore=ignore, 1729 ) 1730 1731 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1732 conflict = self._match_text_seq("ON", "CONFLICT") 1733 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1734 1735 if not conflict and not duplicate: 1736 return None 1737 1738 nothing = None 1739 expressions = None 1740 key = None 1741 constraint = None 1742 1743 if conflict: 1744 if self._match_text_seq("ON", "CONSTRAINT"): 1745 constraint = self._parse_id_var() 1746 else: 1747 key = self._parse_csv(self._parse_value) 1748 1749 self._match_text_seq("DO") 1750 if self._match_text_seq("NOTHING"): 1751 nothing = True 1752 else: 1753 self._match(TokenType.UPDATE) 1754 self._match(TokenType.SET) 1755 expressions = self._parse_csv(self._parse_equality) 1756 1757 return self.expression( 1758 exp.OnConflict, 1759 duplicate=duplicate, 1760 expressions=expressions, 1761 nothing=nothing, 1762 key=key, 1763 constraint=constraint, 1764 ) 1765 1766 def _parse_returning(self) -> t.Optional[exp.Returning]: 1767 if not self._match(TokenType.RETURNING): 1768 return None 1769 return self.expression( 1770 exp.Returning, 1771 expressions=self._parse_csv(self._parse_expression), 1772 into=self._match(TokenType.INTO) and self._parse_table_part(), 1773 ) 1774 1775 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1776 if not self._match(TokenType.FORMAT): 1777 return None 1778 return self._parse_row_format() 1779 1780 def _parse_row_format( 1781 self, match_row: bool = False 1782 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1783 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1784 return None 1785 1786 if self._match_text_seq("SERDE"): 1787 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1788 1789 self._match_text_seq("DELIMITED") 1790 1791 kwargs = {} 1792 1793 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1794 kwargs["fields"] = self._parse_string() 1795 if self._match_text_seq("ESCAPED", "BY"): 1796 kwargs["escaped"] = self._parse_string() 1797 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1798 kwargs["collection_items"] = self._parse_string() 1799 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1800 kwargs["map_keys"] = self._parse_string() 1801 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1802 kwargs["lines"] = self._parse_string() 1803 if self._match_text_seq("NULL", "DEFINED", "AS"): 1804 kwargs["null"] = self._parse_string() 1805 1806 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1807 1808 def _parse_load(self) -> exp.LoadData | exp.Command: 1809 if self._match_text_seq("DATA"): 1810 local = self._match_text_seq("LOCAL") 1811 self._match_text_seq("INPATH") 1812 inpath = self._parse_string() 1813 overwrite = self._match(TokenType.OVERWRITE) 1814 self._match_pair(TokenType.INTO, TokenType.TABLE) 1815 1816 return self.expression( 1817 exp.LoadData, 1818 this=self._parse_table(schema=True), 1819 local=local, 1820 overwrite=overwrite, 1821 inpath=inpath, 1822 partition=self._parse_partition(), 1823 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1824 serde=self._match_text_seq("SERDE") and self._parse_string(), 1825 ) 1826 return self._parse_as_command(self._prev) 1827 1828 def _parse_delete(self) -> exp.Delete: 1829 # This handles MySQL's "Multiple-Table Syntax" 1830 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1831 tables = None 1832 if not self._match(TokenType.FROM, advance=False): 1833 tables = self._parse_csv(self._parse_table) or None 1834 1835 returning = self._parse_returning() 1836 1837 return self.expression( 1838 exp.Delete, 1839 tables=tables, 1840 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1841 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1842 where=self._parse_where(), 1843 returning=returning or self._parse_returning(), 1844 limit=self._parse_limit(), 1845 ) 1846 1847 def _parse_update(self) -> exp.Update: 1848 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1849 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1850 returning = self._parse_returning() 1851 return self.expression( 1852 exp.Update, 1853 **{ # type: ignore 1854 "this": this, 1855 "expressions": expressions, 1856 "from": self._parse_from(joins=True), 1857 "where": self._parse_where(), 1858 "returning": returning or self._parse_returning(), 1859 "limit": self._parse_limit(), 1860 }, 1861 ) 1862 1863 def _parse_uncache(self) -> exp.Uncache: 1864 if not self._match(TokenType.TABLE): 1865 self.raise_error("Expecting TABLE after UNCACHE") 1866 1867 return self.expression( 1868 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1869 ) 1870 1871 def _parse_cache(self) -> exp.Cache: 1872 lazy = self._match_text_seq("LAZY") 1873 self._match(TokenType.TABLE) 1874 table = self._parse_table(schema=True) 1875 1876 options = [] 1877 if self._match_text_seq("OPTIONS"): 1878 self._match_l_paren() 1879 k = self._parse_string() 1880 self._match(TokenType.EQ) 1881 v = self._parse_string() 1882 options = [k, v] 1883 self._match_r_paren() 1884 1885 self._match(TokenType.ALIAS) 1886 return self.expression( 1887 exp.Cache, 1888 this=table, 1889 lazy=lazy, 1890 options=options, 1891 expression=self._parse_select(nested=True), 1892 ) 1893 1894 def _parse_partition(self) -> t.Optional[exp.Partition]: 1895 if not self._match(TokenType.PARTITION): 1896 return None 1897 1898 return self.expression( 1899 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1900 ) 1901 1902 def _parse_value(self) -> exp.Tuple: 1903 if self._match(TokenType.L_PAREN): 1904 expressions = self._parse_csv(self._parse_conjunction) 1905 self._match_r_paren() 1906 return self.expression(exp.Tuple, expressions=expressions) 1907 1908 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1909 # https://prestodb.io/docs/current/sql/values.html 1910 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1911 1912 def _parse_select( 1913 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1914 ) -> t.Optional[exp.Expression]: 1915 cte = self._parse_with() 1916 if cte: 1917 this = self._parse_statement() 1918 1919 if not this: 1920 self.raise_error("Failed to parse any statement following CTE") 1921 return cte 1922 1923 if "with" in this.arg_types: 1924 this.set("with", cte) 1925 else: 1926 self.raise_error(f"{this.key} does not support CTE") 1927 this = cte 1928 elif self._match(TokenType.SELECT): 1929 comments = self._prev_comments 1930 1931 hint = self._parse_hint() 1932 all_ = self._match(TokenType.ALL) 1933 distinct = self._match(TokenType.DISTINCT) 1934 1935 kind = ( 1936 self._match(TokenType.ALIAS) 1937 and self._match_texts(("STRUCT", "VALUE")) 1938 and self._prev.text 1939 ) 1940 1941 if distinct: 1942 distinct = self.expression( 1943 exp.Distinct, 1944 on=self._parse_value() if self._match(TokenType.ON) else None, 1945 ) 1946 1947 if all_ and distinct: 1948 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1949 1950 limit = self._parse_limit(top=True) 1951 expressions = self._parse_expressions() 1952 1953 this = self.expression( 1954 exp.Select, 1955 kind=kind, 1956 hint=hint, 1957 distinct=distinct, 1958 expressions=expressions, 1959 limit=limit, 1960 ) 1961 this.comments = comments 1962 1963 into = self._parse_into() 1964 if into: 1965 this.set("into", into) 1966 1967 from_ = self._parse_from() 1968 if from_: 1969 this.set("from", from_) 1970 1971 this = self._parse_query_modifiers(this) 1972 elif (table or nested) and self._match(TokenType.L_PAREN): 1973 if self._match(TokenType.PIVOT): 1974 this = self._parse_simplified_pivot() 1975 elif self._match(TokenType.FROM): 1976 this = exp.select("*").from_( 1977 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1978 ) 1979 else: 1980 this = self._parse_table() if table else self._parse_select(nested=True) 1981 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1982 1983 self._match_r_paren() 1984 1985 # We return early here so that the UNION isn't attached to the subquery by the 1986 # following call to _parse_set_operations, but instead becomes the parent node 1987 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1988 elif self._match(TokenType.VALUES): 1989 this = self.expression( 1990 exp.Values, 1991 expressions=self._parse_csv(self._parse_value), 1992 alias=self._parse_table_alias(), 1993 ) 1994 else: 1995 this = None 1996 1997 return self._parse_set_operations(this) 1998 1999 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2000 if not skip_with_token and not self._match(TokenType.WITH): 2001 return None 2002 2003 comments = self._prev_comments 2004 recursive = self._match(TokenType.RECURSIVE) 2005 2006 expressions = [] 2007 while True: 2008 expressions.append(self._parse_cte()) 2009 2010 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2011 break 2012 else: 2013 self._match(TokenType.WITH) 2014 2015 return self.expression( 2016 exp.With, comments=comments, expressions=expressions, recursive=recursive 2017 ) 2018 2019 def _parse_cte(self) -> exp.CTE: 2020 alias = self._parse_table_alias() 2021 if not alias or not alias.this: 2022 self.raise_error("Expected CTE to have alias") 2023 2024 self._match(TokenType.ALIAS) 2025 return self.expression( 2026 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2027 ) 2028 2029 def _parse_table_alias( 2030 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2031 ) -> t.Optional[exp.TableAlias]: 2032 any_token = self._match(TokenType.ALIAS) 2033 alias = ( 2034 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2035 or self._parse_string_as_identifier() 2036 ) 2037 2038 index = self._index 2039 if self._match(TokenType.L_PAREN): 2040 columns = self._parse_csv(self._parse_function_parameter) 2041 self._match_r_paren() if columns else self._retreat(index) 2042 else: 2043 columns = None 2044 2045 if not alias and not columns: 2046 return None 2047 2048 return self.expression(exp.TableAlias, this=alias, columns=columns) 2049 2050 def _parse_subquery( 2051 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2052 ) -> t.Optional[exp.Subquery]: 2053 if not this: 2054 return None 2055 2056 return self.expression( 2057 exp.Subquery, 2058 this=this, 2059 pivots=self._parse_pivots(), 2060 alias=self._parse_table_alias() if parse_alias else None, 2061 ) 2062 2063 def _parse_query_modifiers( 2064 self, this: t.Optional[exp.Expression] 2065 ) -> t.Optional[exp.Expression]: 2066 if isinstance(this, self.MODIFIABLES): 2067 for join in iter(self._parse_join, None): 2068 this.append("joins", join) 2069 for lateral in iter(self._parse_lateral, None): 2070 this.append("laterals", lateral) 2071 2072 while True: 2073 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2074 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2075 key, expression = parser(self) 2076 2077 if expression: 2078 this.set(key, expression) 2079 if key == "limit": 2080 offset = expression.args.pop("offset", None) 2081 if offset: 2082 this.set("offset", exp.Offset(expression=offset)) 2083 continue 2084 break 2085 return this 2086 2087 def _parse_hint(self) -> t.Optional[exp.Hint]: 2088 if self._match(TokenType.HINT): 2089 hints = [] 2090 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2091 hints.extend(hint) 2092 2093 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2094 self.raise_error("Expected */ after HINT") 2095 2096 return self.expression(exp.Hint, expressions=hints) 2097 2098 return None 2099 2100 def _parse_into(self) -> t.Optional[exp.Into]: 2101 if not self._match(TokenType.INTO): 2102 return None 2103 2104 temp = self._match(TokenType.TEMPORARY) 2105 unlogged = self._match_text_seq("UNLOGGED") 2106 self._match(TokenType.TABLE) 2107 2108 return self.expression( 2109 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2110 ) 2111 2112 def _parse_from( 2113 self, joins: bool = False, skip_from_token: bool = False 2114 ) -> t.Optional[exp.From]: 2115 if not skip_from_token and not self._match(TokenType.FROM): 2116 return None 2117 2118 return self.expression( 2119 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2120 ) 2121 2122 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2123 if not self._match(TokenType.MATCH_RECOGNIZE): 2124 return None 2125 2126 self._match_l_paren() 2127 2128 partition = self._parse_partition_by() 2129 order = self._parse_order() 2130 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2131 2132 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2133 rows = exp.var("ONE ROW PER MATCH") 2134 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2135 text = "ALL ROWS PER MATCH" 2136 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2137 text += f" SHOW EMPTY MATCHES" 2138 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2139 text += f" OMIT EMPTY MATCHES" 2140 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2141 text += f" WITH UNMATCHED ROWS" 2142 rows = exp.var(text) 2143 else: 2144 rows = None 2145 2146 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2147 text = "AFTER MATCH SKIP" 2148 if self._match_text_seq("PAST", "LAST", "ROW"): 2149 text += f" PAST LAST ROW" 2150 elif self._match_text_seq("TO", "NEXT", "ROW"): 2151 text += f" TO NEXT ROW" 2152 elif self._match_text_seq("TO", "FIRST"): 2153 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2154 elif self._match_text_seq("TO", "LAST"): 2155 text += f" TO LAST {self._advance_any().text}" # type: ignore 2156 after = exp.var(text) 2157 else: 2158 after = None 2159 2160 if self._match_text_seq("PATTERN"): 2161 self._match_l_paren() 2162 2163 if not self._curr: 2164 self.raise_error("Expecting )", self._curr) 2165 2166 paren = 1 2167 start = self._curr 2168 2169 while self._curr and paren > 0: 2170 if self._curr.token_type == TokenType.L_PAREN: 2171 paren += 1 2172 if self._curr.token_type == TokenType.R_PAREN: 2173 paren -= 1 2174 2175 end = self._prev 2176 self._advance() 2177 2178 if paren > 0: 2179 self.raise_error("Expecting )", self._curr) 2180 2181 pattern = exp.var(self._find_sql(start, end)) 2182 else: 2183 pattern = None 2184 2185 define = ( 2186 self._parse_csv( 2187 lambda: self.expression( 2188 exp.Alias, 2189 alias=self._parse_id_var(any_token=True), 2190 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2191 ) 2192 ) 2193 if self._match_text_seq("DEFINE") 2194 else None 2195 ) 2196 2197 self._match_r_paren() 2198 2199 return self.expression( 2200 exp.MatchRecognize, 2201 partition_by=partition, 2202 order=order, 2203 measures=measures, 2204 rows=rows, 2205 after=after, 2206 pattern=pattern, 2207 define=define, 2208 alias=self._parse_table_alias(), 2209 ) 2210 2211 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2212 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2213 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2214 2215 if outer_apply or cross_apply: 2216 this = self._parse_select(table=True) 2217 view = None 2218 outer = not cross_apply 2219 elif self._match(TokenType.LATERAL): 2220 this = self._parse_select(table=True) 2221 view = self._match(TokenType.VIEW) 2222 outer = self._match(TokenType.OUTER) 2223 else: 2224 return None 2225 2226 if not this: 2227 this = self._parse_function() or self._parse_id_var(any_token=False) 2228 while self._match(TokenType.DOT): 2229 this = exp.Dot( 2230 this=this, 2231 expression=self._parse_function() or self._parse_id_var(any_token=False), 2232 ) 2233 2234 if view: 2235 table = self._parse_id_var(any_token=False) 2236 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2237 table_alias: t.Optional[exp.TableAlias] = self.expression( 2238 exp.TableAlias, this=table, columns=columns 2239 ) 2240 elif isinstance(this, exp.Subquery) and this.alias: 2241 # Ensures parity between the Subquery's and the Lateral's "alias" args 2242 table_alias = this.args["alias"].copy() 2243 else: 2244 table_alias = self._parse_table_alias() 2245 2246 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2247 2248 def _parse_join_parts( 2249 self, 2250 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2251 return ( 2252 self._match_set(self.JOIN_METHODS) and self._prev, 2253 self._match_set(self.JOIN_SIDES) and self._prev, 2254 self._match_set(self.JOIN_KINDS) and self._prev, 2255 ) 2256 2257 def _parse_join( 2258 self, skip_join_token: bool = False, parse_bracket: bool = False 2259 ) -> t.Optional[exp.Join]: 2260 if self._match(TokenType.COMMA): 2261 return self.expression(exp.Join, this=self._parse_table()) 2262 2263 index = self._index 2264 method, side, kind = self._parse_join_parts() 2265 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2266 join = self._match(TokenType.JOIN) 2267 2268 if not skip_join_token and not join: 2269 self._retreat(index) 2270 kind = None 2271 method = None 2272 side = None 2273 2274 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2275 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2276 2277 if not skip_join_token and not join and not outer_apply and not cross_apply: 2278 return None 2279 2280 if outer_apply: 2281 side = Token(TokenType.LEFT, "LEFT") 2282 2283 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2284 2285 if method: 2286 kwargs["method"] = method.text 2287 if side: 2288 kwargs["side"] = side.text 2289 if kind: 2290 kwargs["kind"] = kind.text 2291 if hint: 2292 kwargs["hint"] = hint 2293 2294 if self._match(TokenType.ON): 2295 kwargs["on"] = self._parse_conjunction() 2296 elif self._match(TokenType.USING): 2297 kwargs["using"] = self._parse_wrapped_id_vars() 2298 elif not (kind and kind.token_type == TokenType.CROSS): 2299 index = self._index 2300 joins = self._parse_joins() 2301 2302 if joins and self._match(TokenType.ON): 2303 kwargs["on"] = self._parse_conjunction() 2304 elif joins and self._match(TokenType.USING): 2305 kwargs["using"] = self._parse_wrapped_id_vars() 2306 else: 2307 joins = None 2308 self._retreat(index) 2309 2310 kwargs["this"].set("joins", joins) 2311 2312 return self.expression(exp.Join, **kwargs) 2313 2314 def _parse_index( 2315 self, 2316 index: t.Optional[exp.Expression] = None, 2317 ) -> t.Optional[exp.Index]: 2318 if index: 2319 unique = None 2320 primary = None 2321 amp = None 2322 2323 self._match(TokenType.ON) 2324 self._match(TokenType.TABLE) # hive 2325 table = self._parse_table_parts(schema=True) 2326 else: 2327 unique = self._match(TokenType.UNIQUE) 2328 primary = self._match_text_seq("PRIMARY") 2329 amp = self._match_text_seq("AMP") 2330 2331 if not self._match(TokenType.INDEX): 2332 return None 2333 2334 index = self._parse_id_var() 2335 table = None 2336 2337 using = self._parse_field() if self._match(TokenType.USING) else None 2338 2339 if self._match(TokenType.L_PAREN, advance=False): 2340 columns = self._parse_wrapped_csv(self._parse_ordered) 2341 else: 2342 columns = None 2343 2344 return self.expression( 2345 exp.Index, 2346 this=index, 2347 table=table, 2348 using=using, 2349 columns=columns, 2350 unique=unique, 2351 primary=primary, 2352 amp=amp, 2353 partition_by=self._parse_partition_by(), 2354 ) 2355 2356 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2357 hints: t.List[exp.Expression] = [] 2358 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2359 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2360 hints.append( 2361 self.expression( 2362 exp.WithTableHint, 2363 expressions=self._parse_csv( 2364 lambda: self._parse_function() or self._parse_var(any_token=True) 2365 ), 2366 ) 2367 ) 2368 self._match_r_paren() 2369 else: 2370 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2371 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2372 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2373 2374 self._match_texts({"INDEX", "KEY"}) 2375 if self._match(TokenType.FOR): 2376 hint.set("target", self._advance_any() and self._prev.text.upper()) 2377 2378 hint.set("expressions", self._parse_wrapped_id_vars()) 2379 hints.append(hint) 2380 2381 return hints or None 2382 2383 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2384 return ( 2385 (not schema and self._parse_function(optional_parens=False)) 2386 or self._parse_id_var(any_token=False) 2387 or self._parse_string_as_identifier() 2388 or self._parse_placeholder() 2389 ) 2390 2391 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2392 catalog = None 2393 db = None 2394 table = self._parse_table_part(schema=schema) 2395 2396 while self._match(TokenType.DOT): 2397 if catalog: 2398 # This allows nesting the table in arbitrarily many dot expressions if needed 2399 table = self.expression( 2400 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2401 ) 2402 else: 2403 catalog = db 2404 db = table 2405 table = self._parse_table_part(schema=schema) 2406 2407 if not table: 2408 self.raise_error(f"Expected table name but got {self._curr}") 2409 2410 return self.expression( 2411 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2412 ) 2413 2414 def _parse_table( 2415 self, 2416 schema: bool = False, 2417 joins: bool = False, 2418 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2419 parse_bracket: bool = False, 2420 ) -> t.Optional[exp.Expression]: 2421 lateral = self._parse_lateral() 2422 if lateral: 2423 return lateral 2424 2425 unnest = self._parse_unnest() 2426 if unnest: 2427 return unnest 2428 2429 values = self._parse_derived_table_values() 2430 if values: 2431 return values 2432 2433 subquery = self._parse_select(table=True) 2434 if subquery: 2435 if not subquery.args.get("pivots"): 2436 subquery.set("pivots", self._parse_pivots()) 2437 return subquery 2438 2439 bracket = parse_bracket and self._parse_bracket(None) 2440 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2441 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2442 2443 if schema: 2444 return self._parse_schema(this=this) 2445 2446 if self.ALIAS_POST_TABLESAMPLE: 2447 table_sample = self._parse_table_sample() 2448 2449 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2450 if alias: 2451 this.set("alias", alias) 2452 2453 if not this.args.get("pivots"): 2454 this.set("pivots", self._parse_pivots()) 2455 2456 this.set("hints", self._parse_table_hints()) 2457 2458 if not self.ALIAS_POST_TABLESAMPLE: 2459 table_sample = self._parse_table_sample() 2460 2461 if table_sample: 2462 table_sample.set("this", this) 2463 this = table_sample 2464 2465 if joins: 2466 for join in iter(self._parse_join, None): 2467 this.append("joins", join) 2468 2469 return this 2470 2471 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2472 if not self._match(TokenType.UNNEST): 2473 return None 2474 2475 expressions = self._parse_wrapped_csv(self._parse_type) 2476 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2477 2478 alias = self._parse_table_alias() if with_alias else None 2479 2480 if alias and self.UNNEST_COLUMN_ONLY: 2481 if alias.args.get("columns"): 2482 self.raise_error("Unexpected extra column alias in unnest.") 2483 2484 alias.set("columns", [alias.this]) 2485 alias.set("this", None) 2486 2487 offset = None 2488 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2489 self._match(TokenType.ALIAS) 2490 offset = self._parse_id_var() or exp.to_identifier("offset") 2491 2492 return self.expression( 2493 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2494 ) 2495 2496 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2497 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2498 if not is_derived and not self._match(TokenType.VALUES): 2499 return None 2500 2501 expressions = self._parse_csv(self._parse_value) 2502 alias = self._parse_table_alias() 2503 2504 if is_derived: 2505 self._match_r_paren() 2506 2507 return self.expression( 2508 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2509 ) 2510 2511 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2512 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2513 as_modifier and self._match_text_seq("USING", "SAMPLE") 2514 ): 2515 return None 2516 2517 bucket_numerator = None 2518 bucket_denominator = None 2519 bucket_field = None 2520 percent = None 2521 rows = None 2522 size = None 2523 seed = None 2524 2525 kind = ( 2526 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2527 ) 2528 method = self._parse_var(tokens=(TokenType.ROW,)) 2529 2530 self._match(TokenType.L_PAREN) 2531 2532 num = self._parse_number() 2533 2534 if self._match_text_seq("BUCKET"): 2535 bucket_numerator = self._parse_number() 2536 self._match_text_seq("OUT", "OF") 2537 bucket_denominator = bucket_denominator = self._parse_number() 2538 self._match(TokenType.ON) 2539 bucket_field = self._parse_field() 2540 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2541 percent = num 2542 elif self._match(TokenType.ROWS): 2543 rows = num 2544 else: 2545 size = num 2546 2547 self._match(TokenType.R_PAREN) 2548 2549 if self._match(TokenType.L_PAREN): 2550 method = self._parse_var() 2551 seed = self._match(TokenType.COMMA) and self._parse_number() 2552 self._match_r_paren() 2553 elif self._match_texts(("SEED", "REPEATABLE")): 2554 seed = self._parse_wrapped(self._parse_number) 2555 2556 return self.expression( 2557 exp.TableSample, 2558 method=method, 2559 bucket_numerator=bucket_numerator, 2560 bucket_denominator=bucket_denominator, 2561 bucket_field=bucket_field, 2562 percent=percent, 2563 rows=rows, 2564 size=size, 2565 seed=seed, 2566 kind=kind, 2567 ) 2568 2569 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2570 return list(iter(self._parse_pivot, None)) or None 2571 2572 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2573 return list(iter(self._parse_join, None)) or None 2574 2575 # https://duckdb.org/docs/sql/statements/pivot 2576 def _parse_simplified_pivot(self) -> exp.Pivot: 2577 def _parse_on() -> t.Optional[exp.Expression]: 2578 this = self._parse_bitwise() 2579 return self._parse_in(this) if self._match(TokenType.IN) else this 2580 2581 this = self._parse_table() 2582 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2583 using = self._match(TokenType.USING) and self._parse_csv( 2584 lambda: self._parse_alias(self._parse_function()) 2585 ) 2586 group = self._parse_group() 2587 return self.expression( 2588 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2589 ) 2590 2591 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2592 index = self._index 2593 2594 if self._match(TokenType.PIVOT): 2595 unpivot = False 2596 elif self._match(TokenType.UNPIVOT): 2597 unpivot = True 2598 else: 2599 return None 2600 2601 expressions = [] 2602 field = None 2603 2604 if not self._match(TokenType.L_PAREN): 2605 self._retreat(index) 2606 return None 2607 2608 if unpivot: 2609 expressions = self._parse_csv(self._parse_column) 2610 else: 2611 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2612 2613 if not expressions: 2614 self.raise_error("Failed to parse PIVOT's aggregation list") 2615 2616 if not self._match(TokenType.FOR): 2617 self.raise_error("Expecting FOR") 2618 2619 value = self._parse_column() 2620 2621 if not self._match(TokenType.IN): 2622 self.raise_error("Expecting IN") 2623 2624 field = self._parse_in(value, alias=True) 2625 2626 self._match_r_paren() 2627 2628 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2629 2630 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2631 pivot.set("alias", self._parse_table_alias()) 2632 2633 if not unpivot: 2634 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2635 2636 columns: t.List[exp.Expression] = [] 2637 for fld in pivot.args["field"].expressions: 2638 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2639 for name in names: 2640 if self.PREFIXED_PIVOT_COLUMNS: 2641 name = f"{name}_{field_name}" if name else field_name 2642 else: 2643 name = f"{field_name}_{name}" if name else field_name 2644 2645 columns.append(exp.to_identifier(name)) 2646 2647 pivot.set("columns", columns) 2648 2649 return pivot 2650 2651 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2652 return [agg.alias for agg in aggregations] 2653 2654 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2655 if not skip_where_token and not self._match(TokenType.WHERE): 2656 return None 2657 2658 return self.expression( 2659 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2660 ) 2661 2662 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2663 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2664 return None 2665 2666 elements = defaultdict(list) 2667 2668 if self._match(TokenType.ALL): 2669 return self.expression(exp.Group, all=True) 2670 2671 while True: 2672 expressions = self._parse_csv(self._parse_conjunction) 2673 if expressions: 2674 elements["expressions"].extend(expressions) 2675 2676 grouping_sets = self._parse_grouping_sets() 2677 if grouping_sets: 2678 elements["grouping_sets"].extend(grouping_sets) 2679 2680 rollup = None 2681 cube = None 2682 totals = None 2683 2684 with_ = self._match(TokenType.WITH) 2685 if self._match(TokenType.ROLLUP): 2686 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2687 elements["rollup"].extend(ensure_list(rollup)) 2688 2689 if self._match(TokenType.CUBE): 2690 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2691 elements["cube"].extend(ensure_list(cube)) 2692 2693 if self._match_text_seq("TOTALS"): 2694 totals = True 2695 elements["totals"] = True # type: ignore 2696 2697 if not (grouping_sets or rollup or cube or totals): 2698 break 2699 2700 return self.expression(exp.Group, **elements) # type: ignore 2701 2702 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2703 if not self._match(TokenType.GROUPING_SETS): 2704 return None 2705 2706 return self._parse_wrapped_csv(self._parse_grouping_set) 2707 2708 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2709 if self._match(TokenType.L_PAREN): 2710 grouping_set = self._parse_csv(self._parse_column) 2711 self._match_r_paren() 2712 return self.expression(exp.Tuple, expressions=grouping_set) 2713 2714 return self._parse_column() 2715 2716 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2717 if not skip_having_token and not self._match(TokenType.HAVING): 2718 return None 2719 return self.expression(exp.Having, this=self._parse_conjunction()) 2720 2721 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2722 if not self._match(TokenType.QUALIFY): 2723 return None 2724 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2725 2726 def _parse_order( 2727 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2728 ) -> t.Optional[exp.Expression]: 2729 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2730 return this 2731 2732 return self.expression( 2733 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2734 ) 2735 2736 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2737 if not self._match(token): 2738 return None 2739 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2740 2741 def _parse_ordered(self) -> exp.Ordered: 2742 this = self._parse_conjunction() 2743 self._match(TokenType.ASC) 2744 2745 is_desc = self._match(TokenType.DESC) 2746 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2747 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2748 desc = is_desc or False 2749 asc = not desc 2750 nulls_first = is_nulls_first or False 2751 explicitly_null_ordered = is_nulls_first or is_nulls_last 2752 2753 if ( 2754 not explicitly_null_ordered 2755 and ( 2756 (asc and self.NULL_ORDERING == "nulls_are_small") 2757 or (desc and self.NULL_ORDERING != "nulls_are_small") 2758 ) 2759 and self.NULL_ORDERING != "nulls_are_last" 2760 ): 2761 nulls_first = True 2762 2763 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2764 2765 def _parse_limit( 2766 self, this: t.Optional[exp.Expression] = None, top: bool = False 2767 ) -> t.Optional[exp.Expression]: 2768 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2769 comments = self._prev_comments 2770 limit_paren = self._match(TokenType.L_PAREN) 2771 expression = self._parse_number() if top else self._parse_term() 2772 2773 if self._match(TokenType.COMMA): 2774 offset = expression 2775 expression = self._parse_term() 2776 else: 2777 offset = None 2778 2779 limit_exp = self.expression( 2780 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2781 ) 2782 2783 if limit_paren: 2784 self._match_r_paren() 2785 2786 return limit_exp 2787 2788 if self._match(TokenType.FETCH): 2789 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2790 direction = self._prev.text if direction else "FIRST" 2791 2792 count = self._parse_number() 2793 percent = self._match(TokenType.PERCENT) 2794 2795 self._match_set((TokenType.ROW, TokenType.ROWS)) 2796 2797 only = self._match_text_seq("ONLY") 2798 with_ties = self._match_text_seq("WITH", "TIES") 2799 2800 if only and with_ties: 2801 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2802 2803 return self.expression( 2804 exp.Fetch, 2805 direction=direction, 2806 count=count, 2807 percent=percent, 2808 with_ties=with_ties, 2809 ) 2810 2811 return this 2812 2813 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2814 if not self._match(TokenType.OFFSET): 2815 return this 2816 2817 count = self._parse_number() 2818 self._match_set((TokenType.ROW, TokenType.ROWS)) 2819 return self.expression(exp.Offset, this=this, expression=count) 2820 2821 def _parse_locks(self) -> t.List[exp.Lock]: 2822 locks = [] 2823 while True: 2824 if self._match_text_seq("FOR", "UPDATE"): 2825 update = True 2826 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2827 "LOCK", "IN", "SHARE", "MODE" 2828 ): 2829 update = False 2830 else: 2831 break 2832 2833 expressions = None 2834 if self._match_text_seq("OF"): 2835 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2836 2837 wait: t.Optional[bool | exp.Expression] = None 2838 if self._match_text_seq("NOWAIT"): 2839 wait = True 2840 elif self._match_text_seq("WAIT"): 2841 wait = self._parse_primary() 2842 elif self._match_text_seq("SKIP", "LOCKED"): 2843 wait = False 2844 2845 locks.append( 2846 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2847 ) 2848 2849 return locks 2850 2851 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2852 if not self._match_set(self.SET_OPERATIONS): 2853 return this 2854 2855 token_type = self._prev.token_type 2856 2857 if token_type == TokenType.UNION: 2858 expression = exp.Union 2859 elif token_type == TokenType.EXCEPT: 2860 expression = exp.Except 2861 else: 2862 expression = exp.Intersect 2863 2864 return self.expression( 2865 expression, 2866 this=this, 2867 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2868 expression=self._parse_set_operations(self._parse_select(nested=True)), 2869 ) 2870 2871 def _parse_expression(self) -> t.Optional[exp.Expression]: 2872 return self._parse_alias(self._parse_conjunction()) 2873 2874 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2875 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2876 2877 def _parse_equality(self) -> t.Optional[exp.Expression]: 2878 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2879 2880 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2881 return self._parse_tokens(self._parse_range, self.COMPARISON) 2882 2883 def _parse_range(self) -> t.Optional[exp.Expression]: 2884 this = self._parse_bitwise() 2885 negate = self._match(TokenType.NOT) 2886 2887 if self._match_set(self.RANGE_PARSERS): 2888 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2889 if not expression: 2890 return this 2891 2892 this = expression 2893 elif self._match(TokenType.ISNULL): 2894 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2895 2896 # Postgres supports ISNULL and NOTNULL for conditions. 2897 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2898 if self._match(TokenType.NOTNULL): 2899 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2900 this = self.expression(exp.Not, this=this) 2901 2902 if negate: 2903 this = self.expression(exp.Not, this=this) 2904 2905 if self._match(TokenType.IS): 2906 this = self._parse_is(this) 2907 2908 return this 2909 2910 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2911 index = self._index - 1 2912 negate = self._match(TokenType.NOT) 2913 2914 if self._match_text_seq("DISTINCT", "FROM"): 2915 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2916 return self.expression(klass, this=this, expression=self._parse_expression()) 2917 2918 expression = self._parse_null() or self._parse_boolean() 2919 if not expression: 2920 self._retreat(index) 2921 return None 2922 2923 this = self.expression(exp.Is, this=this, expression=expression) 2924 return self.expression(exp.Not, this=this) if negate else this 2925 2926 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2927 unnest = self._parse_unnest(with_alias=False) 2928 if unnest: 2929 this = self.expression(exp.In, this=this, unnest=unnest) 2930 elif self._match(TokenType.L_PAREN): 2931 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2932 2933 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2934 this = self.expression(exp.In, this=this, query=expressions[0]) 2935 else: 2936 this = self.expression(exp.In, this=this, expressions=expressions) 2937 2938 self._match_r_paren(this) 2939 else: 2940 this = self.expression(exp.In, this=this, field=self._parse_field()) 2941 2942 return this 2943 2944 def _parse_between(self, this: exp.Expression) -> exp.Between: 2945 low = self._parse_bitwise() 2946 self._match(TokenType.AND) 2947 high = self._parse_bitwise() 2948 return self.expression(exp.Between, this=this, low=low, high=high) 2949 2950 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2951 if not self._match(TokenType.ESCAPE): 2952 return this 2953 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2954 2955 def _parse_interval(self) -> t.Optional[exp.Interval]: 2956 if not self._match(TokenType.INTERVAL): 2957 return None 2958 2959 if self._match(TokenType.STRING, advance=False): 2960 this = self._parse_primary() 2961 else: 2962 this = self._parse_term() 2963 2964 unit = self._parse_function() or self._parse_var() 2965 2966 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2967 # each INTERVAL expression into this canonical form so it's easy to transpile 2968 if this and this.is_number: 2969 this = exp.Literal.string(this.name) 2970 elif this and this.is_string: 2971 parts = this.name.split() 2972 2973 if len(parts) == 2: 2974 if unit: 2975 # this is not actually a unit, it's something else 2976 unit = None 2977 self._retreat(self._index - 1) 2978 else: 2979 this = exp.Literal.string(parts[0]) 2980 unit = self.expression(exp.Var, this=parts[1]) 2981 2982 return self.expression(exp.Interval, this=this, unit=unit) 2983 2984 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2985 this = self._parse_term() 2986 2987 while True: 2988 if self._match_set(self.BITWISE): 2989 this = self.expression( 2990 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2991 ) 2992 elif self._match_pair(TokenType.LT, TokenType.LT): 2993 this = self.expression( 2994 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2995 ) 2996 elif self._match_pair(TokenType.GT, TokenType.GT): 2997 this = self.expression( 2998 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2999 ) 3000 else: 3001 break 3002 3003 return this 3004 3005 def _parse_term(self) -> t.Optional[exp.Expression]: 3006 return self._parse_tokens(self._parse_factor, self.TERM) 3007 3008 def _parse_factor(self) -> t.Optional[exp.Expression]: 3009 return self._parse_tokens(self._parse_unary, self.FACTOR) 3010 3011 def _parse_unary(self) -> t.Optional[exp.Expression]: 3012 if self._match_set(self.UNARY_PARSERS): 3013 return self.UNARY_PARSERS[self._prev.token_type](self) 3014 return self._parse_at_time_zone(self._parse_type()) 3015 3016 def _parse_type(self) -> t.Optional[exp.Expression]: 3017 interval = self._parse_interval() 3018 if interval: 3019 return interval 3020 3021 index = self._index 3022 data_type = self._parse_types(check_func=True) 3023 this = self._parse_column() 3024 3025 if data_type: 3026 if isinstance(this, exp.Literal): 3027 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3028 if parser: 3029 return parser(self, this, data_type) 3030 return self.expression(exp.Cast, this=this, to=data_type) 3031 if not data_type.expressions: 3032 self._retreat(index) 3033 return self._parse_column() 3034 return self._parse_column_ops(data_type) 3035 3036 return this 3037 3038 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3039 this = self._parse_type() 3040 if not this: 3041 return None 3042 3043 return self.expression( 3044 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3045 ) 3046 3047 def _parse_types( 3048 self, check_func: bool = False, schema: bool = False 3049 ) -> t.Optional[exp.Expression]: 3050 index = self._index 3051 3052 prefix = self._match_text_seq("SYSUDTLIB", ".") 3053 3054 if not self._match_set(self.TYPE_TOKENS): 3055 return None 3056 3057 type_token = self._prev.token_type 3058 3059 if type_token == TokenType.PSEUDO_TYPE: 3060 return self.expression(exp.PseudoType, this=self._prev.text) 3061 3062 nested = type_token in self.NESTED_TYPE_TOKENS 3063 is_struct = type_token == TokenType.STRUCT 3064 expressions = None 3065 maybe_func = False 3066 3067 if self._match(TokenType.L_PAREN): 3068 if is_struct: 3069 expressions = self._parse_csv(self._parse_struct_types) 3070 elif nested: 3071 expressions = self._parse_csv( 3072 lambda: self._parse_types(check_func=check_func, schema=schema) 3073 ) 3074 elif type_token in self.ENUM_TYPE_TOKENS: 3075 expressions = self._parse_csv(self._parse_primary) 3076 else: 3077 expressions = self._parse_csv(self._parse_type_size) 3078 3079 if not expressions or not self._match(TokenType.R_PAREN): 3080 self._retreat(index) 3081 return None 3082 3083 maybe_func = True 3084 3085 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3086 this = exp.DataType( 3087 this=exp.DataType.Type.ARRAY, 3088 expressions=[ 3089 exp.DataType( 3090 this=exp.DataType.Type[type_token.value], 3091 expressions=expressions, 3092 nested=nested, 3093 ) 3094 ], 3095 nested=True, 3096 ) 3097 3098 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3099 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3100 3101 return this 3102 3103 if self._match(TokenType.L_BRACKET): 3104 self._retreat(index) 3105 return None 3106 3107 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3108 if nested and self._match(TokenType.LT): 3109 if is_struct: 3110 expressions = self._parse_csv(self._parse_struct_types) 3111 else: 3112 expressions = self._parse_csv( 3113 lambda: self._parse_types(check_func=check_func, schema=schema) 3114 ) 3115 3116 if not self._match(TokenType.GT): 3117 self.raise_error("Expecting >") 3118 3119 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3120 values = self._parse_csv(self._parse_conjunction) 3121 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3122 3123 value: t.Optional[exp.Expression] = None 3124 if type_token in self.TIMESTAMPS: 3125 if self._match_text_seq("WITH", "TIME", "ZONE"): 3126 maybe_func = False 3127 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3128 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3129 maybe_func = False 3130 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3131 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3132 maybe_func = False 3133 elif type_token == TokenType.INTERVAL: 3134 unit = self._parse_var() 3135 3136 if not unit: 3137 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3138 else: 3139 value = self.expression(exp.Interval, unit=unit) 3140 3141 if maybe_func and check_func: 3142 index2 = self._index 3143 peek = self._parse_string() 3144 3145 if not peek: 3146 self._retreat(index) 3147 return None 3148 3149 self._retreat(index2) 3150 3151 if value: 3152 return value 3153 3154 return exp.DataType( 3155 this=exp.DataType.Type[type_token.value], 3156 expressions=expressions, 3157 nested=nested, 3158 values=values, 3159 prefix=prefix, 3160 ) 3161 3162 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3163 this = self._parse_type() or self._parse_id_var() 3164 self._match(TokenType.COLON) 3165 return self._parse_column_def(this) 3166 3167 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3168 if not self._match_text_seq("AT", "TIME", "ZONE"): 3169 return this 3170 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3171 3172 def _parse_column(self) -> t.Optional[exp.Expression]: 3173 this = self._parse_field() 3174 if isinstance(this, exp.Identifier): 3175 this = self.expression(exp.Column, this=this) 3176 elif not this: 3177 return self._parse_bracket(this) 3178 return self._parse_column_ops(this) 3179 3180 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3181 this = self._parse_bracket(this) 3182 3183 while self._match_set(self.COLUMN_OPERATORS): 3184 op_token = self._prev.token_type 3185 op = self.COLUMN_OPERATORS.get(op_token) 3186 3187 if op_token == TokenType.DCOLON: 3188 field = self._parse_types() 3189 if not field: 3190 self.raise_error("Expected type") 3191 elif op and self._curr: 3192 self._advance() 3193 value = self._prev.text 3194 field = ( 3195 exp.Literal.number(value) 3196 if self._prev.token_type == TokenType.NUMBER 3197 else exp.Literal.string(value) 3198 ) 3199 else: 3200 field = self._parse_field(anonymous_func=True, any_token=True) 3201 3202 if isinstance(field, exp.Func): 3203 # bigquery allows function calls like x.y.count(...) 3204 # SAFE.SUBSTR(...) 3205 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3206 this = self._replace_columns_with_dots(this) 3207 3208 if op: 3209 this = op(self, this, field) 3210 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3211 this = self.expression( 3212 exp.Column, 3213 this=field, 3214 table=this.this, 3215 db=this.args.get("table"), 3216 catalog=this.args.get("db"), 3217 ) 3218 else: 3219 this = self.expression(exp.Dot, this=this, expression=field) 3220 this = self._parse_bracket(this) 3221 return this 3222 3223 def _parse_primary(self) -> t.Optional[exp.Expression]: 3224 if self._match_set(self.PRIMARY_PARSERS): 3225 token_type = self._prev.token_type 3226 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3227 3228 if token_type == TokenType.STRING: 3229 expressions = [primary] 3230 while self._match(TokenType.STRING): 3231 expressions.append(exp.Literal.string(self._prev.text)) 3232 3233 if len(expressions) > 1: 3234 return self.expression(exp.Concat, expressions=expressions) 3235 3236 return primary 3237 3238 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3239 return exp.Literal.number(f"0.{self._prev.text}") 3240 3241 if self._match(TokenType.L_PAREN): 3242 comments = self._prev_comments 3243 query = self._parse_select() 3244 3245 if query: 3246 expressions = [query] 3247 else: 3248 expressions = self._parse_expressions() 3249 3250 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3251 3252 if isinstance(this, exp.Subqueryable): 3253 this = self._parse_set_operations( 3254 self._parse_subquery(this=this, parse_alias=False) 3255 ) 3256 elif len(expressions) > 1: 3257 this = self.expression(exp.Tuple, expressions=expressions) 3258 else: 3259 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3260 3261 if this: 3262 this.add_comments(comments) 3263 3264 self._match_r_paren(expression=this) 3265 return this 3266 3267 return None 3268 3269 def _parse_field( 3270 self, 3271 any_token: bool = False, 3272 tokens: t.Optional[t.Collection[TokenType]] = None, 3273 anonymous_func: bool = False, 3274 ) -> t.Optional[exp.Expression]: 3275 return ( 3276 self._parse_primary() 3277 or self._parse_function(anonymous=anonymous_func) 3278 or self._parse_id_var(any_token=any_token, tokens=tokens) 3279 ) 3280 3281 def _parse_function( 3282 self, 3283 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3284 anonymous: bool = False, 3285 optional_parens: bool = True, 3286 ) -> t.Optional[exp.Expression]: 3287 if not self._curr: 3288 return None 3289 3290 token_type = self._curr.token_type 3291 3292 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3293 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3294 3295 if not self._next or self._next.token_type != TokenType.L_PAREN: 3296 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3297 self._advance() 3298 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3299 3300 return None 3301 3302 if token_type not in self.FUNC_TOKENS: 3303 return None 3304 3305 this = self._curr.text 3306 upper = this.upper() 3307 self._advance(2) 3308 3309 parser = self.FUNCTION_PARSERS.get(upper) 3310 3311 if parser and not anonymous: 3312 this = parser(self) 3313 else: 3314 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3315 3316 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3317 this = self.expression(subquery_predicate, this=self._parse_select()) 3318 self._match_r_paren() 3319 return this 3320 3321 if functions is None: 3322 functions = self.FUNCTIONS 3323 3324 function = functions.get(upper) 3325 3326 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3327 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3328 3329 if function and not anonymous: 3330 this = self.validate_expression(function(args), args) 3331 else: 3332 this = self.expression(exp.Anonymous, this=this, expressions=args) 3333 3334 self._match_r_paren(this) 3335 return self._parse_window(this) 3336 3337 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3338 return self._parse_column_def(self._parse_id_var()) 3339 3340 def _parse_user_defined_function( 3341 self, kind: t.Optional[TokenType] = None 3342 ) -> t.Optional[exp.Expression]: 3343 this = self._parse_id_var() 3344 3345 while self._match(TokenType.DOT): 3346 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3347 3348 if not self._match(TokenType.L_PAREN): 3349 return this 3350 3351 expressions = self._parse_csv(self._parse_function_parameter) 3352 self._match_r_paren() 3353 return self.expression( 3354 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3355 ) 3356 3357 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3358 literal = self._parse_primary() 3359 if literal: 3360 return self.expression(exp.Introducer, this=token.text, expression=literal) 3361 3362 return self.expression(exp.Identifier, this=token.text) 3363 3364 def _parse_session_parameter(self) -> exp.SessionParameter: 3365 kind = None 3366 this = self._parse_id_var() or self._parse_primary() 3367 3368 if this and self._match(TokenType.DOT): 3369 kind = this.name 3370 this = self._parse_var() or self._parse_primary() 3371 3372 return self.expression(exp.SessionParameter, this=this, kind=kind) 3373 3374 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3375 index = self._index 3376 3377 if self._match(TokenType.L_PAREN): 3378 expressions = self._parse_csv(self._parse_id_var) 3379 3380 if not self._match(TokenType.R_PAREN): 3381 self._retreat(index) 3382 else: 3383 expressions = [self._parse_id_var()] 3384 3385 if self._match_set(self.LAMBDAS): 3386 return self.LAMBDAS[self._prev.token_type](self, expressions) 3387 3388 self._retreat(index) 3389 3390 this: t.Optional[exp.Expression] 3391 3392 if self._match(TokenType.DISTINCT): 3393 this = self.expression( 3394 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3395 ) 3396 else: 3397 this = self._parse_select_or_expression(alias=alias) 3398 3399 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3400 3401 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3402 index = self._index 3403 3404 if not self.errors: 3405 try: 3406 if self._parse_select(nested=True): 3407 return this 3408 except ParseError: 3409 pass 3410 finally: 3411 self.errors.clear() 3412 self._retreat(index) 3413 3414 if not self._match(TokenType.L_PAREN): 3415 return this 3416 3417 args = self._parse_csv( 3418 lambda: self._parse_constraint() 3419 or self._parse_column_def(self._parse_field(any_token=True)) 3420 ) 3421 3422 self._match_r_paren() 3423 return self.expression(exp.Schema, this=this, expressions=args) 3424 3425 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3426 # column defs are not really columns, they're identifiers 3427 if isinstance(this, exp.Column): 3428 this = this.this 3429 3430 kind = self._parse_types(schema=True) 3431 3432 if self._match_text_seq("FOR", "ORDINALITY"): 3433 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3434 3435 constraints = [] 3436 while True: 3437 constraint = self._parse_column_constraint() 3438 if not constraint: 3439 break 3440 constraints.append(constraint) 3441 3442 if not kind and not constraints: 3443 return this 3444 3445 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3446 3447 def _parse_auto_increment( 3448 self, 3449 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3450 start = None 3451 increment = None 3452 3453 if self._match(TokenType.L_PAREN, advance=False): 3454 args = self._parse_wrapped_csv(self._parse_bitwise) 3455 start = seq_get(args, 0) 3456 increment = seq_get(args, 1) 3457 elif self._match_text_seq("START"): 3458 start = self._parse_bitwise() 3459 self._match_text_seq("INCREMENT") 3460 increment = self._parse_bitwise() 3461 3462 if start and increment: 3463 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3464 3465 return exp.AutoIncrementColumnConstraint() 3466 3467 def _parse_compress(self) -> exp.CompressColumnConstraint: 3468 if self._match(TokenType.L_PAREN, advance=False): 3469 return self.expression( 3470 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3471 ) 3472 3473 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3474 3475 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3476 if self._match_text_seq("BY", "DEFAULT"): 3477 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3478 this = self.expression( 3479 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3480 ) 3481 else: 3482 self._match_text_seq("ALWAYS") 3483 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3484 3485 self._match(TokenType.ALIAS) 3486 identity = self._match_text_seq("IDENTITY") 3487 3488 if self._match(TokenType.L_PAREN): 3489 if self._match_text_seq("START", "WITH"): 3490 this.set("start", self._parse_bitwise()) 3491 if self._match_text_seq("INCREMENT", "BY"): 3492 this.set("increment", self._parse_bitwise()) 3493 if self._match_text_seq("MINVALUE"): 3494 this.set("minvalue", self._parse_bitwise()) 3495 if self._match_text_seq("MAXVALUE"): 3496 this.set("maxvalue", self._parse_bitwise()) 3497 3498 if self._match_text_seq("CYCLE"): 3499 this.set("cycle", True) 3500 elif self._match_text_seq("NO", "CYCLE"): 3501 this.set("cycle", False) 3502 3503 if not identity: 3504 this.set("expression", self._parse_bitwise()) 3505 3506 self._match_r_paren() 3507 3508 return this 3509 3510 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3511 self._match_text_seq("LENGTH") 3512 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3513 3514 def _parse_not_constraint( 3515 self, 3516 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3517 if self._match_text_seq("NULL"): 3518 return self.expression(exp.NotNullColumnConstraint) 3519 if self._match_text_seq("CASESPECIFIC"): 3520 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3521 return None 3522 3523 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3524 if self._match(TokenType.CONSTRAINT): 3525 this = self._parse_id_var() 3526 else: 3527 this = None 3528 3529 if self._match_texts(self.CONSTRAINT_PARSERS): 3530 return self.expression( 3531 exp.ColumnConstraint, 3532 this=this, 3533 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3534 ) 3535 3536 return this 3537 3538 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3539 if not self._match(TokenType.CONSTRAINT): 3540 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3541 3542 this = self._parse_id_var() 3543 expressions = [] 3544 3545 while True: 3546 constraint = self._parse_unnamed_constraint() or self._parse_function() 3547 if not constraint: 3548 break 3549 expressions.append(constraint) 3550 3551 return self.expression(exp.Constraint, this=this, expressions=expressions) 3552 3553 def _parse_unnamed_constraint( 3554 self, constraints: t.Optional[t.Collection[str]] = None 3555 ) -> t.Optional[exp.Expression]: 3556 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3557 return None 3558 3559 constraint = self._prev.text.upper() 3560 if constraint not in self.CONSTRAINT_PARSERS: 3561 self.raise_error(f"No parser found for schema constraint {constraint}.") 3562 3563 return self.CONSTRAINT_PARSERS[constraint](self) 3564 3565 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3566 self._match_text_seq("KEY") 3567 return self.expression( 3568 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3569 ) 3570 3571 def _parse_key_constraint_options(self) -> t.List[str]: 3572 options = [] 3573 while True: 3574 if not self._curr: 3575 break 3576 3577 if self._match(TokenType.ON): 3578 action = None 3579 on = self._advance_any() and self._prev.text 3580 3581 if self._match_text_seq("NO", "ACTION"): 3582 action = "NO ACTION" 3583 elif self._match_text_seq("CASCADE"): 3584 action = "CASCADE" 3585 elif self._match_pair(TokenType.SET, TokenType.NULL): 3586 action = "SET NULL" 3587 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3588 action = "SET DEFAULT" 3589 else: 3590 self.raise_error("Invalid key constraint") 3591 3592 options.append(f"ON {on} {action}") 3593 elif self._match_text_seq("NOT", "ENFORCED"): 3594 options.append("NOT ENFORCED") 3595 elif self._match_text_seq("DEFERRABLE"): 3596 options.append("DEFERRABLE") 3597 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3598 options.append("INITIALLY DEFERRED") 3599 elif self._match_text_seq("NORELY"): 3600 options.append("NORELY") 3601 elif self._match_text_seq("MATCH", "FULL"): 3602 options.append("MATCH FULL") 3603 else: 3604 break 3605 3606 return options 3607 3608 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3609 if match and not self._match(TokenType.REFERENCES): 3610 return None 3611 3612 expressions = None 3613 this = self._parse_table(schema=True) 3614 options = self._parse_key_constraint_options() 3615 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3616 3617 def _parse_foreign_key(self) -> exp.ForeignKey: 3618 expressions = self._parse_wrapped_id_vars() 3619 reference = self._parse_references() 3620 options = {} 3621 3622 while self._match(TokenType.ON): 3623 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3624 self.raise_error("Expected DELETE or UPDATE") 3625 3626 kind = self._prev.text.lower() 3627 3628 if self._match_text_seq("NO", "ACTION"): 3629 action = "NO ACTION" 3630 elif self._match(TokenType.SET): 3631 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3632 action = "SET " + self._prev.text.upper() 3633 else: 3634 self._advance() 3635 action = self._prev.text.upper() 3636 3637 options[kind] = action 3638 3639 return self.expression( 3640 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3641 ) 3642 3643 def _parse_primary_key( 3644 self, wrapped_optional: bool = False, in_props: bool = False 3645 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3646 desc = ( 3647 self._match_set((TokenType.ASC, TokenType.DESC)) 3648 and self._prev.token_type == TokenType.DESC 3649 ) 3650 3651 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3652 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3653 3654 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3655 options = self._parse_key_constraint_options() 3656 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3657 3658 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3659 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3660 return this 3661 3662 bracket_kind = self._prev.token_type 3663 3664 if self._match(TokenType.COLON): 3665 expressions: t.List[t.Optional[exp.Expression]] = [ 3666 self.expression(exp.Slice, expression=self._parse_conjunction()) 3667 ] 3668 else: 3669 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3670 3671 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3672 if bracket_kind == TokenType.L_BRACE: 3673 this = self.expression(exp.Struct, expressions=expressions) 3674 elif not this or this.name.upper() == "ARRAY": 3675 this = self.expression(exp.Array, expressions=expressions) 3676 else: 3677 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3678 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3679 3680 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3681 self.raise_error("Expected ]") 3682 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3683 self.raise_error("Expected }") 3684 3685 self._add_comments(this) 3686 return self._parse_bracket(this) 3687 3688 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3689 if self._match(TokenType.COLON): 3690 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3691 return this 3692 3693 def _parse_case(self) -> t.Optional[exp.Expression]: 3694 ifs = [] 3695 default = None 3696 3697 expression = self._parse_conjunction() 3698 3699 while self._match(TokenType.WHEN): 3700 this = self._parse_conjunction() 3701 self._match(TokenType.THEN) 3702 then = self._parse_conjunction() 3703 ifs.append(self.expression(exp.If, this=this, true=then)) 3704 3705 if self._match(TokenType.ELSE): 3706 default = self._parse_conjunction() 3707 3708 if not self._match(TokenType.END): 3709 self.raise_error("Expected END after CASE", self._prev) 3710 3711 return self._parse_window( 3712 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3713 ) 3714 3715 def _parse_if(self) -> t.Optional[exp.Expression]: 3716 if self._match(TokenType.L_PAREN): 3717 args = self._parse_csv(self._parse_conjunction) 3718 this = self.validate_expression(exp.If.from_arg_list(args), args) 3719 self._match_r_paren() 3720 else: 3721 index = self._index - 1 3722 condition = self._parse_conjunction() 3723 3724 if not condition: 3725 self._retreat(index) 3726 return None 3727 3728 self._match(TokenType.THEN) 3729 true = self._parse_conjunction() 3730 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3731 self._match(TokenType.END) 3732 this = self.expression(exp.If, this=condition, true=true, false=false) 3733 3734 return self._parse_window(this) 3735 3736 def _parse_extract(self) -> exp.Extract: 3737 this = self._parse_function() or self._parse_var() or self._parse_type() 3738 3739 if self._match(TokenType.FROM): 3740 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3741 3742 if not self._match(TokenType.COMMA): 3743 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3744 3745 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3746 3747 def _parse_any_value(self) -> exp.AnyValue: 3748 this = self._parse_lambda() 3749 is_max = None 3750 having = None 3751 3752 if self._match(TokenType.HAVING): 3753 self._match_texts(("MAX", "MIN")) 3754 is_max = self._prev.text == "MAX" 3755 having = self._parse_column() 3756 3757 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3758 3759 def _parse_cast(self, strict: bool) -> exp.Expression: 3760 this = self._parse_conjunction() 3761 3762 if not self._match(TokenType.ALIAS): 3763 if self._match(TokenType.COMMA): 3764 return self.expression( 3765 exp.CastToStrType, this=this, expression=self._parse_string() 3766 ) 3767 else: 3768 self.raise_error("Expected AS after CAST") 3769 3770 fmt = None 3771 to = self._parse_types() 3772 3773 if not to: 3774 self.raise_error("Expected TYPE after CAST") 3775 elif to.this == exp.DataType.Type.CHAR: 3776 if self._match(TokenType.CHARACTER_SET): 3777 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3778 elif self._match(TokenType.FORMAT): 3779 fmt_string = self._parse_string() 3780 fmt = self._parse_at_time_zone(fmt_string) 3781 3782 if to.this in exp.DataType.TEMPORAL_TYPES: 3783 this = self.expression( 3784 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3785 this=this, 3786 format=exp.Literal.string( 3787 format_time( 3788 fmt_string.this if fmt_string else "", 3789 self.FORMAT_MAPPING or self.TIME_MAPPING, 3790 self.FORMAT_TRIE or self.TIME_TRIE, 3791 ) 3792 ), 3793 ) 3794 3795 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3796 this.set("zone", fmt.args["zone"]) 3797 3798 return this 3799 3800 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3801 3802 def _parse_concat(self) -> t.Optional[exp.Expression]: 3803 args = self._parse_csv(self._parse_conjunction) 3804 if self.CONCAT_NULL_OUTPUTS_STRING: 3805 args = [ 3806 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3807 for arg in args 3808 if arg 3809 ] 3810 3811 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3812 # we find such a call we replace it with its argument. 3813 if len(args) == 1: 3814 return args[0] 3815 3816 return self.expression( 3817 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3818 ) 3819 3820 def _parse_string_agg(self) -> exp.Expression: 3821 if self._match(TokenType.DISTINCT): 3822 args: t.List[t.Optional[exp.Expression]] = [ 3823 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3824 ] 3825 if self._match(TokenType.COMMA): 3826 args.extend(self._parse_csv(self._parse_conjunction)) 3827 else: 3828 args = self._parse_csv(self._parse_conjunction) 3829 3830 index = self._index 3831 if not self._match(TokenType.R_PAREN): 3832 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3833 return self.expression( 3834 exp.GroupConcat, 3835 this=seq_get(args, 0), 3836 separator=self._parse_order(this=seq_get(args, 1)), 3837 ) 3838 3839 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3840 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3841 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3842 if not self._match_text_seq("WITHIN", "GROUP"): 3843 self._retreat(index) 3844 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3845 3846 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3847 order = self._parse_order(this=seq_get(args, 0)) 3848 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3849 3850 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3851 this = self._parse_bitwise() 3852 3853 if self._match(TokenType.USING): 3854 to: t.Optional[exp.Expression] = self.expression( 3855 exp.CharacterSet, this=self._parse_var() 3856 ) 3857 elif self._match(TokenType.COMMA): 3858 to = self._parse_types() 3859 else: 3860 to = None 3861 3862 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3863 3864 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3865 """ 3866 There are generally two variants of the DECODE function: 3867 3868 - DECODE(bin, charset) 3869 - DECODE(expression, search, result [, search, result] ... [, default]) 3870 3871 The second variant will always be parsed into a CASE expression. Note that NULL 3872 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3873 instead of relying on pattern matching. 3874 """ 3875 args = self._parse_csv(self._parse_conjunction) 3876 3877 if len(args) < 3: 3878 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3879 3880 expression, *expressions = args 3881 if not expression: 3882 return None 3883 3884 ifs = [] 3885 for search, result in zip(expressions[::2], expressions[1::2]): 3886 if not search or not result: 3887 return None 3888 3889 if isinstance(search, exp.Literal): 3890 ifs.append( 3891 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3892 ) 3893 elif isinstance(search, exp.Null): 3894 ifs.append( 3895 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3896 ) 3897 else: 3898 cond = exp.or_( 3899 exp.EQ(this=expression.copy(), expression=search), 3900 exp.and_( 3901 exp.Is(this=expression.copy(), expression=exp.Null()), 3902 exp.Is(this=search.copy(), expression=exp.Null()), 3903 copy=False, 3904 ), 3905 copy=False, 3906 ) 3907 ifs.append(exp.If(this=cond, true=result)) 3908 3909 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3910 3911 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3912 self._match_text_seq("KEY") 3913 key = self._parse_field() 3914 self._match(TokenType.COLON) 3915 self._match_text_seq("VALUE") 3916 value = self._parse_field() 3917 3918 if not key and not value: 3919 return None 3920 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3921 3922 def _parse_json_object(self) -> exp.JSONObject: 3923 star = self._parse_star() 3924 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3925 3926 null_handling = None 3927 if self._match_text_seq("NULL", "ON", "NULL"): 3928 null_handling = "NULL ON NULL" 3929 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3930 null_handling = "ABSENT ON NULL" 3931 3932 unique_keys = None 3933 if self._match_text_seq("WITH", "UNIQUE"): 3934 unique_keys = True 3935 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3936 unique_keys = False 3937 3938 self._match_text_seq("KEYS") 3939 3940 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3941 format_json = self._match_text_seq("FORMAT", "JSON") 3942 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3943 3944 return self.expression( 3945 exp.JSONObject, 3946 expressions=expressions, 3947 null_handling=null_handling, 3948 unique_keys=unique_keys, 3949 return_type=return_type, 3950 format_json=format_json, 3951 encoding=encoding, 3952 ) 3953 3954 def _parse_logarithm(self) -> exp.Func: 3955 # Default argument order is base, expression 3956 args = self._parse_csv(self._parse_range) 3957 3958 if len(args) > 1: 3959 if not self.LOG_BASE_FIRST: 3960 args.reverse() 3961 return exp.Log.from_arg_list(args) 3962 3963 return self.expression( 3964 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3965 ) 3966 3967 def _parse_match_against(self) -> exp.MatchAgainst: 3968 expressions = self._parse_csv(self._parse_column) 3969 3970 self._match_text_seq(")", "AGAINST", "(") 3971 3972 this = self._parse_string() 3973 3974 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3975 modifier = "IN NATURAL LANGUAGE MODE" 3976 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3977 modifier = f"{modifier} WITH QUERY EXPANSION" 3978 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3979 modifier = "IN BOOLEAN MODE" 3980 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3981 modifier = "WITH QUERY EXPANSION" 3982 else: 3983 modifier = None 3984 3985 return self.expression( 3986 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3987 ) 3988 3989 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3990 def _parse_open_json(self) -> exp.OpenJSON: 3991 this = self._parse_bitwise() 3992 path = self._match(TokenType.COMMA) and self._parse_string() 3993 3994 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3995 this = self._parse_field(any_token=True) 3996 kind = self._parse_types() 3997 path = self._parse_string() 3998 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3999 4000 return self.expression( 4001 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4002 ) 4003 4004 expressions = None 4005 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4006 self._match_l_paren() 4007 expressions = self._parse_csv(_parse_open_json_column_def) 4008 4009 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4010 4011 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4012 args = self._parse_csv(self._parse_bitwise) 4013 4014 if self._match(TokenType.IN): 4015 return self.expression( 4016 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4017 ) 4018 4019 if haystack_first: 4020 haystack = seq_get(args, 0) 4021 needle = seq_get(args, 1) 4022 else: 4023 needle = seq_get(args, 0) 4024 haystack = seq_get(args, 1) 4025 4026 return self.expression( 4027 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4028 ) 4029 4030 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4031 args = self._parse_csv(self._parse_table) 4032 return exp.JoinHint(this=func_name.upper(), expressions=args) 4033 4034 def _parse_substring(self) -> exp.Substring: 4035 # Postgres supports the form: substring(string [from int] [for int]) 4036 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4037 4038 args = self._parse_csv(self._parse_bitwise) 4039 4040 if self._match(TokenType.FROM): 4041 args.append(self._parse_bitwise()) 4042 if self._match(TokenType.FOR): 4043 args.append(self._parse_bitwise()) 4044 4045 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4046 4047 def _parse_trim(self) -> exp.Trim: 4048 # https://www.w3resource.com/sql/character-functions/trim.php 4049 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4050 4051 position = None 4052 collation = None 4053 4054 if self._match_texts(self.TRIM_TYPES): 4055 position = self._prev.text.upper() 4056 4057 expression = self._parse_bitwise() 4058 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4059 this = self._parse_bitwise() 4060 else: 4061 this = expression 4062 expression = None 4063 4064 if self._match(TokenType.COLLATE): 4065 collation = self._parse_bitwise() 4066 4067 return self.expression( 4068 exp.Trim, this=this, position=position, expression=expression, collation=collation 4069 ) 4070 4071 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4072 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4073 4074 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4075 return self._parse_window(self._parse_id_var(), alias=True) 4076 4077 def _parse_respect_or_ignore_nulls( 4078 self, this: t.Optional[exp.Expression] 4079 ) -> t.Optional[exp.Expression]: 4080 if self._match_text_seq("IGNORE", "NULLS"): 4081 return self.expression(exp.IgnoreNulls, this=this) 4082 if self._match_text_seq("RESPECT", "NULLS"): 4083 return self.expression(exp.RespectNulls, this=this) 4084 return this 4085 4086 def _parse_window( 4087 self, this: t.Optional[exp.Expression], alias: bool = False 4088 ) -> t.Optional[exp.Expression]: 4089 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4090 self._match(TokenType.WHERE) 4091 this = self.expression( 4092 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4093 ) 4094 self._match_r_paren() 4095 4096 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4097 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4098 if self._match_text_seq("WITHIN", "GROUP"): 4099 order = self._parse_wrapped(self._parse_order) 4100 this = self.expression(exp.WithinGroup, this=this, expression=order) 4101 4102 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4103 # Some dialects choose to implement and some do not. 4104 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4105 4106 # There is some code above in _parse_lambda that handles 4107 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4108 4109 # The below changes handle 4110 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4111 4112 # Oracle allows both formats 4113 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4114 # and Snowflake chose to do the same for familiarity 4115 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4116 this = self._parse_respect_or_ignore_nulls(this) 4117 4118 # bigquery select from window x AS (partition by ...) 4119 if alias: 4120 over = None 4121 self._match(TokenType.ALIAS) 4122 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4123 return this 4124 else: 4125 over = self._prev.text.upper() 4126 4127 if not self._match(TokenType.L_PAREN): 4128 return self.expression( 4129 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4130 ) 4131 4132 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4133 4134 first = self._match(TokenType.FIRST) 4135 if self._match_text_seq("LAST"): 4136 first = False 4137 4138 partition = self._parse_partition_by() 4139 order = self._parse_order() 4140 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4141 4142 if kind: 4143 self._match(TokenType.BETWEEN) 4144 start = self._parse_window_spec() 4145 self._match(TokenType.AND) 4146 end = self._parse_window_spec() 4147 4148 spec = self.expression( 4149 exp.WindowSpec, 4150 kind=kind, 4151 start=start["value"], 4152 start_side=start["side"], 4153 end=end["value"], 4154 end_side=end["side"], 4155 ) 4156 else: 4157 spec = None 4158 4159 self._match_r_paren() 4160 4161 return self.expression( 4162 exp.Window, 4163 this=this, 4164 partition_by=partition, 4165 order=order, 4166 spec=spec, 4167 alias=window_alias, 4168 over=over, 4169 first=first, 4170 ) 4171 4172 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4173 self._match(TokenType.BETWEEN) 4174 4175 return { 4176 "value": ( 4177 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4178 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4179 or self._parse_bitwise() 4180 ), 4181 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4182 } 4183 4184 def _parse_alias( 4185 self, this: t.Optional[exp.Expression], explicit: bool = False 4186 ) -> t.Optional[exp.Expression]: 4187 any_token = self._match(TokenType.ALIAS) 4188 4189 if explicit and not any_token: 4190 return this 4191 4192 if self._match(TokenType.L_PAREN): 4193 aliases = self.expression( 4194 exp.Aliases, 4195 this=this, 4196 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4197 ) 4198 self._match_r_paren(aliases) 4199 return aliases 4200 4201 alias = self._parse_id_var(any_token) 4202 4203 if alias: 4204 return self.expression(exp.Alias, this=this, alias=alias) 4205 4206 return this 4207 4208 def _parse_id_var( 4209 self, 4210 any_token: bool = True, 4211 tokens: t.Optional[t.Collection[TokenType]] = None, 4212 ) -> t.Optional[exp.Expression]: 4213 identifier = self._parse_identifier() 4214 4215 if identifier: 4216 return identifier 4217 4218 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4219 quoted = self._prev.token_type == TokenType.STRING 4220 return exp.Identifier(this=self._prev.text, quoted=quoted) 4221 4222 return None 4223 4224 def _parse_string(self) -> t.Optional[exp.Expression]: 4225 if self._match(TokenType.STRING): 4226 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4227 return self._parse_placeholder() 4228 4229 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4230 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4231 4232 def _parse_number(self) -> t.Optional[exp.Expression]: 4233 if self._match(TokenType.NUMBER): 4234 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4235 return self._parse_placeholder() 4236 4237 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4238 if self._match(TokenType.IDENTIFIER): 4239 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4240 return self._parse_placeholder() 4241 4242 def _parse_var( 4243 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4244 ) -> t.Optional[exp.Expression]: 4245 if ( 4246 (any_token and self._advance_any()) 4247 or self._match(TokenType.VAR) 4248 or (self._match_set(tokens) if tokens else False) 4249 ): 4250 return self.expression(exp.Var, this=self._prev.text) 4251 return self._parse_placeholder() 4252 4253 def _advance_any(self) -> t.Optional[Token]: 4254 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4255 self._advance() 4256 return self._prev 4257 return None 4258 4259 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4260 return self._parse_var() or self._parse_string() 4261 4262 def _parse_null(self) -> t.Optional[exp.Expression]: 4263 if self._match(TokenType.NULL): 4264 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4265 return None 4266 4267 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4268 if self._match(TokenType.TRUE): 4269 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4270 if self._match(TokenType.FALSE): 4271 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4272 return None 4273 4274 def _parse_star(self) -> t.Optional[exp.Expression]: 4275 if self._match(TokenType.STAR): 4276 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4277 return None 4278 4279 def _parse_parameter(self) -> exp.Parameter: 4280 wrapped = self._match(TokenType.L_BRACE) 4281 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4282 self._match(TokenType.R_BRACE) 4283 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4284 4285 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4286 if self._match_set(self.PLACEHOLDER_PARSERS): 4287 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4288 if placeholder: 4289 return placeholder 4290 self._advance(-1) 4291 return None 4292 4293 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4294 if not self._match(TokenType.EXCEPT): 4295 return None 4296 if self._match(TokenType.L_PAREN, advance=False): 4297 return self._parse_wrapped_csv(self._parse_column) 4298 return self._parse_csv(self._parse_column) 4299 4300 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4301 if not self._match(TokenType.REPLACE): 4302 return None 4303 if self._match(TokenType.L_PAREN, advance=False): 4304 return self._parse_wrapped_csv(self._parse_expression) 4305 return self._parse_expressions() 4306 4307 def _parse_csv( 4308 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4309 ) -> t.List[t.Optional[exp.Expression]]: 4310 parse_result = parse_method() 4311 items = [parse_result] if parse_result is not None else [] 4312 4313 while self._match(sep): 4314 self._add_comments(parse_result) 4315 parse_result = parse_method() 4316 if parse_result is not None: 4317 items.append(parse_result) 4318 4319 return items 4320 4321 def _parse_tokens( 4322 self, parse_method: t.Callable, expressions: t.Dict 4323 ) -> t.Optional[exp.Expression]: 4324 this = parse_method() 4325 4326 while self._match_set(expressions): 4327 this = self.expression( 4328 expressions[self._prev.token_type], 4329 this=this, 4330 comments=self._prev_comments, 4331 expression=parse_method(), 4332 ) 4333 4334 return this 4335 4336 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4337 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4338 4339 def _parse_wrapped_csv( 4340 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4341 ) -> t.List[t.Optional[exp.Expression]]: 4342 return self._parse_wrapped( 4343 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4344 ) 4345 4346 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4347 wrapped = self._match(TokenType.L_PAREN) 4348 if not wrapped and not optional: 4349 self.raise_error("Expecting (") 4350 parse_result = parse_method() 4351 if wrapped: 4352 self._match_r_paren() 4353 return parse_result 4354 4355 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4356 return self._parse_csv(self._parse_expression) 4357 4358 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4359 return self._parse_select() or self._parse_set_operations( 4360 self._parse_expression() if alias else self._parse_conjunction() 4361 ) 4362 4363 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4364 return self._parse_query_modifiers( 4365 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4366 ) 4367 4368 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4369 this = None 4370 if self._match_texts(self.TRANSACTION_KIND): 4371 this = self._prev.text 4372 4373 self._match_texts({"TRANSACTION", "WORK"}) 4374 4375 modes = [] 4376 while True: 4377 mode = [] 4378 while self._match(TokenType.VAR): 4379 mode.append(self._prev.text) 4380 4381 if mode: 4382 modes.append(" ".join(mode)) 4383 if not self._match(TokenType.COMMA): 4384 break 4385 4386 return self.expression(exp.Transaction, this=this, modes=modes) 4387 4388 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4389 chain = None 4390 savepoint = None 4391 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4392 4393 self._match_texts({"TRANSACTION", "WORK"}) 4394 4395 if self._match_text_seq("TO"): 4396 self._match_text_seq("SAVEPOINT") 4397 savepoint = self._parse_id_var() 4398 4399 if self._match(TokenType.AND): 4400 chain = not self._match_text_seq("NO") 4401 self._match_text_seq("CHAIN") 4402 4403 if is_rollback: 4404 return self.expression(exp.Rollback, savepoint=savepoint) 4405 4406 return self.expression(exp.Commit, chain=chain) 4407 4408 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4409 if not self._match_text_seq("ADD"): 4410 return None 4411 4412 self._match(TokenType.COLUMN) 4413 exists_column = self._parse_exists(not_=True) 4414 expression = self._parse_column_def(self._parse_field(any_token=True)) 4415 4416 if expression: 4417 expression.set("exists", exists_column) 4418 4419 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4420 if self._match_texts(("FIRST", "AFTER")): 4421 position = self._prev.text 4422 column_position = self.expression( 4423 exp.ColumnPosition, this=self._parse_column(), position=position 4424 ) 4425 expression.set("position", column_position) 4426 4427 return expression 4428 4429 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4430 drop = self._match(TokenType.DROP) and self._parse_drop() 4431 if drop and not isinstance(drop, exp.Command): 4432 drop.set("kind", drop.args.get("kind", "COLUMN")) 4433 return drop 4434 4435 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4436 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4437 return self.expression( 4438 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4439 ) 4440 4441 def _parse_add_constraint(self) -> exp.AddConstraint: 4442 this = None 4443 kind = self._prev.token_type 4444 4445 if kind == TokenType.CONSTRAINT: 4446 this = self._parse_id_var() 4447 4448 if self._match_text_seq("CHECK"): 4449 expression = self._parse_wrapped(self._parse_conjunction) 4450 enforced = self._match_text_seq("ENFORCED") 4451 4452 return self.expression( 4453 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4454 ) 4455 4456 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4457 expression = self._parse_foreign_key() 4458 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4459 expression = self._parse_primary_key() 4460 else: 4461 expression = None 4462 4463 return self.expression(exp.AddConstraint, this=this, expression=expression) 4464 4465 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4466 index = self._index - 1 4467 4468 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4469 return self._parse_csv(self._parse_add_constraint) 4470 4471 self._retreat(index) 4472 return self._parse_csv(self._parse_add_column) 4473 4474 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4475 self._match(TokenType.COLUMN) 4476 column = self._parse_field(any_token=True) 4477 4478 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4479 return self.expression(exp.AlterColumn, this=column, drop=True) 4480 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4481 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4482 4483 self._match_text_seq("SET", "DATA") 4484 return self.expression( 4485 exp.AlterColumn, 4486 this=column, 4487 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4488 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4489 using=self._match(TokenType.USING) and self._parse_conjunction(), 4490 ) 4491 4492 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4493 index = self._index - 1 4494 4495 partition_exists = self._parse_exists() 4496 if self._match(TokenType.PARTITION, advance=False): 4497 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4498 4499 self._retreat(index) 4500 return self._parse_csv(self._parse_drop_column) 4501 4502 def _parse_alter_table_rename(self) -> exp.RenameTable: 4503 self._match_text_seq("TO") 4504 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4505 4506 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4507 start = self._prev 4508 4509 if not self._match(TokenType.TABLE): 4510 return self._parse_as_command(start) 4511 4512 exists = self._parse_exists() 4513 this = self._parse_table(schema=True) 4514 4515 if self._next: 4516 self._advance() 4517 4518 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4519 if parser: 4520 actions = ensure_list(parser(self)) 4521 4522 if not self._curr: 4523 return self.expression( 4524 exp.AlterTable, 4525 this=this, 4526 exists=exists, 4527 actions=actions, 4528 ) 4529 return self._parse_as_command(start) 4530 4531 def _parse_merge(self) -> exp.Merge: 4532 self._match(TokenType.INTO) 4533 target = self._parse_table() 4534 4535 self._match(TokenType.USING) 4536 using = self._parse_table() 4537 4538 self._match(TokenType.ON) 4539 on = self._parse_conjunction() 4540 4541 whens = [] 4542 while self._match(TokenType.WHEN): 4543 matched = not self._match(TokenType.NOT) 4544 self._match_text_seq("MATCHED") 4545 source = ( 4546 False 4547 if self._match_text_seq("BY", "TARGET") 4548 else self._match_text_seq("BY", "SOURCE") 4549 ) 4550 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4551 4552 self._match(TokenType.THEN) 4553 4554 if self._match(TokenType.INSERT): 4555 _this = self._parse_star() 4556 if _this: 4557 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4558 else: 4559 then = self.expression( 4560 exp.Insert, 4561 this=self._parse_value(), 4562 expression=self._match(TokenType.VALUES) and self._parse_value(), 4563 ) 4564 elif self._match(TokenType.UPDATE): 4565 expressions = self._parse_star() 4566 if expressions: 4567 then = self.expression(exp.Update, expressions=expressions) 4568 else: 4569 then = self.expression( 4570 exp.Update, 4571 expressions=self._match(TokenType.SET) 4572 and self._parse_csv(self._parse_equality), 4573 ) 4574 elif self._match(TokenType.DELETE): 4575 then = self.expression(exp.Var, this=self._prev.text) 4576 else: 4577 then = None 4578 4579 whens.append( 4580 self.expression( 4581 exp.When, 4582 matched=matched, 4583 source=source, 4584 condition=condition, 4585 then=then, 4586 ) 4587 ) 4588 4589 return self.expression( 4590 exp.Merge, 4591 this=target, 4592 using=using, 4593 on=on, 4594 expressions=whens, 4595 ) 4596 4597 def _parse_show(self) -> t.Optional[exp.Expression]: 4598 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4599 if parser: 4600 return parser(self) 4601 self._advance() 4602 return self.expression(exp.Show, this=self._prev.text.upper()) 4603 4604 def _parse_set_item_assignment( 4605 self, kind: t.Optional[str] = None 4606 ) -> t.Optional[exp.Expression]: 4607 index = self._index 4608 4609 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4610 return self._parse_set_transaction(global_=kind == "GLOBAL") 4611 4612 left = self._parse_primary() or self._parse_id_var() 4613 4614 if not self._match_texts(("=", "TO")): 4615 self._retreat(index) 4616 return None 4617 4618 right = self._parse_statement() or self._parse_id_var() 4619 this = self.expression(exp.EQ, this=left, expression=right) 4620 4621 return self.expression(exp.SetItem, this=this, kind=kind) 4622 4623 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4624 self._match_text_seq("TRANSACTION") 4625 characteristics = self._parse_csv( 4626 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4627 ) 4628 return self.expression( 4629 exp.SetItem, 4630 expressions=characteristics, 4631 kind="TRANSACTION", 4632 **{"global": global_}, # type: ignore 4633 ) 4634 4635 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4636 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4637 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4638 4639 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4640 index = self._index 4641 set_ = self.expression( 4642 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4643 ) 4644 4645 if self._curr: 4646 self._retreat(index) 4647 return self._parse_as_command(self._prev) 4648 4649 return set_ 4650 4651 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4652 for option in options: 4653 if self._match_text_seq(*option.split(" ")): 4654 return exp.var(option) 4655 return None 4656 4657 def _parse_as_command(self, start: Token) -> exp.Command: 4658 while self._curr: 4659 self._advance() 4660 text = self._find_sql(start, self._prev) 4661 size = len(start.text) 4662 return exp.Command(this=text[:size], expression=text[size:]) 4663 4664 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4665 settings = [] 4666 4667 self._match_l_paren() 4668 kind = self._parse_id_var() 4669 4670 if self._match(TokenType.L_PAREN): 4671 while True: 4672 key = self._parse_id_var() 4673 value = self._parse_primary() 4674 4675 if not key and value is None: 4676 break 4677 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4678 self._match(TokenType.R_PAREN) 4679 4680 self._match_r_paren() 4681 4682 return self.expression( 4683 exp.DictProperty, 4684 this=this, 4685 kind=kind.this if kind else None, 4686 settings=settings, 4687 ) 4688 4689 def _parse_dict_range(self, this: str) -> exp.DictRange: 4690 self._match_l_paren() 4691 has_min = self._match_text_seq("MIN") 4692 if has_min: 4693 min = self._parse_var() or self._parse_primary() 4694 self._match_text_seq("MAX") 4695 max = self._parse_var() or self._parse_primary() 4696 else: 4697 max = self._parse_var() or self._parse_primary() 4698 min = exp.Literal.number(0) 4699 self._match_r_paren() 4700 return self.expression(exp.DictRange, this=this, min=min, max=max) 4701 4702 def _find_parser( 4703 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4704 ) -> t.Optional[t.Callable]: 4705 if not self._curr: 4706 return None 4707 4708 index = self._index 4709 this = [] 4710 while True: 4711 # The current token might be multiple words 4712 curr = self._curr.text.upper() 4713 key = curr.split(" ") 4714 this.append(curr) 4715 4716 self._advance() 4717 result, trie = in_trie(trie, key) 4718 if result == TrieResult.FAILED: 4719 break 4720 4721 if result == TrieResult.EXISTS: 4722 subparser = parsers[" ".join(this)] 4723 return subparser 4724 4725 self._retreat(index) 4726 return None 4727 4728 def _match(self, token_type, advance=True, expression=None): 4729 if not self._curr: 4730 return None 4731 4732 if self._curr.token_type == token_type: 4733 if advance: 4734 self._advance() 4735 self._add_comments(expression) 4736 return True 4737 4738 return None 4739 4740 def _match_set(self, types, advance=True): 4741 if not self._curr: 4742 return None 4743 4744 if self._curr.token_type in types: 4745 if advance: 4746 self._advance() 4747 return True 4748 4749 return None 4750 4751 def _match_pair(self, token_type_a, token_type_b, advance=True): 4752 if not self._curr or not self._next: 4753 return None 4754 4755 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4756 if advance: 4757 self._advance(2) 4758 return True 4759 4760 return None 4761 4762 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4763 if not self._match(TokenType.L_PAREN, expression=expression): 4764 self.raise_error("Expecting (") 4765 4766 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4767 if not self._match(TokenType.R_PAREN, expression=expression): 4768 self.raise_error("Expecting )") 4769 4770 def _match_texts(self, texts, advance=True): 4771 if self._curr and self._curr.text.upper() in texts: 4772 if advance: 4773 self._advance() 4774 return True 4775 return False 4776 4777 def _match_text_seq(self, *texts, advance=True): 4778 index = self._index 4779 for text in texts: 4780 if self._curr and self._curr.text.upper() == text: 4781 self._advance() 4782 else: 4783 self._retreat(index) 4784 return False 4785 4786 if not advance: 4787 self._retreat(index) 4788 4789 return True 4790 4791 @t.overload 4792 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4793 ... 4794 4795 @t.overload 4796 def _replace_columns_with_dots( 4797 self, this: t.Optional[exp.Expression] 4798 ) -> t.Optional[exp.Expression]: 4799 ... 4800 4801 def _replace_columns_with_dots(self, this): 4802 if isinstance(this, exp.Dot): 4803 exp.replace_children(this, self._replace_columns_with_dots) 4804 elif isinstance(this, exp.Column): 4805 exp.replace_children(this, self._replace_columns_with_dots) 4806 table = this.args.get("table") 4807 this = ( 4808 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4809 ) 4810 4811 return this 4812 4813 def _replace_lambda( 4814 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4815 ) -> t.Optional[exp.Expression]: 4816 if not node: 4817 return node 4818 4819 for column in node.find_all(exp.Column): 4820 if column.parts[0].name in lambda_variables: 4821 dot_or_id = column.to_dot() if column.table else column.this 4822 parent = column.parent 4823 4824 while isinstance(parent, exp.Dot): 4825 if not isinstance(parent.parent, exp.Dot): 4826 parent.replace(dot_or_id) 4827 break 4828 parent = parent.parent 4829 else: 4830 if column is node: 4831 node = dot_or_id 4832 else: 4833 column.replace(dot_or_id) 4834 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.RLIKE, 332 TokenType.ROW, 333 TokenType.UNNEST, 334 TokenType.VAR, 335 TokenType.LEFT, 336 TokenType.RIGHT, 337 TokenType.DATE, 338 TokenType.DATETIME, 339 TokenType.TABLE, 340 TokenType.TIMESTAMP, 341 TokenType.TIMESTAMPTZ, 342 TokenType.WINDOW, 343 *TYPE_TOKENS, 344 *SUBQUERY_PREDICATES, 345 } 346 347 CONJUNCTION = { 348 TokenType.AND: exp.And, 349 TokenType.OR: exp.Or, 350 } 351 352 EQUALITY = { 353 TokenType.EQ: exp.EQ, 354 TokenType.NEQ: exp.NEQ, 355 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 356 } 357 358 COMPARISON = { 359 TokenType.GT: exp.GT, 360 TokenType.GTE: exp.GTE, 361 TokenType.LT: exp.LT, 362 TokenType.LTE: exp.LTE, 363 } 364 365 BITWISE = { 366 TokenType.AMP: exp.BitwiseAnd, 367 TokenType.CARET: exp.BitwiseXor, 368 TokenType.PIPE: exp.BitwiseOr, 369 TokenType.DPIPE: exp.DPipe, 370 } 371 372 TERM = { 373 TokenType.DASH: exp.Sub, 374 TokenType.PLUS: exp.Add, 375 TokenType.MOD: exp.Mod, 376 TokenType.COLLATE: exp.Collate, 377 } 378 379 FACTOR = { 380 TokenType.DIV: exp.IntDiv, 381 TokenType.LR_ARROW: exp.Distance, 382 TokenType.SLASH: exp.Div, 383 TokenType.STAR: exp.Mul, 384 } 385 386 TIMESTAMPS = { 387 TokenType.TIME, 388 TokenType.TIMESTAMP, 389 TokenType.TIMESTAMPTZ, 390 TokenType.TIMESTAMPLTZ, 391 } 392 393 SET_OPERATIONS = { 394 TokenType.UNION, 395 TokenType.INTERSECT, 396 TokenType.EXCEPT, 397 } 398 399 JOIN_METHODS = { 400 TokenType.NATURAL, 401 TokenType.ASOF, 402 } 403 404 JOIN_SIDES = { 405 TokenType.LEFT, 406 TokenType.RIGHT, 407 TokenType.FULL, 408 } 409 410 JOIN_KINDS = { 411 TokenType.INNER, 412 TokenType.OUTER, 413 TokenType.CROSS, 414 TokenType.SEMI, 415 TokenType.ANTI, 416 } 417 418 JOIN_HINTS: t.Set[str] = set() 419 420 LAMBDAS = { 421 TokenType.ARROW: lambda self, expressions: self.expression( 422 exp.Lambda, 423 this=self._replace_lambda( 424 self._parse_conjunction(), 425 {node.name for node in expressions}, 426 ), 427 expressions=expressions, 428 ), 429 TokenType.FARROW: lambda self, expressions: self.expression( 430 exp.Kwarg, 431 this=exp.var(expressions[0].name), 432 expression=self._parse_conjunction(), 433 ), 434 } 435 436 COLUMN_OPERATORS = { 437 TokenType.DOT: None, 438 TokenType.DCOLON: lambda self, this, to: self.expression( 439 exp.Cast if self.STRICT_CAST else exp.TryCast, 440 this=this, 441 to=to, 442 ), 443 TokenType.ARROW: lambda self, this, path: self.expression( 444 exp.JSONExtract, 445 this=this, 446 expression=path, 447 ), 448 TokenType.DARROW: lambda self, this, path: self.expression( 449 exp.JSONExtractScalar, 450 this=this, 451 expression=path, 452 ), 453 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 454 exp.JSONBExtract, 455 this=this, 456 expression=path, 457 ), 458 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 459 exp.JSONBExtractScalar, 460 this=this, 461 expression=path, 462 ), 463 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 464 exp.JSONBContains, 465 this=this, 466 expression=key, 467 ), 468 } 469 470 EXPRESSION_PARSERS = { 471 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 472 exp.Column: lambda self: self._parse_column(), 473 exp.Condition: lambda self: self._parse_conjunction(), 474 exp.DataType: lambda self: self._parse_types(), 475 exp.Expression: lambda self: self._parse_statement(), 476 exp.From: lambda self: self._parse_from(), 477 exp.Group: lambda self: self._parse_group(), 478 exp.Having: lambda self: self._parse_having(), 479 exp.Identifier: lambda self: self._parse_id_var(), 480 exp.Join: lambda self: self._parse_join(), 481 exp.Lambda: lambda self: self._parse_lambda(), 482 exp.Lateral: lambda self: self._parse_lateral(), 483 exp.Limit: lambda self: self._parse_limit(), 484 exp.Offset: lambda self: self._parse_offset(), 485 exp.Order: lambda self: self._parse_order(), 486 exp.Ordered: lambda self: self._parse_ordered(), 487 exp.Properties: lambda self: self._parse_properties(), 488 exp.Qualify: lambda self: self._parse_qualify(), 489 exp.Returning: lambda self: self._parse_returning(), 490 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 491 exp.Table: lambda self: self._parse_table_parts(), 492 exp.TableAlias: lambda self: self._parse_table_alias(), 493 exp.Where: lambda self: self._parse_where(), 494 exp.Window: lambda self: self._parse_named_window(), 495 exp.With: lambda self: self._parse_with(), 496 "JOIN_TYPE": lambda self: self._parse_join_parts(), 497 } 498 499 STATEMENT_PARSERS = { 500 TokenType.ALTER: lambda self: self._parse_alter(), 501 TokenType.BEGIN: lambda self: self._parse_transaction(), 502 TokenType.CACHE: lambda self: self._parse_cache(), 503 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 504 TokenType.COMMENT: lambda self: self._parse_comment(), 505 TokenType.CREATE: lambda self: self._parse_create(), 506 TokenType.DELETE: lambda self: self._parse_delete(), 507 TokenType.DESC: lambda self: self._parse_describe(), 508 TokenType.DESCRIBE: lambda self: self._parse_describe(), 509 TokenType.DROP: lambda self: self._parse_drop(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "CLUSTERED": lambda self: self._parse_clustered_by(), 590 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 591 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 592 "COPY": lambda self: self._parse_copy_property(), 593 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 594 "DEFINER": lambda self: self._parse_definer(), 595 "DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "DISTKEY": lambda self: self._parse_distkey(), 599 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 600 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 601 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 602 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 603 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 604 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 605 "FREESPACE": lambda self: self._parse_freespace(), 606 "IMMUTABLE": lambda self: self.expression( 607 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 608 ), 609 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 610 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 611 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 612 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 613 "LIKE": lambda self: self._parse_create_like(), 614 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 615 "LOCK": lambda self: self._parse_locking(), 616 "LOCKING": lambda self: self._parse_locking(), 617 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 618 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 619 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 620 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 621 "NO": lambda self: self._parse_no_property(), 622 "ON": lambda self: self._parse_on_property(), 623 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 624 "PARTITION BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 627 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 628 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 629 "RETURNS": lambda self: self._parse_returns(), 630 "ROW": lambda self: self._parse_row(), 631 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 632 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 633 "SETTINGS": lambda self: self.expression( 634 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 635 ), 636 "SORTKEY": lambda self: self._parse_sortkey(), 637 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 638 "STABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("STABLE") 640 ), 641 "STORED": lambda self: self._parse_stored(), 642 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 643 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 644 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 645 "TO": lambda self: self._parse_to_table(), 646 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 647 "TTL": lambda self: self._parse_ttl(), 648 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "VOLATILE": lambda self: self._parse_volatile_property(), 650 "WITH": lambda self: self._parse_with_property(), 651 } 652 653 CONSTRAINT_PARSERS = { 654 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 655 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 656 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 657 "CHARACTER SET": lambda self: self.expression( 658 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "CHECK": lambda self: self.expression( 661 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 662 ), 663 "COLLATE": lambda self: self.expression( 664 exp.CollateColumnConstraint, this=self._parse_var() 665 ), 666 "COMMENT": lambda self: self.expression( 667 exp.CommentColumnConstraint, this=self._parse_string() 668 ), 669 "COMPRESS": lambda self: self._parse_compress(), 670 "DEFAULT": lambda self: self.expression( 671 exp.DefaultColumnConstraint, this=self._parse_bitwise() 672 ), 673 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 674 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 675 "FORMAT": lambda self: self.expression( 676 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "GENERATED": lambda self: self._parse_generated_as_identity(), 679 "IDENTITY": lambda self: self._parse_auto_increment(), 680 "INLINE": lambda self: self._parse_inline(), 681 "LIKE": lambda self: self._parse_create_like(), 682 "NOT": lambda self: self._parse_not_constraint(), 683 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 684 "ON": lambda self: self._match(TokenType.UPDATE) 685 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 686 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 687 "PRIMARY KEY": lambda self: self._parse_primary_key(), 688 "REFERENCES": lambda self: self._parse_references(match=False), 689 "TITLE": lambda self: self.expression( 690 exp.TitleColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 693 "UNIQUE": lambda self: self._parse_unique(), 694 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 695 } 696 697 ALTER_PARSERS = { 698 "ADD": lambda self: self._parse_alter_table_add(), 699 "ALTER": lambda self: self._parse_alter_table_alter(), 700 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 701 "DROP": lambda self: self._parse_alter_table_drop(), 702 "RENAME": lambda self: self._parse_alter_table_rename(), 703 } 704 705 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 706 707 NO_PAREN_FUNCTION_PARSERS = { 708 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 709 TokenType.CASE: lambda self: self._parse_case(), 710 TokenType.IF: lambda self: self._parse_if(), 711 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 712 exp.NextValueFor, 713 this=self._parse_column(), 714 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 715 ), 716 } 717 718 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 719 720 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 721 "ANY_VALUE": lambda self: self._parse_any_value(), 722 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 723 "CONCAT": lambda self: self._parse_concat(), 724 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 725 "DECODE": lambda self: self._parse_decode(), 726 "EXTRACT": lambda self: self._parse_extract(), 727 "JSON_OBJECT": lambda self: self._parse_json_object(), 728 "LOG": lambda self: self._parse_logarithm(), 729 "MATCH": lambda self: self._parse_match_against(), 730 "OPENJSON": lambda self: self._parse_open_json(), 731 "POSITION": lambda self: self._parse_position(), 732 "SAFE_CAST": lambda self: self._parse_cast(False), 733 "STRING_AGG": lambda self: self._parse_string_agg(), 734 "SUBSTRING": lambda self: self._parse_substring(), 735 "TRIM": lambda self: self._parse_trim(), 736 "TRY_CAST": lambda self: self._parse_cast(False), 737 "TRY_CONVERT": lambda self: self._parse_convert(False), 738 } 739 740 QUERY_MODIFIER_PARSERS = { 741 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 742 TokenType.WHERE: lambda self: ("where", self._parse_where()), 743 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 744 TokenType.HAVING: lambda self: ("having", self._parse_having()), 745 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 746 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 747 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 748 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 749 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 750 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 751 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 752 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 753 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 754 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 755 TokenType.CLUSTER_BY: lambda self: ( 756 "cluster", 757 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 758 ), 759 TokenType.DISTRIBUTE_BY: lambda self: ( 760 "distribute", 761 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 762 ), 763 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 764 } 765 766 SET_PARSERS = { 767 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 768 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 769 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 770 "TRANSACTION": lambda self: self._parse_set_transaction(), 771 } 772 773 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 774 775 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 776 777 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 778 779 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 780 781 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 782 783 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 784 TRANSACTION_CHARACTERISTICS = { 785 "ISOLATION LEVEL REPEATABLE READ", 786 "ISOLATION LEVEL READ COMMITTED", 787 "ISOLATION LEVEL READ UNCOMMITTED", 788 "ISOLATION LEVEL SERIALIZABLE", 789 "READ WRITE", 790 "READ ONLY", 791 } 792 793 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 794 795 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 796 797 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 798 799 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 800 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 801 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 802 803 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 804 805 STRICT_CAST = True 806 807 # A NULL arg in CONCAT yields NULL by default 808 CONCAT_NULL_OUTPUTS_STRING = False 809 810 PREFIXED_PIVOT_COLUMNS = False 811 IDENTIFY_PIVOT_STRINGS = False 812 813 LOG_BASE_FIRST = True 814 LOG_DEFAULTS_TO_LN = False 815 816 __slots__ = ( 817 "error_level", 818 "error_message_context", 819 "max_errors", 820 "sql", 821 "errors", 822 "_tokens", 823 "_index", 824 "_curr", 825 "_next", 826 "_prev", 827 "_prev_comments", 828 ) 829 830 # Autofilled 831 INDEX_OFFSET: int = 0 832 UNNEST_COLUMN_ONLY: bool = False 833 ALIAS_POST_TABLESAMPLE: bool = False 834 STRICT_STRING_CONCAT = False 835 NULL_ORDERING: str = "nulls_are_small" 836 SHOW_TRIE: t.Dict = {} 837 SET_TRIE: t.Dict = {} 838 FORMAT_MAPPING: t.Dict[str, str] = {} 839 FORMAT_TRIE: t.Dict = {} 840 TIME_MAPPING: t.Dict[str, str] = {} 841 TIME_TRIE: t.Dict = {} 842 843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset() 853 854 def reset(self): 855 self.sql = "" 856 self.errors = [] 857 self._tokens = [] 858 self._index = 0 859 self._curr = None 860 self._next = None 861 self._prev = None 862 self._prev_comments = None 863 864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 ) 881 882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1] 917 918 def _parse( 919 self, 920 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 921 raw_tokens: t.List[Token], 922 sql: t.Optional[str] = None, 923 ) -> t.List[t.Optional[exp.Expression]]: 924 self.reset() 925 self.sql = sql or "" 926 927 total = len(raw_tokens) 928 chunks: t.List[t.List[Token]] = [[]] 929 930 for i, token in enumerate(raw_tokens): 931 if token.token_type == TokenType.SEMICOLON: 932 if i < total - 1: 933 chunks.append([]) 934 else: 935 chunks[-1].append(token) 936 937 expressions = [] 938 939 for tokens in chunks: 940 self._index = -1 941 self._tokens = tokens 942 self._advance() 943 944 expressions.append(parse_method(self)) 945 946 if self._index < len(self._tokens): 947 self.raise_error("Invalid expression / Unexpected token") 948 949 self.check_errors() 950 951 return expressions 952 953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 ) 963 964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error) 991 992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance) 1009 1010 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1011 if expression and self._prev_comments: 1012 expression.add_comments(self._prev_comments) 1013 self._prev_comments = None 1014 1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression 1031 1032 def _find_sql(self, start: Token, end: Token) -> str: 1033 return self.sql[start.start : end.end + 1] 1034 1035 def _advance(self, times: int = 1) -> None: 1036 self._index += times 1037 self._curr = seq_get(self._tokens, self._index) 1038 self._next = seq_get(self._tokens, self._index + 1) 1039 1040 if self._index > 0: 1041 self._prev = self._tokens[self._index - 1] 1042 self._prev_comments = self._prev.comments 1043 else: 1044 self._prev = None 1045 self._prev_comments = None 1046 1047 def _retreat(self, index: int) -> None: 1048 if index != self._index: 1049 self._advance(index - self._index) 1050 1051 def _parse_command(self) -> exp.Command: 1052 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1053 1054 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1055 start = self._prev 1056 exists = self._parse_exists() if allow_exists else None 1057 1058 self._match(TokenType.ON) 1059 1060 kind = self._match_set(self.CREATABLES) and self._prev 1061 if not kind: 1062 return self._parse_as_command(start) 1063 1064 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1065 this = self._parse_user_defined_function(kind=kind.token_type) 1066 elif kind.token_type == TokenType.TABLE: 1067 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1068 elif kind.token_type == TokenType.COLUMN: 1069 this = self._parse_column() 1070 else: 1071 this = self._parse_id_var() 1072 1073 self._match(TokenType.IS) 1074 1075 return self.expression( 1076 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1077 ) 1078 1079 def _parse_to_table( 1080 self, 1081 ) -> exp.ToTableProperty: 1082 table = self._parse_table_parts(schema=True) 1083 return self.expression(exp.ToTableProperty, this=table) 1084 1085 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1086 def _parse_ttl(self) -> exp.Expression: 1087 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1088 this = self._parse_bitwise() 1089 1090 if self._match_text_seq("DELETE"): 1091 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1092 if self._match_text_seq("RECOMPRESS"): 1093 return self.expression( 1094 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1095 ) 1096 if self._match_text_seq("TO", "DISK"): 1097 return self.expression( 1098 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1099 ) 1100 if self._match_text_seq("TO", "VOLUME"): 1101 return self.expression( 1102 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1103 ) 1104 1105 return this 1106 1107 expressions = self._parse_csv(_parse_ttl_action) 1108 where = self._parse_where() 1109 group = self._parse_group() 1110 1111 aggregates = None 1112 if group and self._match(TokenType.SET): 1113 aggregates = self._parse_csv(self._parse_set_item) 1114 1115 return self.expression( 1116 exp.MergeTreeTTL, 1117 expressions=expressions, 1118 where=where, 1119 group=group, 1120 aggregates=aggregates, 1121 ) 1122 1123 def _parse_statement(self) -> t.Optional[exp.Expression]: 1124 if self._curr is None: 1125 return None 1126 1127 if self._match_set(self.STATEMENT_PARSERS): 1128 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1129 1130 if self._match_set(Tokenizer.COMMANDS): 1131 return self._parse_command() 1132 1133 expression = self._parse_expression() 1134 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1135 return self._parse_query_modifiers(expression) 1136 1137 def _parse_drop(self) -> exp.Drop | exp.Command: 1138 start = self._prev 1139 temporary = self._match(TokenType.TEMPORARY) 1140 materialized = self._match_text_seq("MATERIALIZED") 1141 1142 kind = self._match_set(self.CREATABLES) and self._prev.text 1143 if not kind: 1144 return self._parse_as_command(start) 1145 1146 return self.expression( 1147 exp.Drop, 1148 comments=start.comments, 1149 exists=self._parse_exists(), 1150 this=self._parse_table(schema=True), 1151 kind=kind, 1152 temporary=temporary, 1153 materialized=materialized, 1154 cascade=self._match_text_seq("CASCADE"), 1155 constraints=self._match_text_seq("CONSTRAINTS"), 1156 purge=self._match_text_seq("PURGE"), 1157 ) 1158 1159 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1160 return ( 1161 self._match(TokenType.IF) 1162 and (not not_ or self._match(TokenType.NOT)) 1163 and self._match(TokenType.EXISTS) 1164 ) 1165 1166 def _parse_create(self) -> exp.Create | exp.Command: 1167 # Note: this can't be None because we've matched a statement parser 1168 start = self._prev 1169 replace = start.text.upper() == "REPLACE" or self._match_pair( 1170 TokenType.OR, TokenType.REPLACE 1171 ) 1172 unique = self._match(TokenType.UNIQUE) 1173 1174 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1175 self._advance() 1176 1177 properties = None 1178 create_token = self._match_set(self.CREATABLES) and self._prev 1179 1180 if not create_token: 1181 # exp.Properties.Location.POST_CREATE 1182 properties = self._parse_properties() 1183 create_token = self._match_set(self.CREATABLES) and self._prev 1184 1185 if not properties or not create_token: 1186 return self._parse_as_command(start) 1187 1188 exists = self._parse_exists(not_=True) 1189 this = None 1190 expression = None 1191 indexes = None 1192 no_schema_binding = None 1193 begin = None 1194 clone = None 1195 1196 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1197 nonlocal properties 1198 if properties and temp_props: 1199 properties.expressions.extend(temp_props.expressions) 1200 elif temp_props: 1201 properties = temp_props 1202 1203 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1204 this = self._parse_user_defined_function(kind=create_token.token_type) 1205 1206 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1207 extend_props(self._parse_properties()) 1208 1209 self._match(TokenType.ALIAS) 1210 begin = self._match(TokenType.BEGIN) 1211 return_ = self._match_text_seq("RETURN") 1212 expression = self._parse_statement() 1213 1214 if return_: 1215 expression = self.expression(exp.Return, this=expression) 1216 elif create_token.token_type == TokenType.INDEX: 1217 this = self._parse_index(index=self._parse_id_var()) 1218 elif create_token.token_type in self.DB_CREATABLES: 1219 table_parts = self._parse_table_parts(schema=True) 1220 1221 # exp.Properties.Location.POST_NAME 1222 self._match(TokenType.COMMA) 1223 extend_props(self._parse_properties(before=True)) 1224 1225 this = self._parse_schema(this=table_parts) 1226 1227 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1228 extend_props(self._parse_properties()) 1229 1230 self._match(TokenType.ALIAS) 1231 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1232 # exp.Properties.Location.POST_ALIAS 1233 extend_props(self._parse_properties()) 1234 1235 expression = self._parse_ddl_select() 1236 1237 if create_token.token_type == TokenType.TABLE: 1238 # exp.Properties.Location.POST_EXPRESSION 1239 extend_props(self._parse_properties()) 1240 1241 indexes = [] 1242 while True: 1243 index = self._parse_index() 1244 1245 # exp.Properties.Location.POST_INDEX 1246 extend_props(self._parse_properties()) 1247 1248 if not index: 1249 break 1250 else: 1251 self._match(TokenType.COMMA) 1252 indexes.append(index) 1253 elif create_token.token_type == TokenType.VIEW: 1254 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1255 no_schema_binding = True 1256 1257 if self._match_text_seq("CLONE"): 1258 clone = self._parse_table(schema=True) 1259 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1260 clone_kind = ( 1261 self._match(TokenType.L_PAREN) 1262 and self._match_texts(self.CLONE_KINDS) 1263 and self._prev.text.upper() 1264 ) 1265 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1266 self._match(TokenType.R_PAREN) 1267 clone = self.expression( 1268 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1269 ) 1270 1271 return self.expression( 1272 exp.Create, 1273 this=this, 1274 kind=create_token.text, 1275 replace=replace, 1276 unique=unique, 1277 expression=expression, 1278 exists=exists, 1279 properties=properties, 1280 indexes=indexes, 1281 no_schema_binding=no_schema_binding, 1282 begin=begin, 1283 clone=clone, 1284 ) 1285 1286 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1287 # only used for teradata currently 1288 self._match(TokenType.COMMA) 1289 1290 kwargs = { 1291 "no": self._match_text_seq("NO"), 1292 "dual": self._match_text_seq("DUAL"), 1293 "before": self._match_text_seq("BEFORE"), 1294 "default": self._match_text_seq("DEFAULT"), 1295 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1296 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1297 "after": self._match_text_seq("AFTER"), 1298 "minimum": self._match_texts(("MIN", "MINIMUM")), 1299 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1300 } 1301 1302 if self._match_texts(self.PROPERTY_PARSERS): 1303 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1304 try: 1305 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1306 except TypeError: 1307 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1308 1309 return None 1310 1311 def _parse_property(self) -> t.Optional[exp.Expression]: 1312 if self._match_texts(self.PROPERTY_PARSERS): 1313 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1314 1315 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1316 return self._parse_character_set(default=True) 1317 1318 if self._match_text_seq("COMPOUND", "SORTKEY"): 1319 return self._parse_sortkey(compound=True) 1320 1321 if self._match_text_seq("SQL", "SECURITY"): 1322 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1323 1324 assignment = self._match_pair( 1325 TokenType.VAR, TokenType.EQ, advance=False 1326 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1327 1328 if assignment: 1329 key = self._parse_var_or_string() 1330 self._match(TokenType.EQ) 1331 return self.expression(exp.Property, this=key, value=self._parse_column()) 1332 1333 return None 1334 1335 def _parse_stored(self) -> exp.FileFormatProperty: 1336 self._match(TokenType.ALIAS) 1337 1338 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1339 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1340 1341 return self.expression( 1342 exp.FileFormatProperty, 1343 this=self.expression( 1344 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1345 ) 1346 if input_format or output_format 1347 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1348 ) 1349 1350 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1351 self._match(TokenType.EQ) 1352 self._match(TokenType.ALIAS) 1353 return self.expression(exp_class, this=self._parse_field()) 1354 1355 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1356 properties = [] 1357 while True: 1358 if before: 1359 prop = self._parse_property_before() 1360 else: 1361 prop = self._parse_property() 1362 1363 if not prop: 1364 break 1365 for p in ensure_list(prop): 1366 properties.append(p) 1367 1368 if properties: 1369 return self.expression(exp.Properties, expressions=properties) 1370 1371 return None 1372 1373 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1374 return self.expression( 1375 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1376 ) 1377 1378 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1379 if self._index >= 2: 1380 pre_volatile_token = self._tokens[self._index - 2] 1381 else: 1382 pre_volatile_token = None 1383 1384 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1385 return exp.VolatileProperty() 1386 1387 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1388 1389 def _parse_with_property( 1390 self, 1391 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1392 if self._match(TokenType.L_PAREN, advance=False): 1393 return self._parse_wrapped_csv(self._parse_property) 1394 1395 if self._match_text_seq("JOURNAL"): 1396 return self._parse_withjournaltable() 1397 1398 if self._match_text_seq("DATA"): 1399 return self._parse_withdata(no=False) 1400 elif self._match_text_seq("NO", "DATA"): 1401 return self._parse_withdata(no=True) 1402 1403 if not self._next: 1404 return None 1405 1406 return self._parse_withisolatedloading() 1407 1408 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1409 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1410 self._match(TokenType.EQ) 1411 1412 user = self._parse_id_var() 1413 self._match(TokenType.PARAMETER) 1414 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1415 1416 if not user or not host: 1417 return None 1418 1419 return exp.DefinerProperty(this=f"{user}@{host}") 1420 1421 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1422 self._match(TokenType.TABLE) 1423 self._match(TokenType.EQ) 1424 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1425 1426 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1427 return self.expression(exp.LogProperty, no=no) 1428 1429 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1430 return self.expression(exp.JournalProperty, **kwargs) 1431 1432 def _parse_checksum(self) -> exp.ChecksumProperty: 1433 self._match(TokenType.EQ) 1434 1435 on = None 1436 if self._match(TokenType.ON): 1437 on = True 1438 elif self._match_text_seq("OFF"): 1439 on = False 1440 1441 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1442 1443 def _parse_cluster(self) -> exp.Cluster: 1444 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1445 1446 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1447 self._match_text_seq("BY") 1448 1449 self._match_l_paren() 1450 expressions = self._parse_csv(self._parse_column) 1451 self._match_r_paren() 1452 1453 if self._match_text_seq("SORTED", "BY"): 1454 self._match_l_paren() 1455 sorted_by = self._parse_csv(self._parse_ordered) 1456 self._match_r_paren() 1457 else: 1458 sorted_by = None 1459 1460 self._match(TokenType.INTO) 1461 buckets = self._parse_number() 1462 self._match_text_seq("BUCKETS") 1463 1464 return self.expression( 1465 exp.ClusteredByProperty, 1466 expressions=expressions, 1467 sorted_by=sorted_by, 1468 buckets=buckets, 1469 ) 1470 1471 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1472 if not self._match_text_seq("GRANTS"): 1473 self._retreat(self._index - 1) 1474 return None 1475 1476 return self.expression(exp.CopyGrantsProperty) 1477 1478 def _parse_freespace(self) -> exp.FreespaceProperty: 1479 self._match(TokenType.EQ) 1480 return self.expression( 1481 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1482 ) 1483 1484 def _parse_mergeblockratio( 1485 self, no: bool = False, default: bool = False 1486 ) -> exp.MergeBlockRatioProperty: 1487 if self._match(TokenType.EQ): 1488 return self.expression( 1489 exp.MergeBlockRatioProperty, 1490 this=self._parse_number(), 1491 percent=self._match(TokenType.PERCENT), 1492 ) 1493 1494 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1495 1496 def _parse_datablocksize( 1497 self, 1498 default: t.Optional[bool] = None, 1499 minimum: t.Optional[bool] = None, 1500 maximum: t.Optional[bool] = None, 1501 ) -> exp.DataBlocksizeProperty: 1502 self._match(TokenType.EQ) 1503 size = self._parse_number() 1504 1505 units = None 1506 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1507 units = self._prev.text 1508 1509 return self.expression( 1510 exp.DataBlocksizeProperty, 1511 size=size, 1512 units=units, 1513 default=default, 1514 minimum=minimum, 1515 maximum=maximum, 1516 ) 1517 1518 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1519 self._match(TokenType.EQ) 1520 always = self._match_text_seq("ALWAYS") 1521 manual = self._match_text_seq("MANUAL") 1522 never = self._match_text_seq("NEVER") 1523 default = self._match_text_seq("DEFAULT") 1524 1525 autotemp = None 1526 if self._match_text_seq("AUTOTEMP"): 1527 autotemp = self._parse_schema() 1528 1529 return self.expression( 1530 exp.BlockCompressionProperty, 1531 always=always, 1532 manual=manual, 1533 never=never, 1534 default=default, 1535 autotemp=autotemp, 1536 ) 1537 1538 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1539 no = self._match_text_seq("NO") 1540 concurrent = self._match_text_seq("CONCURRENT") 1541 self._match_text_seq("ISOLATED", "LOADING") 1542 for_all = self._match_text_seq("FOR", "ALL") 1543 for_insert = self._match_text_seq("FOR", "INSERT") 1544 for_none = self._match_text_seq("FOR", "NONE") 1545 return self.expression( 1546 exp.IsolatedLoadingProperty, 1547 no=no, 1548 concurrent=concurrent, 1549 for_all=for_all, 1550 for_insert=for_insert, 1551 for_none=for_none, 1552 ) 1553 1554 def _parse_locking(self) -> exp.LockingProperty: 1555 if self._match(TokenType.TABLE): 1556 kind = "TABLE" 1557 elif self._match(TokenType.VIEW): 1558 kind = "VIEW" 1559 elif self._match(TokenType.ROW): 1560 kind = "ROW" 1561 elif self._match_text_seq("DATABASE"): 1562 kind = "DATABASE" 1563 else: 1564 kind = None 1565 1566 if kind in ("DATABASE", "TABLE", "VIEW"): 1567 this = self._parse_table_parts() 1568 else: 1569 this = None 1570 1571 if self._match(TokenType.FOR): 1572 for_or_in = "FOR" 1573 elif self._match(TokenType.IN): 1574 for_or_in = "IN" 1575 else: 1576 for_or_in = None 1577 1578 if self._match_text_seq("ACCESS"): 1579 lock_type = "ACCESS" 1580 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1581 lock_type = "EXCLUSIVE" 1582 elif self._match_text_seq("SHARE"): 1583 lock_type = "SHARE" 1584 elif self._match_text_seq("READ"): 1585 lock_type = "READ" 1586 elif self._match_text_seq("WRITE"): 1587 lock_type = "WRITE" 1588 elif self._match_text_seq("CHECKSUM"): 1589 lock_type = "CHECKSUM" 1590 else: 1591 lock_type = None 1592 1593 override = self._match_text_seq("OVERRIDE") 1594 1595 return self.expression( 1596 exp.LockingProperty, 1597 this=this, 1598 kind=kind, 1599 for_or_in=for_or_in, 1600 lock_type=lock_type, 1601 override=override, 1602 ) 1603 1604 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1605 if self._match(TokenType.PARTITION_BY): 1606 return self._parse_csv(self._parse_conjunction) 1607 return [] 1608 1609 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1610 self._match(TokenType.EQ) 1611 return self.expression( 1612 exp.PartitionedByProperty, 1613 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1614 ) 1615 1616 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1617 if self._match_text_seq("AND", "STATISTICS"): 1618 statistics = True 1619 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1620 statistics = False 1621 else: 1622 statistics = None 1623 1624 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1625 1626 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1627 if self._match_text_seq("PRIMARY", "INDEX"): 1628 return exp.NoPrimaryIndexProperty() 1629 return None 1630 1631 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1632 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1633 return exp.OnCommitProperty() 1634 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1635 return exp.OnCommitProperty(delete=True) 1636 return None 1637 1638 def _parse_distkey(self) -> exp.DistKeyProperty: 1639 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1640 1641 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1642 table = self._parse_table(schema=True) 1643 1644 options = [] 1645 while self._match_texts(("INCLUDING", "EXCLUDING")): 1646 this = self._prev.text.upper() 1647 1648 id_var = self._parse_id_var() 1649 if not id_var: 1650 return None 1651 1652 options.append( 1653 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1654 ) 1655 1656 return self.expression(exp.LikeProperty, this=table, expressions=options) 1657 1658 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1659 return self.expression( 1660 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1661 ) 1662 1663 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1664 self._match(TokenType.EQ) 1665 return self.expression( 1666 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1667 ) 1668 1669 def _parse_returns(self) -> exp.ReturnsProperty: 1670 value: t.Optional[exp.Expression] 1671 is_table = self._match(TokenType.TABLE) 1672 1673 if is_table: 1674 if self._match(TokenType.LT): 1675 value = self.expression( 1676 exp.Schema, 1677 this="TABLE", 1678 expressions=self._parse_csv(self._parse_struct_types), 1679 ) 1680 if not self._match(TokenType.GT): 1681 self.raise_error("Expecting >") 1682 else: 1683 value = self._parse_schema(exp.var("TABLE")) 1684 else: 1685 value = self._parse_types() 1686 1687 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1688 1689 def _parse_describe(self) -> exp.Describe: 1690 kind = self._match_set(self.CREATABLES) and self._prev.text 1691 this = self._parse_table() 1692 return self.expression(exp.Describe, this=this, kind=kind) 1693 1694 def _parse_insert(self) -> exp.Insert: 1695 overwrite = self._match(TokenType.OVERWRITE) 1696 ignore = self._match(TokenType.IGNORE) 1697 local = self._match_text_seq("LOCAL") 1698 alternative = None 1699 1700 if self._match_text_seq("DIRECTORY"): 1701 this: t.Optional[exp.Expression] = self.expression( 1702 exp.Directory, 1703 this=self._parse_var_or_string(), 1704 local=local, 1705 row_format=self._parse_row_format(match_row=True), 1706 ) 1707 else: 1708 if self._match(TokenType.OR): 1709 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1710 1711 self._match(TokenType.INTO) 1712 self._match(TokenType.TABLE) 1713 this = self._parse_table(schema=True) 1714 1715 returning = self._parse_returning() 1716 1717 return self.expression( 1718 exp.Insert, 1719 this=this, 1720 exists=self._parse_exists(), 1721 partition=self._parse_partition(), 1722 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1723 and self._parse_conjunction(), 1724 expression=self._parse_ddl_select(), 1725 conflict=self._parse_on_conflict(), 1726 returning=returning or self._parse_returning(), 1727 overwrite=overwrite, 1728 alternative=alternative, 1729 ignore=ignore, 1730 ) 1731 1732 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1733 conflict = self._match_text_seq("ON", "CONFLICT") 1734 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1735 1736 if not conflict and not duplicate: 1737 return None 1738 1739 nothing = None 1740 expressions = None 1741 key = None 1742 constraint = None 1743 1744 if conflict: 1745 if self._match_text_seq("ON", "CONSTRAINT"): 1746 constraint = self._parse_id_var() 1747 else: 1748 key = self._parse_csv(self._parse_value) 1749 1750 self._match_text_seq("DO") 1751 if self._match_text_seq("NOTHING"): 1752 nothing = True 1753 else: 1754 self._match(TokenType.UPDATE) 1755 self._match(TokenType.SET) 1756 expressions = self._parse_csv(self._parse_equality) 1757 1758 return self.expression( 1759 exp.OnConflict, 1760 duplicate=duplicate, 1761 expressions=expressions, 1762 nothing=nothing, 1763 key=key, 1764 constraint=constraint, 1765 ) 1766 1767 def _parse_returning(self) -> t.Optional[exp.Returning]: 1768 if not self._match(TokenType.RETURNING): 1769 return None 1770 return self.expression( 1771 exp.Returning, 1772 expressions=self._parse_csv(self._parse_expression), 1773 into=self._match(TokenType.INTO) and self._parse_table_part(), 1774 ) 1775 1776 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1777 if not self._match(TokenType.FORMAT): 1778 return None 1779 return self._parse_row_format() 1780 1781 def _parse_row_format( 1782 self, match_row: bool = False 1783 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1784 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1785 return None 1786 1787 if self._match_text_seq("SERDE"): 1788 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1789 1790 self._match_text_seq("DELIMITED") 1791 1792 kwargs = {} 1793 1794 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1795 kwargs["fields"] = self._parse_string() 1796 if self._match_text_seq("ESCAPED", "BY"): 1797 kwargs["escaped"] = self._parse_string() 1798 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1799 kwargs["collection_items"] = self._parse_string() 1800 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1801 kwargs["map_keys"] = self._parse_string() 1802 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1803 kwargs["lines"] = self._parse_string() 1804 if self._match_text_seq("NULL", "DEFINED", "AS"): 1805 kwargs["null"] = self._parse_string() 1806 1807 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1808 1809 def _parse_load(self) -> exp.LoadData | exp.Command: 1810 if self._match_text_seq("DATA"): 1811 local = self._match_text_seq("LOCAL") 1812 self._match_text_seq("INPATH") 1813 inpath = self._parse_string() 1814 overwrite = self._match(TokenType.OVERWRITE) 1815 self._match_pair(TokenType.INTO, TokenType.TABLE) 1816 1817 return self.expression( 1818 exp.LoadData, 1819 this=self._parse_table(schema=True), 1820 local=local, 1821 overwrite=overwrite, 1822 inpath=inpath, 1823 partition=self._parse_partition(), 1824 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1825 serde=self._match_text_seq("SERDE") and self._parse_string(), 1826 ) 1827 return self._parse_as_command(self._prev) 1828 1829 def _parse_delete(self) -> exp.Delete: 1830 # This handles MySQL's "Multiple-Table Syntax" 1831 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1832 tables = None 1833 if not self._match(TokenType.FROM, advance=False): 1834 tables = self._parse_csv(self._parse_table) or None 1835 1836 returning = self._parse_returning() 1837 1838 return self.expression( 1839 exp.Delete, 1840 tables=tables, 1841 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1842 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1843 where=self._parse_where(), 1844 returning=returning or self._parse_returning(), 1845 limit=self._parse_limit(), 1846 ) 1847 1848 def _parse_update(self) -> exp.Update: 1849 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1850 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1851 returning = self._parse_returning() 1852 return self.expression( 1853 exp.Update, 1854 **{ # type: ignore 1855 "this": this, 1856 "expressions": expressions, 1857 "from": self._parse_from(joins=True), 1858 "where": self._parse_where(), 1859 "returning": returning or self._parse_returning(), 1860 "limit": self._parse_limit(), 1861 }, 1862 ) 1863 1864 def _parse_uncache(self) -> exp.Uncache: 1865 if not self._match(TokenType.TABLE): 1866 self.raise_error("Expecting TABLE after UNCACHE") 1867 1868 return self.expression( 1869 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1870 ) 1871 1872 def _parse_cache(self) -> exp.Cache: 1873 lazy = self._match_text_seq("LAZY") 1874 self._match(TokenType.TABLE) 1875 table = self._parse_table(schema=True) 1876 1877 options = [] 1878 if self._match_text_seq("OPTIONS"): 1879 self._match_l_paren() 1880 k = self._parse_string() 1881 self._match(TokenType.EQ) 1882 v = self._parse_string() 1883 options = [k, v] 1884 self._match_r_paren() 1885 1886 self._match(TokenType.ALIAS) 1887 return self.expression( 1888 exp.Cache, 1889 this=table, 1890 lazy=lazy, 1891 options=options, 1892 expression=self._parse_select(nested=True), 1893 ) 1894 1895 def _parse_partition(self) -> t.Optional[exp.Partition]: 1896 if not self._match(TokenType.PARTITION): 1897 return None 1898 1899 return self.expression( 1900 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1901 ) 1902 1903 def _parse_value(self) -> exp.Tuple: 1904 if self._match(TokenType.L_PAREN): 1905 expressions = self._parse_csv(self._parse_conjunction) 1906 self._match_r_paren() 1907 return self.expression(exp.Tuple, expressions=expressions) 1908 1909 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1910 # https://prestodb.io/docs/current/sql/values.html 1911 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1912 1913 def _parse_select( 1914 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1915 ) -> t.Optional[exp.Expression]: 1916 cte = self._parse_with() 1917 if cte: 1918 this = self._parse_statement() 1919 1920 if not this: 1921 self.raise_error("Failed to parse any statement following CTE") 1922 return cte 1923 1924 if "with" in this.arg_types: 1925 this.set("with", cte) 1926 else: 1927 self.raise_error(f"{this.key} does not support CTE") 1928 this = cte 1929 elif self._match(TokenType.SELECT): 1930 comments = self._prev_comments 1931 1932 hint = self._parse_hint() 1933 all_ = self._match(TokenType.ALL) 1934 distinct = self._match(TokenType.DISTINCT) 1935 1936 kind = ( 1937 self._match(TokenType.ALIAS) 1938 and self._match_texts(("STRUCT", "VALUE")) 1939 and self._prev.text 1940 ) 1941 1942 if distinct: 1943 distinct = self.expression( 1944 exp.Distinct, 1945 on=self._parse_value() if self._match(TokenType.ON) else None, 1946 ) 1947 1948 if all_ and distinct: 1949 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1950 1951 limit = self._parse_limit(top=True) 1952 expressions = self._parse_expressions() 1953 1954 this = self.expression( 1955 exp.Select, 1956 kind=kind, 1957 hint=hint, 1958 distinct=distinct, 1959 expressions=expressions, 1960 limit=limit, 1961 ) 1962 this.comments = comments 1963 1964 into = self._parse_into() 1965 if into: 1966 this.set("into", into) 1967 1968 from_ = self._parse_from() 1969 if from_: 1970 this.set("from", from_) 1971 1972 this = self._parse_query_modifiers(this) 1973 elif (table or nested) and self._match(TokenType.L_PAREN): 1974 if self._match(TokenType.PIVOT): 1975 this = self._parse_simplified_pivot() 1976 elif self._match(TokenType.FROM): 1977 this = exp.select("*").from_( 1978 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1979 ) 1980 else: 1981 this = self._parse_table() if table else self._parse_select(nested=True) 1982 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1983 1984 self._match_r_paren() 1985 1986 # We return early here so that the UNION isn't attached to the subquery by the 1987 # following call to _parse_set_operations, but instead becomes the parent node 1988 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1989 elif self._match(TokenType.VALUES): 1990 this = self.expression( 1991 exp.Values, 1992 expressions=self._parse_csv(self._parse_value), 1993 alias=self._parse_table_alias(), 1994 ) 1995 else: 1996 this = None 1997 1998 return self._parse_set_operations(this) 1999 2000 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2001 if not skip_with_token and not self._match(TokenType.WITH): 2002 return None 2003 2004 comments = self._prev_comments 2005 recursive = self._match(TokenType.RECURSIVE) 2006 2007 expressions = [] 2008 while True: 2009 expressions.append(self._parse_cte()) 2010 2011 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2012 break 2013 else: 2014 self._match(TokenType.WITH) 2015 2016 return self.expression( 2017 exp.With, comments=comments, expressions=expressions, recursive=recursive 2018 ) 2019 2020 def _parse_cte(self) -> exp.CTE: 2021 alias = self._parse_table_alias() 2022 if not alias or not alias.this: 2023 self.raise_error("Expected CTE to have alias") 2024 2025 self._match(TokenType.ALIAS) 2026 return self.expression( 2027 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2028 ) 2029 2030 def _parse_table_alias( 2031 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2032 ) -> t.Optional[exp.TableAlias]: 2033 any_token = self._match(TokenType.ALIAS) 2034 alias = ( 2035 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2036 or self._parse_string_as_identifier() 2037 ) 2038 2039 index = self._index 2040 if self._match(TokenType.L_PAREN): 2041 columns = self._parse_csv(self._parse_function_parameter) 2042 self._match_r_paren() if columns else self._retreat(index) 2043 else: 2044 columns = None 2045 2046 if not alias and not columns: 2047 return None 2048 2049 return self.expression(exp.TableAlias, this=alias, columns=columns) 2050 2051 def _parse_subquery( 2052 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2053 ) -> t.Optional[exp.Subquery]: 2054 if not this: 2055 return None 2056 2057 return self.expression( 2058 exp.Subquery, 2059 this=this, 2060 pivots=self._parse_pivots(), 2061 alias=self._parse_table_alias() if parse_alias else None, 2062 ) 2063 2064 def _parse_query_modifiers( 2065 self, this: t.Optional[exp.Expression] 2066 ) -> t.Optional[exp.Expression]: 2067 if isinstance(this, self.MODIFIABLES): 2068 for join in iter(self._parse_join, None): 2069 this.append("joins", join) 2070 for lateral in iter(self._parse_lateral, None): 2071 this.append("laterals", lateral) 2072 2073 while True: 2074 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2075 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2076 key, expression = parser(self) 2077 2078 if expression: 2079 this.set(key, expression) 2080 if key == "limit": 2081 offset = expression.args.pop("offset", None) 2082 if offset: 2083 this.set("offset", exp.Offset(expression=offset)) 2084 continue 2085 break 2086 return this 2087 2088 def _parse_hint(self) -> t.Optional[exp.Hint]: 2089 if self._match(TokenType.HINT): 2090 hints = [] 2091 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2092 hints.extend(hint) 2093 2094 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2095 self.raise_error("Expected */ after HINT") 2096 2097 return self.expression(exp.Hint, expressions=hints) 2098 2099 return None 2100 2101 def _parse_into(self) -> t.Optional[exp.Into]: 2102 if not self._match(TokenType.INTO): 2103 return None 2104 2105 temp = self._match(TokenType.TEMPORARY) 2106 unlogged = self._match_text_seq("UNLOGGED") 2107 self._match(TokenType.TABLE) 2108 2109 return self.expression( 2110 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2111 ) 2112 2113 def _parse_from( 2114 self, joins: bool = False, skip_from_token: bool = False 2115 ) -> t.Optional[exp.From]: 2116 if not skip_from_token and not self._match(TokenType.FROM): 2117 return None 2118 2119 return self.expression( 2120 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2121 ) 2122 2123 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2124 if not self._match(TokenType.MATCH_RECOGNIZE): 2125 return None 2126 2127 self._match_l_paren() 2128 2129 partition = self._parse_partition_by() 2130 order = self._parse_order() 2131 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2132 2133 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2134 rows = exp.var("ONE ROW PER MATCH") 2135 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2136 text = "ALL ROWS PER MATCH" 2137 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2138 text += f" SHOW EMPTY MATCHES" 2139 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2140 text += f" OMIT EMPTY MATCHES" 2141 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2142 text += f" WITH UNMATCHED ROWS" 2143 rows = exp.var(text) 2144 else: 2145 rows = None 2146 2147 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2148 text = "AFTER MATCH SKIP" 2149 if self._match_text_seq("PAST", "LAST", "ROW"): 2150 text += f" PAST LAST ROW" 2151 elif self._match_text_seq("TO", "NEXT", "ROW"): 2152 text += f" TO NEXT ROW" 2153 elif self._match_text_seq("TO", "FIRST"): 2154 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2155 elif self._match_text_seq("TO", "LAST"): 2156 text += f" TO LAST {self._advance_any().text}" # type: ignore 2157 after = exp.var(text) 2158 else: 2159 after = None 2160 2161 if self._match_text_seq("PATTERN"): 2162 self._match_l_paren() 2163 2164 if not self._curr: 2165 self.raise_error("Expecting )", self._curr) 2166 2167 paren = 1 2168 start = self._curr 2169 2170 while self._curr and paren > 0: 2171 if self._curr.token_type == TokenType.L_PAREN: 2172 paren += 1 2173 if self._curr.token_type == TokenType.R_PAREN: 2174 paren -= 1 2175 2176 end = self._prev 2177 self._advance() 2178 2179 if paren > 0: 2180 self.raise_error("Expecting )", self._curr) 2181 2182 pattern = exp.var(self._find_sql(start, end)) 2183 else: 2184 pattern = None 2185 2186 define = ( 2187 self._parse_csv( 2188 lambda: self.expression( 2189 exp.Alias, 2190 alias=self._parse_id_var(any_token=True), 2191 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2192 ) 2193 ) 2194 if self._match_text_seq("DEFINE") 2195 else None 2196 ) 2197 2198 self._match_r_paren() 2199 2200 return self.expression( 2201 exp.MatchRecognize, 2202 partition_by=partition, 2203 order=order, 2204 measures=measures, 2205 rows=rows, 2206 after=after, 2207 pattern=pattern, 2208 define=define, 2209 alias=self._parse_table_alias(), 2210 ) 2211 2212 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2213 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2214 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2215 2216 if outer_apply or cross_apply: 2217 this = self._parse_select(table=True) 2218 view = None 2219 outer = not cross_apply 2220 elif self._match(TokenType.LATERAL): 2221 this = self._parse_select(table=True) 2222 view = self._match(TokenType.VIEW) 2223 outer = self._match(TokenType.OUTER) 2224 else: 2225 return None 2226 2227 if not this: 2228 this = self._parse_function() or self._parse_id_var(any_token=False) 2229 while self._match(TokenType.DOT): 2230 this = exp.Dot( 2231 this=this, 2232 expression=self._parse_function() or self._parse_id_var(any_token=False), 2233 ) 2234 2235 if view: 2236 table = self._parse_id_var(any_token=False) 2237 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2238 table_alias: t.Optional[exp.TableAlias] = self.expression( 2239 exp.TableAlias, this=table, columns=columns 2240 ) 2241 elif isinstance(this, exp.Subquery) and this.alias: 2242 # Ensures parity between the Subquery's and the Lateral's "alias" args 2243 table_alias = this.args["alias"].copy() 2244 else: 2245 table_alias = self._parse_table_alias() 2246 2247 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2248 2249 def _parse_join_parts( 2250 self, 2251 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2252 return ( 2253 self._match_set(self.JOIN_METHODS) and self._prev, 2254 self._match_set(self.JOIN_SIDES) and self._prev, 2255 self._match_set(self.JOIN_KINDS) and self._prev, 2256 ) 2257 2258 def _parse_join( 2259 self, skip_join_token: bool = False, parse_bracket: bool = False 2260 ) -> t.Optional[exp.Join]: 2261 if self._match(TokenType.COMMA): 2262 return self.expression(exp.Join, this=self._parse_table()) 2263 2264 index = self._index 2265 method, side, kind = self._parse_join_parts() 2266 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2267 join = self._match(TokenType.JOIN) 2268 2269 if not skip_join_token and not join: 2270 self._retreat(index) 2271 kind = None 2272 method = None 2273 side = None 2274 2275 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2276 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2277 2278 if not skip_join_token and not join and not outer_apply and not cross_apply: 2279 return None 2280 2281 if outer_apply: 2282 side = Token(TokenType.LEFT, "LEFT") 2283 2284 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2285 2286 if method: 2287 kwargs["method"] = method.text 2288 if side: 2289 kwargs["side"] = side.text 2290 if kind: 2291 kwargs["kind"] = kind.text 2292 if hint: 2293 kwargs["hint"] = hint 2294 2295 if self._match(TokenType.ON): 2296 kwargs["on"] = self._parse_conjunction() 2297 elif self._match(TokenType.USING): 2298 kwargs["using"] = self._parse_wrapped_id_vars() 2299 elif not (kind and kind.token_type == TokenType.CROSS): 2300 index = self._index 2301 joins = self._parse_joins() 2302 2303 if joins and self._match(TokenType.ON): 2304 kwargs["on"] = self._parse_conjunction() 2305 elif joins and self._match(TokenType.USING): 2306 kwargs["using"] = self._parse_wrapped_id_vars() 2307 else: 2308 joins = None 2309 self._retreat(index) 2310 2311 kwargs["this"].set("joins", joins) 2312 2313 return self.expression(exp.Join, **kwargs) 2314 2315 def _parse_index( 2316 self, 2317 index: t.Optional[exp.Expression] = None, 2318 ) -> t.Optional[exp.Index]: 2319 if index: 2320 unique = None 2321 primary = None 2322 amp = None 2323 2324 self._match(TokenType.ON) 2325 self._match(TokenType.TABLE) # hive 2326 table = self._parse_table_parts(schema=True) 2327 else: 2328 unique = self._match(TokenType.UNIQUE) 2329 primary = self._match_text_seq("PRIMARY") 2330 amp = self._match_text_seq("AMP") 2331 2332 if not self._match(TokenType.INDEX): 2333 return None 2334 2335 index = self._parse_id_var() 2336 table = None 2337 2338 using = self._parse_field() if self._match(TokenType.USING) else None 2339 2340 if self._match(TokenType.L_PAREN, advance=False): 2341 columns = self._parse_wrapped_csv(self._parse_ordered) 2342 else: 2343 columns = None 2344 2345 return self.expression( 2346 exp.Index, 2347 this=index, 2348 table=table, 2349 using=using, 2350 columns=columns, 2351 unique=unique, 2352 primary=primary, 2353 amp=amp, 2354 partition_by=self._parse_partition_by(), 2355 ) 2356 2357 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2358 hints: t.List[exp.Expression] = [] 2359 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2360 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2361 hints.append( 2362 self.expression( 2363 exp.WithTableHint, 2364 expressions=self._parse_csv( 2365 lambda: self._parse_function() or self._parse_var(any_token=True) 2366 ), 2367 ) 2368 ) 2369 self._match_r_paren() 2370 else: 2371 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2372 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2373 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2374 2375 self._match_texts({"INDEX", "KEY"}) 2376 if self._match(TokenType.FOR): 2377 hint.set("target", self._advance_any() and self._prev.text.upper()) 2378 2379 hint.set("expressions", self._parse_wrapped_id_vars()) 2380 hints.append(hint) 2381 2382 return hints or None 2383 2384 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2385 return ( 2386 (not schema and self._parse_function(optional_parens=False)) 2387 or self._parse_id_var(any_token=False) 2388 or self._parse_string_as_identifier() 2389 or self._parse_placeholder() 2390 ) 2391 2392 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2393 catalog = None 2394 db = None 2395 table = self._parse_table_part(schema=schema) 2396 2397 while self._match(TokenType.DOT): 2398 if catalog: 2399 # This allows nesting the table in arbitrarily many dot expressions if needed 2400 table = self.expression( 2401 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2402 ) 2403 else: 2404 catalog = db 2405 db = table 2406 table = self._parse_table_part(schema=schema) 2407 2408 if not table: 2409 self.raise_error(f"Expected table name but got {self._curr}") 2410 2411 return self.expression( 2412 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2413 ) 2414 2415 def _parse_table( 2416 self, 2417 schema: bool = False, 2418 joins: bool = False, 2419 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2420 parse_bracket: bool = False, 2421 ) -> t.Optional[exp.Expression]: 2422 lateral = self._parse_lateral() 2423 if lateral: 2424 return lateral 2425 2426 unnest = self._parse_unnest() 2427 if unnest: 2428 return unnest 2429 2430 values = self._parse_derived_table_values() 2431 if values: 2432 return values 2433 2434 subquery = self._parse_select(table=True) 2435 if subquery: 2436 if not subquery.args.get("pivots"): 2437 subquery.set("pivots", self._parse_pivots()) 2438 return subquery 2439 2440 bracket = parse_bracket and self._parse_bracket(None) 2441 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2442 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2443 2444 if schema: 2445 return self._parse_schema(this=this) 2446 2447 if self.ALIAS_POST_TABLESAMPLE: 2448 table_sample = self._parse_table_sample() 2449 2450 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2451 if alias: 2452 this.set("alias", alias) 2453 2454 if not this.args.get("pivots"): 2455 this.set("pivots", self._parse_pivots()) 2456 2457 this.set("hints", self._parse_table_hints()) 2458 2459 if not self.ALIAS_POST_TABLESAMPLE: 2460 table_sample = self._parse_table_sample() 2461 2462 if table_sample: 2463 table_sample.set("this", this) 2464 this = table_sample 2465 2466 if joins: 2467 for join in iter(self._parse_join, None): 2468 this.append("joins", join) 2469 2470 return this 2471 2472 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2473 if not self._match(TokenType.UNNEST): 2474 return None 2475 2476 expressions = self._parse_wrapped_csv(self._parse_type) 2477 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2478 2479 alias = self._parse_table_alias() if with_alias else None 2480 2481 if alias and self.UNNEST_COLUMN_ONLY: 2482 if alias.args.get("columns"): 2483 self.raise_error("Unexpected extra column alias in unnest.") 2484 2485 alias.set("columns", [alias.this]) 2486 alias.set("this", None) 2487 2488 offset = None 2489 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2490 self._match(TokenType.ALIAS) 2491 offset = self._parse_id_var() or exp.to_identifier("offset") 2492 2493 return self.expression( 2494 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2495 ) 2496 2497 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2498 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2499 if not is_derived and not self._match(TokenType.VALUES): 2500 return None 2501 2502 expressions = self._parse_csv(self._parse_value) 2503 alias = self._parse_table_alias() 2504 2505 if is_derived: 2506 self._match_r_paren() 2507 2508 return self.expression( 2509 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2510 ) 2511 2512 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2513 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2514 as_modifier and self._match_text_seq("USING", "SAMPLE") 2515 ): 2516 return None 2517 2518 bucket_numerator = None 2519 bucket_denominator = None 2520 bucket_field = None 2521 percent = None 2522 rows = None 2523 size = None 2524 seed = None 2525 2526 kind = ( 2527 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2528 ) 2529 method = self._parse_var(tokens=(TokenType.ROW,)) 2530 2531 self._match(TokenType.L_PAREN) 2532 2533 num = self._parse_number() 2534 2535 if self._match_text_seq("BUCKET"): 2536 bucket_numerator = self._parse_number() 2537 self._match_text_seq("OUT", "OF") 2538 bucket_denominator = bucket_denominator = self._parse_number() 2539 self._match(TokenType.ON) 2540 bucket_field = self._parse_field() 2541 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2542 percent = num 2543 elif self._match(TokenType.ROWS): 2544 rows = num 2545 else: 2546 size = num 2547 2548 self._match(TokenType.R_PAREN) 2549 2550 if self._match(TokenType.L_PAREN): 2551 method = self._parse_var() 2552 seed = self._match(TokenType.COMMA) and self._parse_number() 2553 self._match_r_paren() 2554 elif self._match_texts(("SEED", "REPEATABLE")): 2555 seed = self._parse_wrapped(self._parse_number) 2556 2557 return self.expression( 2558 exp.TableSample, 2559 method=method, 2560 bucket_numerator=bucket_numerator, 2561 bucket_denominator=bucket_denominator, 2562 bucket_field=bucket_field, 2563 percent=percent, 2564 rows=rows, 2565 size=size, 2566 seed=seed, 2567 kind=kind, 2568 ) 2569 2570 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2571 return list(iter(self._parse_pivot, None)) or None 2572 2573 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2574 return list(iter(self._parse_join, None)) or None 2575 2576 # https://duckdb.org/docs/sql/statements/pivot 2577 def _parse_simplified_pivot(self) -> exp.Pivot: 2578 def _parse_on() -> t.Optional[exp.Expression]: 2579 this = self._parse_bitwise() 2580 return self._parse_in(this) if self._match(TokenType.IN) else this 2581 2582 this = self._parse_table() 2583 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2584 using = self._match(TokenType.USING) and self._parse_csv( 2585 lambda: self._parse_alias(self._parse_function()) 2586 ) 2587 group = self._parse_group() 2588 return self.expression( 2589 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2590 ) 2591 2592 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2593 index = self._index 2594 2595 if self._match(TokenType.PIVOT): 2596 unpivot = False 2597 elif self._match(TokenType.UNPIVOT): 2598 unpivot = True 2599 else: 2600 return None 2601 2602 expressions = [] 2603 field = None 2604 2605 if not self._match(TokenType.L_PAREN): 2606 self._retreat(index) 2607 return None 2608 2609 if unpivot: 2610 expressions = self._parse_csv(self._parse_column) 2611 else: 2612 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2613 2614 if not expressions: 2615 self.raise_error("Failed to parse PIVOT's aggregation list") 2616 2617 if not self._match(TokenType.FOR): 2618 self.raise_error("Expecting FOR") 2619 2620 value = self._parse_column() 2621 2622 if not self._match(TokenType.IN): 2623 self.raise_error("Expecting IN") 2624 2625 field = self._parse_in(value, alias=True) 2626 2627 self._match_r_paren() 2628 2629 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2630 2631 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2632 pivot.set("alias", self._parse_table_alias()) 2633 2634 if not unpivot: 2635 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2636 2637 columns: t.List[exp.Expression] = [] 2638 for fld in pivot.args["field"].expressions: 2639 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2640 for name in names: 2641 if self.PREFIXED_PIVOT_COLUMNS: 2642 name = f"{name}_{field_name}" if name else field_name 2643 else: 2644 name = f"{field_name}_{name}" if name else field_name 2645 2646 columns.append(exp.to_identifier(name)) 2647 2648 pivot.set("columns", columns) 2649 2650 return pivot 2651 2652 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2653 return [agg.alias for agg in aggregations] 2654 2655 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2656 if not skip_where_token and not self._match(TokenType.WHERE): 2657 return None 2658 2659 return self.expression( 2660 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2661 ) 2662 2663 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2664 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2665 return None 2666 2667 elements = defaultdict(list) 2668 2669 if self._match(TokenType.ALL): 2670 return self.expression(exp.Group, all=True) 2671 2672 while True: 2673 expressions = self._parse_csv(self._parse_conjunction) 2674 if expressions: 2675 elements["expressions"].extend(expressions) 2676 2677 grouping_sets = self._parse_grouping_sets() 2678 if grouping_sets: 2679 elements["grouping_sets"].extend(grouping_sets) 2680 2681 rollup = None 2682 cube = None 2683 totals = None 2684 2685 with_ = self._match(TokenType.WITH) 2686 if self._match(TokenType.ROLLUP): 2687 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2688 elements["rollup"].extend(ensure_list(rollup)) 2689 2690 if self._match(TokenType.CUBE): 2691 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2692 elements["cube"].extend(ensure_list(cube)) 2693 2694 if self._match_text_seq("TOTALS"): 2695 totals = True 2696 elements["totals"] = True # type: ignore 2697 2698 if not (grouping_sets or rollup or cube or totals): 2699 break 2700 2701 return self.expression(exp.Group, **elements) # type: ignore 2702 2703 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2704 if not self._match(TokenType.GROUPING_SETS): 2705 return None 2706 2707 return self._parse_wrapped_csv(self._parse_grouping_set) 2708 2709 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2710 if self._match(TokenType.L_PAREN): 2711 grouping_set = self._parse_csv(self._parse_column) 2712 self._match_r_paren() 2713 return self.expression(exp.Tuple, expressions=grouping_set) 2714 2715 return self._parse_column() 2716 2717 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2718 if not skip_having_token and not self._match(TokenType.HAVING): 2719 return None 2720 return self.expression(exp.Having, this=self._parse_conjunction()) 2721 2722 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2723 if not self._match(TokenType.QUALIFY): 2724 return None 2725 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2726 2727 def _parse_order( 2728 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2729 ) -> t.Optional[exp.Expression]: 2730 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2731 return this 2732 2733 return self.expression( 2734 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2735 ) 2736 2737 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2738 if not self._match(token): 2739 return None 2740 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2741 2742 def _parse_ordered(self) -> exp.Ordered: 2743 this = self._parse_conjunction() 2744 self._match(TokenType.ASC) 2745 2746 is_desc = self._match(TokenType.DESC) 2747 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2748 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2749 desc = is_desc or False 2750 asc = not desc 2751 nulls_first = is_nulls_first or False 2752 explicitly_null_ordered = is_nulls_first or is_nulls_last 2753 2754 if ( 2755 not explicitly_null_ordered 2756 and ( 2757 (asc and self.NULL_ORDERING == "nulls_are_small") 2758 or (desc and self.NULL_ORDERING != "nulls_are_small") 2759 ) 2760 and self.NULL_ORDERING != "nulls_are_last" 2761 ): 2762 nulls_first = True 2763 2764 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2765 2766 def _parse_limit( 2767 self, this: t.Optional[exp.Expression] = None, top: bool = False 2768 ) -> t.Optional[exp.Expression]: 2769 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2770 comments = self._prev_comments 2771 limit_paren = self._match(TokenType.L_PAREN) 2772 expression = self._parse_number() if top else self._parse_term() 2773 2774 if self._match(TokenType.COMMA): 2775 offset = expression 2776 expression = self._parse_term() 2777 else: 2778 offset = None 2779 2780 limit_exp = self.expression( 2781 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2782 ) 2783 2784 if limit_paren: 2785 self._match_r_paren() 2786 2787 return limit_exp 2788 2789 if self._match(TokenType.FETCH): 2790 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2791 direction = self._prev.text if direction else "FIRST" 2792 2793 count = self._parse_number() 2794 percent = self._match(TokenType.PERCENT) 2795 2796 self._match_set((TokenType.ROW, TokenType.ROWS)) 2797 2798 only = self._match_text_seq("ONLY") 2799 with_ties = self._match_text_seq("WITH", "TIES") 2800 2801 if only and with_ties: 2802 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2803 2804 return self.expression( 2805 exp.Fetch, 2806 direction=direction, 2807 count=count, 2808 percent=percent, 2809 with_ties=with_ties, 2810 ) 2811 2812 return this 2813 2814 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2815 if not self._match(TokenType.OFFSET): 2816 return this 2817 2818 count = self._parse_number() 2819 self._match_set((TokenType.ROW, TokenType.ROWS)) 2820 return self.expression(exp.Offset, this=this, expression=count) 2821 2822 def _parse_locks(self) -> t.List[exp.Lock]: 2823 locks = [] 2824 while True: 2825 if self._match_text_seq("FOR", "UPDATE"): 2826 update = True 2827 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2828 "LOCK", "IN", "SHARE", "MODE" 2829 ): 2830 update = False 2831 else: 2832 break 2833 2834 expressions = None 2835 if self._match_text_seq("OF"): 2836 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2837 2838 wait: t.Optional[bool | exp.Expression] = None 2839 if self._match_text_seq("NOWAIT"): 2840 wait = True 2841 elif self._match_text_seq("WAIT"): 2842 wait = self._parse_primary() 2843 elif self._match_text_seq("SKIP", "LOCKED"): 2844 wait = False 2845 2846 locks.append( 2847 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2848 ) 2849 2850 return locks 2851 2852 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2853 if not self._match_set(self.SET_OPERATIONS): 2854 return this 2855 2856 token_type = self._prev.token_type 2857 2858 if token_type == TokenType.UNION: 2859 expression = exp.Union 2860 elif token_type == TokenType.EXCEPT: 2861 expression = exp.Except 2862 else: 2863 expression = exp.Intersect 2864 2865 return self.expression( 2866 expression, 2867 this=this, 2868 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2869 expression=self._parse_set_operations(self._parse_select(nested=True)), 2870 ) 2871 2872 def _parse_expression(self) -> t.Optional[exp.Expression]: 2873 return self._parse_alias(self._parse_conjunction()) 2874 2875 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2876 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2877 2878 def _parse_equality(self) -> t.Optional[exp.Expression]: 2879 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2880 2881 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2882 return self._parse_tokens(self._parse_range, self.COMPARISON) 2883 2884 def _parse_range(self) -> t.Optional[exp.Expression]: 2885 this = self._parse_bitwise() 2886 negate = self._match(TokenType.NOT) 2887 2888 if self._match_set(self.RANGE_PARSERS): 2889 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2890 if not expression: 2891 return this 2892 2893 this = expression 2894 elif self._match(TokenType.ISNULL): 2895 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2896 2897 # Postgres supports ISNULL and NOTNULL for conditions. 2898 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2899 if self._match(TokenType.NOTNULL): 2900 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2901 this = self.expression(exp.Not, this=this) 2902 2903 if negate: 2904 this = self.expression(exp.Not, this=this) 2905 2906 if self._match(TokenType.IS): 2907 this = self._parse_is(this) 2908 2909 return this 2910 2911 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2912 index = self._index - 1 2913 negate = self._match(TokenType.NOT) 2914 2915 if self._match_text_seq("DISTINCT", "FROM"): 2916 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2917 return self.expression(klass, this=this, expression=self._parse_expression()) 2918 2919 expression = self._parse_null() or self._parse_boolean() 2920 if not expression: 2921 self._retreat(index) 2922 return None 2923 2924 this = self.expression(exp.Is, this=this, expression=expression) 2925 return self.expression(exp.Not, this=this) if negate else this 2926 2927 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2928 unnest = self._parse_unnest(with_alias=False) 2929 if unnest: 2930 this = self.expression(exp.In, this=this, unnest=unnest) 2931 elif self._match(TokenType.L_PAREN): 2932 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2933 2934 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2935 this = self.expression(exp.In, this=this, query=expressions[0]) 2936 else: 2937 this = self.expression(exp.In, this=this, expressions=expressions) 2938 2939 self._match_r_paren(this) 2940 else: 2941 this = self.expression(exp.In, this=this, field=self._parse_field()) 2942 2943 return this 2944 2945 def _parse_between(self, this: exp.Expression) -> exp.Between: 2946 low = self._parse_bitwise() 2947 self._match(TokenType.AND) 2948 high = self._parse_bitwise() 2949 return self.expression(exp.Between, this=this, low=low, high=high) 2950 2951 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2952 if not self._match(TokenType.ESCAPE): 2953 return this 2954 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2955 2956 def _parse_interval(self) -> t.Optional[exp.Interval]: 2957 if not self._match(TokenType.INTERVAL): 2958 return None 2959 2960 if self._match(TokenType.STRING, advance=False): 2961 this = self._parse_primary() 2962 else: 2963 this = self._parse_term() 2964 2965 unit = self._parse_function() or self._parse_var() 2966 2967 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2968 # each INTERVAL expression into this canonical form so it's easy to transpile 2969 if this and this.is_number: 2970 this = exp.Literal.string(this.name) 2971 elif this and this.is_string: 2972 parts = this.name.split() 2973 2974 if len(parts) == 2: 2975 if unit: 2976 # this is not actually a unit, it's something else 2977 unit = None 2978 self._retreat(self._index - 1) 2979 else: 2980 this = exp.Literal.string(parts[0]) 2981 unit = self.expression(exp.Var, this=parts[1]) 2982 2983 return self.expression(exp.Interval, this=this, unit=unit) 2984 2985 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2986 this = self._parse_term() 2987 2988 while True: 2989 if self._match_set(self.BITWISE): 2990 this = self.expression( 2991 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2992 ) 2993 elif self._match_pair(TokenType.LT, TokenType.LT): 2994 this = self.expression( 2995 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2996 ) 2997 elif self._match_pair(TokenType.GT, TokenType.GT): 2998 this = self.expression( 2999 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3000 ) 3001 else: 3002 break 3003 3004 return this 3005 3006 def _parse_term(self) -> t.Optional[exp.Expression]: 3007 return self._parse_tokens(self._parse_factor, self.TERM) 3008 3009 def _parse_factor(self) -> t.Optional[exp.Expression]: 3010 return self._parse_tokens(self._parse_unary, self.FACTOR) 3011 3012 def _parse_unary(self) -> t.Optional[exp.Expression]: 3013 if self._match_set(self.UNARY_PARSERS): 3014 return self.UNARY_PARSERS[self._prev.token_type](self) 3015 return self._parse_at_time_zone(self._parse_type()) 3016 3017 def _parse_type(self) -> t.Optional[exp.Expression]: 3018 interval = self._parse_interval() 3019 if interval: 3020 return interval 3021 3022 index = self._index 3023 data_type = self._parse_types(check_func=True) 3024 this = self._parse_column() 3025 3026 if data_type: 3027 if isinstance(this, exp.Literal): 3028 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3029 if parser: 3030 return parser(self, this, data_type) 3031 return self.expression(exp.Cast, this=this, to=data_type) 3032 if not data_type.expressions: 3033 self._retreat(index) 3034 return self._parse_column() 3035 return self._parse_column_ops(data_type) 3036 3037 return this 3038 3039 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3040 this = self._parse_type() 3041 if not this: 3042 return None 3043 3044 return self.expression( 3045 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3046 ) 3047 3048 def _parse_types( 3049 self, check_func: bool = False, schema: bool = False 3050 ) -> t.Optional[exp.Expression]: 3051 index = self._index 3052 3053 prefix = self._match_text_seq("SYSUDTLIB", ".") 3054 3055 if not self._match_set(self.TYPE_TOKENS): 3056 return None 3057 3058 type_token = self._prev.token_type 3059 3060 if type_token == TokenType.PSEUDO_TYPE: 3061 return self.expression(exp.PseudoType, this=self._prev.text) 3062 3063 nested = type_token in self.NESTED_TYPE_TOKENS 3064 is_struct = type_token == TokenType.STRUCT 3065 expressions = None 3066 maybe_func = False 3067 3068 if self._match(TokenType.L_PAREN): 3069 if is_struct: 3070 expressions = self._parse_csv(self._parse_struct_types) 3071 elif nested: 3072 expressions = self._parse_csv( 3073 lambda: self._parse_types(check_func=check_func, schema=schema) 3074 ) 3075 elif type_token in self.ENUM_TYPE_TOKENS: 3076 expressions = self._parse_csv(self._parse_primary) 3077 else: 3078 expressions = self._parse_csv(self._parse_type_size) 3079 3080 if not expressions or not self._match(TokenType.R_PAREN): 3081 self._retreat(index) 3082 return None 3083 3084 maybe_func = True 3085 3086 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3087 this = exp.DataType( 3088 this=exp.DataType.Type.ARRAY, 3089 expressions=[ 3090 exp.DataType( 3091 this=exp.DataType.Type[type_token.value], 3092 expressions=expressions, 3093 nested=nested, 3094 ) 3095 ], 3096 nested=True, 3097 ) 3098 3099 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3100 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3101 3102 return this 3103 3104 if self._match(TokenType.L_BRACKET): 3105 self._retreat(index) 3106 return None 3107 3108 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3109 if nested and self._match(TokenType.LT): 3110 if is_struct: 3111 expressions = self._parse_csv(self._parse_struct_types) 3112 else: 3113 expressions = self._parse_csv( 3114 lambda: self._parse_types(check_func=check_func, schema=schema) 3115 ) 3116 3117 if not self._match(TokenType.GT): 3118 self.raise_error("Expecting >") 3119 3120 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3121 values = self._parse_csv(self._parse_conjunction) 3122 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3123 3124 value: t.Optional[exp.Expression] = None 3125 if type_token in self.TIMESTAMPS: 3126 if self._match_text_seq("WITH", "TIME", "ZONE"): 3127 maybe_func = False 3128 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3129 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3130 maybe_func = False 3131 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3132 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3133 maybe_func = False 3134 elif type_token == TokenType.INTERVAL: 3135 unit = self._parse_var() 3136 3137 if not unit: 3138 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3139 else: 3140 value = self.expression(exp.Interval, unit=unit) 3141 3142 if maybe_func and check_func: 3143 index2 = self._index 3144 peek = self._parse_string() 3145 3146 if not peek: 3147 self._retreat(index) 3148 return None 3149 3150 self._retreat(index2) 3151 3152 if value: 3153 return value 3154 3155 return exp.DataType( 3156 this=exp.DataType.Type[type_token.value], 3157 expressions=expressions, 3158 nested=nested, 3159 values=values, 3160 prefix=prefix, 3161 ) 3162 3163 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3164 this = self._parse_type() or self._parse_id_var() 3165 self._match(TokenType.COLON) 3166 return self._parse_column_def(this) 3167 3168 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3169 if not self._match_text_seq("AT", "TIME", "ZONE"): 3170 return this 3171 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3172 3173 def _parse_column(self) -> t.Optional[exp.Expression]: 3174 this = self._parse_field() 3175 if isinstance(this, exp.Identifier): 3176 this = self.expression(exp.Column, this=this) 3177 elif not this: 3178 return self._parse_bracket(this) 3179 return self._parse_column_ops(this) 3180 3181 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3182 this = self._parse_bracket(this) 3183 3184 while self._match_set(self.COLUMN_OPERATORS): 3185 op_token = self._prev.token_type 3186 op = self.COLUMN_OPERATORS.get(op_token) 3187 3188 if op_token == TokenType.DCOLON: 3189 field = self._parse_types() 3190 if not field: 3191 self.raise_error("Expected type") 3192 elif op and self._curr: 3193 self._advance() 3194 value = self._prev.text 3195 field = ( 3196 exp.Literal.number(value) 3197 if self._prev.token_type == TokenType.NUMBER 3198 else exp.Literal.string(value) 3199 ) 3200 else: 3201 field = self._parse_field(anonymous_func=True, any_token=True) 3202 3203 if isinstance(field, exp.Func): 3204 # bigquery allows function calls like x.y.count(...) 3205 # SAFE.SUBSTR(...) 3206 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3207 this = self._replace_columns_with_dots(this) 3208 3209 if op: 3210 this = op(self, this, field) 3211 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3212 this = self.expression( 3213 exp.Column, 3214 this=field, 3215 table=this.this, 3216 db=this.args.get("table"), 3217 catalog=this.args.get("db"), 3218 ) 3219 else: 3220 this = self.expression(exp.Dot, this=this, expression=field) 3221 this = self._parse_bracket(this) 3222 return this 3223 3224 def _parse_primary(self) -> t.Optional[exp.Expression]: 3225 if self._match_set(self.PRIMARY_PARSERS): 3226 token_type = self._prev.token_type 3227 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3228 3229 if token_type == TokenType.STRING: 3230 expressions = [primary] 3231 while self._match(TokenType.STRING): 3232 expressions.append(exp.Literal.string(self._prev.text)) 3233 3234 if len(expressions) > 1: 3235 return self.expression(exp.Concat, expressions=expressions) 3236 3237 return primary 3238 3239 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3240 return exp.Literal.number(f"0.{self._prev.text}") 3241 3242 if self._match(TokenType.L_PAREN): 3243 comments = self._prev_comments 3244 query = self._parse_select() 3245 3246 if query: 3247 expressions = [query] 3248 else: 3249 expressions = self._parse_expressions() 3250 3251 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3252 3253 if isinstance(this, exp.Subqueryable): 3254 this = self._parse_set_operations( 3255 self._parse_subquery(this=this, parse_alias=False) 3256 ) 3257 elif len(expressions) > 1: 3258 this = self.expression(exp.Tuple, expressions=expressions) 3259 else: 3260 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3261 3262 if this: 3263 this.add_comments(comments) 3264 3265 self._match_r_paren(expression=this) 3266 return this 3267 3268 return None 3269 3270 def _parse_field( 3271 self, 3272 any_token: bool = False, 3273 tokens: t.Optional[t.Collection[TokenType]] = None, 3274 anonymous_func: bool = False, 3275 ) -> t.Optional[exp.Expression]: 3276 return ( 3277 self._parse_primary() 3278 or self._parse_function(anonymous=anonymous_func) 3279 or self._parse_id_var(any_token=any_token, tokens=tokens) 3280 ) 3281 3282 def _parse_function( 3283 self, 3284 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3285 anonymous: bool = False, 3286 optional_parens: bool = True, 3287 ) -> t.Optional[exp.Expression]: 3288 if not self._curr: 3289 return None 3290 3291 token_type = self._curr.token_type 3292 3293 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3294 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3295 3296 if not self._next or self._next.token_type != TokenType.L_PAREN: 3297 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3298 self._advance() 3299 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3300 3301 return None 3302 3303 if token_type not in self.FUNC_TOKENS: 3304 return None 3305 3306 this = self._curr.text 3307 upper = this.upper() 3308 self._advance(2) 3309 3310 parser = self.FUNCTION_PARSERS.get(upper) 3311 3312 if parser and not anonymous: 3313 this = parser(self) 3314 else: 3315 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3316 3317 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3318 this = self.expression(subquery_predicate, this=self._parse_select()) 3319 self._match_r_paren() 3320 return this 3321 3322 if functions is None: 3323 functions = self.FUNCTIONS 3324 3325 function = functions.get(upper) 3326 3327 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3328 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3329 3330 if function and not anonymous: 3331 this = self.validate_expression(function(args), args) 3332 else: 3333 this = self.expression(exp.Anonymous, this=this, expressions=args) 3334 3335 self._match_r_paren(this) 3336 return self._parse_window(this) 3337 3338 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3339 return self._parse_column_def(self._parse_id_var()) 3340 3341 def _parse_user_defined_function( 3342 self, kind: t.Optional[TokenType] = None 3343 ) -> t.Optional[exp.Expression]: 3344 this = self._parse_id_var() 3345 3346 while self._match(TokenType.DOT): 3347 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3348 3349 if not self._match(TokenType.L_PAREN): 3350 return this 3351 3352 expressions = self._parse_csv(self._parse_function_parameter) 3353 self._match_r_paren() 3354 return self.expression( 3355 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3356 ) 3357 3358 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3359 literal = self._parse_primary() 3360 if literal: 3361 return self.expression(exp.Introducer, this=token.text, expression=literal) 3362 3363 return self.expression(exp.Identifier, this=token.text) 3364 3365 def _parse_session_parameter(self) -> exp.SessionParameter: 3366 kind = None 3367 this = self._parse_id_var() or self._parse_primary() 3368 3369 if this and self._match(TokenType.DOT): 3370 kind = this.name 3371 this = self._parse_var() or self._parse_primary() 3372 3373 return self.expression(exp.SessionParameter, this=this, kind=kind) 3374 3375 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3376 index = self._index 3377 3378 if self._match(TokenType.L_PAREN): 3379 expressions = self._parse_csv(self._parse_id_var) 3380 3381 if not self._match(TokenType.R_PAREN): 3382 self._retreat(index) 3383 else: 3384 expressions = [self._parse_id_var()] 3385 3386 if self._match_set(self.LAMBDAS): 3387 return self.LAMBDAS[self._prev.token_type](self, expressions) 3388 3389 self._retreat(index) 3390 3391 this: t.Optional[exp.Expression] 3392 3393 if self._match(TokenType.DISTINCT): 3394 this = self.expression( 3395 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3396 ) 3397 else: 3398 this = self._parse_select_or_expression(alias=alias) 3399 3400 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3401 3402 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3403 index = self._index 3404 3405 if not self.errors: 3406 try: 3407 if self._parse_select(nested=True): 3408 return this 3409 except ParseError: 3410 pass 3411 finally: 3412 self.errors.clear() 3413 self._retreat(index) 3414 3415 if not self._match(TokenType.L_PAREN): 3416 return this 3417 3418 args = self._parse_csv( 3419 lambda: self._parse_constraint() 3420 or self._parse_column_def(self._parse_field(any_token=True)) 3421 ) 3422 3423 self._match_r_paren() 3424 return self.expression(exp.Schema, this=this, expressions=args) 3425 3426 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3427 # column defs are not really columns, they're identifiers 3428 if isinstance(this, exp.Column): 3429 this = this.this 3430 3431 kind = self._parse_types(schema=True) 3432 3433 if self._match_text_seq("FOR", "ORDINALITY"): 3434 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3435 3436 constraints = [] 3437 while True: 3438 constraint = self._parse_column_constraint() 3439 if not constraint: 3440 break 3441 constraints.append(constraint) 3442 3443 if not kind and not constraints: 3444 return this 3445 3446 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3447 3448 def _parse_auto_increment( 3449 self, 3450 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3451 start = None 3452 increment = None 3453 3454 if self._match(TokenType.L_PAREN, advance=False): 3455 args = self._parse_wrapped_csv(self._parse_bitwise) 3456 start = seq_get(args, 0) 3457 increment = seq_get(args, 1) 3458 elif self._match_text_seq("START"): 3459 start = self._parse_bitwise() 3460 self._match_text_seq("INCREMENT") 3461 increment = self._parse_bitwise() 3462 3463 if start and increment: 3464 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3465 3466 return exp.AutoIncrementColumnConstraint() 3467 3468 def _parse_compress(self) -> exp.CompressColumnConstraint: 3469 if self._match(TokenType.L_PAREN, advance=False): 3470 return self.expression( 3471 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3472 ) 3473 3474 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3475 3476 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3477 if self._match_text_seq("BY", "DEFAULT"): 3478 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3479 this = self.expression( 3480 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3481 ) 3482 else: 3483 self._match_text_seq("ALWAYS") 3484 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3485 3486 self._match(TokenType.ALIAS) 3487 identity = self._match_text_seq("IDENTITY") 3488 3489 if self._match(TokenType.L_PAREN): 3490 if self._match_text_seq("START", "WITH"): 3491 this.set("start", self._parse_bitwise()) 3492 if self._match_text_seq("INCREMENT", "BY"): 3493 this.set("increment", self._parse_bitwise()) 3494 if self._match_text_seq("MINVALUE"): 3495 this.set("minvalue", self._parse_bitwise()) 3496 if self._match_text_seq("MAXVALUE"): 3497 this.set("maxvalue", self._parse_bitwise()) 3498 3499 if self._match_text_seq("CYCLE"): 3500 this.set("cycle", True) 3501 elif self._match_text_seq("NO", "CYCLE"): 3502 this.set("cycle", False) 3503 3504 if not identity: 3505 this.set("expression", self._parse_bitwise()) 3506 3507 self._match_r_paren() 3508 3509 return this 3510 3511 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3512 self._match_text_seq("LENGTH") 3513 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3514 3515 def _parse_not_constraint( 3516 self, 3517 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3518 if self._match_text_seq("NULL"): 3519 return self.expression(exp.NotNullColumnConstraint) 3520 if self._match_text_seq("CASESPECIFIC"): 3521 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3522 return None 3523 3524 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3525 if self._match(TokenType.CONSTRAINT): 3526 this = self._parse_id_var() 3527 else: 3528 this = None 3529 3530 if self._match_texts(self.CONSTRAINT_PARSERS): 3531 return self.expression( 3532 exp.ColumnConstraint, 3533 this=this, 3534 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3535 ) 3536 3537 return this 3538 3539 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3540 if not self._match(TokenType.CONSTRAINT): 3541 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3542 3543 this = self._parse_id_var() 3544 expressions = [] 3545 3546 while True: 3547 constraint = self._parse_unnamed_constraint() or self._parse_function() 3548 if not constraint: 3549 break 3550 expressions.append(constraint) 3551 3552 return self.expression(exp.Constraint, this=this, expressions=expressions) 3553 3554 def _parse_unnamed_constraint( 3555 self, constraints: t.Optional[t.Collection[str]] = None 3556 ) -> t.Optional[exp.Expression]: 3557 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3558 return None 3559 3560 constraint = self._prev.text.upper() 3561 if constraint not in self.CONSTRAINT_PARSERS: 3562 self.raise_error(f"No parser found for schema constraint {constraint}.") 3563 3564 return self.CONSTRAINT_PARSERS[constraint](self) 3565 3566 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3567 self._match_text_seq("KEY") 3568 return self.expression( 3569 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3570 ) 3571 3572 def _parse_key_constraint_options(self) -> t.List[str]: 3573 options = [] 3574 while True: 3575 if not self._curr: 3576 break 3577 3578 if self._match(TokenType.ON): 3579 action = None 3580 on = self._advance_any() and self._prev.text 3581 3582 if self._match_text_seq("NO", "ACTION"): 3583 action = "NO ACTION" 3584 elif self._match_text_seq("CASCADE"): 3585 action = "CASCADE" 3586 elif self._match_pair(TokenType.SET, TokenType.NULL): 3587 action = "SET NULL" 3588 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3589 action = "SET DEFAULT" 3590 else: 3591 self.raise_error("Invalid key constraint") 3592 3593 options.append(f"ON {on} {action}") 3594 elif self._match_text_seq("NOT", "ENFORCED"): 3595 options.append("NOT ENFORCED") 3596 elif self._match_text_seq("DEFERRABLE"): 3597 options.append("DEFERRABLE") 3598 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3599 options.append("INITIALLY DEFERRED") 3600 elif self._match_text_seq("NORELY"): 3601 options.append("NORELY") 3602 elif self._match_text_seq("MATCH", "FULL"): 3603 options.append("MATCH FULL") 3604 else: 3605 break 3606 3607 return options 3608 3609 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3610 if match and not self._match(TokenType.REFERENCES): 3611 return None 3612 3613 expressions = None 3614 this = self._parse_table(schema=True) 3615 options = self._parse_key_constraint_options() 3616 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3617 3618 def _parse_foreign_key(self) -> exp.ForeignKey: 3619 expressions = self._parse_wrapped_id_vars() 3620 reference = self._parse_references() 3621 options = {} 3622 3623 while self._match(TokenType.ON): 3624 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3625 self.raise_error("Expected DELETE or UPDATE") 3626 3627 kind = self._prev.text.lower() 3628 3629 if self._match_text_seq("NO", "ACTION"): 3630 action = "NO ACTION" 3631 elif self._match(TokenType.SET): 3632 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3633 action = "SET " + self._prev.text.upper() 3634 else: 3635 self._advance() 3636 action = self._prev.text.upper() 3637 3638 options[kind] = action 3639 3640 return self.expression( 3641 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3642 ) 3643 3644 def _parse_primary_key( 3645 self, wrapped_optional: bool = False, in_props: bool = False 3646 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3647 desc = ( 3648 self._match_set((TokenType.ASC, TokenType.DESC)) 3649 and self._prev.token_type == TokenType.DESC 3650 ) 3651 3652 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3653 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3654 3655 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3656 options = self._parse_key_constraint_options() 3657 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3658 3659 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3660 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3661 return this 3662 3663 bracket_kind = self._prev.token_type 3664 3665 if self._match(TokenType.COLON): 3666 expressions: t.List[t.Optional[exp.Expression]] = [ 3667 self.expression(exp.Slice, expression=self._parse_conjunction()) 3668 ] 3669 else: 3670 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3671 3672 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3673 if bracket_kind == TokenType.L_BRACE: 3674 this = self.expression(exp.Struct, expressions=expressions) 3675 elif not this or this.name.upper() == "ARRAY": 3676 this = self.expression(exp.Array, expressions=expressions) 3677 else: 3678 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3679 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3680 3681 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3682 self.raise_error("Expected ]") 3683 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3684 self.raise_error("Expected }") 3685 3686 self._add_comments(this) 3687 return self._parse_bracket(this) 3688 3689 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3690 if self._match(TokenType.COLON): 3691 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3692 return this 3693 3694 def _parse_case(self) -> t.Optional[exp.Expression]: 3695 ifs = [] 3696 default = None 3697 3698 expression = self._parse_conjunction() 3699 3700 while self._match(TokenType.WHEN): 3701 this = self._parse_conjunction() 3702 self._match(TokenType.THEN) 3703 then = self._parse_conjunction() 3704 ifs.append(self.expression(exp.If, this=this, true=then)) 3705 3706 if self._match(TokenType.ELSE): 3707 default = self._parse_conjunction() 3708 3709 if not self._match(TokenType.END): 3710 self.raise_error("Expected END after CASE", self._prev) 3711 3712 return self._parse_window( 3713 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3714 ) 3715 3716 def _parse_if(self) -> t.Optional[exp.Expression]: 3717 if self._match(TokenType.L_PAREN): 3718 args = self._parse_csv(self._parse_conjunction) 3719 this = self.validate_expression(exp.If.from_arg_list(args), args) 3720 self._match_r_paren() 3721 else: 3722 index = self._index - 1 3723 condition = self._parse_conjunction() 3724 3725 if not condition: 3726 self._retreat(index) 3727 return None 3728 3729 self._match(TokenType.THEN) 3730 true = self._parse_conjunction() 3731 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3732 self._match(TokenType.END) 3733 this = self.expression(exp.If, this=condition, true=true, false=false) 3734 3735 return self._parse_window(this) 3736 3737 def _parse_extract(self) -> exp.Extract: 3738 this = self._parse_function() or self._parse_var() or self._parse_type() 3739 3740 if self._match(TokenType.FROM): 3741 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3742 3743 if not self._match(TokenType.COMMA): 3744 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3745 3746 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3747 3748 def _parse_any_value(self) -> exp.AnyValue: 3749 this = self._parse_lambda() 3750 is_max = None 3751 having = None 3752 3753 if self._match(TokenType.HAVING): 3754 self._match_texts(("MAX", "MIN")) 3755 is_max = self._prev.text == "MAX" 3756 having = self._parse_column() 3757 3758 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3759 3760 def _parse_cast(self, strict: bool) -> exp.Expression: 3761 this = self._parse_conjunction() 3762 3763 if not self._match(TokenType.ALIAS): 3764 if self._match(TokenType.COMMA): 3765 return self.expression( 3766 exp.CastToStrType, this=this, expression=self._parse_string() 3767 ) 3768 else: 3769 self.raise_error("Expected AS after CAST") 3770 3771 fmt = None 3772 to = self._parse_types() 3773 3774 if not to: 3775 self.raise_error("Expected TYPE after CAST") 3776 elif to.this == exp.DataType.Type.CHAR: 3777 if self._match(TokenType.CHARACTER_SET): 3778 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3779 elif self._match(TokenType.FORMAT): 3780 fmt_string = self._parse_string() 3781 fmt = self._parse_at_time_zone(fmt_string) 3782 3783 if to.this in exp.DataType.TEMPORAL_TYPES: 3784 this = self.expression( 3785 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3786 this=this, 3787 format=exp.Literal.string( 3788 format_time( 3789 fmt_string.this if fmt_string else "", 3790 self.FORMAT_MAPPING or self.TIME_MAPPING, 3791 self.FORMAT_TRIE or self.TIME_TRIE, 3792 ) 3793 ), 3794 ) 3795 3796 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3797 this.set("zone", fmt.args["zone"]) 3798 3799 return this 3800 3801 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3802 3803 def _parse_concat(self) -> t.Optional[exp.Expression]: 3804 args = self._parse_csv(self._parse_conjunction) 3805 if self.CONCAT_NULL_OUTPUTS_STRING: 3806 args = [ 3807 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3808 for arg in args 3809 if arg 3810 ] 3811 3812 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3813 # we find such a call we replace it with its argument. 3814 if len(args) == 1: 3815 return args[0] 3816 3817 return self.expression( 3818 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3819 ) 3820 3821 def _parse_string_agg(self) -> exp.Expression: 3822 if self._match(TokenType.DISTINCT): 3823 args: t.List[t.Optional[exp.Expression]] = [ 3824 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3825 ] 3826 if self._match(TokenType.COMMA): 3827 args.extend(self._parse_csv(self._parse_conjunction)) 3828 else: 3829 args = self._parse_csv(self._parse_conjunction) 3830 3831 index = self._index 3832 if not self._match(TokenType.R_PAREN): 3833 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3834 return self.expression( 3835 exp.GroupConcat, 3836 this=seq_get(args, 0), 3837 separator=self._parse_order(this=seq_get(args, 1)), 3838 ) 3839 3840 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3841 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3842 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3843 if not self._match_text_seq("WITHIN", "GROUP"): 3844 self._retreat(index) 3845 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3846 3847 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3848 order = self._parse_order(this=seq_get(args, 0)) 3849 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3850 3851 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3852 this = self._parse_bitwise() 3853 3854 if self._match(TokenType.USING): 3855 to: t.Optional[exp.Expression] = self.expression( 3856 exp.CharacterSet, this=self._parse_var() 3857 ) 3858 elif self._match(TokenType.COMMA): 3859 to = self._parse_types() 3860 else: 3861 to = None 3862 3863 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3864 3865 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3866 """ 3867 There are generally two variants of the DECODE function: 3868 3869 - DECODE(bin, charset) 3870 - DECODE(expression, search, result [, search, result] ... [, default]) 3871 3872 The second variant will always be parsed into a CASE expression. Note that NULL 3873 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3874 instead of relying on pattern matching. 3875 """ 3876 args = self._parse_csv(self._parse_conjunction) 3877 3878 if len(args) < 3: 3879 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3880 3881 expression, *expressions = args 3882 if not expression: 3883 return None 3884 3885 ifs = [] 3886 for search, result in zip(expressions[::2], expressions[1::2]): 3887 if not search or not result: 3888 return None 3889 3890 if isinstance(search, exp.Literal): 3891 ifs.append( 3892 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3893 ) 3894 elif isinstance(search, exp.Null): 3895 ifs.append( 3896 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3897 ) 3898 else: 3899 cond = exp.or_( 3900 exp.EQ(this=expression.copy(), expression=search), 3901 exp.and_( 3902 exp.Is(this=expression.copy(), expression=exp.Null()), 3903 exp.Is(this=search.copy(), expression=exp.Null()), 3904 copy=False, 3905 ), 3906 copy=False, 3907 ) 3908 ifs.append(exp.If(this=cond, true=result)) 3909 3910 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3911 3912 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3913 self._match_text_seq("KEY") 3914 key = self._parse_field() 3915 self._match(TokenType.COLON) 3916 self._match_text_seq("VALUE") 3917 value = self._parse_field() 3918 3919 if not key and not value: 3920 return None 3921 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3922 3923 def _parse_json_object(self) -> exp.JSONObject: 3924 star = self._parse_star() 3925 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3926 3927 null_handling = None 3928 if self._match_text_seq("NULL", "ON", "NULL"): 3929 null_handling = "NULL ON NULL" 3930 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3931 null_handling = "ABSENT ON NULL" 3932 3933 unique_keys = None 3934 if self._match_text_seq("WITH", "UNIQUE"): 3935 unique_keys = True 3936 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3937 unique_keys = False 3938 3939 self._match_text_seq("KEYS") 3940 3941 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3942 format_json = self._match_text_seq("FORMAT", "JSON") 3943 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3944 3945 return self.expression( 3946 exp.JSONObject, 3947 expressions=expressions, 3948 null_handling=null_handling, 3949 unique_keys=unique_keys, 3950 return_type=return_type, 3951 format_json=format_json, 3952 encoding=encoding, 3953 ) 3954 3955 def _parse_logarithm(self) -> exp.Func: 3956 # Default argument order is base, expression 3957 args = self._parse_csv(self._parse_range) 3958 3959 if len(args) > 1: 3960 if not self.LOG_BASE_FIRST: 3961 args.reverse() 3962 return exp.Log.from_arg_list(args) 3963 3964 return self.expression( 3965 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3966 ) 3967 3968 def _parse_match_against(self) -> exp.MatchAgainst: 3969 expressions = self._parse_csv(self._parse_column) 3970 3971 self._match_text_seq(")", "AGAINST", "(") 3972 3973 this = self._parse_string() 3974 3975 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3976 modifier = "IN NATURAL LANGUAGE MODE" 3977 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3978 modifier = f"{modifier} WITH QUERY EXPANSION" 3979 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3980 modifier = "IN BOOLEAN MODE" 3981 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3982 modifier = "WITH QUERY EXPANSION" 3983 else: 3984 modifier = None 3985 3986 return self.expression( 3987 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3988 ) 3989 3990 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3991 def _parse_open_json(self) -> exp.OpenJSON: 3992 this = self._parse_bitwise() 3993 path = self._match(TokenType.COMMA) and self._parse_string() 3994 3995 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3996 this = self._parse_field(any_token=True) 3997 kind = self._parse_types() 3998 path = self._parse_string() 3999 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4000 4001 return self.expression( 4002 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4003 ) 4004 4005 expressions = None 4006 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4007 self._match_l_paren() 4008 expressions = self._parse_csv(_parse_open_json_column_def) 4009 4010 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4011 4012 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4013 args = self._parse_csv(self._parse_bitwise) 4014 4015 if self._match(TokenType.IN): 4016 return self.expression( 4017 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4018 ) 4019 4020 if haystack_first: 4021 haystack = seq_get(args, 0) 4022 needle = seq_get(args, 1) 4023 else: 4024 needle = seq_get(args, 0) 4025 haystack = seq_get(args, 1) 4026 4027 return self.expression( 4028 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4029 ) 4030 4031 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4032 args = self._parse_csv(self._parse_table) 4033 return exp.JoinHint(this=func_name.upper(), expressions=args) 4034 4035 def _parse_substring(self) -> exp.Substring: 4036 # Postgres supports the form: substring(string [from int] [for int]) 4037 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4038 4039 args = self._parse_csv(self._parse_bitwise) 4040 4041 if self._match(TokenType.FROM): 4042 args.append(self._parse_bitwise()) 4043 if self._match(TokenType.FOR): 4044 args.append(self._parse_bitwise()) 4045 4046 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4047 4048 def _parse_trim(self) -> exp.Trim: 4049 # https://www.w3resource.com/sql/character-functions/trim.php 4050 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4051 4052 position = None 4053 collation = None 4054 4055 if self._match_texts(self.TRIM_TYPES): 4056 position = self._prev.text.upper() 4057 4058 expression = self._parse_bitwise() 4059 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4060 this = self._parse_bitwise() 4061 else: 4062 this = expression 4063 expression = None 4064 4065 if self._match(TokenType.COLLATE): 4066 collation = self._parse_bitwise() 4067 4068 return self.expression( 4069 exp.Trim, this=this, position=position, expression=expression, collation=collation 4070 ) 4071 4072 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4073 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4074 4075 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4076 return self._parse_window(self._parse_id_var(), alias=True) 4077 4078 def _parse_respect_or_ignore_nulls( 4079 self, this: t.Optional[exp.Expression] 4080 ) -> t.Optional[exp.Expression]: 4081 if self._match_text_seq("IGNORE", "NULLS"): 4082 return self.expression(exp.IgnoreNulls, this=this) 4083 if self._match_text_seq("RESPECT", "NULLS"): 4084 return self.expression(exp.RespectNulls, this=this) 4085 return this 4086 4087 def _parse_window( 4088 self, this: t.Optional[exp.Expression], alias: bool = False 4089 ) -> t.Optional[exp.Expression]: 4090 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4091 self._match(TokenType.WHERE) 4092 this = self.expression( 4093 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4094 ) 4095 self._match_r_paren() 4096 4097 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4098 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4099 if self._match_text_seq("WITHIN", "GROUP"): 4100 order = self._parse_wrapped(self._parse_order) 4101 this = self.expression(exp.WithinGroup, this=this, expression=order) 4102 4103 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4104 # Some dialects choose to implement and some do not. 4105 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4106 4107 # There is some code above in _parse_lambda that handles 4108 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4109 4110 # The below changes handle 4111 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4112 4113 # Oracle allows both formats 4114 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4115 # and Snowflake chose to do the same for familiarity 4116 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4117 this = self._parse_respect_or_ignore_nulls(this) 4118 4119 # bigquery select from window x AS (partition by ...) 4120 if alias: 4121 over = None 4122 self._match(TokenType.ALIAS) 4123 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4124 return this 4125 else: 4126 over = self._prev.text.upper() 4127 4128 if not self._match(TokenType.L_PAREN): 4129 return self.expression( 4130 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4131 ) 4132 4133 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4134 4135 first = self._match(TokenType.FIRST) 4136 if self._match_text_seq("LAST"): 4137 first = False 4138 4139 partition = self._parse_partition_by() 4140 order = self._parse_order() 4141 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4142 4143 if kind: 4144 self._match(TokenType.BETWEEN) 4145 start = self._parse_window_spec() 4146 self._match(TokenType.AND) 4147 end = self._parse_window_spec() 4148 4149 spec = self.expression( 4150 exp.WindowSpec, 4151 kind=kind, 4152 start=start["value"], 4153 start_side=start["side"], 4154 end=end["value"], 4155 end_side=end["side"], 4156 ) 4157 else: 4158 spec = None 4159 4160 self._match_r_paren() 4161 4162 return self.expression( 4163 exp.Window, 4164 this=this, 4165 partition_by=partition, 4166 order=order, 4167 spec=spec, 4168 alias=window_alias, 4169 over=over, 4170 first=first, 4171 ) 4172 4173 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4174 self._match(TokenType.BETWEEN) 4175 4176 return { 4177 "value": ( 4178 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4179 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4180 or self._parse_bitwise() 4181 ), 4182 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4183 } 4184 4185 def _parse_alias( 4186 self, this: t.Optional[exp.Expression], explicit: bool = False 4187 ) -> t.Optional[exp.Expression]: 4188 any_token = self._match(TokenType.ALIAS) 4189 4190 if explicit and not any_token: 4191 return this 4192 4193 if self._match(TokenType.L_PAREN): 4194 aliases = self.expression( 4195 exp.Aliases, 4196 this=this, 4197 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4198 ) 4199 self._match_r_paren(aliases) 4200 return aliases 4201 4202 alias = self._parse_id_var(any_token) 4203 4204 if alias: 4205 return self.expression(exp.Alias, this=this, alias=alias) 4206 4207 return this 4208 4209 def _parse_id_var( 4210 self, 4211 any_token: bool = True, 4212 tokens: t.Optional[t.Collection[TokenType]] = None, 4213 ) -> t.Optional[exp.Expression]: 4214 identifier = self._parse_identifier() 4215 4216 if identifier: 4217 return identifier 4218 4219 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4220 quoted = self._prev.token_type == TokenType.STRING 4221 return exp.Identifier(this=self._prev.text, quoted=quoted) 4222 4223 return None 4224 4225 def _parse_string(self) -> t.Optional[exp.Expression]: 4226 if self._match(TokenType.STRING): 4227 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4228 return self._parse_placeholder() 4229 4230 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4231 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4232 4233 def _parse_number(self) -> t.Optional[exp.Expression]: 4234 if self._match(TokenType.NUMBER): 4235 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4236 return self._parse_placeholder() 4237 4238 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4239 if self._match(TokenType.IDENTIFIER): 4240 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4241 return self._parse_placeholder() 4242 4243 def _parse_var( 4244 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4245 ) -> t.Optional[exp.Expression]: 4246 if ( 4247 (any_token and self._advance_any()) 4248 or self._match(TokenType.VAR) 4249 or (self._match_set(tokens) if tokens else False) 4250 ): 4251 return self.expression(exp.Var, this=self._prev.text) 4252 return self._parse_placeholder() 4253 4254 def _advance_any(self) -> t.Optional[Token]: 4255 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4256 self._advance() 4257 return self._prev 4258 return None 4259 4260 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4261 return self._parse_var() or self._parse_string() 4262 4263 def _parse_null(self) -> t.Optional[exp.Expression]: 4264 if self._match(TokenType.NULL): 4265 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4266 return None 4267 4268 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4269 if self._match(TokenType.TRUE): 4270 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4271 if self._match(TokenType.FALSE): 4272 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4273 return None 4274 4275 def _parse_star(self) -> t.Optional[exp.Expression]: 4276 if self._match(TokenType.STAR): 4277 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4278 return None 4279 4280 def _parse_parameter(self) -> exp.Parameter: 4281 wrapped = self._match(TokenType.L_BRACE) 4282 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4283 self._match(TokenType.R_BRACE) 4284 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4285 4286 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4287 if self._match_set(self.PLACEHOLDER_PARSERS): 4288 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4289 if placeholder: 4290 return placeholder 4291 self._advance(-1) 4292 return None 4293 4294 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4295 if not self._match(TokenType.EXCEPT): 4296 return None 4297 if self._match(TokenType.L_PAREN, advance=False): 4298 return self._parse_wrapped_csv(self._parse_column) 4299 return self._parse_csv(self._parse_column) 4300 4301 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4302 if not self._match(TokenType.REPLACE): 4303 return None 4304 if self._match(TokenType.L_PAREN, advance=False): 4305 return self._parse_wrapped_csv(self._parse_expression) 4306 return self._parse_expressions() 4307 4308 def _parse_csv( 4309 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4310 ) -> t.List[t.Optional[exp.Expression]]: 4311 parse_result = parse_method() 4312 items = [parse_result] if parse_result is not None else [] 4313 4314 while self._match(sep): 4315 self._add_comments(parse_result) 4316 parse_result = parse_method() 4317 if parse_result is not None: 4318 items.append(parse_result) 4319 4320 return items 4321 4322 def _parse_tokens( 4323 self, parse_method: t.Callable, expressions: t.Dict 4324 ) -> t.Optional[exp.Expression]: 4325 this = parse_method() 4326 4327 while self._match_set(expressions): 4328 this = self.expression( 4329 expressions[self._prev.token_type], 4330 this=this, 4331 comments=self._prev_comments, 4332 expression=parse_method(), 4333 ) 4334 4335 return this 4336 4337 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4338 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4339 4340 def _parse_wrapped_csv( 4341 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4342 ) -> t.List[t.Optional[exp.Expression]]: 4343 return self._parse_wrapped( 4344 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4345 ) 4346 4347 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4348 wrapped = self._match(TokenType.L_PAREN) 4349 if not wrapped and not optional: 4350 self.raise_error("Expecting (") 4351 parse_result = parse_method() 4352 if wrapped: 4353 self._match_r_paren() 4354 return parse_result 4355 4356 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4357 return self._parse_csv(self._parse_expression) 4358 4359 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4360 return self._parse_select() or self._parse_set_operations( 4361 self._parse_expression() if alias else self._parse_conjunction() 4362 ) 4363 4364 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4365 return self._parse_query_modifiers( 4366 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4367 ) 4368 4369 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4370 this = None 4371 if self._match_texts(self.TRANSACTION_KIND): 4372 this = self._prev.text 4373 4374 self._match_texts({"TRANSACTION", "WORK"}) 4375 4376 modes = [] 4377 while True: 4378 mode = [] 4379 while self._match(TokenType.VAR): 4380 mode.append(self._prev.text) 4381 4382 if mode: 4383 modes.append(" ".join(mode)) 4384 if not self._match(TokenType.COMMA): 4385 break 4386 4387 return self.expression(exp.Transaction, this=this, modes=modes) 4388 4389 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4390 chain = None 4391 savepoint = None 4392 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4393 4394 self._match_texts({"TRANSACTION", "WORK"}) 4395 4396 if self._match_text_seq("TO"): 4397 self._match_text_seq("SAVEPOINT") 4398 savepoint = self._parse_id_var() 4399 4400 if self._match(TokenType.AND): 4401 chain = not self._match_text_seq("NO") 4402 self._match_text_seq("CHAIN") 4403 4404 if is_rollback: 4405 return self.expression(exp.Rollback, savepoint=savepoint) 4406 4407 return self.expression(exp.Commit, chain=chain) 4408 4409 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4410 if not self._match_text_seq("ADD"): 4411 return None 4412 4413 self._match(TokenType.COLUMN) 4414 exists_column = self._parse_exists(not_=True) 4415 expression = self._parse_column_def(self._parse_field(any_token=True)) 4416 4417 if expression: 4418 expression.set("exists", exists_column) 4419 4420 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4421 if self._match_texts(("FIRST", "AFTER")): 4422 position = self._prev.text 4423 column_position = self.expression( 4424 exp.ColumnPosition, this=self._parse_column(), position=position 4425 ) 4426 expression.set("position", column_position) 4427 4428 return expression 4429 4430 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4431 drop = self._match(TokenType.DROP) and self._parse_drop() 4432 if drop and not isinstance(drop, exp.Command): 4433 drop.set("kind", drop.args.get("kind", "COLUMN")) 4434 return drop 4435 4436 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4437 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4438 return self.expression( 4439 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4440 ) 4441 4442 def _parse_add_constraint(self) -> exp.AddConstraint: 4443 this = None 4444 kind = self._prev.token_type 4445 4446 if kind == TokenType.CONSTRAINT: 4447 this = self._parse_id_var() 4448 4449 if self._match_text_seq("CHECK"): 4450 expression = self._parse_wrapped(self._parse_conjunction) 4451 enforced = self._match_text_seq("ENFORCED") 4452 4453 return self.expression( 4454 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4455 ) 4456 4457 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4458 expression = self._parse_foreign_key() 4459 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4460 expression = self._parse_primary_key() 4461 else: 4462 expression = None 4463 4464 return self.expression(exp.AddConstraint, this=this, expression=expression) 4465 4466 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4467 index = self._index - 1 4468 4469 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4470 return self._parse_csv(self._parse_add_constraint) 4471 4472 self._retreat(index) 4473 return self._parse_csv(self._parse_add_column) 4474 4475 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4476 self._match(TokenType.COLUMN) 4477 column = self._parse_field(any_token=True) 4478 4479 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4480 return self.expression(exp.AlterColumn, this=column, drop=True) 4481 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4482 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4483 4484 self._match_text_seq("SET", "DATA") 4485 return self.expression( 4486 exp.AlterColumn, 4487 this=column, 4488 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4489 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4490 using=self._match(TokenType.USING) and self._parse_conjunction(), 4491 ) 4492 4493 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4494 index = self._index - 1 4495 4496 partition_exists = self._parse_exists() 4497 if self._match(TokenType.PARTITION, advance=False): 4498 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4499 4500 self._retreat(index) 4501 return self._parse_csv(self._parse_drop_column) 4502 4503 def _parse_alter_table_rename(self) -> exp.RenameTable: 4504 self._match_text_seq("TO") 4505 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4506 4507 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4508 start = self._prev 4509 4510 if not self._match(TokenType.TABLE): 4511 return self._parse_as_command(start) 4512 4513 exists = self._parse_exists() 4514 this = self._parse_table(schema=True) 4515 4516 if self._next: 4517 self._advance() 4518 4519 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4520 if parser: 4521 actions = ensure_list(parser(self)) 4522 4523 if not self._curr: 4524 return self.expression( 4525 exp.AlterTable, 4526 this=this, 4527 exists=exists, 4528 actions=actions, 4529 ) 4530 return self._parse_as_command(start) 4531 4532 def _parse_merge(self) -> exp.Merge: 4533 self._match(TokenType.INTO) 4534 target = self._parse_table() 4535 4536 self._match(TokenType.USING) 4537 using = self._parse_table() 4538 4539 self._match(TokenType.ON) 4540 on = self._parse_conjunction() 4541 4542 whens = [] 4543 while self._match(TokenType.WHEN): 4544 matched = not self._match(TokenType.NOT) 4545 self._match_text_seq("MATCHED") 4546 source = ( 4547 False 4548 if self._match_text_seq("BY", "TARGET") 4549 else self._match_text_seq("BY", "SOURCE") 4550 ) 4551 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4552 4553 self._match(TokenType.THEN) 4554 4555 if self._match(TokenType.INSERT): 4556 _this = self._parse_star() 4557 if _this: 4558 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4559 else: 4560 then = self.expression( 4561 exp.Insert, 4562 this=self._parse_value(), 4563 expression=self._match(TokenType.VALUES) and self._parse_value(), 4564 ) 4565 elif self._match(TokenType.UPDATE): 4566 expressions = self._parse_star() 4567 if expressions: 4568 then = self.expression(exp.Update, expressions=expressions) 4569 else: 4570 then = self.expression( 4571 exp.Update, 4572 expressions=self._match(TokenType.SET) 4573 and self._parse_csv(self._parse_equality), 4574 ) 4575 elif self._match(TokenType.DELETE): 4576 then = self.expression(exp.Var, this=self._prev.text) 4577 else: 4578 then = None 4579 4580 whens.append( 4581 self.expression( 4582 exp.When, 4583 matched=matched, 4584 source=source, 4585 condition=condition, 4586 then=then, 4587 ) 4588 ) 4589 4590 return self.expression( 4591 exp.Merge, 4592 this=target, 4593 using=using, 4594 on=on, 4595 expressions=whens, 4596 ) 4597 4598 def _parse_show(self) -> t.Optional[exp.Expression]: 4599 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4600 if parser: 4601 return parser(self) 4602 self._advance() 4603 return self.expression(exp.Show, this=self._prev.text.upper()) 4604 4605 def _parse_set_item_assignment( 4606 self, kind: t.Optional[str] = None 4607 ) -> t.Optional[exp.Expression]: 4608 index = self._index 4609 4610 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4611 return self._parse_set_transaction(global_=kind == "GLOBAL") 4612 4613 left = self._parse_primary() or self._parse_id_var() 4614 4615 if not self._match_texts(("=", "TO")): 4616 self._retreat(index) 4617 return None 4618 4619 right = self._parse_statement() or self._parse_id_var() 4620 this = self.expression(exp.EQ, this=left, expression=right) 4621 4622 return self.expression(exp.SetItem, this=this, kind=kind) 4623 4624 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4625 self._match_text_seq("TRANSACTION") 4626 characteristics = self._parse_csv( 4627 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4628 ) 4629 return self.expression( 4630 exp.SetItem, 4631 expressions=characteristics, 4632 kind="TRANSACTION", 4633 **{"global": global_}, # type: ignore 4634 ) 4635 4636 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4637 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4638 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4639 4640 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4641 index = self._index 4642 set_ = self.expression( 4643 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4644 ) 4645 4646 if self._curr: 4647 self._retreat(index) 4648 return self._parse_as_command(self._prev) 4649 4650 return set_ 4651 4652 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4653 for option in options: 4654 if self._match_text_seq(*option.split(" ")): 4655 return exp.var(option) 4656 return None 4657 4658 def _parse_as_command(self, start: Token) -> exp.Command: 4659 while self._curr: 4660 self._advance() 4661 text = self._find_sql(start, self._prev) 4662 size = len(start.text) 4663 return exp.Command(this=text[:size], expression=text[size:]) 4664 4665 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4666 settings = [] 4667 4668 self._match_l_paren() 4669 kind = self._parse_id_var() 4670 4671 if self._match(TokenType.L_PAREN): 4672 while True: 4673 key = self._parse_id_var() 4674 value = self._parse_primary() 4675 4676 if not key and value is None: 4677 break 4678 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4679 self._match(TokenType.R_PAREN) 4680 4681 self._match_r_paren() 4682 4683 return self.expression( 4684 exp.DictProperty, 4685 this=this, 4686 kind=kind.this if kind else None, 4687 settings=settings, 4688 ) 4689 4690 def _parse_dict_range(self, this: str) -> exp.DictRange: 4691 self._match_l_paren() 4692 has_min = self._match_text_seq("MIN") 4693 if has_min: 4694 min = self._parse_var() or self._parse_primary() 4695 self._match_text_seq("MAX") 4696 max = self._parse_var() or self._parse_primary() 4697 else: 4698 max = self._parse_var() or self._parse_primary() 4699 min = exp.Literal.number(0) 4700 self._match_r_paren() 4701 return self.expression(exp.DictRange, this=this, min=min, max=max) 4702 4703 def _find_parser( 4704 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4705 ) -> t.Optional[t.Callable]: 4706 if not self._curr: 4707 return None 4708 4709 index = self._index 4710 this = [] 4711 while True: 4712 # The current token might be multiple words 4713 curr = self._curr.text.upper() 4714 key = curr.split(" ") 4715 this.append(curr) 4716 4717 self._advance() 4718 result, trie = in_trie(trie, key) 4719 if result == TrieResult.FAILED: 4720 break 4721 4722 if result == TrieResult.EXISTS: 4723 subparser = parsers[" ".join(this)] 4724 return subparser 4725 4726 self._retreat(index) 4727 return None 4728 4729 def _match(self, token_type, advance=True, expression=None): 4730 if not self._curr: 4731 return None 4732 4733 if self._curr.token_type == token_type: 4734 if advance: 4735 self._advance() 4736 self._add_comments(expression) 4737 return True 4738 4739 return None 4740 4741 def _match_set(self, types, advance=True): 4742 if not self._curr: 4743 return None 4744 4745 if self._curr.token_type in types: 4746 if advance: 4747 self._advance() 4748 return True 4749 4750 return None 4751 4752 def _match_pair(self, token_type_a, token_type_b, advance=True): 4753 if not self._curr or not self._next: 4754 return None 4755 4756 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4757 if advance: 4758 self._advance(2) 4759 return True 4760 4761 return None 4762 4763 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4764 if not self._match(TokenType.L_PAREN, expression=expression): 4765 self.raise_error("Expecting (") 4766 4767 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4768 if not self._match(TokenType.R_PAREN, expression=expression): 4769 self.raise_error("Expecting )") 4770 4771 def _match_texts(self, texts, advance=True): 4772 if self._curr and self._curr.text.upper() in texts: 4773 if advance: 4774 self._advance() 4775 return True 4776 return False 4777 4778 def _match_text_seq(self, *texts, advance=True): 4779 index = self._index 4780 for text in texts: 4781 if self._curr and self._curr.text.upper() == text: 4782 self._advance() 4783 else: 4784 self._retreat(index) 4785 return False 4786 4787 if not advance: 4788 self._retreat(index) 4789 4790 return True 4791 4792 @t.overload 4793 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4794 ... 4795 4796 @t.overload 4797 def _replace_columns_with_dots( 4798 self, this: t.Optional[exp.Expression] 4799 ) -> t.Optional[exp.Expression]: 4800 ... 4801 4802 def _replace_columns_with_dots(self, this): 4803 if isinstance(this, exp.Dot): 4804 exp.replace_children(this, self._replace_columns_with_dots) 4805 elif isinstance(this, exp.Column): 4806 exp.replace_children(this, self._replace_columns_with_dots) 4807 table = this.args.get("table") 4808 this = ( 4809 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4810 ) 4811 4812 return this 4813 4814 def _replace_lambda( 4815 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4816 ) -> t.Optional[exp.Expression]: 4817 if not node: 4818 return node 4819 4820 for column in node.find_all(exp.Column): 4821 if column.parts[0].name in lambda_variables: 4822 dot_or_id = column.to_dot() if column.table else column.this 4823 parent = column.parent 4824 4825 while isinstance(parent, exp.Dot): 4826 if not isinstance(parent.parent, exp.Dot): 4827 parent.replace(dot_or_id) 4828 break 4829 parent = parent.parent 4830 else: 4831 if column is node: 4832 node = dot_or_id 4833 else: 4834 column.replace(dot_or_id) 4835 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset()
864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 )
Logs or raises any found errors, depending on the chosen error level setting.
964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.