sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER BY": lambda self: self._parse_cluster(), 588 "CLUSTERED": lambda self: self._parse_clustered_by(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "ANY_VALUE": lambda self: self._parse_any_value(), 721 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 722 "CONCAT": lambda self: self._parse_concat(), 723 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 724 "DECODE": lambda self: self._parse_decode(), 725 "EXTRACT": lambda self: self._parse_extract(), 726 "JSON_OBJECT": lambda self: self._parse_json_object(), 727 "LOG": lambda self: self._parse_logarithm(), 728 "MATCH": lambda self: self._parse_match_against(), 729 "OPENJSON": lambda self: self._parse_open_json(), 730 "POSITION": lambda self: self._parse_position(), 731 "SAFE_CAST": lambda self: self._parse_cast(False), 732 "STRING_AGG": lambda self: self._parse_string_agg(), 733 "SUBSTRING": lambda self: self._parse_substring(), 734 "TRIM": lambda self: self._parse_trim(), 735 "TRY_CAST": lambda self: self._parse_cast(False), 736 "TRY_CONVERT": lambda self: self._parse_convert(False), 737 } 738 739 QUERY_MODIFIER_PARSERS = { 740 "joins": lambda self: list(iter(self._parse_join, None)), 741 "laterals": lambda self: list(iter(self._parse_lateral, None)), 742 "match": lambda self: self._parse_match_recognize(), 743 "where": lambda self: self._parse_where(), 744 "group": lambda self: self._parse_group(), 745 "having": lambda self: self._parse_having(), 746 "qualify": lambda self: self._parse_qualify(), 747 "windows": lambda self: self._parse_window_clause(), 748 "order": lambda self: self._parse_order(), 749 "limit": lambda self: self._parse_limit(), 750 "offset": lambda self: self._parse_offset(), 751 "locks": lambda self: self._parse_locks(), 752 "sample": lambda self: self._parse_table_sample(as_modifier=True), 753 } 754 755 SET_PARSERS = { 756 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 757 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 758 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 759 "TRANSACTION": lambda self: self._parse_set_transaction(), 760 } 761 762 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 763 764 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 765 766 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 767 768 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 769 770 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 771 772 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 773 TRANSACTION_CHARACTERISTICS = { 774 "ISOLATION LEVEL REPEATABLE READ", 775 "ISOLATION LEVEL READ COMMITTED", 776 "ISOLATION LEVEL READ UNCOMMITTED", 777 "ISOLATION LEVEL SERIALIZABLE", 778 "READ WRITE", 779 "READ ONLY", 780 } 781 782 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 783 784 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 785 786 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 787 788 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 789 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 790 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 791 792 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 793 794 STRICT_CAST = True 795 796 # A NULL arg in CONCAT yields NULL by default 797 CONCAT_NULL_OUTPUTS_STRING = False 798 799 PREFIXED_PIVOT_COLUMNS = False 800 IDENTIFY_PIVOT_STRINGS = False 801 802 LOG_BASE_FIRST = True 803 LOG_DEFAULTS_TO_LN = False 804 805 __slots__ = ( 806 "error_level", 807 "error_message_context", 808 "max_errors", 809 "sql", 810 "errors", 811 "_tokens", 812 "_index", 813 "_curr", 814 "_next", 815 "_prev", 816 "_prev_comments", 817 ) 818 819 # Autofilled 820 INDEX_OFFSET: int = 0 821 UNNEST_COLUMN_ONLY: bool = False 822 ALIAS_POST_TABLESAMPLE: bool = False 823 STRICT_STRING_CONCAT = False 824 NULL_ORDERING: str = "nulls_are_small" 825 SHOW_TRIE: t.Dict = {} 826 SET_TRIE: t.Dict = {} 827 FORMAT_MAPPING: t.Dict[str, str] = {} 828 FORMAT_TRIE: t.Dict = {} 829 TIME_MAPPING: t.Dict[str, str] = {} 830 TIME_TRIE: t.Dict = {} 831 832 def __init__( 833 self, 834 error_level: t.Optional[ErrorLevel] = None, 835 error_message_context: int = 100, 836 max_errors: int = 3, 837 ): 838 self.error_level = error_level or ErrorLevel.IMMEDIATE 839 self.error_message_context = error_message_context 840 self.max_errors = max_errors 841 self.reset() 842 843 def reset(self): 844 self.sql = "" 845 self.errors = [] 846 self._tokens = [] 847 self._index = 0 848 self._curr = None 849 self._next = None 850 self._prev = None 851 self._prev_comments = None 852 853 def parse( 854 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 855 ) -> t.List[t.Optional[exp.Expression]]: 856 """ 857 Parses a list of tokens and returns a list of syntax trees, one tree 858 per parsed SQL statement. 859 860 Args: 861 raw_tokens: The list of tokens. 862 sql: The original SQL string, used to produce helpful debug messages. 863 864 Returns: 865 The list of the produced syntax trees. 866 """ 867 return self._parse( 868 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 869 ) 870 871 def parse_into( 872 self, 873 expression_types: exp.IntoType, 874 raw_tokens: t.List[Token], 875 sql: t.Optional[str] = None, 876 ) -> t.List[t.Optional[exp.Expression]]: 877 """ 878 Parses a list of tokens into a given Expression type. If a collection of Expression 879 types is given instead, this method will try to parse the token list into each one 880 of them, stopping at the first for which the parsing succeeds. 881 882 Args: 883 expression_types: The expression type(s) to try and parse the token list into. 884 raw_tokens: The list of tokens. 885 sql: The original SQL string, used to produce helpful debug messages. 886 887 Returns: 888 The target Expression. 889 """ 890 errors = [] 891 for expression_type in ensure_list(expression_types): 892 parser = self.EXPRESSION_PARSERS.get(expression_type) 893 if not parser: 894 raise TypeError(f"No parser registered for {expression_type}") 895 896 try: 897 return self._parse(parser, raw_tokens, sql) 898 except ParseError as e: 899 e.errors[0]["into_expression"] = expression_type 900 errors.append(e) 901 902 raise ParseError( 903 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 904 errors=merge_errors(errors), 905 ) from errors[-1] 906 907 def _parse( 908 self, 909 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 910 raw_tokens: t.List[Token], 911 sql: t.Optional[str] = None, 912 ) -> t.List[t.Optional[exp.Expression]]: 913 self.reset() 914 self.sql = sql or "" 915 916 total = len(raw_tokens) 917 chunks: t.List[t.List[Token]] = [[]] 918 919 for i, token in enumerate(raw_tokens): 920 if token.token_type == TokenType.SEMICOLON: 921 if i < total - 1: 922 chunks.append([]) 923 else: 924 chunks[-1].append(token) 925 926 expressions = [] 927 928 for tokens in chunks: 929 self._index = -1 930 self._tokens = tokens 931 self._advance() 932 933 expressions.append(parse_method(self)) 934 935 if self._index < len(self._tokens): 936 self.raise_error("Invalid expression / Unexpected token") 937 938 self.check_errors() 939 940 return expressions 941 942 def check_errors(self) -> None: 943 """Logs or raises any found errors, depending on the chosen error level setting.""" 944 if self.error_level == ErrorLevel.WARN: 945 for error in self.errors: 946 logger.error(str(error)) 947 elif self.error_level == ErrorLevel.RAISE and self.errors: 948 raise ParseError( 949 concat_messages(self.errors, self.max_errors), 950 errors=merge_errors(self.errors), 951 ) 952 953 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 954 """ 955 Appends an error in the list of recorded errors or raises it, depending on the chosen 956 error level setting. 957 """ 958 token = token or self._curr or self._prev or Token.string("") 959 start = token.start 960 end = token.end + 1 961 start_context = self.sql[max(start - self.error_message_context, 0) : start] 962 highlight = self.sql[start:end] 963 end_context = self.sql[end : end + self.error_message_context] 964 965 error = ParseError.new( 966 f"{message}. Line {token.line}, Col: {token.col}.\n" 967 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 968 description=message, 969 line=token.line, 970 col=token.col, 971 start_context=start_context, 972 highlight=highlight, 973 end_context=end_context, 974 ) 975 976 if self.error_level == ErrorLevel.IMMEDIATE: 977 raise error 978 979 self.errors.append(error) 980 981 def expression( 982 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 983 ) -> E: 984 """ 985 Creates a new, validated Expression. 986 987 Args: 988 exp_class: The expression class to instantiate. 989 comments: An optional list of comments to attach to the expression. 990 kwargs: The arguments to set for the expression along with their respective values. 991 992 Returns: 993 The target expression. 994 """ 995 instance = exp_class(**kwargs) 996 instance.add_comments(comments) if comments else self._add_comments(instance) 997 return self.validate_expression(instance) 998 999 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1000 if expression and self._prev_comments: 1001 expression.add_comments(self._prev_comments) 1002 self._prev_comments = None 1003 1004 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1005 """ 1006 Validates an Expression, making sure that all its mandatory arguments are set. 1007 1008 Args: 1009 expression: The expression to validate. 1010 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1011 1012 Returns: 1013 The validated expression. 1014 """ 1015 if self.error_level != ErrorLevel.IGNORE: 1016 for error_message in expression.error_messages(args): 1017 self.raise_error(error_message) 1018 1019 return expression 1020 1021 def _find_sql(self, start: Token, end: Token) -> str: 1022 return self.sql[start.start : end.end + 1] 1023 1024 def _advance(self, times: int = 1) -> None: 1025 self._index += times 1026 self._curr = seq_get(self._tokens, self._index) 1027 self._next = seq_get(self._tokens, self._index + 1) 1028 1029 if self._index > 0: 1030 self._prev = self._tokens[self._index - 1] 1031 self._prev_comments = self._prev.comments 1032 else: 1033 self._prev = None 1034 self._prev_comments = None 1035 1036 def _retreat(self, index: int) -> None: 1037 if index != self._index: 1038 self._advance(index - self._index) 1039 1040 def _parse_command(self) -> exp.Command: 1041 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1042 1043 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1044 start = self._prev 1045 exists = self._parse_exists() if allow_exists else None 1046 1047 self._match(TokenType.ON) 1048 1049 kind = self._match_set(self.CREATABLES) and self._prev 1050 if not kind: 1051 return self._parse_as_command(start) 1052 1053 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1054 this = self._parse_user_defined_function(kind=kind.token_type) 1055 elif kind.token_type == TokenType.TABLE: 1056 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1057 elif kind.token_type == TokenType.COLUMN: 1058 this = self._parse_column() 1059 else: 1060 this = self._parse_id_var() 1061 1062 self._match(TokenType.IS) 1063 1064 return self.expression( 1065 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1066 ) 1067 1068 def _parse_to_table( 1069 self, 1070 ) -> exp.ToTableProperty: 1071 table = self._parse_table_parts(schema=True) 1072 return self.expression(exp.ToTableProperty, this=table) 1073 1074 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1075 def _parse_ttl(self) -> exp.Expression: 1076 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1077 this = self._parse_bitwise() 1078 1079 if self._match_text_seq("DELETE"): 1080 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1081 if self._match_text_seq("RECOMPRESS"): 1082 return self.expression( 1083 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1084 ) 1085 if self._match_text_seq("TO", "DISK"): 1086 return self.expression( 1087 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1088 ) 1089 if self._match_text_seq("TO", "VOLUME"): 1090 return self.expression( 1091 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1092 ) 1093 1094 return this 1095 1096 expressions = self._parse_csv(_parse_ttl_action) 1097 where = self._parse_where() 1098 group = self._parse_group() 1099 1100 aggregates = None 1101 if group and self._match(TokenType.SET): 1102 aggregates = self._parse_csv(self._parse_set_item) 1103 1104 return self.expression( 1105 exp.MergeTreeTTL, 1106 expressions=expressions, 1107 where=where, 1108 group=group, 1109 aggregates=aggregates, 1110 ) 1111 1112 def _parse_statement(self) -> t.Optional[exp.Expression]: 1113 if self._curr is None: 1114 return None 1115 1116 if self._match_set(self.STATEMENT_PARSERS): 1117 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1118 1119 if self._match_set(Tokenizer.COMMANDS): 1120 return self._parse_command() 1121 1122 expression = self._parse_expression() 1123 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1124 return self._parse_query_modifiers(expression) 1125 1126 def _parse_drop(self) -> exp.Drop | exp.Command: 1127 start = self._prev 1128 temporary = self._match(TokenType.TEMPORARY) 1129 materialized = self._match_text_seq("MATERIALIZED") 1130 1131 kind = self._match_set(self.CREATABLES) and self._prev.text 1132 if not kind: 1133 return self._parse_as_command(start) 1134 1135 return self.expression( 1136 exp.Drop, 1137 exists=self._parse_exists(), 1138 this=self._parse_table(schema=True), 1139 kind=kind, 1140 temporary=temporary, 1141 materialized=materialized, 1142 cascade=self._match_text_seq("CASCADE"), 1143 constraints=self._match_text_seq("CONSTRAINTS"), 1144 purge=self._match_text_seq("PURGE"), 1145 ) 1146 1147 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1148 return ( 1149 self._match(TokenType.IF) 1150 and (not not_ or self._match(TokenType.NOT)) 1151 and self._match(TokenType.EXISTS) 1152 ) 1153 1154 def _parse_create(self) -> exp.Create | exp.Command: 1155 # Note: this can't be None because we've matched a statement parser 1156 start = self._prev 1157 replace = start.text.upper() == "REPLACE" or self._match_pair( 1158 TokenType.OR, TokenType.REPLACE 1159 ) 1160 unique = self._match(TokenType.UNIQUE) 1161 1162 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1163 self._advance() 1164 1165 properties = None 1166 create_token = self._match_set(self.CREATABLES) and self._prev 1167 1168 if not create_token: 1169 # exp.Properties.Location.POST_CREATE 1170 properties = self._parse_properties() 1171 create_token = self._match_set(self.CREATABLES) and self._prev 1172 1173 if not properties or not create_token: 1174 return self._parse_as_command(start) 1175 1176 exists = self._parse_exists(not_=True) 1177 this = None 1178 expression = None 1179 indexes = None 1180 no_schema_binding = None 1181 begin = None 1182 clone = None 1183 1184 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1185 nonlocal properties 1186 if properties and temp_props: 1187 properties.expressions.extend(temp_props.expressions) 1188 elif temp_props: 1189 properties = temp_props 1190 1191 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1192 this = self._parse_user_defined_function(kind=create_token.token_type) 1193 1194 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1195 extend_props(self._parse_properties()) 1196 1197 self._match(TokenType.ALIAS) 1198 begin = self._match(TokenType.BEGIN) 1199 return_ = self._match_text_seq("RETURN") 1200 expression = self._parse_statement() 1201 1202 if return_: 1203 expression = self.expression(exp.Return, this=expression) 1204 elif create_token.token_type == TokenType.INDEX: 1205 this = self._parse_index(index=self._parse_id_var()) 1206 elif create_token.token_type in self.DB_CREATABLES: 1207 table_parts = self._parse_table_parts(schema=True) 1208 1209 # exp.Properties.Location.POST_NAME 1210 self._match(TokenType.COMMA) 1211 extend_props(self._parse_properties(before=True)) 1212 1213 this = self._parse_schema(this=table_parts) 1214 1215 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1216 extend_props(self._parse_properties()) 1217 1218 self._match(TokenType.ALIAS) 1219 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1220 # exp.Properties.Location.POST_ALIAS 1221 extend_props(self._parse_properties()) 1222 1223 expression = self._parse_ddl_select() 1224 1225 if create_token.token_type == TokenType.TABLE: 1226 indexes = [] 1227 while True: 1228 index = self._parse_index() 1229 1230 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1231 extend_props(self._parse_properties()) 1232 1233 if not index: 1234 break 1235 else: 1236 self._match(TokenType.COMMA) 1237 indexes.append(index) 1238 elif create_token.token_type == TokenType.VIEW: 1239 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1240 no_schema_binding = True 1241 1242 if self._match_text_seq("CLONE"): 1243 clone = self._parse_table(schema=True) 1244 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1245 clone_kind = ( 1246 self._match(TokenType.L_PAREN) 1247 and self._match_texts(self.CLONE_KINDS) 1248 and self._prev.text.upper() 1249 ) 1250 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1251 self._match(TokenType.R_PAREN) 1252 clone = self.expression( 1253 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1254 ) 1255 1256 return self.expression( 1257 exp.Create, 1258 this=this, 1259 kind=create_token.text, 1260 replace=replace, 1261 unique=unique, 1262 expression=expression, 1263 exists=exists, 1264 properties=properties, 1265 indexes=indexes, 1266 no_schema_binding=no_schema_binding, 1267 begin=begin, 1268 clone=clone, 1269 ) 1270 1271 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1272 # only used for teradata currently 1273 self._match(TokenType.COMMA) 1274 1275 kwargs = { 1276 "no": self._match_text_seq("NO"), 1277 "dual": self._match_text_seq("DUAL"), 1278 "before": self._match_text_seq("BEFORE"), 1279 "default": self._match_text_seq("DEFAULT"), 1280 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1281 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1282 "after": self._match_text_seq("AFTER"), 1283 "minimum": self._match_texts(("MIN", "MINIMUM")), 1284 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1285 } 1286 1287 if self._match_texts(self.PROPERTY_PARSERS): 1288 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1289 try: 1290 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1291 except TypeError: 1292 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1293 1294 return None 1295 1296 def _parse_property(self) -> t.Optional[exp.Expression]: 1297 if self._match_texts(self.PROPERTY_PARSERS): 1298 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1299 1300 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1301 return self._parse_character_set(default=True) 1302 1303 if self._match_text_seq("COMPOUND", "SORTKEY"): 1304 return self._parse_sortkey(compound=True) 1305 1306 if self._match_text_seq("SQL", "SECURITY"): 1307 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1308 1309 assignment = self._match_pair( 1310 TokenType.VAR, TokenType.EQ, advance=False 1311 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1312 1313 if assignment: 1314 key = self._parse_var_or_string() 1315 self._match(TokenType.EQ) 1316 return self.expression(exp.Property, this=key, value=self._parse_column()) 1317 1318 return None 1319 1320 def _parse_stored(self) -> exp.FileFormatProperty: 1321 self._match(TokenType.ALIAS) 1322 1323 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1324 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1325 1326 return self.expression( 1327 exp.FileFormatProperty, 1328 this=self.expression( 1329 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1330 ) 1331 if input_format or output_format 1332 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1333 ) 1334 1335 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1336 self._match(TokenType.EQ) 1337 self._match(TokenType.ALIAS) 1338 return self.expression(exp_class, this=self._parse_field()) 1339 1340 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1341 properties = [] 1342 while True: 1343 if before: 1344 prop = self._parse_property_before() 1345 else: 1346 prop = self._parse_property() 1347 1348 if not prop: 1349 break 1350 for p in ensure_list(prop): 1351 properties.append(p) 1352 1353 if properties: 1354 return self.expression(exp.Properties, expressions=properties) 1355 1356 return None 1357 1358 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1359 return self.expression( 1360 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1361 ) 1362 1363 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1364 if self._index >= 2: 1365 pre_volatile_token = self._tokens[self._index - 2] 1366 else: 1367 pre_volatile_token = None 1368 1369 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1370 return exp.VolatileProperty() 1371 1372 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1373 1374 def _parse_with_property( 1375 self, 1376 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1377 self._match(TokenType.WITH) 1378 if self._match(TokenType.L_PAREN, advance=False): 1379 return self._parse_wrapped_csv(self._parse_property) 1380 1381 if self._match_text_seq("JOURNAL"): 1382 return self._parse_withjournaltable() 1383 1384 if self._match_text_seq("DATA"): 1385 return self._parse_withdata(no=False) 1386 elif self._match_text_seq("NO", "DATA"): 1387 return self._parse_withdata(no=True) 1388 1389 if not self._next: 1390 return None 1391 1392 return self._parse_withisolatedloading() 1393 1394 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1395 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1396 self._match(TokenType.EQ) 1397 1398 user = self._parse_id_var() 1399 self._match(TokenType.PARAMETER) 1400 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1401 1402 if not user or not host: 1403 return None 1404 1405 return exp.DefinerProperty(this=f"{user}@{host}") 1406 1407 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1408 self._match(TokenType.TABLE) 1409 self._match(TokenType.EQ) 1410 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1411 1412 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1413 return self.expression(exp.LogProperty, no=no) 1414 1415 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1416 return self.expression(exp.JournalProperty, **kwargs) 1417 1418 def _parse_checksum(self) -> exp.ChecksumProperty: 1419 self._match(TokenType.EQ) 1420 1421 on = None 1422 if self._match(TokenType.ON): 1423 on = True 1424 elif self._match_text_seq("OFF"): 1425 on = False 1426 1427 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1428 1429 def _parse_cluster(self) -> exp.Cluster: 1430 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1431 1432 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1433 self._match_text_seq("BY") 1434 1435 self._match_l_paren() 1436 expressions = self._parse_csv(self._parse_column) 1437 self._match_r_paren() 1438 1439 if self._match_text_seq("SORTED", "BY"): 1440 self._match_l_paren() 1441 sorted_by = self._parse_csv(self._parse_ordered) 1442 self._match_r_paren() 1443 else: 1444 sorted_by = None 1445 1446 self._match(TokenType.INTO) 1447 buckets = self._parse_number() 1448 self._match_text_seq("BUCKETS") 1449 1450 return self.expression( 1451 exp.ClusteredByProperty, 1452 expressions=expressions, 1453 sorted_by=sorted_by, 1454 buckets=buckets, 1455 ) 1456 1457 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1458 if not self._match_text_seq("GRANTS"): 1459 self._retreat(self._index - 1) 1460 return None 1461 1462 return self.expression(exp.CopyGrantsProperty) 1463 1464 def _parse_freespace(self) -> exp.FreespaceProperty: 1465 self._match(TokenType.EQ) 1466 return self.expression( 1467 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1468 ) 1469 1470 def _parse_mergeblockratio( 1471 self, no: bool = False, default: bool = False 1472 ) -> exp.MergeBlockRatioProperty: 1473 if self._match(TokenType.EQ): 1474 return self.expression( 1475 exp.MergeBlockRatioProperty, 1476 this=self._parse_number(), 1477 percent=self._match(TokenType.PERCENT), 1478 ) 1479 1480 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1481 1482 def _parse_datablocksize( 1483 self, 1484 default: t.Optional[bool] = None, 1485 minimum: t.Optional[bool] = None, 1486 maximum: t.Optional[bool] = None, 1487 ) -> exp.DataBlocksizeProperty: 1488 self._match(TokenType.EQ) 1489 size = self._parse_number() 1490 1491 units = None 1492 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1493 units = self._prev.text 1494 1495 return self.expression( 1496 exp.DataBlocksizeProperty, 1497 size=size, 1498 units=units, 1499 default=default, 1500 minimum=minimum, 1501 maximum=maximum, 1502 ) 1503 1504 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1505 self._match(TokenType.EQ) 1506 always = self._match_text_seq("ALWAYS") 1507 manual = self._match_text_seq("MANUAL") 1508 never = self._match_text_seq("NEVER") 1509 default = self._match_text_seq("DEFAULT") 1510 1511 autotemp = None 1512 if self._match_text_seq("AUTOTEMP"): 1513 autotemp = self._parse_schema() 1514 1515 return self.expression( 1516 exp.BlockCompressionProperty, 1517 always=always, 1518 manual=manual, 1519 never=never, 1520 default=default, 1521 autotemp=autotemp, 1522 ) 1523 1524 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1525 no = self._match_text_seq("NO") 1526 concurrent = self._match_text_seq("CONCURRENT") 1527 self._match_text_seq("ISOLATED", "LOADING") 1528 for_all = self._match_text_seq("FOR", "ALL") 1529 for_insert = self._match_text_seq("FOR", "INSERT") 1530 for_none = self._match_text_seq("FOR", "NONE") 1531 return self.expression( 1532 exp.IsolatedLoadingProperty, 1533 no=no, 1534 concurrent=concurrent, 1535 for_all=for_all, 1536 for_insert=for_insert, 1537 for_none=for_none, 1538 ) 1539 1540 def _parse_locking(self) -> exp.LockingProperty: 1541 if self._match(TokenType.TABLE): 1542 kind = "TABLE" 1543 elif self._match(TokenType.VIEW): 1544 kind = "VIEW" 1545 elif self._match(TokenType.ROW): 1546 kind = "ROW" 1547 elif self._match_text_seq("DATABASE"): 1548 kind = "DATABASE" 1549 else: 1550 kind = None 1551 1552 if kind in ("DATABASE", "TABLE", "VIEW"): 1553 this = self._parse_table_parts() 1554 else: 1555 this = None 1556 1557 if self._match(TokenType.FOR): 1558 for_or_in = "FOR" 1559 elif self._match(TokenType.IN): 1560 for_or_in = "IN" 1561 else: 1562 for_or_in = None 1563 1564 if self._match_text_seq("ACCESS"): 1565 lock_type = "ACCESS" 1566 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1567 lock_type = "EXCLUSIVE" 1568 elif self._match_text_seq("SHARE"): 1569 lock_type = "SHARE" 1570 elif self._match_text_seq("READ"): 1571 lock_type = "READ" 1572 elif self._match_text_seq("WRITE"): 1573 lock_type = "WRITE" 1574 elif self._match_text_seq("CHECKSUM"): 1575 lock_type = "CHECKSUM" 1576 else: 1577 lock_type = None 1578 1579 override = self._match_text_seq("OVERRIDE") 1580 1581 return self.expression( 1582 exp.LockingProperty, 1583 this=this, 1584 kind=kind, 1585 for_or_in=for_or_in, 1586 lock_type=lock_type, 1587 override=override, 1588 ) 1589 1590 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1591 if self._match(TokenType.PARTITION_BY): 1592 return self._parse_csv(self._parse_conjunction) 1593 return [] 1594 1595 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1596 self._match(TokenType.EQ) 1597 return self.expression( 1598 exp.PartitionedByProperty, 1599 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1600 ) 1601 1602 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1603 if self._match_text_seq("AND", "STATISTICS"): 1604 statistics = True 1605 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1606 statistics = False 1607 else: 1608 statistics = None 1609 1610 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1611 1612 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1613 if self._match_text_seq("PRIMARY", "INDEX"): 1614 return exp.NoPrimaryIndexProperty() 1615 return None 1616 1617 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1618 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1619 return exp.OnCommitProperty() 1620 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1621 return exp.OnCommitProperty(delete=True) 1622 return None 1623 1624 def _parse_distkey(self) -> exp.DistKeyProperty: 1625 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1626 1627 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1628 table = self._parse_table(schema=True) 1629 1630 options = [] 1631 while self._match_texts(("INCLUDING", "EXCLUDING")): 1632 this = self._prev.text.upper() 1633 1634 id_var = self._parse_id_var() 1635 if not id_var: 1636 return None 1637 1638 options.append( 1639 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1640 ) 1641 1642 return self.expression(exp.LikeProperty, this=table, expressions=options) 1643 1644 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1645 return self.expression( 1646 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1647 ) 1648 1649 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1650 self._match(TokenType.EQ) 1651 return self.expression( 1652 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1653 ) 1654 1655 def _parse_returns(self) -> exp.ReturnsProperty: 1656 value: t.Optional[exp.Expression] 1657 is_table = self._match(TokenType.TABLE) 1658 1659 if is_table: 1660 if self._match(TokenType.LT): 1661 value = self.expression( 1662 exp.Schema, 1663 this="TABLE", 1664 expressions=self._parse_csv(self._parse_struct_types), 1665 ) 1666 if not self._match(TokenType.GT): 1667 self.raise_error("Expecting >") 1668 else: 1669 value = self._parse_schema(exp.var("TABLE")) 1670 else: 1671 value = self._parse_types() 1672 1673 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1674 1675 def _parse_describe(self) -> exp.Describe: 1676 kind = self._match_set(self.CREATABLES) and self._prev.text 1677 this = self._parse_table() 1678 return self.expression(exp.Describe, this=this, kind=kind) 1679 1680 def _parse_insert(self) -> exp.Insert: 1681 overwrite = self._match(TokenType.OVERWRITE) 1682 local = self._match_text_seq("LOCAL") 1683 alternative = None 1684 1685 if self._match_text_seq("DIRECTORY"): 1686 this: t.Optional[exp.Expression] = self.expression( 1687 exp.Directory, 1688 this=self._parse_var_or_string(), 1689 local=local, 1690 row_format=self._parse_row_format(match_row=True), 1691 ) 1692 else: 1693 if self._match(TokenType.OR): 1694 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1695 1696 self._match(TokenType.INTO) 1697 self._match(TokenType.TABLE) 1698 this = self._parse_table(schema=True) 1699 1700 return self.expression( 1701 exp.Insert, 1702 this=this, 1703 exists=self._parse_exists(), 1704 partition=self._parse_partition(), 1705 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1706 and self._parse_conjunction(), 1707 expression=self._parse_ddl_select(), 1708 conflict=self._parse_on_conflict(), 1709 returning=self._parse_returning(), 1710 overwrite=overwrite, 1711 alternative=alternative, 1712 ) 1713 1714 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1715 conflict = self._match_text_seq("ON", "CONFLICT") 1716 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1717 1718 if not conflict and not duplicate: 1719 return None 1720 1721 nothing = None 1722 expressions = None 1723 key = None 1724 constraint = None 1725 1726 if conflict: 1727 if self._match_text_seq("ON", "CONSTRAINT"): 1728 constraint = self._parse_id_var() 1729 else: 1730 key = self._parse_csv(self._parse_value) 1731 1732 self._match_text_seq("DO") 1733 if self._match_text_seq("NOTHING"): 1734 nothing = True 1735 else: 1736 self._match(TokenType.UPDATE) 1737 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1738 1739 return self.expression( 1740 exp.OnConflict, 1741 duplicate=duplicate, 1742 expressions=expressions, 1743 nothing=nothing, 1744 key=key, 1745 constraint=constraint, 1746 ) 1747 1748 def _parse_returning(self) -> t.Optional[exp.Returning]: 1749 if not self._match(TokenType.RETURNING): 1750 return None 1751 1752 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1753 1754 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1755 if not self._match(TokenType.FORMAT): 1756 return None 1757 return self._parse_row_format() 1758 1759 def _parse_row_format( 1760 self, match_row: bool = False 1761 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1762 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1763 return None 1764 1765 if self._match_text_seq("SERDE"): 1766 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1767 1768 self._match_text_seq("DELIMITED") 1769 1770 kwargs = {} 1771 1772 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1773 kwargs["fields"] = self._parse_string() 1774 if self._match_text_seq("ESCAPED", "BY"): 1775 kwargs["escaped"] = self._parse_string() 1776 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1777 kwargs["collection_items"] = self._parse_string() 1778 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1779 kwargs["map_keys"] = self._parse_string() 1780 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1781 kwargs["lines"] = self._parse_string() 1782 if self._match_text_seq("NULL", "DEFINED", "AS"): 1783 kwargs["null"] = self._parse_string() 1784 1785 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1786 1787 def _parse_load(self) -> exp.LoadData | exp.Command: 1788 if self._match_text_seq("DATA"): 1789 local = self._match_text_seq("LOCAL") 1790 self._match_text_seq("INPATH") 1791 inpath = self._parse_string() 1792 overwrite = self._match(TokenType.OVERWRITE) 1793 self._match_pair(TokenType.INTO, TokenType.TABLE) 1794 1795 return self.expression( 1796 exp.LoadData, 1797 this=self._parse_table(schema=True), 1798 local=local, 1799 overwrite=overwrite, 1800 inpath=inpath, 1801 partition=self._parse_partition(), 1802 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1803 serde=self._match_text_seq("SERDE") and self._parse_string(), 1804 ) 1805 return self._parse_as_command(self._prev) 1806 1807 def _parse_delete(self) -> exp.Delete: 1808 self._match(TokenType.FROM) 1809 1810 return self.expression( 1811 exp.Delete, 1812 this=self._parse_table(), 1813 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1814 where=self._parse_where(), 1815 returning=self._parse_returning(), 1816 limit=self._parse_limit(), 1817 ) 1818 1819 def _parse_update(self) -> exp.Update: 1820 return self.expression( 1821 exp.Update, 1822 **{ # type: ignore 1823 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1824 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1825 "from": self._parse_from(modifiers=True), 1826 "where": self._parse_where(), 1827 "returning": self._parse_returning(), 1828 "limit": self._parse_limit(), 1829 }, 1830 ) 1831 1832 def _parse_uncache(self) -> exp.Uncache: 1833 if not self._match(TokenType.TABLE): 1834 self.raise_error("Expecting TABLE after UNCACHE") 1835 1836 return self.expression( 1837 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1838 ) 1839 1840 def _parse_cache(self) -> exp.Cache: 1841 lazy = self._match_text_seq("LAZY") 1842 self._match(TokenType.TABLE) 1843 table = self._parse_table(schema=True) 1844 1845 options = [] 1846 if self._match_text_seq("OPTIONS"): 1847 self._match_l_paren() 1848 k = self._parse_string() 1849 self._match(TokenType.EQ) 1850 v = self._parse_string() 1851 options = [k, v] 1852 self._match_r_paren() 1853 1854 self._match(TokenType.ALIAS) 1855 return self.expression( 1856 exp.Cache, 1857 this=table, 1858 lazy=lazy, 1859 options=options, 1860 expression=self._parse_select(nested=True), 1861 ) 1862 1863 def _parse_partition(self) -> t.Optional[exp.Partition]: 1864 if not self._match(TokenType.PARTITION): 1865 return None 1866 1867 return self.expression( 1868 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1869 ) 1870 1871 def _parse_value(self) -> exp.Tuple: 1872 if self._match(TokenType.L_PAREN): 1873 expressions = self._parse_csv(self._parse_conjunction) 1874 self._match_r_paren() 1875 return self.expression(exp.Tuple, expressions=expressions) 1876 1877 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1878 # Source: https://prestodb.io/docs/current/sql/values.html 1879 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1880 1881 def _parse_select( 1882 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1883 ) -> t.Optional[exp.Expression]: 1884 cte = self._parse_with() 1885 if cte: 1886 this = self._parse_statement() 1887 1888 if not this: 1889 self.raise_error("Failed to parse any statement following CTE") 1890 return cte 1891 1892 if "with" in this.arg_types: 1893 this.set("with", cte) 1894 else: 1895 self.raise_error(f"{this.key} does not support CTE") 1896 this = cte 1897 elif self._match(TokenType.SELECT): 1898 comments = self._prev_comments 1899 1900 hint = self._parse_hint() 1901 all_ = self._match(TokenType.ALL) 1902 distinct = self._match(TokenType.DISTINCT) 1903 1904 kind = ( 1905 self._match(TokenType.ALIAS) 1906 and self._match_texts(("STRUCT", "VALUE")) 1907 and self._prev.text 1908 ) 1909 1910 if distinct: 1911 distinct = self.expression( 1912 exp.Distinct, 1913 on=self._parse_value() if self._match(TokenType.ON) else None, 1914 ) 1915 1916 if all_ and distinct: 1917 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1918 1919 limit = self._parse_limit(top=True) 1920 expressions = self._parse_csv(self._parse_expression) 1921 1922 this = self.expression( 1923 exp.Select, 1924 kind=kind, 1925 hint=hint, 1926 distinct=distinct, 1927 expressions=expressions, 1928 limit=limit, 1929 ) 1930 this.comments = comments 1931 1932 into = self._parse_into() 1933 if into: 1934 this.set("into", into) 1935 1936 from_ = self._parse_from() 1937 if from_: 1938 this.set("from", from_) 1939 1940 this = self._parse_query_modifiers(this) 1941 elif (table or nested) and self._match(TokenType.L_PAREN): 1942 if self._match(TokenType.PIVOT): 1943 this = self._parse_simplified_pivot() 1944 elif self._match(TokenType.FROM): 1945 this = exp.select("*").from_( 1946 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1947 ) 1948 else: 1949 this = self._parse_table() if table else self._parse_select(nested=True) 1950 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1951 1952 self._match_r_paren() 1953 1954 # early return so that subquery unions aren't parsed again 1955 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1956 # Union ALL should be a property of the top select node, not the subquery 1957 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1958 elif self._match(TokenType.VALUES): 1959 this = self.expression( 1960 exp.Values, 1961 expressions=self._parse_csv(self._parse_value), 1962 alias=self._parse_table_alias(), 1963 ) 1964 else: 1965 this = None 1966 1967 return self._parse_set_operations(this) 1968 1969 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1970 if not skip_with_token and not self._match(TokenType.WITH): 1971 return None 1972 1973 comments = self._prev_comments 1974 recursive = self._match(TokenType.RECURSIVE) 1975 1976 expressions = [] 1977 while True: 1978 expressions.append(self._parse_cte()) 1979 1980 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1981 break 1982 else: 1983 self._match(TokenType.WITH) 1984 1985 return self.expression( 1986 exp.With, comments=comments, expressions=expressions, recursive=recursive 1987 ) 1988 1989 def _parse_cte(self) -> exp.CTE: 1990 alias = self._parse_table_alias() 1991 if not alias or not alias.this: 1992 self.raise_error("Expected CTE to have alias") 1993 1994 self._match(TokenType.ALIAS) 1995 return self.expression( 1996 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1997 ) 1998 1999 def _parse_table_alias( 2000 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2001 ) -> t.Optional[exp.TableAlias]: 2002 any_token = self._match(TokenType.ALIAS) 2003 alias = ( 2004 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2005 or self._parse_string_as_identifier() 2006 ) 2007 2008 index = self._index 2009 if self._match(TokenType.L_PAREN): 2010 columns = self._parse_csv(self._parse_function_parameter) 2011 self._match_r_paren() if columns else self._retreat(index) 2012 else: 2013 columns = None 2014 2015 if not alias and not columns: 2016 return None 2017 2018 return self.expression(exp.TableAlias, this=alias, columns=columns) 2019 2020 def _parse_subquery( 2021 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2022 ) -> t.Optional[exp.Subquery]: 2023 if not this: 2024 return None 2025 2026 return self.expression( 2027 exp.Subquery, 2028 this=this, 2029 pivots=self._parse_pivots(), 2030 alias=self._parse_table_alias() if parse_alias else None, 2031 ) 2032 2033 def _parse_query_modifiers( 2034 self, this: t.Optional[exp.Expression] 2035 ) -> t.Optional[exp.Expression]: 2036 if isinstance(this, self.MODIFIABLES): 2037 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2038 expression = parser(self) 2039 2040 if expression: 2041 if key == "limit": 2042 offset = expression.args.pop("offset", None) 2043 if offset: 2044 this.set("offset", exp.Offset(expression=offset)) 2045 this.set(key, expression) 2046 return this 2047 2048 def _parse_hint(self) -> t.Optional[exp.Hint]: 2049 if self._match(TokenType.HINT): 2050 hints = self._parse_csv(self._parse_function) 2051 2052 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2053 self.raise_error("Expected */ after HINT") 2054 2055 return self.expression(exp.Hint, expressions=hints) 2056 2057 return None 2058 2059 def _parse_into(self) -> t.Optional[exp.Into]: 2060 if not self._match(TokenType.INTO): 2061 return None 2062 2063 temp = self._match(TokenType.TEMPORARY) 2064 unlogged = self._match_text_seq("UNLOGGED") 2065 self._match(TokenType.TABLE) 2066 2067 return self.expression( 2068 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2069 ) 2070 2071 def _parse_from( 2072 self, modifiers: bool = False, skip_from_token: bool = False 2073 ) -> t.Optional[exp.From]: 2074 if not skip_from_token and not self._match(TokenType.FROM): 2075 return None 2076 2077 comments = self._prev_comments 2078 this = self._parse_table() 2079 2080 return self.expression( 2081 exp.From, 2082 comments=comments, 2083 this=self._parse_query_modifiers(this) if modifiers else this, 2084 ) 2085 2086 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2087 if not self._match(TokenType.MATCH_RECOGNIZE): 2088 return None 2089 2090 self._match_l_paren() 2091 2092 partition = self._parse_partition_by() 2093 order = self._parse_order() 2094 measures = ( 2095 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2096 ) 2097 2098 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2099 rows = exp.var("ONE ROW PER MATCH") 2100 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2101 text = "ALL ROWS PER MATCH" 2102 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2103 text += f" SHOW EMPTY MATCHES" 2104 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2105 text += f" OMIT EMPTY MATCHES" 2106 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2107 text += f" WITH UNMATCHED ROWS" 2108 rows = exp.var(text) 2109 else: 2110 rows = None 2111 2112 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2113 text = "AFTER MATCH SKIP" 2114 if self._match_text_seq("PAST", "LAST", "ROW"): 2115 text += f" PAST LAST ROW" 2116 elif self._match_text_seq("TO", "NEXT", "ROW"): 2117 text += f" TO NEXT ROW" 2118 elif self._match_text_seq("TO", "FIRST"): 2119 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2120 elif self._match_text_seq("TO", "LAST"): 2121 text += f" TO LAST {self._advance_any().text}" # type: ignore 2122 after = exp.var(text) 2123 else: 2124 after = None 2125 2126 if self._match_text_seq("PATTERN"): 2127 self._match_l_paren() 2128 2129 if not self._curr: 2130 self.raise_error("Expecting )", self._curr) 2131 2132 paren = 1 2133 start = self._curr 2134 2135 while self._curr and paren > 0: 2136 if self._curr.token_type == TokenType.L_PAREN: 2137 paren += 1 2138 if self._curr.token_type == TokenType.R_PAREN: 2139 paren -= 1 2140 2141 end = self._prev 2142 self._advance() 2143 2144 if paren > 0: 2145 self.raise_error("Expecting )", self._curr) 2146 2147 pattern = exp.var(self._find_sql(start, end)) 2148 else: 2149 pattern = None 2150 2151 define = ( 2152 self._parse_csv( 2153 lambda: self.expression( 2154 exp.Alias, 2155 alias=self._parse_id_var(any_token=True), 2156 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2157 ) 2158 ) 2159 if self._match_text_seq("DEFINE") 2160 else None 2161 ) 2162 2163 self._match_r_paren() 2164 2165 return self.expression( 2166 exp.MatchRecognize, 2167 partition_by=partition, 2168 order=order, 2169 measures=measures, 2170 rows=rows, 2171 after=after, 2172 pattern=pattern, 2173 define=define, 2174 alias=self._parse_table_alias(), 2175 ) 2176 2177 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2178 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2179 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2180 2181 if outer_apply or cross_apply: 2182 this = self._parse_select(table=True) 2183 view = None 2184 outer = not cross_apply 2185 elif self._match(TokenType.LATERAL): 2186 this = self._parse_select(table=True) 2187 view = self._match(TokenType.VIEW) 2188 outer = self._match(TokenType.OUTER) 2189 else: 2190 return None 2191 2192 if not this: 2193 this = self._parse_function() or self._parse_id_var(any_token=False) 2194 while self._match(TokenType.DOT): 2195 this = exp.Dot( 2196 this=this, 2197 expression=self._parse_function() or self._parse_id_var(any_token=False), 2198 ) 2199 2200 if view: 2201 table = self._parse_id_var(any_token=False) 2202 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2203 table_alias: t.Optional[exp.TableAlias] = self.expression( 2204 exp.TableAlias, this=table, columns=columns 2205 ) 2206 elif isinstance(this, exp.Subquery) and this.alias: 2207 # Ensures parity between the Subquery's and the Lateral's "alias" args 2208 table_alias = this.args["alias"].copy() 2209 else: 2210 table_alias = self._parse_table_alias() 2211 2212 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2213 2214 def _parse_join_parts( 2215 self, 2216 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2217 return ( 2218 self._match_set(self.JOIN_METHODS) and self._prev, 2219 self._match_set(self.JOIN_SIDES) and self._prev, 2220 self._match_set(self.JOIN_KINDS) and self._prev, 2221 ) 2222 2223 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2224 if self._match(TokenType.COMMA): 2225 return self.expression(exp.Join, this=self._parse_table()) 2226 2227 index = self._index 2228 method, side, kind = self._parse_join_parts() 2229 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2230 join = self._match(TokenType.JOIN) 2231 2232 if not skip_join_token and not join: 2233 self._retreat(index) 2234 kind = None 2235 method = None 2236 side = None 2237 2238 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2239 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2240 2241 if not skip_join_token and not join and not outer_apply and not cross_apply: 2242 return None 2243 2244 if outer_apply: 2245 side = Token(TokenType.LEFT, "LEFT") 2246 2247 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2248 2249 if method: 2250 kwargs["method"] = method.text 2251 if side: 2252 kwargs["side"] = side.text 2253 if kind: 2254 kwargs["kind"] = kind.text 2255 if hint: 2256 kwargs["hint"] = hint 2257 2258 if self._match(TokenType.ON): 2259 kwargs["on"] = self._parse_conjunction() 2260 elif self._match(TokenType.USING): 2261 kwargs["using"] = self._parse_wrapped_id_vars() 2262 2263 return self.expression(exp.Join, **kwargs) 2264 2265 def _parse_index( 2266 self, 2267 index: t.Optional[exp.Expression] = None, 2268 ) -> t.Optional[exp.Index]: 2269 if index: 2270 unique = None 2271 primary = None 2272 amp = None 2273 2274 self._match(TokenType.ON) 2275 self._match(TokenType.TABLE) # hive 2276 table = self._parse_table_parts(schema=True) 2277 else: 2278 unique = self._match(TokenType.UNIQUE) 2279 primary = self._match_text_seq("PRIMARY") 2280 amp = self._match_text_seq("AMP") 2281 2282 if not self._match(TokenType.INDEX): 2283 return None 2284 2285 index = self._parse_id_var() 2286 table = None 2287 2288 using = self._parse_field() if self._match(TokenType.USING) else None 2289 2290 if self._match(TokenType.L_PAREN, advance=False): 2291 columns = self._parse_wrapped_csv(self._parse_ordered) 2292 else: 2293 columns = None 2294 2295 return self.expression( 2296 exp.Index, 2297 this=index, 2298 table=table, 2299 using=using, 2300 columns=columns, 2301 unique=unique, 2302 primary=primary, 2303 amp=amp, 2304 partition_by=self._parse_partition_by(), 2305 ) 2306 2307 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2308 hints: t.List[exp.Expression] = [] 2309 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2310 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2311 hints.append( 2312 self.expression( 2313 exp.WithTableHint, 2314 expressions=self._parse_csv( 2315 lambda: self._parse_function() or self._parse_var(any_token=True) 2316 ), 2317 ) 2318 ) 2319 self._match_r_paren() 2320 else: 2321 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2322 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2323 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2324 2325 self._match_texts({"INDEX", "KEY"}) 2326 if self._match(TokenType.FOR): 2327 hint.set("target", self._advance_any() and self._prev.text.upper()) 2328 2329 hint.set("expressions", self._parse_wrapped_id_vars()) 2330 hints.append(hint) 2331 2332 return hints or None 2333 2334 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2335 return ( 2336 (not schema and self._parse_function(optional_parens=False)) 2337 or self._parse_id_var(any_token=False) 2338 or self._parse_string_as_identifier() 2339 or self._parse_placeholder() 2340 ) 2341 2342 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2343 catalog = None 2344 db = None 2345 table = self._parse_table_part(schema=schema) 2346 2347 while self._match(TokenType.DOT): 2348 if catalog: 2349 # This allows nesting the table in arbitrarily many dot expressions if needed 2350 table = self.expression( 2351 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2352 ) 2353 else: 2354 catalog = db 2355 db = table 2356 table = self._parse_table_part(schema=schema) 2357 2358 if not table: 2359 self.raise_error(f"Expected table name but got {self._curr}") 2360 2361 return self.expression( 2362 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2363 ) 2364 2365 def _parse_table( 2366 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2367 ) -> t.Optional[exp.Expression]: 2368 lateral = self._parse_lateral() 2369 if lateral: 2370 return lateral 2371 2372 unnest = self._parse_unnest() 2373 if unnest: 2374 return unnest 2375 2376 values = self._parse_derived_table_values() 2377 if values: 2378 return values 2379 2380 subquery = self._parse_select(table=True) 2381 if subquery: 2382 if not subquery.args.get("pivots"): 2383 subquery.set("pivots", self._parse_pivots()) 2384 return subquery 2385 2386 this: exp.Expression = self._parse_table_parts(schema=schema) 2387 2388 if schema: 2389 return self._parse_schema(this=this) 2390 2391 if self.ALIAS_POST_TABLESAMPLE: 2392 table_sample = self._parse_table_sample() 2393 2394 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2395 if alias: 2396 this.set("alias", alias) 2397 2398 if not this.args.get("pivots"): 2399 this.set("pivots", self._parse_pivots()) 2400 2401 this.set("hints", self._parse_table_hints()) 2402 2403 if not self.ALIAS_POST_TABLESAMPLE: 2404 table_sample = self._parse_table_sample() 2405 2406 if table_sample: 2407 table_sample.set("this", this) 2408 this = table_sample 2409 2410 return this 2411 2412 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2413 if not self._match(TokenType.UNNEST): 2414 return None 2415 2416 expressions = self._parse_wrapped_csv(self._parse_type) 2417 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2418 2419 alias = self._parse_table_alias() if with_alias else None 2420 2421 if alias and self.UNNEST_COLUMN_ONLY: 2422 if alias.args.get("columns"): 2423 self.raise_error("Unexpected extra column alias in unnest.") 2424 2425 alias.set("columns", [alias.this]) 2426 alias.set("this", None) 2427 2428 offset = None 2429 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2430 self._match(TokenType.ALIAS) 2431 offset = self._parse_id_var() or exp.to_identifier("offset") 2432 2433 return self.expression( 2434 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2435 ) 2436 2437 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2438 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2439 if not is_derived and not self._match(TokenType.VALUES): 2440 return None 2441 2442 expressions = self._parse_csv(self._parse_value) 2443 alias = self._parse_table_alias() 2444 2445 if is_derived: 2446 self._match_r_paren() 2447 2448 return self.expression( 2449 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2450 ) 2451 2452 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2453 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2454 as_modifier and self._match_text_seq("USING", "SAMPLE") 2455 ): 2456 return None 2457 2458 bucket_numerator = None 2459 bucket_denominator = None 2460 bucket_field = None 2461 percent = None 2462 rows = None 2463 size = None 2464 seed = None 2465 2466 kind = ( 2467 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2468 ) 2469 method = self._parse_var(tokens=(TokenType.ROW,)) 2470 2471 self._match(TokenType.L_PAREN) 2472 2473 num = self._parse_number() 2474 2475 if self._match_text_seq("BUCKET"): 2476 bucket_numerator = self._parse_number() 2477 self._match_text_seq("OUT", "OF") 2478 bucket_denominator = bucket_denominator = self._parse_number() 2479 self._match(TokenType.ON) 2480 bucket_field = self._parse_field() 2481 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2482 percent = num 2483 elif self._match(TokenType.ROWS): 2484 rows = num 2485 else: 2486 size = num 2487 2488 self._match(TokenType.R_PAREN) 2489 2490 if self._match(TokenType.L_PAREN): 2491 method = self._parse_var() 2492 seed = self._match(TokenType.COMMA) and self._parse_number() 2493 self._match_r_paren() 2494 elif self._match_texts(("SEED", "REPEATABLE")): 2495 seed = self._parse_wrapped(self._parse_number) 2496 2497 return self.expression( 2498 exp.TableSample, 2499 method=method, 2500 bucket_numerator=bucket_numerator, 2501 bucket_denominator=bucket_denominator, 2502 bucket_field=bucket_field, 2503 percent=percent, 2504 rows=rows, 2505 size=size, 2506 seed=seed, 2507 kind=kind, 2508 ) 2509 2510 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2511 return list(iter(self._parse_pivot, None)) 2512 2513 # https://duckdb.org/docs/sql/statements/pivot 2514 def _parse_simplified_pivot(self) -> exp.Pivot: 2515 def _parse_on() -> t.Optional[exp.Expression]: 2516 this = self._parse_bitwise() 2517 return self._parse_in(this) if self._match(TokenType.IN) else this 2518 2519 this = self._parse_table() 2520 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2521 using = self._match(TokenType.USING) and self._parse_csv( 2522 lambda: self._parse_alias(self._parse_function()) 2523 ) 2524 group = self._parse_group() 2525 return self.expression( 2526 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2527 ) 2528 2529 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2530 index = self._index 2531 2532 if self._match(TokenType.PIVOT): 2533 unpivot = False 2534 elif self._match(TokenType.UNPIVOT): 2535 unpivot = True 2536 else: 2537 return None 2538 2539 expressions = [] 2540 field = None 2541 2542 if not self._match(TokenType.L_PAREN): 2543 self._retreat(index) 2544 return None 2545 2546 if unpivot: 2547 expressions = self._parse_csv(self._parse_column) 2548 else: 2549 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2550 2551 if not expressions: 2552 self.raise_error("Failed to parse PIVOT's aggregation list") 2553 2554 if not self._match(TokenType.FOR): 2555 self.raise_error("Expecting FOR") 2556 2557 value = self._parse_column() 2558 2559 if not self._match(TokenType.IN): 2560 self.raise_error("Expecting IN") 2561 2562 field = self._parse_in(value, alias=True) 2563 2564 self._match_r_paren() 2565 2566 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2567 2568 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2569 pivot.set("alias", self._parse_table_alias()) 2570 2571 if not unpivot: 2572 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2573 2574 columns: t.List[exp.Expression] = [] 2575 for fld in pivot.args["field"].expressions: 2576 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2577 for name in names: 2578 if self.PREFIXED_PIVOT_COLUMNS: 2579 name = f"{name}_{field_name}" if name else field_name 2580 else: 2581 name = f"{field_name}_{name}" if name else field_name 2582 2583 columns.append(exp.to_identifier(name)) 2584 2585 pivot.set("columns", columns) 2586 2587 return pivot 2588 2589 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2590 return [agg.alias for agg in aggregations] 2591 2592 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2593 if not skip_where_token and not self._match(TokenType.WHERE): 2594 return None 2595 2596 return self.expression( 2597 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2598 ) 2599 2600 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2601 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2602 return None 2603 2604 elements = defaultdict(list) 2605 2606 while True: 2607 expressions = self._parse_csv(self._parse_conjunction) 2608 if expressions: 2609 elements["expressions"].extend(expressions) 2610 2611 grouping_sets = self._parse_grouping_sets() 2612 if grouping_sets: 2613 elements["grouping_sets"].extend(grouping_sets) 2614 2615 rollup = None 2616 cube = None 2617 totals = None 2618 2619 with_ = self._match(TokenType.WITH) 2620 if self._match(TokenType.ROLLUP): 2621 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2622 elements["rollup"].extend(ensure_list(rollup)) 2623 2624 if self._match(TokenType.CUBE): 2625 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2626 elements["cube"].extend(ensure_list(cube)) 2627 2628 if self._match_text_seq("TOTALS"): 2629 totals = True 2630 elements["totals"] = True # type: ignore 2631 2632 if not (grouping_sets or rollup or cube or totals): 2633 break 2634 2635 return self.expression(exp.Group, **elements) # type: ignore 2636 2637 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2638 if not self._match(TokenType.GROUPING_SETS): 2639 return None 2640 2641 return self._parse_wrapped_csv(self._parse_grouping_set) 2642 2643 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2644 if self._match(TokenType.L_PAREN): 2645 grouping_set = self._parse_csv(self._parse_column) 2646 self._match_r_paren() 2647 return self.expression(exp.Tuple, expressions=grouping_set) 2648 2649 return self._parse_column() 2650 2651 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2652 if not skip_having_token and not self._match(TokenType.HAVING): 2653 return None 2654 return self.expression(exp.Having, this=self._parse_conjunction()) 2655 2656 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2657 if not self._match(TokenType.QUALIFY): 2658 return None 2659 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2660 2661 def _parse_order( 2662 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2663 ) -> t.Optional[exp.Expression]: 2664 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2665 return this 2666 2667 return self.expression( 2668 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2669 ) 2670 2671 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2672 if not self._match(token): 2673 return None 2674 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2675 2676 def _parse_ordered(self) -> exp.Ordered: 2677 this = self._parse_conjunction() 2678 self._match(TokenType.ASC) 2679 2680 is_desc = self._match(TokenType.DESC) 2681 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2682 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2683 desc = is_desc or False 2684 asc = not desc 2685 nulls_first = is_nulls_first or False 2686 explicitly_null_ordered = is_nulls_first or is_nulls_last 2687 2688 if ( 2689 not explicitly_null_ordered 2690 and ( 2691 (asc and self.NULL_ORDERING == "nulls_are_small") 2692 or (desc and self.NULL_ORDERING != "nulls_are_small") 2693 ) 2694 and self.NULL_ORDERING != "nulls_are_last" 2695 ): 2696 nulls_first = True 2697 2698 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2699 2700 def _parse_limit( 2701 self, this: t.Optional[exp.Expression] = None, top: bool = False 2702 ) -> t.Optional[exp.Expression]: 2703 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2704 limit_paren = self._match(TokenType.L_PAREN) 2705 expression = self._parse_number() if top else self._parse_term() 2706 2707 if self._match(TokenType.COMMA): 2708 offset = expression 2709 expression = self._parse_term() 2710 else: 2711 offset = None 2712 2713 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2714 2715 if limit_paren: 2716 self._match_r_paren() 2717 2718 return limit_exp 2719 2720 if self._match(TokenType.FETCH): 2721 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2722 direction = self._prev.text if direction else "FIRST" 2723 2724 count = self._parse_number() 2725 percent = self._match(TokenType.PERCENT) 2726 2727 self._match_set((TokenType.ROW, TokenType.ROWS)) 2728 2729 only = self._match_text_seq("ONLY") 2730 with_ties = self._match_text_seq("WITH", "TIES") 2731 2732 if only and with_ties: 2733 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2734 2735 return self.expression( 2736 exp.Fetch, 2737 direction=direction, 2738 count=count, 2739 percent=percent, 2740 with_ties=with_ties, 2741 ) 2742 2743 return this 2744 2745 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2746 if not self._match(TokenType.OFFSET): 2747 return this 2748 2749 count = self._parse_number() 2750 self._match_set((TokenType.ROW, TokenType.ROWS)) 2751 return self.expression(exp.Offset, this=this, expression=count) 2752 2753 def _parse_locks(self) -> t.List[exp.Lock]: 2754 locks = [] 2755 while True: 2756 if self._match_text_seq("FOR", "UPDATE"): 2757 update = True 2758 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2759 "LOCK", "IN", "SHARE", "MODE" 2760 ): 2761 update = False 2762 else: 2763 break 2764 2765 expressions = None 2766 if self._match_text_seq("OF"): 2767 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2768 2769 wait: t.Optional[bool | exp.Expression] = None 2770 if self._match_text_seq("NOWAIT"): 2771 wait = True 2772 elif self._match_text_seq("WAIT"): 2773 wait = self._parse_primary() 2774 elif self._match_text_seq("SKIP", "LOCKED"): 2775 wait = False 2776 2777 locks.append( 2778 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2779 ) 2780 2781 return locks 2782 2783 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2784 if not self._match_set(self.SET_OPERATIONS): 2785 return this 2786 2787 token_type = self._prev.token_type 2788 2789 if token_type == TokenType.UNION: 2790 expression = exp.Union 2791 elif token_type == TokenType.EXCEPT: 2792 expression = exp.Except 2793 else: 2794 expression = exp.Intersect 2795 2796 return self.expression( 2797 expression, 2798 this=this, 2799 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2800 expression=self._parse_set_operations(self._parse_select(nested=True)), 2801 ) 2802 2803 def _parse_expression(self) -> t.Optional[exp.Expression]: 2804 return self._parse_alias(self._parse_conjunction()) 2805 2806 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2807 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2808 2809 def _parse_equality(self) -> t.Optional[exp.Expression]: 2810 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2811 2812 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2813 return self._parse_tokens(self._parse_range, self.COMPARISON) 2814 2815 def _parse_range(self) -> t.Optional[exp.Expression]: 2816 this = self._parse_bitwise() 2817 negate = self._match(TokenType.NOT) 2818 2819 if self._match_set(self.RANGE_PARSERS): 2820 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2821 if not expression: 2822 return this 2823 2824 this = expression 2825 elif self._match(TokenType.ISNULL): 2826 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2827 2828 # Postgres supports ISNULL and NOTNULL for conditions. 2829 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2830 if self._match(TokenType.NOTNULL): 2831 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2832 this = self.expression(exp.Not, this=this) 2833 2834 if negate: 2835 this = self.expression(exp.Not, this=this) 2836 2837 if self._match(TokenType.IS): 2838 this = self._parse_is(this) 2839 2840 return this 2841 2842 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2843 index = self._index - 1 2844 negate = self._match(TokenType.NOT) 2845 2846 if self._match_text_seq("DISTINCT", "FROM"): 2847 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2848 return self.expression(klass, this=this, expression=self._parse_expression()) 2849 2850 expression = self._parse_null() or self._parse_boolean() 2851 if not expression: 2852 self._retreat(index) 2853 return None 2854 2855 this = self.expression(exp.Is, this=this, expression=expression) 2856 return self.expression(exp.Not, this=this) if negate else this 2857 2858 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2859 unnest = self._parse_unnest(with_alias=False) 2860 if unnest: 2861 this = self.expression(exp.In, this=this, unnest=unnest) 2862 elif self._match(TokenType.L_PAREN): 2863 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2864 2865 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2866 this = self.expression(exp.In, this=this, query=expressions[0]) 2867 else: 2868 this = self.expression(exp.In, this=this, expressions=expressions) 2869 2870 self._match_r_paren(this) 2871 else: 2872 this = self.expression(exp.In, this=this, field=self._parse_field()) 2873 2874 return this 2875 2876 def _parse_between(self, this: exp.Expression) -> exp.Between: 2877 low = self._parse_bitwise() 2878 self._match(TokenType.AND) 2879 high = self._parse_bitwise() 2880 return self.expression(exp.Between, this=this, low=low, high=high) 2881 2882 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2883 if not self._match(TokenType.ESCAPE): 2884 return this 2885 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2886 2887 def _parse_interval(self) -> t.Optional[exp.Interval]: 2888 if not self._match(TokenType.INTERVAL): 2889 return None 2890 2891 if self._match(TokenType.STRING, advance=False): 2892 this = self._parse_primary() 2893 else: 2894 this = self._parse_term() 2895 2896 unit = self._parse_function() or self._parse_var() 2897 2898 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2899 # each INTERVAL expression into this canonical form so it's easy to transpile 2900 if this and this.is_number: 2901 this = exp.Literal.string(this.name) 2902 elif this and this.is_string: 2903 parts = this.name.split() 2904 2905 if len(parts) == 2: 2906 if unit: 2907 # this is not actually a unit, it's something else 2908 unit = None 2909 self._retreat(self._index - 1) 2910 else: 2911 this = exp.Literal.string(parts[0]) 2912 unit = self.expression(exp.Var, this=parts[1]) 2913 2914 return self.expression(exp.Interval, this=this, unit=unit) 2915 2916 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2917 this = self._parse_term() 2918 2919 while True: 2920 if self._match_set(self.BITWISE): 2921 this = self.expression( 2922 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2923 ) 2924 elif self._match_pair(TokenType.LT, TokenType.LT): 2925 this = self.expression( 2926 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2927 ) 2928 elif self._match_pair(TokenType.GT, TokenType.GT): 2929 this = self.expression( 2930 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2931 ) 2932 else: 2933 break 2934 2935 return this 2936 2937 def _parse_term(self) -> t.Optional[exp.Expression]: 2938 return self._parse_tokens(self._parse_factor, self.TERM) 2939 2940 def _parse_factor(self) -> t.Optional[exp.Expression]: 2941 return self._parse_tokens(self._parse_unary, self.FACTOR) 2942 2943 def _parse_unary(self) -> t.Optional[exp.Expression]: 2944 if self._match_set(self.UNARY_PARSERS): 2945 return self.UNARY_PARSERS[self._prev.token_type](self) 2946 return self._parse_at_time_zone(self._parse_type()) 2947 2948 def _parse_type(self) -> t.Optional[exp.Expression]: 2949 interval = self._parse_interval() 2950 if interval: 2951 return interval 2952 2953 index = self._index 2954 data_type = self._parse_types(check_func=True) 2955 this = self._parse_column() 2956 2957 if data_type: 2958 if isinstance(this, exp.Literal): 2959 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2960 if parser: 2961 return parser(self, this, data_type) 2962 return self.expression(exp.Cast, this=this, to=data_type) 2963 if not data_type.expressions: 2964 self._retreat(index) 2965 return self._parse_column() 2966 return self._parse_column_ops(data_type) 2967 2968 return this 2969 2970 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2971 this = self._parse_type() 2972 if not this: 2973 return None 2974 2975 return self.expression( 2976 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2977 ) 2978 2979 def _parse_types( 2980 self, check_func: bool = False, schema: bool = False 2981 ) -> t.Optional[exp.Expression]: 2982 index = self._index 2983 2984 prefix = self._match_text_seq("SYSUDTLIB", ".") 2985 2986 if not self._match_set(self.TYPE_TOKENS): 2987 return None 2988 2989 type_token = self._prev.token_type 2990 2991 if type_token == TokenType.PSEUDO_TYPE: 2992 return self.expression(exp.PseudoType, this=self._prev.text) 2993 2994 nested = type_token in self.NESTED_TYPE_TOKENS 2995 is_struct = type_token == TokenType.STRUCT 2996 expressions = None 2997 maybe_func = False 2998 2999 if self._match(TokenType.L_PAREN): 3000 if is_struct: 3001 expressions = self._parse_csv(self._parse_struct_types) 3002 elif nested: 3003 expressions = self._parse_csv( 3004 lambda: self._parse_types(check_func=check_func, schema=schema) 3005 ) 3006 elif type_token in self.ENUM_TYPE_TOKENS: 3007 expressions = self._parse_csv(self._parse_primary) 3008 else: 3009 expressions = self._parse_csv(self._parse_type_size) 3010 3011 if not expressions or not self._match(TokenType.R_PAREN): 3012 self._retreat(index) 3013 return None 3014 3015 maybe_func = True 3016 3017 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3018 this = exp.DataType( 3019 this=exp.DataType.Type.ARRAY, 3020 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3021 nested=True, 3022 ) 3023 3024 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3025 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3026 3027 return this 3028 3029 if self._match(TokenType.L_BRACKET): 3030 self._retreat(index) 3031 return None 3032 3033 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3034 if nested and self._match(TokenType.LT): 3035 if is_struct: 3036 expressions = self._parse_csv(self._parse_struct_types) 3037 else: 3038 expressions = self._parse_csv( 3039 lambda: self._parse_types(check_func=check_func, schema=schema) 3040 ) 3041 3042 if not self._match(TokenType.GT): 3043 self.raise_error("Expecting >") 3044 3045 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3046 values = self._parse_csv(self._parse_conjunction) 3047 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3048 3049 value: t.Optional[exp.Expression] = None 3050 if type_token in self.TIMESTAMPS: 3051 if self._match_text_seq("WITH", "TIME", "ZONE"): 3052 maybe_func = False 3053 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3054 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3055 maybe_func = False 3056 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3057 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3058 maybe_func = False 3059 elif type_token == TokenType.INTERVAL: 3060 unit = self._parse_var() 3061 3062 if not unit: 3063 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3064 else: 3065 value = self.expression(exp.Interval, unit=unit) 3066 3067 if maybe_func and check_func: 3068 index2 = self._index 3069 peek = self._parse_string() 3070 3071 if not peek: 3072 self._retreat(index) 3073 return None 3074 3075 self._retreat(index2) 3076 3077 if value: 3078 return value 3079 3080 return exp.DataType( 3081 this=exp.DataType.Type[type_token.value.upper()], 3082 expressions=expressions, 3083 nested=nested, 3084 values=values, 3085 prefix=prefix, 3086 ) 3087 3088 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3089 this = self._parse_type() or self._parse_id_var() 3090 self._match(TokenType.COLON) 3091 return self._parse_column_def(this) 3092 3093 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3094 if not self._match_text_seq("AT", "TIME", "ZONE"): 3095 return this 3096 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3097 3098 def _parse_column(self) -> t.Optional[exp.Expression]: 3099 this = self._parse_field() 3100 if isinstance(this, exp.Identifier): 3101 this = self.expression(exp.Column, this=this) 3102 elif not this: 3103 return self._parse_bracket(this) 3104 return self._parse_column_ops(this) 3105 3106 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3107 this = self._parse_bracket(this) 3108 3109 while self._match_set(self.COLUMN_OPERATORS): 3110 op_token = self._prev.token_type 3111 op = self.COLUMN_OPERATORS.get(op_token) 3112 3113 if op_token == TokenType.DCOLON: 3114 field = self._parse_types() 3115 if not field: 3116 self.raise_error("Expected type") 3117 elif op and self._curr: 3118 self._advance() 3119 value = self._prev.text 3120 field = ( 3121 exp.Literal.number(value) 3122 if self._prev.token_type == TokenType.NUMBER 3123 else exp.Literal.string(value) 3124 ) 3125 else: 3126 field = self._parse_field(anonymous_func=True, any_token=True) 3127 3128 if isinstance(field, exp.Func): 3129 # bigquery allows function calls like x.y.count(...) 3130 # SAFE.SUBSTR(...) 3131 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3132 this = self._replace_columns_with_dots(this) 3133 3134 if op: 3135 this = op(self, this, field) 3136 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3137 this = self.expression( 3138 exp.Column, 3139 this=field, 3140 table=this.this, 3141 db=this.args.get("table"), 3142 catalog=this.args.get("db"), 3143 ) 3144 else: 3145 this = self.expression(exp.Dot, this=this, expression=field) 3146 this = self._parse_bracket(this) 3147 return this 3148 3149 def _parse_primary(self) -> t.Optional[exp.Expression]: 3150 if self._match_set(self.PRIMARY_PARSERS): 3151 token_type = self._prev.token_type 3152 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3153 3154 if token_type == TokenType.STRING: 3155 expressions = [primary] 3156 while self._match(TokenType.STRING): 3157 expressions.append(exp.Literal.string(self._prev.text)) 3158 3159 if len(expressions) > 1: 3160 return self.expression(exp.Concat, expressions=expressions) 3161 3162 return primary 3163 3164 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3165 return exp.Literal.number(f"0.{self._prev.text}") 3166 3167 if self._match(TokenType.L_PAREN): 3168 comments = self._prev_comments 3169 query = self._parse_select() 3170 3171 if query: 3172 expressions = [query] 3173 else: 3174 expressions = self._parse_csv(self._parse_expression) 3175 3176 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3177 3178 if isinstance(this, exp.Subqueryable): 3179 this = self._parse_set_operations( 3180 self._parse_subquery(this=this, parse_alias=False) 3181 ) 3182 elif len(expressions) > 1: 3183 this = self.expression(exp.Tuple, expressions=expressions) 3184 else: 3185 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3186 3187 if this: 3188 this.add_comments(comments) 3189 3190 self._match_r_paren(expression=this) 3191 return this 3192 3193 return None 3194 3195 def _parse_field( 3196 self, 3197 any_token: bool = False, 3198 tokens: t.Optional[t.Collection[TokenType]] = None, 3199 anonymous_func: bool = False, 3200 ) -> t.Optional[exp.Expression]: 3201 return ( 3202 self._parse_primary() 3203 or self._parse_function(anonymous=anonymous_func) 3204 or self._parse_id_var(any_token=any_token, tokens=tokens) 3205 ) 3206 3207 def _parse_function( 3208 self, 3209 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3210 anonymous: bool = False, 3211 optional_parens: bool = True, 3212 ) -> t.Optional[exp.Expression]: 3213 if not self._curr: 3214 return None 3215 3216 token_type = self._curr.token_type 3217 3218 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3219 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3220 3221 if not self._next or self._next.token_type != TokenType.L_PAREN: 3222 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3223 self._advance() 3224 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3225 3226 return None 3227 3228 if token_type not in self.FUNC_TOKENS: 3229 return None 3230 3231 this = self._curr.text 3232 upper = this.upper() 3233 self._advance(2) 3234 3235 parser = self.FUNCTION_PARSERS.get(upper) 3236 3237 if parser and not anonymous: 3238 this = parser(self) 3239 else: 3240 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3241 3242 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3243 this = self.expression(subquery_predicate, this=self._parse_select()) 3244 self._match_r_paren() 3245 return this 3246 3247 if functions is None: 3248 functions = self.FUNCTIONS 3249 3250 function = functions.get(upper) 3251 3252 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3253 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3254 3255 if function and not anonymous: 3256 this = self.validate_expression(function(args), args) 3257 else: 3258 this = self.expression(exp.Anonymous, this=this, expressions=args) 3259 3260 self._match_r_paren(this) 3261 return self._parse_window(this) 3262 3263 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3264 return self._parse_column_def(self._parse_id_var()) 3265 3266 def _parse_user_defined_function( 3267 self, kind: t.Optional[TokenType] = None 3268 ) -> t.Optional[exp.Expression]: 3269 this = self._parse_id_var() 3270 3271 while self._match(TokenType.DOT): 3272 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3273 3274 if not self._match(TokenType.L_PAREN): 3275 return this 3276 3277 expressions = self._parse_csv(self._parse_function_parameter) 3278 self._match_r_paren() 3279 return self.expression( 3280 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3281 ) 3282 3283 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3284 literal = self._parse_primary() 3285 if literal: 3286 return self.expression(exp.Introducer, this=token.text, expression=literal) 3287 3288 return self.expression(exp.Identifier, this=token.text) 3289 3290 def _parse_session_parameter(self) -> exp.SessionParameter: 3291 kind = None 3292 this = self._parse_id_var() or self._parse_primary() 3293 3294 if this and self._match(TokenType.DOT): 3295 kind = this.name 3296 this = self._parse_var() or self._parse_primary() 3297 3298 return self.expression(exp.SessionParameter, this=this, kind=kind) 3299 3300 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3301 index = self._index 3302 3303 if self._match(TokenType.L_PAREN): 3304 expressions = self._parse_csv(self._parse_id_var) 3305 3306 if not self._match(TokenType.R_PAREN): 3307 self._retreat(index) 3308 else: 3309 expressions = [self._parse_id_var()] 3310 3311 if self._match_set(self.LAMBDAS): 3312 return self.LAMBDAS[self._prev.token_type](self, expressions) 3313 3314 self._retreat(index) 3315 3316 this: t.Optional[exp.Expression] 3317 3318 if self._match(TokenType.DISTINCT): 3319 this = self.expression( 3320 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3321 ) 3322 else: 3323 this = self._parse_select_or_expression(alias=alias) 3324 3325 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3326 3327 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3328 index = self._index 3329 3330 if not self.errors: 3331 try: 3332 if self._parse_select(nested=True): 3333 return this 3334 except ParseError: 3335 pass 3336 finally: 3337 self.errors.clear() 3338 self._retreat(index) 3339 3340 if not self._match(TokenType.L_PAREN): 3341 return this 3342 3343 args = self._parse_csv( 3344 lambda: self._parse_constraint() 3345 or self._parse_column_def(self._parse_field(any_token=True)) 3346 ) 3347 3348 self._match_r_paren() 3349 return self.expression(exp.Schema, this=this, expressions=args) 3350 3351 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3352 # column defs are not really columns, they're identifiers 3353 if isinstance(this, exp.Column): 3354 this = this.this 3355 3356 kind = self._parse_types(schema=True) 3357 3358 if self._match_text_seq("FOR", "ORDINALITY"): 3359 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3360 3361 constraints = [] 3362 while True: 3363 constraint = self._parse_column_constraint() 3364 if not constraint: 3365 break 3366 constraints.append(constraint) 3367 3368 if not kind and not constraints: 3369 return this 3370 3371 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3372 3373 def _parse_auto_increment( 3374 self, 3375 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3376 start = None 3377 increment = None 3378 3379 if self._match(TokenType.L_PAREN, advance=False): 3380 args = self._parse_wrapped_csv(self._parse_bitwise) 3381 start = seq_get(args, 0) 3382 increment = seq_get(args, 1) 3383 elif self._match_text_seq("START"): 3384 start = self._parse_bitwise() 3385 self._match_text_seq("INCREMENT") 3386 increment = self._parse_bitwise() 3387 3388 if start and increment: 3389 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3390 3391 return exp.AutoIncrementColumnConstraint() 3392 3393 def _parse_compress(self) -> exp.CompressColumnConstraint: 3394 if self._match(TokenType.L_PAREN, advance=False): 3395 return self.expression( 3396 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3397 ) 3398 3399 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3400 3401 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3402 if self._match_text_seq("BY", "DEFAULT"): 3403 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3404 this = self.expression( 3405 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3406 ) 3407 else: 3408 self._match_text_seq("ALWAYS") 3409 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3410 3411 self._match(TokenType.ALIAS) 3412 identity = self._match_text_seq("IDENTITY") 3413 3414 if self._match(TokenType.L_PAREN): 3415 if self._match_text_seq("START", "WITH"): 3416 this.set("start", self._parse_bitwise()) 3417 if self._match_text_seq("INCREMENT", "BY"): 3418 this.set("increment", self._parse_bitwise()) 3419 if self._match_text_seq("MINVALUE"): 3420 this.set("minvalue", self._parse_bitwise()) 3421 if self._match_text_seq("MAXVALUE"): 3422 this.set("maxvalue", self._parse_bitwise()) 3423 3424 if self._match_text_seq("CYCLE"): 3425 this.set("cycle", True) 3426 elif self._match_text_seq("NO", "CYCLE"): 3427 this.set("cycle", False) 3428 3429 if not identity: 3430 this.set("expression", self._parse_bitwise()) 3431 3432 self._match_r_paren() 3433 3434 return this 3435 3436 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3437 self._match_text_seq("LENGTH") 3438 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3439 3440 def _parse_not_constraint( 3441 self, 3442 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3443 if self._match_text_seq("NULL"): 3444 return self.expression(exp.NotNullColumnConstraint) 3445 if self._match_text_seq("CASESPECIFIC"): 3446 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3447 return None 3448 3449 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3450 if self._match(TokenType.CONSTRAINT): 3451 this = self._parse_id_var() 3452 else: 3453 this = None 3454 3455 if self._match_texts(self.CONSTRAINT_PARSERS): 3456 return self.expression( 3457 exp.ColumnConstraint, 3458 this=this, 3459 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3460 ) 3461 3462 return this 3463 3464 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3465 if not self._match(TokenType.CONSTRAINT): 3466 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3467 3468 this = self._parse_id_var() 3469 expressions = [] 3470 3471 while True: 3472 constraint = self._parse_unnamed_constraint() or self._parse_function() 3473 if not constraint: 3474 break 3475 expressions.append(constraint) 3476 3477 return self.expression(exp.Constraint, this=this, expressions=expressions) 3478 3479 def _parse_unnamed_constraint( 3480 self, constraints: t.Optional[t.Collection[str]] = None 3481 ) -> t.Optional[exp.Expression]: 3482 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3483 return None 3484 3485 constraint = self._prev.text.upper() 3486 if constraint not in self.CONSTRAINT_PARSERS: 3487 self.raise_error(f"No parser found for schema constraint {constraint}.") 3488 3489 return self.CONSTRAINT_PARSERS[constraint](self) 3490 3491 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3492 self._match_text_seq("KEY") 3493 return self.expression( 3494 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3495 ) 3496 3497 def _parse_key_constraint_options(self) -> t.List[str]: 3498 options = [] 3499 while True: 3500 if not self._curr: 3501 break 3502 3503 if self._match(TokenType.ON): 3504 action = None 3505 on = self._advance_any() and self._prev.text 3506 3507 if self._match_text_seq("NO", "ACTION"): 3508 action = "NO ACTION" 3509 elif self._match_text_seq("CASCADE"): 3510 action = "CASCADE" 3511 elif self._match_pair(TokenType.SET, TokenType.NULL): 3512 action = "SET NULL" 3513 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3514 action = "SET DEFAULT" 3515 else: 3516 self.raise_error("Invalid key constraint") 3517 3518 options.append(f"ON {on} {action}") 3519 elif self._match_text_seq("NOT", "ENFORCED"): 3520 options.append("NOT ENFORCED") 3521 elif self._match_text_seq("DEFERRABLE"): 3522 options.append("DEFERRABLE") 3523 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3524 options.append("INITIALLY DEFERRED") 3525 elif self._match_text_seq("NORELY"): 3526 options.append("NORELY") 3527 elif self._match_text_seq("MATCH", "FULL"): 3528 options.append("MATCH FULL") 3529 else: 3530 break 3531 3532 return options 3533 3534 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3535 if match and not self._match(TokenType.REFERENCES): 3536 return None 3537 3538 expressions = None 3539 this = self._parse_id_var() 3540 3541 if self._match(TokenType.L_PAREN, advance=False): 3542 expressions = self._parse_wrapped_id_vars() 3543 3544 options = self._parse_key_constraint_options() 3545 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3546 3547 def _parse_foreign_key(self) -> exp.ForeignKey: 3548 expressions = self._parse_wrapped_id_vars() 3549 reference = self._parse_references() 3550 options = {} 3551 3552 while self._match(TokenType.ON): 3553 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3554 self.raise_error("Expected DELETE or UPDATE") 3555 3556 kind = self._prev.text.lower() 3557 3558 if self._match_text_seq("NO", "ACTION"): 3559 action = "NO ACTION" 3560 elif self._match(TokenType.SET): 3561 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3562 action = "SET " + self._prev.text.upper() 3563 else: 3564 self._advance() 3565 action = self._prev.text.upper() 3566 3567 options[kind] = action 3568 3569 return self.expression( 3570 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3571 ) 3572 3573 def _parse_primary_key( 3574 self, wrapped_optional: bool = False, in_props: bool = False 3575 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3576 desc = ( 3577 self._match_set((TokenType.ASC, TokenType.DESC)) 3578 and self._prev.token_type == TokenType.DESC 3579 ) 3580 3581 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3582 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3583 3584 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3585 options = self._parse_key_constraint_options() 3586 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3587 3588 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3589 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3590 return this 3591 3592 bracket_kind = self._prev.token_type 3593 3594 if self._match(TokenType.COLON): 3595 expressions: t.List[t.Optional[exp.Expression]] = [ 3596 self.expression(exp.Slice, expression=self._parse_conjunction()) 3597 ] 3598 else: 3599 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3600 3601 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3602 if bracket_kind == TokenType.L_BRACE: 3603 this = self.expression(exp.Struct, expressions=expressions) 3604 elif not this or this.name.upper() == "ARRAY": 3605 this = self.expression(exp.Array, expressions=expressions) 3606 else: 3607 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3608 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3609 3610 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3611 self.raise_error("Expected ]") 3612 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3613 self.raise_error("Expected }") 3614 3615 self._add_comments(this) 3616 return self._parse_bracket(this) 3617 3618 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3619 if self._match(TokenType.COLON): 3620 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3621 return this 3622 3623 def _parse_case(self) -> t.Optional[exp.Expression]: 3624 ifs = [] 3625 default = None 3626 3627 expression = self._parse_conjunction() 3628 3629 while self._match(TokenType.WHEN): 3630 this = self._parse_conjunction() 3631 self._match(TokenType.THEN) 3632 then = self._parse_conjunction() 3633 ifs.append(self.expression(exp.If, this=this, true=then)) 3634 3635 if self._match(TokenType.ELSE): 3636 default = self._parse_conjunction() 3637 3638 if not self._match(TokenType.END): 3639 self.raise_error("Expected END after CASE", self._prev) 3640 3641 return self._parse_window( 3642 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3643 ) 3644 3645 def _parse_if(self) -> t.Optional[exp.Expression]: 3646 if self._match(TokenType.L_PAREN): 3647 args = self._parse_csv(self._parse_conjunction) 3648 this = self.validate_expression(exp.If.from_arg_list(args), args) 3649 self._match_r_paren() 3650 else: 3651 index = self._index - 1 3652 condition = self._parse_conjunction() 3653 3654 if not condition: 3655 self._retreat(index) 3656 return None 3657 3658 self._match(TokenType.THEN) 3659 true = self._parse_conjunction() 3660 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3661 self._match(TokenType.END) 3662 this = self.expression(exp.If, this=condition, true=true, false=false) 3663 3664 return self._parse_window(this) 3665 3666 def _parse_extract(self) -> exp.Extract: 3667 this = self._parse_function() or self._parse_var() or self._parse_type() 3668 3669 if self._match(TokenType.FROM): 3670 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3671 3672 if not self._match(TokenType.COMMA): 3673 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3674 3675 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3676 3677 def _parse_any_value(self) -> exp.AnyValue: 3678 this = self._parse_lambda() 3679 is_max = None 3680 having = None 3681 3682 if self._match(TokenType.HAVING): 3683 self._match_texts(("MAX", "MIN")) 3684 is_max = self._prev.text == "MAX" 3685 having = self._parse_column() 3686 3687 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3688 3689 def _parse_cast(self, strict: bool) -> exp.Expression: 3690 this = self._parse_conjunction() 3691 3692 if not self._match(TokenType.ALIAS): 3693 if self._match(TokenType.COMMA): 3694 return self.expression( 3695 exp.CastToStrType, this=this, expression=self._parse_string() 3696 ) 3697 else: 3698 self.raise_error("Expected AS after CAST") 3699 3700 fmt = None 3701 to = self._parse_types() 3702 3703 if not to: 3704 self.raise_error("Expected TYPE after CAST") 3705 elif to.this == exp.DataType.Type.CHAR: 3706 if self._match(TokenType.CHARACTER_SET): 3707 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3708 elif self._match(TokenType.FORMAT): 3709 fmt = self._parse_at_time_zone(self._parse_string()) 3710 3711 if to.this in exp.DataType.TEMPORAL_TYPES: 3712 return self.expression( 3713 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3714 this=this, 3715 format=exp.Literal.string( 3716 format_time( 3717 fmt.this if fmt else "", 3718 self.FORMAT_MAPPING or self.TIME_MAPPING, 3719 self.FORMAT_TRIE or self.TIME_TRIE, 3720 ) 3721 ), 3722 ) 3723 3724 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3725 3726 def _parse_concat(self) -> t.Optional[exp.Expression]: 3727 args = self._parse_csv(self._parse_conjunction) 3728 if self.CONCAT_NULL_OUTPUTS_STRING: 3729 args = [ 3730 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3731 for arg in args 3732 if arg 3733 ] 3734 3735 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3736 # we find such a call we replace it with its argument. 3737 if len(args) == 1: 3738 return args[0] 3739 3740 return self.expression( 3741 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3742 ) 3743 3744 def _parse_string_agg(self) -> exp.Expression: 3745 if self._match(TokenType.DISTINCT): 3746 args: t.List[t.Optional[exp.Expression]] = [ 3747 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3748 ] 3749 if self._match(TokenType.COMMA): 3750 args.extend(self._parse_csv(self._parse_conjunction)) 3751 else: 3752 args = self._parse_csv(self._parse_conjunction) 3753 3754 index = self._index 3755 if not self._match(TokenType.R_PAREN): 3756 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3757 return self.expression( 3758 exp.GroupConcat, 3759 this=seq_get(args, 0), 3760 separator=self._parse_order(this=seq_get(args, 1)), 3761 ) 3762 3763 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3764 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3765 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3766 if not self._match_text_seq("WITHIN", "GROUP"): 3767 self._retreat(index) 3768 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3769 3770 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3771 order = self._parse_order(this=seq_get(args, 0)) 3772 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3773 3774 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3775 this = self._parse_bitwise() 3776 3777 if self._match(TokenType.USING): 3778 to: t.Optional[exp.Expression] = self.expression( 3779 exp.CharacterSet, this=self._parse_var() 3780 ) 3781 elif self._match(TokenType.COMMA): 3782 to = self._parse_types() 3783 else: 3784 to = None 3785 3786 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3787 3788 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3789 """ 3790 There are generally two variants of the DECODE function: 3791 3792 - DECODE(bin, charset) 3793 - DECODE(expression, search, result [, search, result] ... [, default]) 3794 3795 The second variant will always be parsed into a CASE expression. Note that NULL 3796 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3797 instead of relying on pattern matching. 3798 """ 3799 args = self._parse_csv(self._parse_conjunction) 3800 3801 if len(args) < 3: 3802 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3803 3804 expression, *expressions = args 3805 if not expression: 3806 return None 3807 3808 ifs = [] 3809 for search, result in zip(expressions[::2], expressions[1::2]): 3810 if not search or not result: 3811 return None 3812 3813 if isinstance(search, exp.Literal): 3814 ifs.append( 3815 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3816 ) 3817 elif isinstance(search, exp.Null): 3818 ifs.append( 3819 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3820 ) 3821 else: 3822 cond = exp.or_( 3823 exp.EQ(this=expression.copy(), expression=search), 3824 exp.and_( 3825 exp.Is(this=expression.copy(), expression=exp.Null()), 3826 exp.Is(this=search.copy(), expression=exp.Null()), 3827 copy=False, 3828 ), 3829 copy=False, 3830 ) 3831 ifs.append(exp.If(this=cond, true=result)) 3832 3833 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3834 3835 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3836 self._match_text_seq("KEY") 3837 key = self._parse_field() 3838 self._match(TokenType.COLON) 3839 self._match_text_seq("VALUE") 3840 value = self._parse_field() 3841 3842 if not key and not value: 3843 return None 3844 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3845 3846 def _parse_json_object(self) -> exp.JSONObject: 3847 star = self._parse_star() 3848 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3849 3850 null_handling = None 3851 if self._match_text_seq("NULL", "ON", "NULL"): 3852 null_handling = "NULL ON NULL" 3853 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3854 null_handling = "ABSENT ON NULL" 3855 3856 unique_keys = None 3857 if self._match_text_seq("WITH", "UNIQUE"): 3858 unique_keys = True 3859 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3860 unique_keys = False 3861 3862 self._match_text_seq("KEYS") 3863 3864 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3865 format_json = self._match_text_seq("FORMAT", "JSON") 3866 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3867 3868 return self.expression( 3869 exp.JSONObject, 3870 expressions=expressions, 3871 null_handling=null_handling, 3872 unique_keys=unique_keys, 3873 return_type=return_type, 3874 format_json=format_json, 3875 encoding=encoding, 3876 ) 3877 3878 def _parse_logarithm(self) -> exp.Func: 3879 # Default argument order is base, expression 3880 args = self._parse_csv(self._parse_range) 3881 3882 if len(args) > 1: 3883 if not self.LOG_BASE_FIRST: 3884 args.reverse() 3885 return exp.Log.from_arg_list(args) 3886 3887 return self.expression( 3888 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3889 ) 3890 3891 def _parse_match_against(self) -> exp.MatchAgainst: 3892 expressions = self._parse_csv(self._parse_column) 3893 3894 self._match_text_seq(")", "AGAINST", "(") 3895 3896 this = self._parse_string() 3897 3898 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3899 modifier = "IN NATURAL LANGUAGE MODE" 3900 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3901 modifier = f"{modifier} WITH QUERY EXPANSION" 3902 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3903 modifier = "IN BOOLEAN MODE" 3904 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3905 modifier = "WITH QUERY EXPANSION" 3906 else: 3907 modifier = None 3908 3909 return self.expression( 3910 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3911 ) 3912 3913 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3914 def _parse_open_json(self) -> exp.OpenJSON: 3915 this = self._parse_bitwise() 3916 path = self._match(TokenType.COMMA) and self._parse_string() 3917 3918 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3919 this = self._parse_field(any_token=True) 3920 kind = self._parse_types() 3921 path = self._parse_string() 3922 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3923 3924 return self.expression( 3925 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3926 ) 3927 3928 expressions = None 3929 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3930 self._match_l_paren() 3931 expressions = self._parse_csv(_parse_open_json_column_def) 3932 3933 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3934 3935 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3936 args = self._parse_csv(self._parse_bitwise) 3937 3938 if self._match(TokenType.IN): 3939 return self.expression( 3940 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3941 ) 3942 3943 if haystack_first: 3944 haystack = seq_get(args, 0) 3945 needle = seq_get(args, 1) 3946 else: 3947 needle = seq_get(args, 0) 3948 haystack = seq_get(args, 1) 3949 3950 return self.expression( 3951 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3952 ) 3953 3954 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3955 args = self._parse_csv(self._parse_table) 3956 return exp.JoinHint(this=func_name.upper(), expressions=args) 3957 3958 def _parse_substring(self) -> exp.Substring: 3959 # Postgres supports the form: substring(string [from int] [for int]) 3960 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3961 3962 args = self._parse_csv(self._parse_bitwise) 3963 3964 if self._match(TokenType.FROM): 3965 args.append(self._parse_bitwise()) 3966 if self._match(TokenType.FOR): 3967 args.append(self._parse_bitwise()) 3968 3969 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3970 3971 def _parse_trim(self) -> exp.Trim: 3972 # https://www.w3resource.com/sql/character-functions/trim.php 3973 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3974 3975 position = None 3976 collation = None 3977 3978 if self._match_texts(self.TRIM_TYPES): 3979 position = self._prev.text.upper() 3980 3981 expression = self._parse_bitwise() 3982 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3983 this = self._parse_bitwise() 3984 else: 3985 this = expression 3986 expression = None 3987 3988 if self._match(TokenType.COLLATE): 3989 collation = self._parse_bitwise() 3990 3991 return self.expression( 3992 exp.Trim, this=this, position=position, expression=expression, collation=collation 3993 ) 3994 3995 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3996 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3997 3998 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3999 return self._parse_window(self._parse_id_var(), alias=True) 4000 4001 def _parse_respect_or_ignore_nulls( 4002 self, this: t.Optional[exp.Expression] 4003 ) -> t.Optional[exp.Expression]: 4004 if self._match_text_seq("IGNORE", "NULLS"): 4005 return self.expression(exp.IgnoreNulls, this=this) 4006 if self._match_text_seq("RESPECT", "NULLS"): 4007 return self.expression(exp.RespectNulls, this=this) 4008 return this 4009 4010 def _parse_window( 4011 self, this: t.Optional[exp.Expression], alias: bool = False 4012 ) -> t.Optional[exp.Expression]: 4013 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4014 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4015 self._match_r_paren() 4016 4017 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4018 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4019 if self._match_text_seq("WITHIN", "GROUP"): 4020 order = self._parse_wrapped(self._parse_order) 4021 this = self.expression(exp.WithinGroup, this=this, expression=order) 4022 4023 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4024 # Some dialects choose to implement and some do not. 4025 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4026 4027 # There is some code above in _parse_lambda that handles 4028 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4029 4030 # The below changes handle 4031 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4032 4033 # Oracle allows both formats 4034 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4035 # and Snowflake chose to do the same for familiarity 4036 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4037 this = self._parse_respect_or_ignore_nulls(this) 4038 4039 # bigquery select from window x AS (partition by ...) 4040 if alias: 4041 over = None 4042 self._match(TokenType.ALIAS) 4043 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4044 return this 4045 else: 4046 over = self._prev.text.upper() 4047 4048 if not self._match(TokenType.L_PAREN): 4049 return self.expression( 4050 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4051 ) 4052 4053 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4054 4055 first = self._match(TokenType.FIRST) 4056 if self._match_text_seq("LAST"): 4057 first = False 4058 4059 partition = self._parse_partition_by() 4060 order = self._parse_order() 4061 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4062 4063 if kind: 4064 self._match(TokenType.BETWEEN) 4065 start = self._parse_window_spec() 4066 self._match(TokenType.AND) 4067 end = self._parse_window_spec() 4068 4069 spec = self.expression( 4070 exp.WindowSpec, 4071 kind=kind, 4072 start=start["value"], 4073 start_side=start["side"], 4074 end=end["value"], 4075 end_side=end["side"], 4076 ) 4077 else: 4078 spec = None 4079 4080 self._match_r_paren() 4081 4082 return self.expression( 4083 exp.Window, 4084 this=this, 4085 partition_by=partition, 4086 order=order, 4087 spec=spec, 4088 alias=window_alias, 4089 over=over, 4090 first=first, 4091 ) 4092 4093 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4094 self._match(TokenType.BETWEEN) 4095 4096 return { 4097 "value": ( 4098 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4099 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4100 or self._parse_bitwise() 4101 ), 4102 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4103 } 4104 4105 def _parse_alias( 4106 self, this: t.Optional[exp.Expression], explicit: bool = False 4107 ) -> t.Optional[exp.Expression]: 4108 any_token = self._match(TokenType.ALIAS) 4109 4110 if explicit and not any_token: 4111 return this 4112 4113 if self._match(TokenType.L_PAREN): 4114 aliases = self.expression( 4115 exp.Aliases, 4116 this=this, 4117 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4118 ) 4119 self._match_r_paren(aliases) 4120 return aliases 4121 4122 alias = self._parse_id_var(any_token) 4123 4124 if alias: 4125 return self.expression(exp.Alias, this=this, alias=alias) 4126 4127 return this 4128 4129 def _parse_id_var( 4130 self, 4131 any_token: bool = True, 4132 tokens: t.Optional[t.Collection[TokenType]] = None, 4133 ) -> t.Optional[exp.Expression]: 4134 identifier = self._parse_identifier() 4135 4136 if identifier: 4137 return identifier 4138 4139 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4140 quoted = self._prev.token_type == TokenType.STRING 4141 return exp.Identifier(this=self._prev.text, quoted=quoted) 4142 4143 return None 4144 4145 def _parse_string(self) -> t.Optional[exp.Expression]: 4146 if self._match(TokenType.STRING): 4147 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4148 return self._parse_placeholder() 4149 4150 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4151 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4152 4153 def _parse_number(self) -> t.Optional[exp.Expression]: 4154 if self._match(TokenType.NUMBER): 4155 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4156 return self._parse_placeholder() 4157 4158 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4159 if self._match(TokenType.IDENTIFIER): 4160 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4161 return self._parse_placeholder() 4162 4163 def _parse_var( 4164 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4165 ) -> t.Optional[exp.Expression]: 4166 if ( 4167 (any_token and self._advance_any()) 4168 or self._match(TokenType.VAR) 4169 or (self._match_set(tokens) if tokens else False) 4170 ): 4171 return self.expression(exp.Var, this=self._prev.text) 4172 return self._parse_placeholder() 4173 4174 def _advance_any(self) -> t.Optional[Token]: 4175 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4176 self._advance() 4177 return self._prev 4178 return None 4179 4180 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4181 return self._parse_var() or self._parse_string() 4182 4183 def _parse_null(self) -> t.Optional[exp.Expression]: 4184 if self._match(TokenType.NULL): 4185 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4186 return None 4187 4188 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4189 if self._match(TokenType.TRUE): 4190 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4191 if self._match(TokenType.FALSE): 4192 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4193 return None 4194 4195 def _parse_star(self) -> t.Optional[exp.Expression]: 4196 if self._match(TokenType.STAR): 4197 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4198 return None 4199 4200 def _parse_parameter(self) -> exp.Parameter: 4201 wrapped = self._match(TokenType.L_BRACE) 4202 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4203 self._match(TokenType.R_BRACE) 4204 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4205 4206 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4207 if self._match_set(self.PLACEHOLDER_PARSERS): 4208 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4209 if placeholder: 4210 return placeholder 4211 self._advance(-1) 4212 return None 4213 4214 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4215 if not self._match(TokenType.EXCEPT): 4216 return None 4217 if self._match(TokenType.L_PAREN, advance=False): 4218 return self._parse_wrapped_csv(self._parse_column) 4219 return self._parse_csv(self._parse_column) 4220 4221 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4222 if not self._match(TokenType.REPLACE): 4223 return None 4224 if self._match(TokenType.L_PAREN, advance=False): 4225 return self._parse_wrapped_csv(self._parse_expression) 4226 return self._parse_csv(self._parse_expression) 4227 4228 def _parse_csv( 4229 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4230 ) -> t.List[t.Optional[exp.Expression]]: 4231 parse_result = parse_method() 4232 items = [parse_result] if parse_result is not None else [] 4233 4234 while self._match(sep): 4235 self._add_comments(parse_result) 4236 parse_result = parse_method() 4237 if parse_result is not None: 4238 items.append(parse_result) 4239 4240 return items 4241 4242 def _parse_tokens( 4243 self, parse_method: t.Callable, expressions: t.Dict 4244 ) -> t.Optional[exp.Expression]: 4245 this = parse_method() 4246 4247 while self._match_set(expressions): 4248 this = self.expression( 4249 expressions[self._prev.token_type], 4250 this=this, 4251 comments=self._prev_comments, 4252 expression=parse_method(), 4253 ) 4254 4255 return this 4256 4257 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4258 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4259 4260 def _parse_wrapped_csv( 4261 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4262 ) -> t.List[t.Optional[exp.Expression]]: 4263 return self._parse_wrapped( 4264 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4265 ) 4266 4267 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4268 wrapped = self._match(TokenType.L_PAREN) 4269 if not wrapped and not optional: 4270 self.raise_error("Expecting (") 4271 parse_result = parse_method() 4272 if wrapped: 4273 self._match_r_paren() 4274 return parse_result 4275 4276 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4277 return self._parse_select() or self._parse_set_operations( 4278 self._parse_expression() if alias else self._parse_conjunction() 4279 ) 4280 4281 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4282 return self._parse_query_modifiers( 4283 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4284 ) 4285 4286 def _parse_transaction(self) -> exp.Transaction: 4287 this = None 4288 if self._match_texts(self.TRANSACTION_KIND): 4289 this = self._prev.text 4290 4291 self._match_texts({"TRANSACTION", "WORK"}) 4292 4293 modes = [] 4294 while True: 4295 mode = [] 4296 while self._match(TokenType.VAR): 4297 mode.append(self._prev.text) 4298 4299 if mode: 4300 modes.append(" ".join(mode)) 4301 if not self._match(TokenType.COMMA): 4302 break 4303 4304 return self.expression(exp.Transaction, this=this, modes=modes) 4305 4306 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4307 chain = None 4308 savepoint = None 4309 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4310 4311 self._match_texts({"TRANSACTION", "WORK"}) 4312 4313 if self._match_text_seq("TO"): 4314 self._match_text_seq("SAVEPOINT") 4315 savepoint = self._parse_id_var() 4316 4317 if self._match(TokenType.AND): 4318 chain = not self._match_text_seq("NO") 4319 self._match_text_seq("CHAIN") 4320 4321 if is_rollback: 4322 return self.expression(exp.Rollback, savepoint=savepoint) 4323 4324 return self.expression(exp.Commit, chain=chain) 4325 4326 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4327 if not self._match_text_seq("ADD"): 4328 return None 4329 4330 self._match(TokenType.COLUMN) 4331 exists_column = self._parse_exists(not_=True) 4332 expression = self._parse_column_def(self._parse_field(any_token=True)) 4333 4334 if expression: 4335 expression.set("exists", exists_column) 4336 4337 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4338 if self._match_texts(("FIRST", "AFTER")): 4339 position = self._prev.text 4340 column_position = self.expression( 4341 exp.ColumnPosition, this=self._parse_column(), position=position 4342 ) 4343 expression.set("position", column_position) 4344 4345 return expression 4346 4347 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4348 drop = self._match(TokenType.DROP) and self._parse_drop() 4349 if drop and not isinstance(drop, exp.Command): 4350 drop.set("kind", drop.args.get("kind", "COLUMN")) 4351 return drop 4352 4353 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4354 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4355 return self.expression( 4356 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4357 ) 4358 4359 def _parse_add_constraint(self) -> exp.AddConstraint: 4360 this = None 4361 kind = self._prev.token_type 4362 4363 if kind == TokenType.CONSTRAINT: 4364 this = self._parse_id_var() 4365 4366 if self._match_text_seq("CHECK"): 4367 expression = self._parse_wrapped(self._parse_conjunction) 4368 enforced = self._match_text_seq("ENFORCED") 4369 4370 return self.expression( 4371 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4372 ) 4373 4374 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4375 expression = self._parse_foreign_key() 4376 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4377 expression = self._parse_primary_key() 4378 else: 4379 expression = None 4380 4381 return self.expression(exp.AddConstraint, this=this, expression=expression) 4382 4383 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4384 index = self._index - 1 4385 4386 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4387 return self._parse_csv(self._parse_add_constraint) 4388 4389 self._retreat(index) 4390 return self._parse_csv(self._parse_add_column) 4391 4392 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4393 self._match(TokenType.COLUMN) 4394 column = self._parse_field(any_token=True) 4395 4396 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4397 return self.expression(exp.AlterColumn, this=column, drop=True) 4398 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4399 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4400 4401 self._match_text_seq("SET", "DATA") 4402 return self.expression( 4403 exp.AlterColumn, 4404 this=column, 4405 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4406 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4407 using=self._match(TokenType.USING) and self._parse_conjunction(), 4408 ) 4409 4410 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4411 index = self._index - 1 4412 4413 partition_exists = self._parse_exists() 4414 if self._match(TokenType.PARTITION, advance=False): 4415 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4416 4417 self._retreat(index) 4418 return self._parse_csv(self._parse_drop_column) 4419 4420 def _parse_alter_table_rename(self) -> exp.RenameTable: 4421 self._match_text_seq("TO") 4422 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4423 4424 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4425 start = self._prev 4426 4427 if not self._match(TokenType.TABLE): 4428 return self._parse_as_command(start) 4429 4430 exists = self._parse_exists() 4431 this = self._parse_table(schema=True) 4432 4433 if self._next: 4434 self._advance() 4435 4436 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4437 if parser: 4438 actions = ensure_list(parser(self)) 4439 4440 if not self._curr: 4441 return self.expression( 4442 exp.AlterTable, 4443 this=this, 4444 exists=exists, 4445 actions=actions, 4446 ) 4447 return self._parse_as_command(start) 4448 4449 def _parse_merge(self) -> exp.Merge: 4450 self._match(TokenType.INTO) 4451 target = self._parse_table() 4452 4453 self._match(TokenType.USING) 4454 using = self._parse_table() 4455 4456 self._match(TokenType.ON) 4457 on = self._parse_conjunction() 4458 4459 whens = [] 4460 while self._match(TokenType.WHEN): 4461 matched = not self._match(TokenType.NOT) 4462 self._match_text_seq("MATCHED") 4463 source = ( 4464 False 4465 if self._match_text_seq("BY", "TARGET") 4466 else self._match_text_seq("BY", "SOURCE") 4467 ) 4468 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4469 4470 self._match(TokenType.THEN) 4471 4472 if self._match(TokenType.INSERT): 4473 _this = self._parse_star() 4474 if _this: 4475 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4476 else: 4477 then = self.expression( 4478 exp.Insert, 4479 this=self._parse_value(), 4480 expression=self._match(TokenType.VALUES) and self._parse_value(), 4481 ) 4482 elif self._match(TokenType.UPDATE): 4483 expressions = self._parse_star() 4484 if expressions: 4485 then = self.expression(exp.Update, expressions=expressions) 4486 else: 4487 then = self.expression( 4488 exp.Update, 4489 expressions=self._match(TokenType.SET) 4490 and self._parse_csv(self._parse_equality), 4491 ) 4492 elif self._match(TokenType.DELETE): 4493 then = self.expression(exp.Var, this=self._prev.text) 4494 else: 4495 then = None 4496 4497 whens.append( 4498 self.expression( 4499 exp.When, 4500 matched=matched, 4501 source=source, 4502 condition=condition, 4503 then=then, 4504 ) 4505 ) 4506 4507 return self.expression( 4508 exp.Merge, 4509 this=target, 4510 using=using, 4511 on=on, 4512 expressions=whens, 4513 ) 4514 4515 def _parse_show(self) -> t.Optional[exp.Expression]: 4516 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4517 if parser: 4518 return parser(self) 4519 self._advance() 4520 return self.expression(exp.Show, this=self._prev.text.upper()) 4521 4522 def _parse_set_item_assignment( 4523 self, kind: t.Optional[str] = None 4524 ) -> t.Optional[exp.Expression]: 4525 index = self._index 4526 4527 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4528 return self._parse_set_transaction(global_=kind == "GLOBAL") 4529 4530 left = self._parse_primary() or self._parse_id_var() 4531 4532 if not self._match_texts(("=", "TO")): 4533 self._retreat(index) 4534 return None 4535 4536 right = self._parse_statement() or self._parse_id_var() 4537 this = self.expression(exp.EQ, this=left, expression=right) 4538 4539 return self.expression(exp.SetItem, this=this, kind=kind) 4540 4541 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4542 self._match_text_seq("TRANSACTION") 4543 characteristics = self._parse_csv( 4544 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4545 ) 4546 return self.expression( 4547 exp.SetItem, 4548 expressions=characteristics, 4549 kind="TRANSACTION", 4550 **{"global": global_}, # type: ignore 4551 ) 4552 4553 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4554 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4555 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4556 4557 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4558 index = self._index 4559 set_ = self.expression( 4560 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4561 ) 4562 4563 if self._curr: 4564 self._retreat(index) 4565 return self._parse_as_command(self._prev) 4566 4567 return set_ 4568 4569 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4570 for option in options: 4571 if self._match_text_seq(*option.split(" ")): 4572 return exp.var(option) 4573 return None 4574 4575 def _parse_as_command(self, start: Token) -> exp.Command: 4576 while self._curr: 4577 self._advance() 4578 text = self._find_sql(start, self._prev) 4579 size = len(start.text) 4580 return exp.Command(this=text[:size], expression=text[size:]) 4581 4582 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4583 settings = [] 4584 4585 self._match_l_paren() 4586 kind = self._parse_id_var() 4587 4588 if self._match(TokenType.L_PAREN): 4589 while True: 4590 key = self._parse_id_var() 4591 value = self._parse_primary() 4592 4593 if not key and value is None: 4594 break 4595 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4596 self._match(TokenType.R_PAREN) 4597 4598 self._match_r_paren() 4599 4600 return self.expression( 4601 exp.DictProperty, 4602 this=this, 4603 kind=kind.this if kind else None, 4604 settings=settings, 4605 ) 4606 4607 def _parse_dict_range(self, this: str) -> exp.DictRange: 4608 self._match_l_paren() 4609 has_min = self._match_text_seq("MIN") 4610 if has_min: 4611 min = self._parse_var() or self._parse_primary() 4612 self._match_text_seq("MAX") 4613 max = self._parse_var() or self._parse_primary() 4614 else: 4615 max = self._parse_var() or self._parse_primary() 4616 min = exp.Literal.number(0) 4617 self._match_r_paren() 4618 return self.expression(exp.DictRange, this=this, min=min, max=max) 4619 4620 def _find_parser( 4621 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4622 ) -> t.Optional[t.Callable]: 4623 if not self._curr: 4624 return None 4625 4626 index = self._index 4627 this = [] 4628 while True: 4629 # The current token might be multiple words 4630 curr = self._curr.text.upper() 4631 key = curr.split(" ") 4632 this.append(curr) 4633 4634 self._advance() 4635 result, trie = in_trie(trie, key) 4636 if result == TrieResult.FAILED: 4637 break 4638 4639 if result == TrieResult.EXISTS: 4640 subparser = parsers[" ".join(this)] 4641 return subparser 4642 4643 self._retreat(index) 4644 return None 4645 4646 def _match(self, token_type, advance=True, expression=None): 4647 if not self._curr: 4648 return None 4649 4650 if self._curr.token_type == token_type: 4651 if advance: 4652 self._advance() 4653 self._add_comments(expression) 4654 return True 4655 4656 return None 4657 4658 def _match_set(self, types, advance=True): 4659 if not self._curr: 4660 return None 4661 4662 if self._curr.token_type in types: 4663 if advance: 4664 self._advance() 4665 return True 4666 4667 return None 4668 4669 def _match_pair(self, token_type_a, token_type_b, advance=True): 4670 if not self._curr or not self._next: 4671 return None 4672 4673 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4674 if advance: 4675 self._advance(2) 4676 return True 4677 4678 return None 4679 4680 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4681 if not self._match(TokenType.L_PAREN, expression=expression): 4682 self.raise_error("Expecting (") 4683 4684 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4685 if not self._match(TokenType.R_PAREN, expression=expression): 4686 self.raise_error("Expecting )") 4687 4688 def _match_texts(self, texts, advance=True): 4689 if self._curr and self._curr.text.upper() in texts: 4690 if advance: 4691 self._advance() 4692 return True 4693 return False 4694 4695 def _match_text_seq(self, *texts, advance=True): 4696 index = self._index 4697 for text in texts: 4698 if self._curr and self._curr.text.upper() == text: 4699 self._advance() 4700 else: 4701 self._retreat(index) 4702 return False 4703 4704 if not advance: 4705 self._retreat(index) 4706 4707 return True 4708 4709 @t.overload 4710 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4711 ... 4712 4713 @t.overload 4714 def _replace_columns_with_dots( 4715 self, this: t.Optional[exp.Expression] 4716 ) -> t.Optional[exp.Expression]: 4717 ... 4718 4719 def _replace_columns_with_dots(self, this): 4720 if isinstance(this, exp.Dot): 4721 exp.replace_children(this, self._replace_columns_with_dots) 4722 elif isinstance(this, exp.Column): 4723 exp.replace_children(this, self._replace_columns_with_dots) 4724 table = this.args.get("table") 4725 this = ( 4726 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4727 ) 4728 4729 return this 4730 4731 def _replace_lambda( 4732 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4733 ) -> t.Optional[exp.Expression]: 4734 if not node: 4735 return node 4736 4737 for column in node.find_all(exp.Column): 4738 if column.parts[0].name in lambda_variables: 4739 dot_or_id = column.to_dot() if column.table else column.this 4740 parent = column.parent 4741 4742 while isinstance(parent, exp.Dot): 4743 if not isinstance(parent.parent, exp.Dot): 4744 parent.replace(dot_or_id) 4745 break 4746 parent = parent.parent 4747 else: 4748 if column is node: 4749 node = dot_or_id 4750 else: 4751 column.replace(dot_or_id) 4752 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 *TYPE_TOKENS, 343 *SUBQUERY_PREDICATES, 344 } 345 346 CONJUNCTION = { 347 TokenType.AND: exp.And, 348 TokenType.OR: exp.Or, 349 } 350 351 EQUALITY = { 352 TokenType.EQ: exp.EQ, 353 TokenType.NEQ: exp.NEQ, 354 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 355 } 356 357 COMPARISON = { 358 TokenType.GT: exp.GT, 359 TokenType.GTE: exp.GTE, 360 TokenType.LT: exp.LT, 361 TokenType.LTE: exp.LTE, 362 } 363 364 BITWISE = { 365 TokenType.AMP: exp.BitwiseAnd, 366 TokenType.CARET: exp.BitwiseXor, 367 TokenType.PIPE: exp.BitwiseOr, 368 TokenType.DPIPE: exp.DPipe, 369 } 370 371 TERM = { 372 TokenType.DASH: exp.Sub, 373 TokenType.PLUS: exp.Add, 374 TokenType.MOD: exp.Mod, 375 TokenType.COLLATE: exp.Collate, 376 } 377 378 FACTOR = { 379 TokenType.DIV: exp.IntDiv, 380 TokenType.LR_ARROW: exp.Distance, 381 TokenType.SLASH: exp.Div, 382 TokenType.STAR: exp.Mul, 383 } 384 385 TIMESTAMPS = { 386 TokenType.TIME, 387 TokenType.TIMESTAMP, 388 TokenType.TIMESTAMPTZ, 389 TokenType.TIMESTAMPLTZ, 390 } 391 392 SET_OPERATIONS = { 393 TokenType.UNION, 394 TokenType.INTERSECT, 395 TokenType.EXCEPT, 396 } 397 398 JOIN_METHODS = { 399 TokenType.NATURAL, 400 TokenType.ASOF, 401 } 402 403 JOIN_SIDES = { 404 TokenType.LEFT, 405 TokenType.RIGHT, 406 TokenType.FULL, 407 } 408 409 JOIN_KINDS = { 410 TokenType.INNER, 411 TokenType.OUTER, 412 TokenType.CROSS, 413 TokenType.SEMI, 414 TokenType.ANTI, 415 } 416 417 JOIN_HINTS: t.Set[str] = set() 418 419 LAMBDAS = { 420 TokenType.ARROW: lambda self, expressions: self.expression( 421 exp.Lambda, 422 this=self._replace_lambda( 423 self._parse_conjunction(), 424 {node.name for node in expressions}, 425 ), 426 expressions=expressions, 427 ), 428 TokenType.FARROW: lambda self, expressions: self.expression( 429 exp.Kwarg, 430 this=exp.var(expressions[0].name), 431 expression=self._parse_conjunction(), 432 ), 433 } 434 435 COLUMN_OPERATORS = { 436 TokenType.DOT: None, 437 TokenType.DCOLON: lambda self, this, to: self.expression( 438 exp.Cast if self.STRICT_CAST else exp.TryCast, 439 this=this, 440 to=to, 441 ), 442 TokenType.ARROW: lambda self, this, path: self.expression( 443 exp.JSONExtract, 444 this=this, 445 expression=path, 446 ), 447 TokenType.DARROW: lambda self, this, path: self.expression( 448 exp.JSONExtractScalar, 449 this=this, 450 expression=path, 451 ), 452 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 453 exp.JSONBExtract, 454 this=this, 455 expression=path, 456 ), 457 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 458 exp.JSONBExtractScalar, 459 this=this, 460 expression=path, 461 ), 462 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 463 exp.JSONBContains, 464 this=this, 465 expression=key, 466 ), 467 } 468 469 EXPRESSION_PARSERS = { 470 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 471 exp.Column: lambda self: self._parse_column(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.DataType: lambda self: self._parse_types(), 474 exp.Expression: lambda self: self._parse_statement(), 475 exp.From: lambda self: self._parse_from(), 476 exp.Group: lambda self: self._parse_group(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.Identifier: lambda self: self._parse_id_var(), 479 exp.Join: lambda self: self._parse_join(), 480 exp.Lambda: lambda self: self._parse_lambda(), 481 exp.Lateral: lambda self: self._parse_lateral(), 482 exp.Limit: lambda self: self._parse_limit(), 483 exp.Offset: lambda self: self._parse_offset(), 484 exp.Order: lambda self: self._parse_order(), 485 exp.Ordered: lambda self: self._parse_ordered(), 486 exp.Properties: lambda self: self._parse_properties(), 487 exp.Qualify: lambda self: self._parse_qualify(), 488 exp.Returning: lambda self: self._parse_returning(), 489 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 490 exp.Table: lambda self: self._parse_table_parts(), 491 exp.TableAlias: lambda self: self._parse_table_alias(), 492 exp.Where: lambda self: self._parse_where(), 493 exp.Window: lambda self: self._parse_named_window(), 494 exp.With: lambda self: self._parse_with(), 495 "JOIN_TYPE": lambda self: self._parse_join_parts(), 496 } 497 498 STATEMENT_PARSERS = { 499 TokenType.ALTER: lambda self: self._parse_alter(), 500 TokenType.BEGIN: lambda self: self._parse_transaction(), 501 TokenType.CACHE: lambda self: self._parse_cache(), 502 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 503 TokenType.COMMENT: lambda self: self._parse_comment(), 504 TokenType.CREATE: lambda self: self._parse_create(), 505 TokenType.DELETE: lambda self: self._parse_delete(), 506 TokenType.DESC: lambda self: self._parse_describe(), 507 TokenType.DESCRIBE: lambda self: self._parse_describe(), 508 TokenType.DROP: lambda self: self._parse_drop(), 509 TokenType.END: lambda self: self._parse_commit_or_rollback(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "CLUSTERED": lambda self: self._parse_clustered_by(), 590 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 591 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 592 "COPY": lambda self: self._parse_copy_property(), 593 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 594 "DEFINER": lambda self: self._parse_definer(), 595 "DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "DISTKEY": lambda self: self._parse_distkey(), 599 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 600 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 601 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 602 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 603 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 604 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 605 "FREESPACE": lambda self: self._parse_freespace(), 606 "IMMUTABLE": lambda self: self.expression( 607 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 608 ), 609 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 610 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 611 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 612 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 613 "LIKE": lambda self: self._parse_create_like(), 614 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 615 "LOCK": lambda self: self._parse_locking(), 616 "LOCKING": lambda self: self._parse_locking(), 617 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 618 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 619 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 620 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 621 "NO": lambda self: self._parse_no_property(), 622 "ON": lambda self: self._parse_on_property(), 623 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 624 "PARTITION BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 627 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 628 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 629 "RETURNS": lambda self: self._parse_returns(), 630 "ROW": lambda self: self._parse_row(), 631 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 632 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 633 "SETTINGS": lambda self: self.expression( 634 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 635 ), 636 "SORTKEY": lambda self: self._parse_sortkey(), 637 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 638 "STABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("STABLE") 640 ), 641 "STORED": lambda self: self._parse_stored(), 642 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 643 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 644 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 645 "TO": lambda self: self._parse_to_table(), 646 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 647 "TTL": lambda self: self._parse_ttl(), 648 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "VOLATILE": lambda self: self._parse_volatile_property(), 650 "WITH": lambda self: self._parse_with_property(), 651 } 652 653 CONSTRAINT_PARSERS = { 654 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 655 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 656 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 657 "CHARACTER SET": lambda self: self.expression( 658 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "CHECK": lambda self: self.expression( 661 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 662 ), 663 "COLLATE": lambda self: self.expression( 664 exp.CollateColumnConstraint, this=self._parse_var() 665 ), 666 "COMMENT": lambda self: self.expression( 667 exp.CommentColumnConstraint, this=self._parse_string() 668 ), 669 "COMPRESS": lambda self: self._parse_compress(), 670 "DEFAULT": lambda self: self.expression( 671 exp.DefaultColumnConstraint, this=self._parse_bitwise() 672 ), 673 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 674 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 675 "FORMAT": lambda self: self.expression( 676 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "GENERATED": lambda self: self._parse_generated_as_identity(), 679 "IDENTITY": lambda self: self._parse_auto_increment(), 680 "INLINE": lambda self: self._parse_inline(), 681 "LIKE": lambda self: self._parse_create_like(), 682 "NOT": lambda self: self._parse_not_constraint(), 683 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 684 "ON": lambda self: self._match(TokenType.UPDATE) 685 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 686 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 687 "PRIMARY KEY": lambda self: self._parse_primary_key(), 688 "REFERENCES": lambda self: self._parse_references(match=False), 689 "TITLE": lambda self: self.expression( 690 exp.TitleColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 693 "UNIQUE": lambda self: self._parse_unique(), 694 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 695 } 696 697 ALTER_PARSERS = { 698 "ADD": lambda self: self._parse_alter_table_add(), 699 "ALTER": lambda self: self._parse_alter_table_alter(), 700 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 701 "DROP": lambda self: self._parse_alter_table_drop(), 702 "RENAME": lambda self: self._parse_alter_table_rename(), 703 } 704 705 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 706 707 NO_PAREN_FUNCTION_PARSERS = { 708 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 709 TokenType.CASE: lambda self: self._parse_case(), 710 TokenType.IF: lambda self: self._parse_if(), 711 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 712 exp.NextValueFor, 713 this=self._parse_column(), 714 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 715 ), 716 } 717 718 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 719 720 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 721 "ANY_VALUE": lambda self: self._parse_any_value(), 722 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 723 "CONCAT": lambda self: self._parse_concat(), 724 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 725 "DECODE": lambda self: self._parse_decode(), 726 "EXTRACT": lambda self: self._parse_extract(), 727 "JSON_OBJECT": lambda self: self._parse_json_object(), 728 "LOG": lambda self: self._parse_logarithm(), 729 "MATCH": lambda self: self._parse_match_against(), 730 "OPENJSON": lambda self: self._parse_open_json(), 731 "POSITION": lambda self: self._parse_position(), 732 "SAFE_CAST": lambda self: self._parse_cast(False), 733 "STRING_AGG": lambda self: self._parse_string_agg(), 734 "SUBSTRING": lambda self: self._parse_substring(), 735 "TRIM": lambda self: self._parse_trim(), 736 "TRY_CAST": lambda self: self._parse_cast(False), 737 "TRY_CONVERT": lambda self: self._parse_convert(False), 738 } 739 740 QUERY_MODIFIER_PARSERS = { 741 "joins": lambda self: list(iter(self._parse_join, None)), 742 "laterals": lambda self: list(iter(self._parse_lateral, None)), 743 "match": lambda self: self._parse_match_recognize(), 744 "where": lambda self: self._parse_where(), 745 "group": lambda self: self._parse_group(), 746 "having": lambda self: self._parse_having(), 747 "qualify": lambda self: self._parse_qualify(), 748 "windows": lambda self: self._parse_window_clause(), 749 "order": lambda self: self._parse_order(), 750 "limit": lambda self: self._parse_limit(), 751 "offset": lambda self: self._parse_offset(), 752 "locks": lambda self: self._parse_locks(), 753 "sample": lambda self: self._parse_table_sample(as_modifier=True), 754 } 755 756 SET_PARSERS = { 757 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 758 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 759 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 760 "TRANSACTION": lambda self: self._parse_set_transaction(), 761 } 762 763 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 764 765 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 766 767 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 768 769 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 770 771 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 772 773 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 774 TRANSACTION_CHARACTERISTICS = { 775 "ISOLATION LEVEL REPEATABLE READ", 776 "ISOLATION LEVEL READ COMMITTED", 777 "ISOLATION LEVEL READ UNCOMMITTED", 778 "ISOLATION LEVEL SERIALIZABLE", 779 "READ WRITE", 780 "READ ONLY", 781 } 782 783 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 784 785 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 786 787 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 788 789 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 790 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 791 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 792 793 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 794 795 STRICT_CAST = True 796 797 # A NULL arg in CONCAT yields NULL by default 798 CONCAT_NULL_OUTPUTS_STRING = False 799 800 PREFIXED_PIVOT_COLUMNS = False 801 IDENTIFY_PIVOT_STRINGS = False 802 803 LOG_BASE_FIRST = True 804 LOG_DEFAULTS_TO_LN = False 805 806 __slots__ = ( 807 "error_level", 808 "error_message_context", 809 "max_errors", 810 "sql", 811 "errors", 812 "_tokens", 813 "_index", 814 "_curr", 815 "_next", 816 "_prev", 817 "_prev_comments", 818 ) 819 820 # Autofilled 821 INDEX_OFFSET: int = 0 822 UNNEST_COLUMN_ONLY: bool = False 823 ALIAS_POST_TABLESAMPLE: bool = False 824 STRICT_STRING_CONCAT = False 825 NULL_ORDERING: str = "nulls_are_small" 826 SHOW_TRIE: t.Dict = {} 827 SET_TRIE: t.Dict = {} 828 FORMAT_MAPPING: t.Dict[str, str] = {} 829 FORMAT_TRIE: t.Dict = {} 830 TIME_MAPPING: t.Dict[str, str] = {} 831 TIME_TRIE: t.Dict = {} 832 833 def __init__( 834 self, 835 error_level: t.Optional[ErrorLevel] = None, 836 error_message_context: int = 100, 837 max_errors: int = 3, 838 ): 839 self.error_level = error_level or ErrorLevel.IMMEDIATE 840 self.error_message_context = error_message_context 841 self.max_errors = max_errors 842 self.reset() 843 844 def reset(self): 845 self.sql = "" 846 self.errors = [] 847 self._tokens = [] 848 self._index = 0 849 self._curr = None 850 self._next = None 851 self._prev = None 852 self._prev_comments = None 853 854 def parse( 855 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 856 ) -> t.List[t.Optional[exp.Expression]]: 857 """ 858 Parses a list of tokens and returns a list of syntax trees, one tree 859 per parsed SQL statement. 860 861 Args: 862 raw_tokens: The list of tokens. 863 sql: The original SQL string, used to produce helpful debug messages. 864 865 Returns: 866 The list of the produced syntax trees. 867 """ 868 return self._parse( 869 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 870 ) 871 872 def parse_into( 873 self, 874 expression_types: exp.IntoType, 875 raw_tokens: t.List[Token], 876 sql: t.Optional[str] = None, 877 ) -> t.List[t.Optional[exp.Expression]]: 878 """ 879 Parses a list of tokens into a given Expression type. If a collection of Expression 880 types is given instead, this method will try to parse the token list into each one 881 of them, stopping at the first for which the parsing succeeds. 882 883 Args: 884 expression_types: The expression type(s) to try and parse the token list into. 885 raw_tokens: The list of tokens. 886 sql: The original SQL string, used to produce helpful debug messages. 887 888 Returns: 889 The target Expression. 890 """ 891 errors = [] 892 for expression_type in ensure_list(expression_types): 893 parser = self.EXPRESSION_PARSERS.get(expression_type) 894 if not parser: 895 raise TypeError(f"No parser registered for {expression_type}") 896 897 try: 898 return self._parse(parser, raw_tokens, sql) 899 except ParseError as e: 900 e.errors[0]["into_expression"] = expression_type 901 errors.append(e) 902 903 raise ParseError( 904 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 905 errors=merge_errors(errors), 906 ) from errors[-1] 907 908 def _parse( 909 self, 910 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 911 raw_tokens: t.List[Token], 912 sql: t.Optional[str] = None, 913 ) -> t.List[t.Optional[exp.Expression]]: 914 self.reset() 915 self.sql = sql or "" 916 917 total = len(raw_tokens) 918 chunks: t.List[t.List[Token]] = [[]] 919 920 for i, token in enumerate(raw_tokens): 921 if token.token_type == TokenType.SEMICOLON: 922 if i < total - 1: 923 chunks.append([]) 924 else: 925 chunks[-1].append(token) 926 927 expressions = [] 928 929 for tokens in chunks: 930 self._index = -1 931 self._tokens = tokens 932 self._advance() 933 934 expressions.append(parse_method(self)) 935 936 if self._index < len(self._tokens): 937 self.raise_error("Invalid expression / Unexpected token") 938 939 self.check_errors() 940 941 return expressions 942 943 def check_errors(self) -> None: 944 """Logs or raises any found errors, depending on the chosen error level setting.""" 945 if self.error_level == ErrorLevel.WARN: 946 for error in self.errors: 947 logger.error(str(error)) 948 elif self.error_level == ErrorLevel.RAISE and self.errors: 949 raise ParseError( 950 concat_messages(self.errors, self.max_errors), 951 errors=merge_errors(self.errors), 952 ) 953 954 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 955 """ 956 Appends an error in the list of recorded errors or raises it, depending on the chosen 957 error level setting. 958 """ 959 token = token or self._curr or self._prev or Token.string("") 960 start = token.start 961 end = token.end + 1 962 start_context = self.sql[max(start - self.error_message_context, 0) : start] 963 highlight = self.sql[start:end] 964 end_context = self.sql[end : end + self.error_message_context] 965 966 error = ParseError.new( 967 f"{message}. Line {token.line}, Col: {token.col}.\n" 968 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 969 description=message, 970 line=token.line, 971 col=token.col, 972 start_context=start_context, 973 highlight=highlight, 974 end_context=end_context, 975 ) 976 977 if self.error_level == ErrorLevel.IMMEDIATE: 978 raise error 979 980 self.errors.append(error) 981 982 def expression( 983 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 984 ) -> E: 985 """ 986 Creates a new, validated Expression. 987 988 Args: 989 exp_class: The expression class to instantiate. 990 comments: An optional list of comments to attach to the expression. 991 kwargs: The arguments to set for the expression along with their respective values. 992 993 Returns: 994 The target expression. 995 """ 996 instance = exp_class(**kwargs) 997 instance.add_comments(comments) if comments else self._add_comments(instance) 998 return self.validate_expression(instance) 999 1000 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1001 if expression and self._prev_comments: 1002 expression.add_comments(self._prev_comments) 1003 self._prev_comments = None 1004 1005 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1006 """ 1007 Validates an Expression, making sure that all its mandatory arguments are set. 1008 1009 Args: 1010 expression: The expression to validate. 1011 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1012 1013 Returns: 1014 The validated expression. 1015 """ 1016 if self.error_level != ErrorLevel.IGNORE: 1017 for error_message in expression.error_messages(args): 1018 self.raise_error(error_message) 1019 1020 return expression 1021 1022 def _find_sql(self, start: Token, end: Token) -> str: 1023 return self.sql[start.start : end.end + 1] 1024 1025 def _advance(self, times: int = 1) -> None: 1026 self._index += times 1027 self._curr = seq_get(self._tokens, self._index) 1028 self._next = seq_get(self._tokens, self._index + 1) 1029 1030 if self._index > 0: 1031 self._prev = self._tokens[self._index - 1] 1032 self._prev_comments = self._prev.comments 1033 else: 1034 self._prev = None 1035 self._prev_comments = None 1036 1037 def _retreat(self, index: int) -> None: 1038 if index != self._index: 1039 self._advance(index - self._index) 1040 1041 def _parse_command(self) -> exp.Command: 1042 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1043 1044 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1045 start = self._prev 1046 exists = self._parse_exists() if allow_exists else None 1047 1048 self._match(TokenType.ON) 1049 1050 kind = self._match_set(self.CREATABLES) and self._prev 1051 if not kind: 1052 return self._parse_as_command(start) 1053 1054 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1055 this = self._parse_user_defined_function(kind=kind.token_type) 1056 elif kind.token_type == TokenType.TABLE: 1057 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1058 elif kind.token_type == TokenType.COLUMN: 1059 this = self._parse_column() 1060 else: 1061 this = self._parse_id_var() 1062 1063 self._match(TokenType.IS) 1064 1065 return self.expression( 1066 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1067 ) 1068 1069 def _parse_to_table( 1070 self, 1071 ) -> exp.ToTableProperty: 1072 table = self._parse_table_parts(schema=True) 1073 return self.expression(exp.ToTableProperty, this=table) 1074 1075 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1076 def _parse_ttl(self) -> exp.Expression: 1077 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1078 this = self._parse_bitwise() 1079 1080 if self._match_text_seq("DELETE"): 1081 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1082 if self._match_text_seq("RECOMPRESS"): 1083 return self.expression( 1084 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1085 ) 1086 if self._match_text_seq("TO", "DISK"): 1087 return self.expression( 1088 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1089 ) 1090 if self._match_text_seq("TO", "VOLUME"): 1091 return self.expression( 1092 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1093 ) 1094 1095 return this 1096 1097 expressions = self._parse_csv(_parse_ttl_action) 1098 where = self._parse_where() 1099 group = self._parse_group() 1100 1101 aggregates = None 1102 if group and self._match(TokenType.SET): 1103 aggregates = self._parse_csv(self._parse_set_item) 1104 1105 return self.expression( 1106 exp.MergeTreeTTL, 1107 expressions=expressions, 1108 where=where, 1109 group=group, 1110 aggregates=aggregates, 1111 ) 1112 1113 def _parse_statement(self) -> t.Optional[exp.Expression]: 1114 if self._curr is None: 1115 return None 1116 1117 if self._match_set(self.STATEMENT_PARSERS): 1118 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1119 1120 if self._match_set(Tokenizer.COMMANDS): 1121 return self._parse_command() 1122 1123 expression = self._parse_expression() 1124 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1125 return self._parse_query_modifiers(expression) 1126 1127 def _parse_drop(self) -> exp.Drop | exp.Command: 1128 start = self._prev 1129 temporary = self._match(TokenType.TEMPORARY) 1130 materialized = self._match_text_seq("MATERIALIZED") 1131 1132 kind = self._match_set(self.CREATABLES) and self._prev.text 1133 if not kind: 1134 return self._parse_as_command(start) 1135 1136 return self.expression( 1137 exp.Drop, 1138 exists=self._parse_exists(), 1139 this=self._parse_table(schema=True), 1140 kind=kind, 1141 temporary=temporary, 1142 materialized=materialized, 1143 cascade=self._match_text_seq("CASCADE"), 1144 constraints=self._match_text_seq("CONSTRAINTS"), 1145 purge=self._match_text_seq("PURGE"), 1146 ) 1147 1148 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1149 return ( 1150 self._match(TokenType.IF) 1151 and (not not_ or self._match(TokenType.NOT)) 1152 and self._match(TokenType.EXISTS) 1153 ) 1154 1155 def _parse_create(self) -> exp.Create | exp.Command: 1156 # Note: this can't be None because we've matched a statement parser 1157 start = self._prev 1158 replace = start.text.upper() == "REPLACE" or self._match_pair( 1159 TokenType.OR, TokenType.REPLACE 1160 ) 1161 unique = self._match(TokenType.UNIQUE) 1162 1163 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1164 self._advance() 1165 1166 properties = None 1167 create_token = self._match_set(self.CREATABLES) and self._prev 1168 1169 if not create_token: 1170 # exp.Properties.Location.POST_CREATE 1171 properties = self._parse_properties() 1172 create_token = self._match_set(self.CREATABLES) and self._prev 1173 1174 if not properties or not create_token: 1175 return self._parse_as_command(start) 1176 1177 exists = self._parse_exists(not_=True) 1178 this = None 1179 expression = None 1180 indexes = None 1181 no_schema_binding = None 1182 begin = None 1183 clone = None 1184 1185 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1186 nonlocal properties 1187 if properties and temp_props: 1188 properties.expressions.extend(temp_props.expressions) 1189 elif temp_props: 1190 properties = temp_props 1191 1192 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1193 this = self._parse_user_defined_function(kind=create_token.token_type) 1194 1195 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1196 extend_props(self._parse_properties()) 1197 1198 self._match(TokenType.ALIAS) 1199 begin = self._match(TokenType.BEGIN) 1200 return_ = self._match_text_seq("RETURN") 1201 expression = self._parse_statement() 1202 1203 if return_: 1204 expression = self.expression(exp.Return, this=expression) 1205 elif create_token.token_type == TokenType.INDEX: 1206 this = self._parse_index(index=self._parse_id_var()) 1207 elif create_token.token_type in self.DB_CREATABLES: 1208 table_parts = self._parse_table_parts(schema=True) 1209 1210 # exp.Properties.Location.POST_NAME 1211 self._match(TokenType.COMMA) 1212 extend_props(self._parse_properties(before=True)) 1213 1214 this = self._parse_schema(this=table_parts) 1215 1216 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1217 extend_props(self._parse_properties()) 1218 1219 self._match(TokenType.ALIAS) 1220 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1221 # exp.Properties.Location.POST_ALIAS 1222 extend_props(self._parse_properties()) 1223 1224 expression = self._parse_ddl_select() 1225 1226 if create_token.token_type == TokenType.TABLE: 1227 indexes = [] 1228 while True: 1229 index = self._parse_index() 1230 1231 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1232 extend_props(self._parse_properties()) 1233 1234 if not index: 1235 break 1236 else: 1237 self._match(TokenType.COMMA) 1238 indexes.append(index) 1239 elif create_token.token_type == TokenType.VIEW: 1240 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1241 no_schema_binding = True 1242 1243 if self._match_text_seq("CLONE"): 1244 clone = self._parse_table(schema=True) 1245 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1246 clone_kind = ( 1247 self._match(TokenType.L_PAREN) 1248 and self._match_texts(self.CLONE_KINDS) 1249 and self._prev.text.upper() 1250 ) 1251 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1252 self._match(TokenType.R_PAREN) 1253 clone = self.expression( 1254 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1255 ) 1256 1257 return self.expression( 1258 exp.Create, 1259 this=this, 1260 kind=create_token.text, 1261 replace=replace, 1262 unique=unique, 1263 expression=expression, 1264 exists=exists, 1265 properties=properties, 1266 indexes=indexes, 1267 no_schema_binding=no_schema_binding, 1268 begin=begin, 1269 clone=clone, 1270 ) 1271 1272 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1273 # only used for teradata currently 1274 self._match(TokenType.COMMA) 1275 1276 kwargs = { 1277 "no": self._match_text_seq("NO"), 1278 "dual": self._match_text_seq("DUAL"), 1279 "before": self._match_text_seq("BEFORE"), 1280 "default": self._match_text_seq("DEFAULT"), 1281 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1282 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1283 "after": self._match_text_seq("AFTER"), 1284 "minimum": self._match_texts(("MIN", "MINIMUM")), 1285 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1286 } 1287 1288 if self._match_texts(self.PROPERTY_PARSERS): 1289 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1290 try: 1291 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1292 except TypeError: 1293 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1294 1295 return None 1296 1297 def _parse_property(self) -> t.Optional[exp.Expression]: 1298 if self._match_texts(self.PROPERTY_PARSERS): 1299 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1300 1301 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1302 return self._parse_character_set(default=True) 1303 1304 if self._match_text_seq("COMPOUND", "SORTKEY"): 1305 return self._parse_sortkey(compound=True) 1306 1307 if self._match_text_seq("SQL", "SECURITY"): 1308 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1309 1310 assignment = self._match_pair( 1311 TokenType.VAR, TokenType.EQ, advance=False 1312 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1313 1314 if assignment: 1315 key = self._parse_var_or_string() 1316 self._match(TokenType.EQ) 1317 return self.expression(exp.Property, this=key, value=self._parse_column()) 1318 1319 return None 1320 1321 def _parse_stored(self) -> exp.FileFormatProperty: 1322 self._match(TokenType.ALIAS) 1323 1324 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1325 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1326 1327 return self.expression( 1328 exp.FileFormatProperty, 1329 this=self.expression( 1330 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1331 ) 1332 if input_format or output_format 1333 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1334 ) 1335 1336 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1337 self._match(TokenType.EQ) 1338 self._match(TokenType.ALIAS) 1339 return self.expression(exp_class, this=self._parse_field()) 1340 1341 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1342 properties = [] 1343 while True: 1344 if before: 1345 prop = self._parse_property_before() 1346 else: 1347 prop = self._parse_property() 1348 1349 if not prop: 1350 break 1351 for p in ensure_list(prop): 1352 properties.append(p) 1353 1354 if properties: 1355 return self.expression(exp.Properties, expressions=properties) 1356 1357 return None 1358 1359 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1360 return self.expression( 1361 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1362 ) 1363 1364 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1365 if self._index >= 2: 1366 pre_volatile_token = self._tokens[self._index - 2] 1367 else: 1368 pre_volatile_token = None 1369 1370 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1371 return exp.VolatileProperty() 1372 1373 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1374 1375 def _parse_with_property( 1376 self, 1377 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1378 self._match(TokenType.WITH) 1379 if self._match(TokenType.L_PAREN, advance=False): 1380 return self._parse_wrapped_csv(self._parse_property) 1381 1382 if self._match_text_seq("JOURNAL"): 1383 return self._parse_withjournaltable() 1384 1385 if self._match_text_seq("DATA"): 1386 return self._parse_withdata(no=False) 1387 elif self._match_text_seq("NO", "DATA"): 1388 return self._parse_withdata(no=True) 1389 1390 if not self._next: 1391 return None 1392 1393 return self._parse_withisolatedloading() 1394 1395 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1396 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1397 self._match(TokenType.EQ) 1398 1399 user = self._parse_id_var() 1400 self._match(TokenType.PARAMETER) 1401 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1402 1403 if not user or not host: 1404 return None 1405 1406 return exp.DefinerProperty(this=f"{user}@{host}") 1407 1408 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1409 self._match(TokenType.TABLE) 1410 self._match(TokenType.EQ) 1411 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1412 1413 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1414 return self.expression(exp.LogProperty, no=no) 1415 1416 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1417 return self.expression(exp.JournalProperty, **kwargs) 1418 1419 def _parse_checksum(self) -> exp.ChecksumProperty: 1420 self._match(TokenType.EQ) 1421 1422 on = None 1423 if self._match(TokenType.ON): 1424 on = True 1425 elif self._match_text_seq("OFF"): 1426 on = False 1427 1428 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1429 1430 def _parse_cluster(self) -> exp.Cluster: 1431 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1432 1433 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1434 self._match_text_seq("BY") 1435 1436 self._match_l_paren() 1437 expressions = self._parse_csv(self._parse_column) 1438 self._match_r_paren() 1439 1440 if self._match_text_seq("SORTED", "BY"): 1441 self._match_l_paren() 1442 sorted_by = self._parse_csv(self._parse_ordered) 1443 self._match_r_paren() 1444 else: 1445 sorted_by = None 1446 1447 self._match(TokenType.INTO) 1448 buckets = self._parse_number() 1449 self._match_text_seq("BUCKETS") 1450 1451 return self.expression( 1452 exp.ClusteredByProperty, 1453 expressions=expressions, 1454 sorted_by=sorted_by, 1455 buckets=buckets, 1456 ) 1457 1458 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1459 if not self._match_text_seq("GRANTS"): 1460 self._retreat(self._index - 1) 1461 return None 1462 1463 return self.expression(exp.CopyGrantsProperty) 1464 1465 def _parse_freespace(self) -> exp.FreespaceProperty: 1466 self._match(TokenType.EQ) 1467 return self.expression( 1468 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1469 ) 1470 1471 def _parse_mergeblockratio( 1472 self, no: bool = False, default: bool = False 1473 ) -> exp.MergeBlockRatioProperty: 1474 if self._match(TokenType.EQ): 1475 return self.expression( 1476 exp.MergeBlockRatioProperty, 1477 this=self._parse_number(), 1478 percent=self._match(TokenType.PERCENT), 1479 ) 1480 1481 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1482 1483 def _parse_datablocksize( 1484 self, 1485 default: t.Optional[bool] = None, 1486 minimum: t.Optional[bool] = None, 1487 maximum: t.Optional[bool] = None, 1488 ) -> exp.DataBlocksizeProperty: 1489 self._match(TokenType.EQ) 1490 size = self._parse_number() 1491 1492 units = None 1493 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1494 units = self._prev.text 1495 1496 return self.expression( 1497 exp.DataBlocksizeProperty, 1498 size=size, 1499 units=units, 1500 default=default, 1501 minimum=minimum, 1502 maximum=maximum, 1503 ) 1504 1505 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1506 self._match(TokenType.EQ) 1507 always = self._match_text_seq("ALWAYS") 1508 manual = self._match_text_seq("MANUAL") 1509 never = self._match_text_seq("NEVER") 1510 default = self._match_text_seq("DEFAULT") 1511 1512 autotemp = None 1513 if self._match_text_seq("AUTOTEMP"): 1514 autotemp = self._parse_schema() 1515 1516 return self.expression( 1517 exp.BlockCompressionProperty, 1518 always=always, 1519 manual=manual, 1520 never=never, 1521 default=default, 1522 autotemp=autotemp, 1523 ) 1524 1525 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1526 no = self._match_text_seq("NO") 1527 concurrent = self._match_text_seq("CONCURRENT") 1528 self._match_text_seq("ISOLATED", "LOADING") 1529 for_all = self._match_text_seq("FOR", "ALL") 1530 for_insert = self._match_text_seq("FOR", "INSERT") 1531 for_none = self._match_text_seq("FOR", "NONE") 1532 return self.expression( 1533 exp.IsolatedLoadingProperty, 1534 no=no, 1535 concurrent=concurrent, 1536 for_all=for_all, 1537 for_insert=for_insert, 1538 for_none=for_none, 1539 ) 1540 1541 def _parse_locking(self) -> exp.LockingProperty: 1542 if self._match(TokenType.TABLE): 1543 kind = "TABLE" 1544 elif self._match(TokenType.VIEW): 1545 kind = "VIEW" 1546 elif self._match(TokenType.ROW): 1547 kind = "ROW" 1548 elif self._match_text_seq("DATABASE"): 1549 kind = "DATABASE" 1550 else: 1551 kind = None 1552 1553 if kind in ("DATABASE", "TABLE", "VIEW"): 1554 this = self._parse_table_parts() 1555 else: 1556 this = None 1557 1558 if self._match(TokenType.FOR): 1559 for_or_in = "FOR" 1560 elif self._match(TokenType.IN): 1561 for_or_in = "IN" 1562 else: 1563 for_or_in = None 1564 1565 if self._match_text_seq("ACCESS"): 1566 lock_type = "ACCESS" 1567 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1568 lock_type = "EXCLUSIVE" 1569 elif self._match_text_seq("SHARE"): 1570 lock_type = "SHARE" 1571 elif self._match_text_seq("READ"): 1572 lock_type = "READ" 1573 elif self._match_text_seq("WRITE"): 1574 lock_type = "WRITE" 1575 elif self._match_text_seq("CHECKSUM"): 1576 lock_type = "CHECKSUM" 1577 else: 1578 lock_type = None 1579 1580 override = self._match_text_seq("OVERRIDE") 1581 1582 return self.expression( 1583 exp.LockingProperty, 1584 this=this, 1585 kind=kind, 1586 for_or_in=for_or_in, 1587 lock_type=lock_type, 1588 override=override, 1589 ) 1590 1591 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1592 if self._match(TokenType.PARTITION_BY): 1593 return self._parse_csv(self._parse_conjunction) 1594 return [] 1595 1596 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1597 self._match(TokenType.EQ) 1598 return self.expression( 1599 exp.PartitionedByProperty, 1600 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1601 ) 1602 1603 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1604 if self._match_text_seq("AND", "STATISTICS"): 1605 statistics = True 1606 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1607 statistics = False 1608 else: 1609 statistics = None 1610 1611 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1612 1613 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1614 if self._match_text_seq("PRIMARY", "INDEX"): 1615 return exp.NoPrimaryIndexProperty() 1616 return None 1617 1618 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1619 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1620 return exp.OnCommitProperty() 1621 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1622 return exp.OnCommitProperty(delete=True) 1623 return None 1624 1625 def _parse_distkey(self) -> exp.DistKeyProperty: 1626 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1627 1628 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1629 table = self._parse_table(schema=True) 1630 1631 options = [] 1632 while self._match_texts(("INCLUDING", "EXCLUDING")): 1633 this = self._prev.text.upper() 1634 1635 id_var = self._parse_id_var() 1636 if not id_var: 1637 return None 1638 1639 options.append( 1640 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1641 ) 1642 1643 return self.expression(exp.LikeProperty, this=table, expressions=options) 1644 1645 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1646 return self.expression( 1647 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1648 ) 1649 1650 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1651 self._match(TokenType.EQ) 1652 return self.expression( 1653 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1654 ) 1655 1656 def _parse_returns(self) -> exp.ReturnsProperty: 1657 value: t.Optional[exp.Expression] 1658 is_table = self._match(TokenType.TABLE) 1659 1660 if is_table: 1661 if self._match(TokenType.LT): 1662 value = self.expression( 1663 exp.Schema, 1664 this="TABLE", 1665 expressions=self._parse_csv(self._parse_struct_types), 1666 ) 1667 if not self._match(TokenType.GT): 1668 self.raise_error("Expecting >") 1669 else: 1670 value = self._parse_schema(exp.var("TABLE")) 1671 else: 1672 value = self._parse_types() 1673 1674 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1675 1676 def _parse_describe(self) -> exp.Describe: 1677 kind = self._match_set(self.CREATABLES) and self._prev.text 1678 this = self._parse_table() 1679 return self.expression(exp.Describe, this=this, kind=kind) 1680 1681 def _parse_insert(self) -> exp.Insert: 1682 overwrite = self._match(TokenType.OVERWRITE) 1683 local = self._match_text_seq("LOCAL") 1684 alternative = None 1685 1686 if self._match_text_seq("DIRECTORY"): 1687 this: t.Optional[exp.Expression] = self.expression( 1688 exp.Directory, 1689 this=self._parse_var_or_string(), 1690 local=local, 1691 row_format=self._parse_row_format(match_row=True), 1692 ) 1693 else: 1694 if self._match(TokenType.OR): 1695 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1696 1697 self._match(TokenType.INTO) 1698 self._match(TokenType.TABLE) 1699 this = self._parse_table(schema=True) 1700 1701 return self.expression( 1702 exp.Insert, 1703 this=this, 1704 exists=self._parse_exists(), 1705 partition=self._parse_partition(), 1706 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1707 and self._parse_conjunction(), 1708 expression=self._parse_ddl_select(), 1709 conflict=self._parse_on_conflict(), 1710 returning=self._parse_returning(), 1711 overwrite=overwrite, 1712 alternative=alternative, 1713 ) 1714 1715 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1716 conflict = self._match_text_seq("ON", "CONFLICT") 1717 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1718 1719 if not conflict and not duplicate: 1720 return None 1721 1722 nothing = None 1723 expressions = None 1724 key = None 1725 constraint = None 1726 1727 if conflict: 1728 if self._match_text_seq("ON", "CONSTRAINT"): 1729 constraint = self._parse_id_var() 1730 else: 1731 key = self._parse_csv(self._parse_value) 1732 1733 self._match_text_seq("DO") 1734 if self._match_text_seq("NOTHING"): 1735 nothing = True 1736 else: 1737 self._match(TokenType.UPDATE) 1738 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1739 1740 return self.expression( 1741 exp.OnConflict, 1742 duplicate=duplicate, 1743 expressions=expressions, 1744 nothing=nothing, 1745 key=key, 1746 constraint=constraint, 1747 ) 1748 1749 def _parse_returning(self) -> t.Optional[exp.Returning]: 1750 if not self._match(TokenType.RETURNING): 1751 return None 1752 1753 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1754 1755 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1756 if not self._match(TokenType.FORMAT): 1757 return None 1758 return self._parse_row_format() 1759 1760 def _parse_row_format( 1761 self, match_row: bool = False 1762 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1763 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1764 return None 1765 1766 if self._match_text_seq("SERDE"): 1767 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1768 1769 self._match_text_seq("DELIMITED") 1770 1771 kwargs = {} 1772 1773 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1774 kwargs["fields"] = self._parse_string() 1775 if self._match_text_seq("ESCAPED", "BY"): 1776 kwargs["escaped"] = self._parse_string() 1777 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1778 kwargs["collection_items"] = self._parse_string() 1779 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1780 kwargs["map_keys"] = self._parse_string() 1781 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1782 kwargs["lines"] = self._parse_string() 1783 if self._match_text_seq("NULL", "DEFINED", "AS"): 1784 kwargs["null"] = self._parse_string() 1785 1786 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1787 1788 def _parse_load(self) -> exp.LoadData | exp.Command: 1789 if self._match_text_seq("DATA"): 1790 local = self._match_text_seq("LOCAL") 1791 self._match_text_seq("INPATH") 1792 inpath = self._parse_string() 1793 overwrite = self._match(TokenType.OVERWRITE) 1794 self._match_pair(TokenType.INTO, TokenType.TABLE) 1795 1796 return self.expression( 1797 exp.LoadData, 1798 this=self._parse_table(schema=True), 1799 local=local, 1800 overwrite=overwrite, 1801 inpath=inpath, 1802 partition=self._parse_partition(), 1803 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1804 serde=self._match_text_seq("SERDE") and self._parse_string(), 1805 ) 1806 return self._parse_as_command(self._prev) 1807 1808 def _parse_delete(self) -> exp.Delete: 1809 self._match(TokenType.FROM) 1810 1811 return self.expression( 1812 exp.Delete, 1813 this=self._parse_table(), 1814 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1815 where=self._parse_where(), 1816 returning=self._parse_returning(), 1817 limit=self._parse_limit(), 1818 ) 1819 1820 def _parse_update(self) -> exp.Update: 1821 return self.expression( 1822 exp.Update, 1823 **{ # type: ignore 1824 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1825 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1826 "from": self._parse_from(modifiers=True), 1827 "where": self._parse_where(), 1828 "returning": self._parse_returning(), 1829 "limit": self._parse_limit(), 1830 }, 1831 ) 1832 1833 def _parse_uncache(self) -> exp.Uncache: 1834 if not self._match(TokenType.TABLE): 1835 self.raise_error("Expecting TABLE after UNCACHE") 1836 1837 return self.expression( 1838 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1839 ) 1840 1841 def _parse_cache(self) -> exp.Cache: 1842 lazy = self._match_text_seq("LAZY") 1843 self._match(TokenType.TABLE) 1844 table = self._parse_table(schema=True) 1845 1846 options = [] 1847 if self._match_text_seq("OPTIONS"): 1848 self._match_l_paren() 1849 k = self._parse_string() 1850 self._match(TokenType.EQ) 1851 v = self._parse_string() 1852 options = [k, v] 1853 self._match_r_paren() 1854 1855 self._match(TokenType.ALIAS) 1856 return self.expression( 1857 exp.Cache, 1858 this=table, 1859 lazy=lazy, 1860 options=options, 1861 expression=self._parse_select(nested=True), 1862 ) 1863 1864 def _parse_partition(self) -> t.Optional[exp.Partition]: 1865 if not self._match(TokenType.PARTITION): 1866 return None 1867 1868 return self.expression( 1869 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1870 ) 1871 1872 def _parse_value(self) -> exp.Tuple: 1873 if self._match(TokenType.L_PAREN): 1874 expressions = self._parse_csv(self._parse_conjunction) 1875 self._match_r_paren() 1876 return self.expression(exp.Tuple, expressions=expressions) 1877 1878 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1879 # Source: https://prestodb.io/docs/current/sql/values.html 1880 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1881 1882 def _parse_select( 1883 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1884 ) -> t.Optional[exp.Expression]: 1885 cte = self._parse_with() 1886 if cte: 1887 this = self._parse_statement() 1888 1889 if not this: 1890 self.raise_error("Failed to parse any statement following CTE") 1891 return cte 1892 1893 if "with" in this.arg_types: 1894 this.set("with", cte) 1895 else: 1896 self.raise_error(f"{this.key} does not support CTE") 1897 this = cte 1898 elif self._match(TokenType.SELECT): 1899 comments = self._prev_comments 1900 1901 hint = self._parse_hint() 1902 all_ = self._match(TokenType.ALL) 1903 distinct = self._match(TokenType.DISTINCT) 1904 1905 kind = ( 1906 self._match(TokenType.ALIAS) 1907 and self._match_texts(("STRUCT", "VALUE")) 1908 and self._prev.text 1909 ) 1910 1911 if distinct: 1912 distinct = self.expression( 1913 exp.Distinct, 1914 on=self._parse_value() if self._match(TokenType.ON) else None, 1915 ) 1916 1917 if all_ and distinct: 1918 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1919 1920 limit = self._parse_limit(top=True) 1921 expressions = self._parse_csv(self._parse_expression) 1922 1923 this = self.expression( 1924 exp.Select, 1925 kind=kind, 1926 hint=hint, 1927 distinct=distinct, 1928 expressions=expressions, 1929 limit=limit, 1930 ) 1931 this.comments = comments 1932 1933 into = self._parse_into() 1934 if into: 1935 this.set("into", into) 1936 1937 from_ = self._parse_from() 1938 if from_: 1939 this.set("from", from_) 1940 1941 this = self._parse_query_modifiers(this) 1942 elif (table or nested) and self._match(TokenType.L_PAREN): 1943 if self._match(TokenType.PIVOT): 1944 this = self._parse_simplified_pivot() 1945 elif self._match(TokenType.FROM): 1946 this = exp.select("*").from_( 1947 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1948 ) 1949 else: 1950 this = self._parse_table() if table else self._parse_select(nested=True) 1951 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1952 1953 self._match_r_paren() 1954 1955 # early return so that subquery unions aren't parsed again 1956 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1957 # Union ALL should be a property of the top select node, not the subquery 1958 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1959 elif self._match(TokenType.VALUES): 1960 this = self.expression( 1961 exp.Values, 1962 expressions=self._parse_csv(self._parse_value), 1963 alias=self._parse_table_alias(), 1964 ) 1965 else: 1966 this = None 1967 1968 return self._parse_set_operations(this) 1969 1970 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1971 if not skip_with_token and not self._match(TokenType.WITH): 1972 return None 1973 1974 comments = self._prev_comments 1975 recursive = self._match(TokenType.RECURSIVE) 1976 1977 expressions = [] 1978 while True: 1979 expressions.append(self._parse_cte()) 1980 1981 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1982 break 1983 else: 1984 self._match(TokenType.WITH) 1985 1986 return self.expression( 1987 exp.With, comments=comments, expressions=expressions, recursive=recursive 1988 ) 1989 1990 def _parse_cte(self) -> exp.CTE: 1991 alias = self._parse_table_alias() 1992 if not alias or not alias.this: 1993 self.raise_error("Expected CTE to have alias") 1994 1995 self._match(TokenType.ALIAS) 1996 return self.expression( 1997 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1998 ) 1999 2000 def _parse_table_alias( 2001 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2002 ) -> t.Optional[exp.TableAlias]: 2003 any_token = self._match(TokenType.ALIAS) 2004 alias = ( 2005 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2006 or self._parse_string_as_identifier() 2007 ) 2008 2009 index = self._index 2010 if self._match(TokenType.L_PAREN): 2011 columns = self._parse_csv(self._parse_function_parameter) 2012 self._match_r_paren() if columns else self._retreat(index) 2013 else: 2014 columns = None 2015 2016 if not alias and not columns: 2017 return None 2018 2019 return self.expression(exp.TableAlias, this=alias, columns=columns) 2020 2021 def _parse_subquery( 2022 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2023 ) -> t.Optional[exp.Subquery]: 2024 if not this: 2025 return None 2026 2027 return self.expression( 2028 exp.Subquery, 2029 this=this, 2030 pivots=self._parse_pivots(), 2031 alias=self._parse_table_alias() if parse_alias else None, 2032 ) 2033 2034 def _parse_query_modifiers( 2035 self, this: t.Optional[exp.Expression] 2036 ) -> t.Optional[exp.Expression]: 2037 if isinstance(this, self.MODIFIABLES): 2038 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2039 expression = parser(self) 2040 2041 if expression: 2042 if key == "limit": 2043 offset = expression.args.pop("offset", None) 2044 if offset: 2045 this.set("offset", exp.Offset(expression=offset)) 2046 this.set(key, expression) 2047 return this 2048 2049 def _parse_hint(self) -> t.Optional[exp.Hint]: 2050 if self._match(TokenType.HINT): 2051 hints = self._parse_csv(self._parse_function) 2052 2053 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2054 self.raise_error("Expected */ after HINT") 2055 2056 return self.expression(exp.Hint, expressions=hints) 2057 2058 return None 2059 2060 def _parse_into(self) -> t.Optional[exp.Into]: 2061 if not self._match(TokenType.INTO): 2062 return None 2063 2064 temp = self._match(TokenType.TEMPORARY) 2065 unlogged = self._match_text_seq("UNLOGGED") 2066 self._match(TokenType.TABLE) 2067 2068 return self.expression( 2069 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2070 ) 2071 2072 def _parse_from( 2073 self, modifiers: bool = False, skip_from_token: bool = False 2074 ) -> t.Optional[exp.From]: 2075 if not skip_from_token and not self._match(TokenType.FROM): 2076 return None 2077 2078 comments = self._prev_comments 2079 this = self._parse_table() 2080 2081 return self.expression( 2082 exp.From, 2083 comments=comments, 2084 this=self._parse_query_modifiers(this) if modifiers else this, 2085 ) 2086 2087 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2088 if not self._match(TokenType.MATCH_RECOGNIZE): 2089 return None 2090 2091 self._match_l_paren() 2092 2093 partition = self._parse_partition_by() 2094 order = self._parse_order() 2095 measures = ( 2096 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2097 ) 2098 2099 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2100 rows = exp.var("ONE ROW PER MATCH") 2101 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2102 text = "ALL ROWS PER MATCH" 2103 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2104 text += f" SHOW EMPTY MATCHES" 2105 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2106 text += f" OMIT EMPTY MATCHES" 2107 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2108 text += f" WITH UNMATCHED ROWS" 2109 rows = exp.var(text) 2110 else: 2111 rows = None 2112 2113 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2114 text = "AFTER MATCH SKIP" 2115 if self._match_text_seq("PAST", "LAST", "ROW"): 2116 text += f" PAST LAST ROW" 2117 elif self._match_text_seq("TO", "NEXT", "ROW"): 2118 text += f" TO NEXT ROW" 2119 elif self._match_text_seq("TO", "FIRST"): 2120 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2121 elif self._match_text_seq("TO", "LAST"): 2122 text += f" TO LAST {self._advance_any().text}" # type: ignore 2123 after = exp.var(text) 2124 else: 2125 after = None 2126 2127 if self._match_text_seq("PATTERN"): 2128 self._match_l_paren() 2129 2130 if not self._curr: 2131 self.raise_error("Expecting )", self._curr) 2132 2133 paren = 1 2134 start = self._curr 2135 2136 while self._curr and paren > 0: 2137 if self._curr.token_type == TokenType.L_PAREN: 2138 paren += 1 2139 if self._curr.token_type == TokenType.R_PAREN: 2140 paren -= 1 2141 2142 end = self._prev 2143 self._advance() 2144 2145 if paren > 0: 2146 self.raise_error("Expecting )", self._curr) 2147 2148 pattern = exp.var(self._find_sql(start, end)) 2149 else: 2150 pattern = None 2151 2152 define = ( 2153 self._parse_csv( 2154 lambda: self.expression( 2155 exp.Alias, 2156 alias=self._parse_id_var(any_token=True), 2157 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2158 ) 2159 ) 2160 if self._match_text_seq("DEFINE") 2161 else None 2162 ) 2163 2164 self._match_r_paren() 2165 2166 return self.expression( 2167 exp.MatchRecognize, 2168 partition_by=partition, 2169 order=order, 2170 measures=measures, 2171 rows=rows, 2172 after=after, 2173 pattern=pattern, 2174 define=define, 2175 alias=self._parse_table_alias(), 2176 ) 2177 2178 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2179 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2180 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2181 2182 if outer_apply or cross_apply: 2183 this = self._parse_select(table=True) 2184 view = None 2185 outer = not cross_apply 2186 elif self._match(TokenType.LATERAL): 2187 this = self._parse_select(table=True) 2188 view = self._match(TokenType.VIEW) 2189 outer = self._match(TokenType.OUTER) 2190 else: 2191 return None 2192 2193 if not this: 2194 this = self._parse_function() or self._parse_id_var(any_token=False) 2195 while self._match(TokenType.DOT): 2196 this = exp.Dot( 2197 this=this, 2198 expression=self._parse_function() or self._parse_id_var(any_token=False), 2199 ) 2200 2201 if view: 2202 table = self._parse_id_var(any_token=False) 2203 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2204 table_alias: t.Optional[exp.TableAlias] = self.expression( 2205 exp.TableAlias, this=table, columns=columns 2206 ) 2207 elif isinstance(this, exp.Subquery) and this.alias: 2208 # Ensures parity between the Subquery's and the Lateral's "alias" args 2209 table_alias = this.args["alias"].copy() 2210 else: 2211 table_alias = self._parse_table_alias() 2212 2213 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2214 2215 def _parse_join_parts( 2216 self, 2217 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2218 return ( 2219 self._match_set(self.JOIN_METHODS) and self._prev, 2220 self._match_set(self.JOIN_SIDES) and self._prev, 2221 self._match_set(self.JOIN_KINDS) and self._prev, 2222 ) 2223 2224 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2225 if self._match(TokenType.COMMA): 2226 return self.expression(exp.Join, this=self._parse_table()) 2227 2228 index = self._index 2229 method, side, kind = self._parse_join_parts() 2230 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2231 join = self._match(TokenType.JOIN) 2232 2233 if not skip_join_token and not join: 2234 self._retreat(index) 2235 kind = None 2236 method = None 2237 side = None 2238 2239 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2240 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2241 2242 if not skip_join_token and not join and not outer_apply and not cross_apply: 2243 return None 2244 2245 if outer_apply: 2246 side = Token(TokenType.LEFT, "LEFT") 2247 2248 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2249 2250 if method: 2251 kwargs["method"] = method.text 2252 if side: 2253 kwargs["side"] = side.text 2254 if kind: 2255 kwargs["kind"] = kind.text 2256 if hint: 2257 kwargs["hint"] = hint 2258 2259 if self._match(TokenType.ON): 2260 kwargs["on"] = self._parse_conjunction() 2261 elif self._match(TokenType.USING): 2262 kwargs["using"] = self._parse_wrapped_id_vars() 2263 2264 return self.expression(exp.Join, **kwargs) 2265 2266 def _parse_index( 2267 self, 2268 index: t.Optional[exp.Expression] = None, 2269 ) -> t.Optional[exp.Index]: 2270 if index: 2271 unique = None 2272 primary = None 2273 amp = None 2274 2275 self._match(TokenType.ON) 2276 self._match(TokenType.TABLE) # hive 2277 table = self._parse_table_parts(schema=True) 2278 else: 2279 unique = self._match(TokenType.UNIQUE) 2280 primary = self._match_text_seq("PRIMARY") 2281 amp = self._match_text_seq("AMP") 2282 2283 if not self._match(TokenType.INDEX): 2284 return None 2285 2286 index = self._parse_id_var() 2287 table = None 2288 2289 using = self._parse_field() if self._match(TokenType.USING) else None 2290 2291 if self._match(TokenType.L_PAREN, advance=False): 2292 columns = self._parse_wrapped_csv(self._parse_ordered) 2293 else: 2294 columns = None 2295 2296 return self.expression( 2297 exp.Index, 2298 this=index, 2299 table=table, 2300 using=using, 2301 columns=columns, 2302 unique=unique, 2303 primary=primary, 2304 amp=amp, 2305 partition_by=self._parse_partition_by(), 2306 ) 2307 2308 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2309 hints: t.List[exp.Expression] = [] 2310 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2311 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2312 hints.append( 2313 self.expression( 2314 exp.WithTableHint, 2315 expressions=self._parse_csv( 2316 lambda: self._parse_function() or self._parse_var(any_token=True) 2317 ), 2318 ) 2319 ) 2320 self._match_r_paren() 2321 else: 2322 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2323 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2324 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2325 2326 self._match_texts({"INDEX", "KEY"}) 2327 if self._match(TokenType.FOR): 2328 hint.set("target", self._advance_any() and self._prev.text.upper()) 2329 2330 hint.set("expressions", self._parse_wrapped_id_vars()) 2331 hints.append(hint) 2332 2333 return hints or None 2334 2335 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2336 return ( 2337 (not schema and self._parse_function(optional_parens=False)) 2338 or self._parse_id_var(any_token=False) 2339 or self._parse_string_as_identifier() 2340 or self._parse_placeholder() 2341 ) 2342 2343 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2344 catalog = None 2345 db = None 2346 table = self._parse_table_part(schema=schema) 2347 2348 while self._match(TokenType.DOT): 2349 if catalog: 2350 # This allows nesting the table in arbitrarily many dot expressions if needed 2351 table = self.expression( 2352 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2353 ) 2354 else: 2355 catalog = db 2356 db = table 2357 table = self._parse_table_part(schema=schema) 2358 2359 if not table: 2360 self.raise_error(f"Expected table name but got {self._curr}") 2361 2362 return self.expression( 2363 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2364 ) 2365 2366 def _parse_table( 2367 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2368 ) -> t.Optional[exp.Expression]: 2369 lateral = self._parse_lateral() 2370 if lateral: 2371 return lateral 2372 2373 unnest = self._parse_unnest() 2374 if unnest: 2375 return unnest 2376 2377 values = self._parse_derived_table_values() 2378 if values: 2379 return values 2380 2381 subquery = self._parse_select(table=True) 2382 if subquery: 2383 if not subquery.args.get("pivots"): 2384 subquery.set("pivots", self._parse_pivots()) 2385 return subquery 2386 2387 this: exp.Expression = self._parse_table_parts(schema=schema) 2388 2389 if schema: 2390 return self._parse_schema(this=this) 2391 2392 if self.ALIAS_POST_TABLESAMPLE: 2393 table_sample = self._parse_table_sample() 2394 2395 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2396 if alias: 2397 this.set("alias", alias) 2398 2399 if not this.args.get("pivots"): 2400 this.set("pivots", self._parse_pivots()) 2401 2402 this.set("hints", self._parse_table_hints()) 2403 2404 if not self.ALIAS_POST_TABLESAMPLE: 2405 table_sample = self._parse_table_sample() 2406 2407 if table_sample: 2408 table_sample.set("this", this) 2409 this = table_sample 2410 2411 return this 2412 2413 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2414 if not self._match(TokenType.UNNEST): 2415 return None 2416 2417 expressions = self._parse_wrapped_csv(self._parse_type) 2418 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2419 2420 alias = self._parse_table_alias() if with_alias else None 2421 2422 if alias and self.UNNEST_COLUMN_ONLY: 2423 if alias.args.get("columns"): 2424 self.raise_error("Unexpected extra column alias in unnest.") 2425 2426 alias.set("columns", [alias.this]) 2427 alias.set("this", None) 2428 2429 offset = None 2430 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2431 self._match(TokenType.ALIAS) 2432 offset = self._parse_id_var() or exp.to_identifier("offset") 2433 2434 return self.expression( 2435 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2436 ) 2437 2438 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2439 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2440 if not is_derived and not self._match(TokenType.VALUES): 2441 return None 2442 2443 expressions = self._parse_csv(self._parse_value) 2444 alias = self._parse_table_alias() 2445 2446 if is_derived: 2447 self._match_r_paren() 2448 2449 return self.expression( 2450 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2451 ) 2452 2453 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2454 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2455 as_modifier and self._match_text_seq("USING", "SAMPLE") 2456 ): 2457 return None 2458 2459 bucket_numerator = None 2460 bucket_denominator = None 2461 bucket_field = None 2462 percent = None 2463 rows = None 2464 size = None 2465 seed = None 2466 2467 kind = ( 2468 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2469 ) 2470 method = self._parse_var(tokens=(TokenType.ROW,)) 2471 2472 self._match(TokenType.L_PAREN) 2473 2474 num = self._parse_number() 2475 2476 if self._match_text_seq("BUCKET"): 2477 bucket_numerator = self._parse_number() 2478 self._match_text_seq("OUT", "OF") 2479 bucket_denominator = bucket_denominator = self._parse_number() 2480 self._match(TokenType.ON) 2481 bucket_field = self._parse_field() 2482 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2483 percent = num 2484 elif self._match(TokenType.ROWS): 2485 rows = num 2486 else: 2487 size = num 2488 2489 self._match(TokenType.R_PAREN) 2490 2491 if self._match(TokenType.L_PAREN): 2492 method = self._parse_var() 2493 seed = self._match(TokenType.COMMA) and self._parse_number() 2494 self._match_r_paren() 2495 elif self._match_texts(("SEED", "REPEATABLE")): 2496 seed = self._parse_wrapped(self._parse_number) 2497 2498 return self.expression( 2499 exp.TableSample, 2500 method=method, 2501 bucket_numerator=bucket_numerator, 2502 bucket_denominator=bucket_denominator, 2503 bucket_field=bucket_field, 2504 percent=percent, 2505 rows=rows, 2506 size=size, 2507 seed=seed, 2508 kind=kind, 2509 ) 2510 2511 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2512 return list(iter(self._parse_pivot, None)) 2513 2514 # https://duckdb.org/docs/sql/statements/pivot 2515 def _parse_simplified_pivot(self) -> exp.Pivot: 2516 def _parse_on() -> t.Optional[exp.Expression]: 2517 this = self._parse_bitwise() 2518 return self._parse_in(this) if self._match(TokenType.IN) else this 2519 2520 this = self._parse_table() 2521 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2522 using = self._match(TokenType.USING) and self._parse_csv( 2523 lambda: self._parse_alias(self._parse_function()) 2524 ) 2525 group = self._parse_group() 2526 return self.expression( 2527 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2528 ) 2529 2530 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2531 index = self._index 2532 2533 if self._match(TokenType.PIVOT): 2534 unpivot = False 2535 elif self._match(TokenType.UNPIVOT): 2536 unpivot = True 2537 else: 2538 return None 2539 2540 expressions = [] 2541 field = None 2542 2543 if not self._match(TokenType.L_PAREN): 2544 self._retreat(index) 2545 return None 2546 2547 if unpivot: 2548 expressions = self._parse_csv(self._parse_column) 2549 else: 2550 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2551 2552 if not expressions: 2553 self.raise_error("Failed to parse PIVOT's aggregation list") 2554 2555 if not self._match(TokenType.FOR): 2556 self.raise_error("Expecting FOR") 2557 2558 value = self._parse_column() 2559 2560 if not self._match(TokenType.IN): 2561 self.raise_error("Expecting IN") 2562 2563 field = self._parse_in(value, alias=True) 2564 2565 self._match_r_paren() 2566 2567 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2568 2569 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2570 pivot.set("alias", self._parse_table_alias()) 2571 2572 if not unpivot: 2573 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2574 2575 columns: t.List[exp.Expression] = [] 2576 for fld in pivot.args["field"].expressions: 2577 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2578 for name in names: 2579 if self.PREFIXED_PIVOT_COLUMNS: 2580 name = f"{name}_{field_name}" if name else field_name 2581 else: 2582 name = f"{field_name}_{name}" if name else field_name 2583 2584 columns.append(exp.to_identifier(name)) 2585 2586 pivot.set("columns", columns) 2587 2588 return pivot 2589 2590 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2591 return [agg.alias for agg in aggregations] 2592 2593 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2594 if not skip_where_token and not self._match(TokenType.WHERE): 2595 return None 2596 2597 return self.expression( 2598 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2599 ) 2600 2601 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2602 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2603 return None 2604 2605 elements = defaultdict(list) 2606 2607 while True: 2608 expressions = self._parse_csv(self._parse_conjunction) 2609 if expressions: 2610 elements["expressions"].extend(expressions) 2611 2612 grouping_sets = self._parse_grouping_sets() 2613 if grouping_sets: 2614 elements["grouping_sets"].extend(grouping_sets) 2615 2616 rollup = None 2617 cube = None 2618 totals = None 2619 2620 with_ = self._match(TokenType.WITH) 2621 if self._match(TokenType.ROLLUP): 2622 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2623 elements["rollup"].extend(ensure_list(rollup)) 2624 2625 if self._match(TokenType.CUBE): 2626 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2627 elements["cube"].extend(ensure_list(cube)) 2628 2629 if self._match_text_seq("TOTALS"): 2630 totals = True 2631 elements["totals"] = True # type: ignore 2632 2633 if not (grouping_sets or rollup or cube or totals): 2634 break 2635 2636 return self.expression(exp.Group, **elements) # type: ignore 2637 2638 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2639 if not self._match(TokenType.GROUPING_SETS): 2640 return None 2641 2642 return self._parse_wrapped_csv(self._parse_grouping_set) 2643 2644 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2645 if self._match(TokenType.L_PAREN): 2646 grouping_set = self._parse_csv(self._parse_column) 2647 self._match_r_paren() 2648 return self.expression(exp.Tuple, expressions=grouping_set) 2649 2650 return self._parse_column() 2651 2652 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2653 if not skip_having_token and not self._match(TokenType.HAVING): 2654 return None 2655 return self.expression(exp.Having, this=self._parse_conjunction()) 2656 2657 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2658 if not self._match(TokenType.QUALIFY): 2659 return None 2660 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2661 2662 def _parse_order( 2663 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2664 ) -> t.Optional[exp.Expression]: 2665 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2666 return this 2667 2668 return self.expression( 2669 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2670 ) 2671 2672 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2673 if not self._match(token): 2674 return None 2675 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2676 2677 def _parse_ordered(self) -> exp.Ordered: 2678 this = self._parse_conjunction() 2679 self._match(TokenType.ASC) 2680 2681 is_desc = self._match(TokenType.DESC) 2682 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2683 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2684 desc = is_desc or False 2685 asc = not desc 2686 nulls_first = is_nulls_first or False 2687 explicitly_null_ordered = is_nulls_first or is_nulls_last 2688 2689 if ( 2690 not explicitly_null_ordered 2691 and ( 2692 (asc and self.NULL_ORDERING == "nulls_are_small") 2693 or (desc and self.NULL_ORDERING != "nulls_are_small") 2694 ) 2695 and self.NULL_ORDERING != "nulls_are_last" 2696 ): 2697 nulls_first = True 2698 2699 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2700 2701 def _parse_limit( 2702 self, this: t.Optional[exp.Expression] = None, top: bool = False 2703 ) -> t.Optional[exp.Expression]: 2704 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2705 limit_paren = self._match(TokenType.L_PAREN) 2706 expression = self._parse_number() if top else self._parse_term() 2707 2708 if self._match(TokenType.COMMA): 2709 offset = expression 2710 expression = self._parse_term() 2711 else: 2712 offset = None 2713 2714 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2715 2716 if limit_paren: 2717 self._match_r_paren() 2718 2719 return limit_exp 2720 2721 if self._match(TokenType.FETCH): 2722 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2723 direction = self._prev.text if direction else "FIRST" 2724 2725 count = self._parse_number() 2726 percent = self._match(TokenType.PERCENT) 2727 2728 self._match_set((TokenType.ROW, TokenType.ROWS)) 2729 2730 only = self._match_text_seq("ONLY") 2731 with_ties = self._match_text_seq("WITH", "TIES") 2732 2733 if only and with_ties: 2734 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2735 2736 return self.expression( 2737 exp.Fetch, 2738 direction=direction, 2739 count=count, 2740 percent=percent, 2741 with_ties=with_ties, 2742 ) 2743 2744 return this 2745 2746 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2747 if not self._match(TokenType.OFFSET): 2748 return this 2749 2750 count = self._parse_number() 2751 self._match_set((TokenType.ROW, TokenType.ROWS)) 2752 return self.expression(exp.Offset, this=this, expression=count) 2753 2754 def _parse_locks(self) -> t.List[exp.Lock]: 2755 locks = [] 2756 while True: 2757 if self._match_text_seq("FOR", "UPDATE"): 2758 update = True 2759 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2760 "LOCK", "IN", "SHARE", "MODE" 2761 ): 2762 update = False 2763 else: 2764 break 2765 2766 expressions = None 2767 if self._match_text_seq("OF"): 2768 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2769 2770 wait: t.Optional[bool | exp.Expression] = None 2771 if self._match_text_seq("NOWAIT"): 2772 wait = True 2773 elif self._match_text_seq("WAIT"): 2774 wait = self._parse_primary() 2775 elif self._match_text_seq("SKIP", "LOCKED"): 2776 wait = False 2777 2778 locks.append( 2779 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2780 ) 2781 2782 return locks 2783 2784 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2785 if not self._match_set(self.SET_OPERATIONS): 2786 return this 2787 2788 token_type = self._prev.token_type 2789 2790 if token_type == TokenType.UNION: 2791 expression = exp.Union 2792 elif token_type == TokenType.EXCEPT: 2793 expression = exp.Except 2794 else: 2795 expression = exp.Intersect 2796 2797 return self.expression( 2798 expression, 2799 this=this, 2800 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2801 expression=self._parse_set_operations(self._parse_select(nested=True)), 2802 ) 2803 2804 def _parse_expression(self) -> t.Optional[exp.Expression]: 2805 return self._parse_alias(self._parse_conjunction()) 2806 2807 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2808 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2809 2810 def _parse_equality(self) -> t.Optional[exp.Expression]: 2811 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2812 2813 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2814 return self._parse_tokens(self._parse_range, self.COMPARISON) 2815 2816 def _parse_range(self) -> t.Optional[exp.Expression]: 2817 this = self._parse_bitwise() 2818 negate = self._match(TokenType.NOT) 2819 2820 if self._match_set(self.RANGE_PARSERS): 2821 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2822 if not expression: 2823 return this 2824 2825 this = expression 2826 elif self._match(TokenType.ISNULL): 2827 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2828 2829 # Postgres supports ISNULL and NOTNULL for conditions. 2830 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2831 if self._match(TokenType.NOTNULL): 2832 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2833 this = self.expression(exp.Not, this=this) 2834 2835 if negate: 2836 this = self.expression(exp.Not, this=this) 2837 2838 if self._match(TokenType.IS): 2839 this = self._parse_is(this) 2840 2841 return this 2842 2843 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2844 index = self._index - 1 2845 negate = self._match(TokenType.NOT) 2846 2847 if self._match_text_seq("DISTINCT", "FROM"): 2848 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2849 return self.expression(klass, this=this, expression=self._parse_expression()) 2850 2851 expression = self._parse_null() or self._parse_boolean() 2852 if not expression: 2853 self._retreat(index) 2854 return None 2855 2856 this = self.expression(exp.Is, this=this, expression=expression) 2857 return self.expression(exp.Not, this=this) if negate else this 2858 2859 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2860 unnest = self._parse_unnest(with_alias=False) 2861 if unnest: 2862 this = self.expression(exp.In, this=this, unnest=unnest) 2863 elif self._match(TokenType.L_PAREN): 2864 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2865 2866 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2867 this = self.expression(exp.In, this=this, query=expressions[0]) 2868 else: 2869 this = self.expression(exp.In, this=this, expressions=expressions) 2870 2871 self._match_r_paren(this) 2872 else: 2873 this = self.expression(exp.In, this=this, field=self._parse_field()) 2874 2875 return this 2876 2877 def _parse_between(self, this: exp.Expression) -> exp.Between: 2878 low = self._parse_bitwise() 2879 self._match(TokenType.AND) 2880 high = self._parse_bitwise() 2881 return self.expression(exp.Between, this=this, low=low, high=high) 2882 2883 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2884 if not self._match(TokenType.ESCAPE): 2885 return this 2886 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2887 2888 def _parse_interval(self) -> t.Optional[exp.Interval]: 2889 if not self._match(TokenType.INTERVAL): 2890 return None 2891 2892 if self._match(TokenType.STRING, advance=False): 2893 this = self._parse_primary() 2894 else: 2895 this = self._parse_term() 2896 2897 unit = self._parse_function() or self._parse_var() 2898 2899 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2900 # each INTERVAL expression into this canonical form so it's easy to transpile 2901 if this and this.is_number: 2902 this = exp.Literal.string(this.name) 2903 elif this and this.is_string: 2904 parts = this.name.split() 2905 2906 if len(parts) == 2: 2907 if unit: 2908 # this is not actually a unit, it's something else 2909 unit = None 2910 self._retreat(self._index - 1) 2911 else: 2912 this = exp.Literal.string(parts[0]) 2913 unit = self.expression(exp.Var, this=parts[1]) 2914 2915 return self.expression(exp.Interval, this=this, unit=unit) 2916 2917 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2918 this = self._parse_term() 2919 2920 while True: 2921 if self._match_set(self.BITWISE): 2922 this = self.expression( 2923 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2924 ) 2925 elif self._match_pair(TokenType.LT, TokenType.LT): 2926 this = self.expression( 2927 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2928 ) 2929 elif self._match_pair(TokenType.GT, TokenType.GT): 2930 this = self.expression( 2931 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2932 ) 2933 else: 2934 break 2935 2936 return this 2937 2938 def _parse_term(self) -> t.Optional[exp.Expression]: 2939 return self._parse_tokens(self._parse_factor, self.TERM) 2940 2941 def _parse_factor(self) -> t.Optional[exp.Expression]: 2942 return self._parse_tokens(self._parse_unary, self.FACTOR) 2943 2944 def _parse_unary(self) -> t.Optional[exp.Expression]: 2945 if self._match_set(self.UNARY_PARSERS): 2946 return self.UNARY_PARSERS[self._prev.token_type](self) 2947 return self._parse_at_time_zone(self._parse_type()) 2948 2949 def _parse_type(self) -> t.Optional[exp.Expression]: 2950 interval = self._parse_interval() 2951 if interval: 2952 return interval 2953 2954 index = self._index 2955 data_type = self._parse_types(check_func=True) 2956 this = self._parse_column() 2957 2958 if data_type: 2959 if isinstance(this, exp.Literal): 2960 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2961 if parser: 2962 return parser(self, this, data_type) 2963 return self.expression(exp.Cast, this=this, to=data_type) 2964 if not data_type.expressions: 2965 self._retreat(index) 2966 return self._parse_column() 2967 return self._parse_column_ops(data_type) 2968 2969 return this 2970 2971 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2972 this = self._parse_type() 2973 if not this: 2974 return None 2975 2976 return self.expression( 2977 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2978 ) 2979 2980 def _parse_types( 2981 self, check_func: bool = False, schema: bool = False 2982 ) -> t.Optional[exp.Expression]: 2983 index = self._index 2984 2985 prefix = self._match_text_seq("SYSUDTLIB", ".") 2986 2987 if not self._match_set(self.TYPE_TOKENS): 2988 return None 2989 2990 type_token = self._prev.token_type 2991 2992 if type_token == TokenType.PSEUDO_TYPE: 2993 return self.expression(exp.PseudoType, this=self._prev.text) 2994 2995 nested = type_token in self.NESTED_TYPE_TOKENS 2996 is_struct = type_token == TokenType.STRUCT 2997 expressions = None 2998 maybe_func = False 2999 3000 if self._match(TokenType.L_PAREN): 3001 if is_struct: 3002 expressions = self._parse_csv(self._parse_struct_types) 3003 elif nested: 3004 expressions = self._parse_csv( 3005 lambda: self._parse_types(check_func=check_func, schema=schema) 3006 ) 3007 elif type_token in self.ENUM_TYPE_TOKENS: 3008 expressions = self._parse_csv(self._parse_primary) 3009 else: 3010 expressions = self._parse_csv(self._parse_type_size) 3011 3012 if not expressions or not self._match(TokenType.R_PAREN): 3013 self._retreat(index) 3014 return None 3015 3016 maybe_func = True 3017 3018 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3019 this = exp.DataType( 3020 this=exp.DataType.Type.ARRAY, 3021 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3022 nested=True, 3023 ) 3024 3025 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3026 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3027 3028 return this 3029 3030 if self._match(TokenType.L_BRACKET): 3031 self._retreat(index) 3032 return None 3033 3034 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3035 if nested and self._match(TokenType.LT): 3036 if is_struct: 3037 expressions = self._parse_csv(self._parse_struct_types) 3038 else: 3039 expressions = self._parse_csv( 3040 lambda: self._parse_types(check_func=check_func, schema=schema) 3041 ) 3042 3043 if not self._match(TokenType.GT): 3044 self.raise_error("Expecting >") 3045 3046 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3047 values = self._parse_csv(self._parse_conjunction) 3048 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3049 3050 value: t.Optional[exp.Expression] = None 3051 if type_token in self.TIMESTAMPS: 3052 if self._match_text_seq("WITH", "TIME", "ZONE"): 3053 maybe_func = False 3054 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3055 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3056 maybe_func = False 3057 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3058 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3059 maybe_func = False 3060 elif type_token == TokenType.INTERVAL: 3061 unit = self._parse_var() 3062 3063 if not unit: 3064 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3065 else: 3066 value = self.expression(exp.Interval, unit=unit) 3067 3068 if maybe_func and check_func: 3069 index2 = self._index 3070 peek = self._parse_string() 3071 3072 if not peek: 3073 self._retreat(index) 3074 return None 3075 3076 self._retreat(index2) 3077 3078 if value: 3079 return value 3080 3081 return exp.DataType( 3082 this=exp.DataType.Type[type_token.value.upper()], 3083 expressions=expressions, 3084 nested=nested, 3085 values=values, 3086 prefix=prefix, 3087 ) 3088 3089 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3090 this = self._parse_type() or self._parse_id_var() 3091 self._match(TokenType.COLON) 3092 return self._parse_column_def(this) 3093 3094 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3095 if not self._match_text_seq("AT", "TIME", "ZONE"): 3096 return this 3097 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3098 3099 def _parse_column(self) -> t.Optional[exp.Expression]: 3100 this = self._parse_field() 3101 if isinstance(this, exp.Identifier): 3102 this = self.expression(exp.Column, this=this) 3103 elif not this: 3104 return self._parse_bracket(this) 3105 return self._parse_column_ops(this) 3106 3107 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3108 this = self._parse_bracket(this) 3109 3110 while self._match_set(self.COLUMN_OPERATORS): 3111 op_token = self._prev.token_type 3112 op = self.COLUMN_OPERATORS.get(op_token) 3113 3114 if op_token == TokenType.DCOLON: 3115 field = self._parse_types() 3116 if not field: 3117 self.raise_error("Expected type") 3118 elif op and self._curr: 3119 self._advance() 3120 value = self._prev.text 3121 field = ( 3122 exp.Literal.number(value) 3123 if self._prev.token_type == TokenType.NUMBER 3124 else exp.Literal.string(value) 3125 ) 3126 else: 3127 field = self._parse_field(anonymous_func=True, any_token=True) 3128 3129 if isinstance(field, exp.Func): 3130 # bigquery allows function calls like x.y.count(...) 3131 # SAFE.SUBSTR(...) 3132 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3133 this = self._replace_columns_with_dots(this) 3134 3135 if op: 3136 this = op(self, this, field) 3137 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3138 this = self.expression( 3139 exp.Column, 3140 this=field, 3141 table=this.this, 3142 db=this.args.get("table"), 3143 catalog=this.args.get("db"), 3144 ) 3145 else: 3146 this = self.expression(exp.Dot, this=this, expression=field) 3147 this = self._parse_bracket(this) 3148 return this 3149 3150 def _parse_primary(self) -> t.Optional[exp.Expression]: 3151 if self._match_set(self.PRIMARY_PARSERS): 3152 token_type = self._prev.token_type 3153 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3154 3155 if token_type == TokenType.STRING: 3156 expressions = [primary] 3157 while self._match(TokenType.STRING): 3158 expressions.append(exp.Literal.string(self._prev.text)) 3159 3160 if len(expressions) > 1: 3161 return self.expression(exp.Concat, expressions=expressions) 3162 3163 return primary 3164 3165 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3166 return exp.Literal.number(f"0.{self._prev.text}") 3167 3168 if self._match(TokenType.L_PAREN): 3169 comments = self._prev_comments 3170 query = self._parse_select() 3171 3172 if query: 3173 expressions = [query] 3174 else: 3175 expressions = self._parse_csv(self._parse_expression) 3176 3177 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3178 3179 if isinstance(this, exp.Subqueryable): 3180 this = self._parse_set_operations( 3181 self._parse_subquery(this=this, parse_alias=False) 3182 ) 3183 elif len(expressions) > 1: 3184 this = self.expression(exp.Tuple, expressions=expressions) 3185 else: 3186 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3187 3188 if this: 3189 this.add_comments(comments) 3190 3191 self._match_r_paren(expression=this) 3192 return this 3193 3194 return None 3195 3196 def _parse_field( 3197 self, 3198 any_token: bool = False, 3199 tokens: t.Optional[t.Collection[TokenType]] = None, 3200 anonymous_func: bool = False, 3201 ) -> t.Optional[exp.Expression]: 3202 return ( 3203 self._parse_primary() 3204 or self._parse_function(anonymous=anonymous_func) 3205 or self._parse_id_var(any_token=any_token, tokens=tokens) 3206 ) 3207 3208 def _parse_function( 3209 self, 3210 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3211 anonymous: bool = False, 3212 optional_parens: bool = True, 3213 ) -> t.Optional[exp.Expression]: 3214 if not self._curr: 3215 return None 3216 3217 token_type = self._curr.token_type 3218 3219 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3220 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3221 3222 if not self._next or self._next.token_type != TokenType.L_PAREN: 3223 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3224 self._advance() 3225 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3226 3227 return None 3228 3229 if token_type not in self.FUNC_TOKENS: 3230 return None 3231 3232 this = self._curr.text 3233 upper = this.upper() 3234 self._advance(2) 3235 3236 parser = self.FUNCTION_PARSERS.get(upper) 3237 3238 if parser and not anonymous: 3239 this = parser(self) 3240 else: 3241 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3242 3243 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3244 this = self.expression(subquery_predicate, this=self._parse_select()) 3245 self._match_r_paren() 3246 return this 3247 3248 if functions is None: 3249 functions = self.FUNCTIONS 3250 3251 function = functions.get(upper) 3252 3253 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3254 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3255 3256 if function and not anonymous: 3257 this = self.validate_expression(function(args), args) 3258 else: 3259 this = self.expression(exp.Anonymous, this=this, expressions=args) 3260 3261 self._match_r_paren(this) 3262 return self._parse_window(this) 3263 3264 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3265 return self._parse_column_def(self._parse_id_var()) 3266 3267 def _parse_user_defined_function( 3268 self, kind: t.Optional[TokenType] = None 3269 ) -> t.Optional[exp.Expression]: 3270 this = self._parse_id_var() 3271 3272 while self._match(TokenType.DOT): 3273 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3274 3275 if not self._match(TokenType.L_PAREN): 3276 return this 3277 3278 expressions = self._parse_csv(self._parse_function_parameter) 3279 self._match_r_paren() 3280 return self.expression( 3281 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3282 ) 3283 3284 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3285 literal = self._parse_primary() 3286 if literal: 3287 return self.expression(exp.Introducer, this=token.text, expression=literal) 3288 3289 return self.expression(exp.Identifier, this=token.text) 3290 3291 def _parse_session_parameter(self) -> exp.SessionParameter: 3292 kind = None 3293 this = self._parse_id_var() or self._parse_primary() 3294 3295 if this and self._match(TokenType.DOT): 3296 kind = this.name 3297 this = self._parse_var() or self._parse_primary() 3298 3299 return self.expression(exp.SessionParameter, this=this, kind=kind) 3300 3301 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3302 index = self._index 3303 3304 if self._match(TokenType.L_PAREN): 3305 expressions = self._parse_csv(self._parse_id_var) 3306 3307 if not self._match(TokenType.R_PAREN): 3308 self._retreat(index) 3309 else: 3310 expressions = [self._parse_id_var()] 3311 3312 if self._match_set(self.LAMBDAS): 3313 return self.LAMBDAS[self._prev.token_type](self, expressions) 3314 3315 self._retreat(index) 3316 3317 this: t.Optional[exp.Expression] 3318 3319 if self._match(TokenType.DISTINCT): 3320 this = self.expression( 3321 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3322 ) 3323 else: 3324 this = self._parse_select_or_expression(alias=alias) 3325 3326 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3327 3328 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3329 index = self._index 3330 3331 if not self.errors: 3332 try: 3333 if self._parse_select(nested=True): 3334 return this 3335 except ParseError: 3336 pass 3337 finally: 3338 self.errors.clear() 3339 self._retreat(index) 3340 3341 if not self._match(TokenType.L_PAREN): 3342 return this 3343 3344 args = self._parse_csv( 3345 lambda: self._parse_constraint() 3346 or self._parse_column_def(self._parse_field(any_token=True)) 3347 ) 3348 3349 self._match_r_paren() 3350 return self.expression(exp.Schema, this=this, expressions=args) 3351 3352 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3353 # column defs are not really columns, they're identifiers 3354 if isinstance(this, exp.Column): 3355 this = this.this 3356 3357 kind = self._parse_types(schema=True) 3358 3359 if self._match_text_seq("FOR", "ORDINALITY"): 3360 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3361 3362 constraints = [] 3363 while True: 3364 constraint = self._parse_column_constraint() 3365 if not constraint: 3366 break 3367 constraints.append(constraint) 3368 3369 if not kind and not constraints: 3370 return this 3371 3372 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3373 3374 def _parse_auto_increment( 3375 self, 3376 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3377 start = None 3378 increment = None 3379 3380 if self._match(TokenType.L_PAREN, advance=False): 3381 args = self._parse_wrapped_csv(self._parse_bitwise) 3382 start = seq_get(args, 0) 3383 increment = seq_get(args, 1) 3384 elif self._match_text_seq("START"): 3385 start = self._parse_bitwise() 3386 self._match_text_seq("INCREMENT") 3387 increment = self._parse_bitwise() 3388 3389 if start and increment: 3390 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3391 3392 return exp.AutoIncrementColumnConstraint() 3393 3394 def _parse_compress(self) -> exp.CompressColumnConstraint: 3395 if self._match(TokenType.L_PAREN, advance=False): 3396 return self.expression( 3397 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3398 ) 3399 3400 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3401 3402 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3403 if self._match_text_seq("BY", "DEFAULT"): 3404 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3405 this = self.expression( 3406 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3407 ) 3408 else: 3409 self._match_text_seq("ALWAYS") 3410 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3411 3412 self._match(TokenType.ALIAS) 3413 identity = self._match_text_seq("IDENTITY") 3414 3415 if self._match(TokenType.L_PAREN): 3416 if self._match_text_seq("START", "WITH"): 3417 this.set("start", self._parse_bitwise()) 3418 if self._match_text_seq("INCREMENT", "BY"): 3419 this.set("increment", self._parse_bitwise()) 3420 if self._match_text_seq("MINVALUE"): 3421 this.set("minvalue", self._parse_bitwise()) 3422 if self._match_text_seq("MAXVALUE"): 3423 this.set("maxvalue", self._parse_bitwise()) 3424 3425 if self._match_text_seq("CYCLE"): 3426 this.set("cycle", True) 3427 elif self._match_text_seq("NO", "CYCLE"): 3428 this.set("cycle", False) 3429 3430 if not identity: 3431 this.set("expression", self._parse_bitwise()) 3432 3433 self._match_r_paren() 3434 3435 return this 3436 3437 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3438 self._match_text_seq("LENGTH") 3439 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3440 3441 def _parse_not_constraint( 3442 self, 3443 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3444 if self._match_text_seq("NULL"): 3445 return self.expression(exp.NotNullColumnConstraint) 3446 if self._match_text_seq("CASESPECIFIC"): 3447 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3448 return None 3449 3450 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3451 if self._match(TokenType.CONSTRAINT): 3452 this = self._parse_id_var() 3453 else: 3454 this = None 3455 3456 if self._match_texts(self.CONSTRAINT_PARSERS): 3457 return self.expression( 3458 exp.ColumnConstraint, 3459 this=this, 3460 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3461 ) 3462 3463 return this 3464 3465 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3466 if not self._match(TokenType.CONSTRAINT): 3467 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3468 3469 this = self._parse_id_var() 3470 expressions = [] 3471 3472 while True: 3473 constraint = self._parse_unnamed_constraint() or self._parse_function() 3474 if not constraint: 3475 break 3476 expressions.append(constraint) 3477 3478 return self.expression(exp.Constraint, this=this, expressions=expressions) 3479 3480 def _parse_unnamed_constraint( 3481 self, constraints: t.Optional[t.Collection[str]] = None 3482 ) -> t.Optional[exp.Expression]: 3483 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3484 return None 3485 3486 constraint = self._prev.text.upper() 3487 if constraint not in self.CONSTRAINT_PARSERS: 3488 self.raise_error(f"No parser found for schema constraint {constraint}.") 3489 3490 return self.CONSTRAINT_PARSERS[constraint](self) 3491 3492 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3493 self._match_text_seq("KEY") 3494 return self.expression( 3495 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3496 ) 3497 3498 def _parse_key_constraint_options(self) -> t.List[str]: 3499 options = [] 3500 while True: 3501 if not self._curr: 3502 break 3503 3504 if self._match(TokenType.ON): 3505 action = None 3506 on = self._advance_any() and self._prev.text 3507 3508 if self._match_text_seq("NO", "ACTION"): 3509 action = "NO ACTION" 3510 elif self._match_text_seq("CASCADE"): 3511 action = "CASCADE" 3512 elif self._match_pair(TokenType.SET, TokenType.NULL): 3513 action = "SET NULL" 3514 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3515 action = "SET DEFAULT" 3516 else: 3517 self.raise_error("Invalid key constraint") 3518 3519 options.append(f"ON {on} {action}") 3520 elif self._match_text_seq("NOT", "ENFORCED"): 3521 options.append("NOT ENFORCED") 3522 elif self._match_text_seq("DEFERRABLE"): 3523 options.append("DEFERRABLE") 3524 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3525 options.append("INITIALLY DEFERRED") 3526 elif self._match_text_seq("NORELY"): 3527 options.append("NORELY") 3528 elif self._match_text_seq("MATCH", "FULL"): 3529 options.append("MATCH FULL") 3530 else: 3531 break 3532 3533 return options 3534 3535 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3536 if match and not self._match(TokenType.REFERENCES): 3537 return None 3538 3539 expressions = None 3540 this = self._parse_id_var() 3541 3542 if self._match(TokenType.L_PAREN, advance=False): 3543 expressions = self._parse_wrapped_id_vars() 3544 3545 options = self._parse_key_constraint_options() 3546 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3547 3548 def _parse_foreign_key(self) -> exp.ForeignKey: 3549 expressions = self._parse_wrapped_id_vars() 3550 reference = self._parse_references() 3551 options = {} 3552 3553 while self._match(TokenType.ON): 3554 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3555 self.raise_error("Expected DELETE or UPDATE") 3556 3557 kind = self._prev.text.lower() 3558 3559 if self._match_text_seq("NO", "ACTION"): 3560 action = "NO ACTION" 3561 elif self._match(TokenType.SET): 3562 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3563 action = "SET " + self._prev.text.upper() 3564 else: 3565 self._advance() 3566 action = self._prev.text.upper() 3567 3568 options[kind] = action 3569 3570 return self.expression( 3571 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3572 ) 3573 3574 def _parse_primary_key( 3575 self, wrapped_optional: bool = False, in_props: bool = False 3576 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3577 desc = ( 3578 self._match_set((TokenType.ASC, TokenType.DESC)) 3579 and self._prev.token_type == TokenType.DESC 3580 ) 3581 3582 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3583 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3584 3585 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3586 options = self._parse_key_constraint_options() 3587 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3588 3589 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3590 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3591 return this 3592 3593 bracket_kind = self._prev.token_type 3594 3595 if self._match(TokenType.COLON): 3596 expressions: t.List[t.Optional[exp.Expression]] = [ 3597 self.expression(exp.Slice, expression=self._parse_conjunction()) 3598 ] 3599 else: 3600 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3601 3602 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3603 if bracket_kind == TokenType.L_BRACE: 3604 this = self.expression(exp.Struct, expressions=expressions) 3605 elif not this or this.name.upper() == "ARRAY": 3606 this = self.expression(exp.Array, expressions=expressions) 3607 else: 3608 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3609 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3610 3611 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3612 self.raise_error("Expected ]") 3613 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3614 self.raise_error("Expected }") 3615 3616 self._add_comments(this) 3617 return self._parse_bracket(this) 3618 3619 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3620 if self._match(TokenType.COLON): 3621 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3622 return this 3623 3624 def _parse_case(self) -> t.Optional[exp.Expression]: 3625 ifs = [] 3626 default = None 3627 3628 expression = self._parse_conjunction() 3629 3630 while self._match(TokenType.WHEN): 3631 this = self._parse_conjunction() 3632 self._match(TokenType.THEN) 3633 then = self._parse_conjunction() 3634 ifs.append(self.expression(exp.If, this=this, true=then)) 3635 3636 if self._match(TokenType.ELSE): 3637 default = self._parse_conjunction() 3638 3639 if not self._match(TokenType.END): 3640 self.raise_error("Expected END after CASE", self._prev) 3641 3642 return self._parse_window( 3643 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3644 ) 3645 3646 def _parse_if(self) -> t.Optional[exp.Expression]: 3647 if self._match(TokenType.L_PAREN): 3648 args = self._parse_csv(self._parse_conjunction) 3649 this = self.validate_expression(exp.If.from_arg_list(args), args) 3650 self._match_r_paren() 3651 else: 3652 index = self._index - 1 3653 condition = self._parse_conjunction() 3654 3655 if not condition: 3656 self._retreat(index) 3657 return None 3658 3659 self._match(TokenType.THEN) 3660 true = self._parse_conjunction() 3661 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3662 self._match(TokenType.END) 3663 this = self.expression(exp.If, this=condition, true=true, false=false) 3664 3665 return self._parse_window(this) 3666 3667 def _parse_extract(self) -> exp.Extract: 3668 this = self._parse_function() or self._parse_var() or self._parse_type() 3669 3670 if self._match(TokenType.FROM): 3671 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3672 3673 if not self._match(TokenType.COMMA): 3674 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3675 3676 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3677 3678 def _parse_any_value(self) -> exp.AnyValue: 3679 this = self._parse_lambda() 3680 is_max = None 3681 having = None 3682 3683 if self._match(TokenType.HAVING): 3684 self._match_texts(("MAX", "MIN")) 3685 is_max = self._prev.text == "MAX" 3686 having = self._parse_column() 3687 3688 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3689 3690 def _parse_cast(self, strict: bool) -> exp.Expression: 3691 this = self._parse_conjunction() 3692 3693 if not self._match(TokenType.ALIAS): 3694 if self._match(TokenType.COMMA): 3695 return self.expression( 3696 exp.CastToStrType, this=this, expression=self._parse_string() 3697 ) 3698 else: 3699 self.raise_error("Expected AS after CAST") 3700 3701 fmt = None 3702 to = self._parse_types() 3703 3704 if not to: 3705 self.raise_error("Expected TYPE after CAST") 3706 elif to.this == exp.DataType.Type.CHAR: 3707 if self._match(TokenType.CHARACTER_SET): 3708 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3709 elif self._match(TokenType.FORMAT): 3710 fmt = self._parse_at_time_zone(self._parse_string()) 3711 3712 if to.this in exp.DataType.TEMPORAL_TYPES: 3713 return self.expression( 3714 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3715 this=this, 3716 format=exp.Literal.string( 3717 format_time( 3718 fmt.this if fmt else "", 3719 self.FORMAT_MAPPING or self.TIME_MAPPING, 3720 self.FORMAT_TRIE or self.TIME_TRIE, 3721 ) 3722 ), 3723 ) 3724 3725 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3726 3727 def _parse_concat(self) -> t.Optional[exp.Expression]: 3728 args = self._parse_csv(self._parse_conjunction) 3729 if self.CONCAT_NULL_OUTPUTS_STRING: 3730 args = [ 3731 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3732 for arg in args 3733 if arg 3734 ] 3735 3736 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3737 # we find such a call we replace it with its argument. 3738 if len(args) == 1: 3739 return args[0] 3740 3741 return self.expression( 3742 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3743 ) 3744 3745 def _parse_string_agg(self) -> exp.Expression: 3746 if self._match(TokenType.DISTINCT): 3747 args: t.List[t.Optional[exp.Expression]] = [ 3748 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3749 ] 3750 if self._match(TokenType.COMMA): 3751 args.extend(self._parse_csv(self._parse_conjunction)) 3752 else: 3753 args = self._parse_csv(self._parse_conjunction) 3754 3755 index = self._index 3756 if not self._match(TokenType.R_PAREN): 3757 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3758 return self.expression( 3759 exp.GroupConcat, 3760 this=seq_get(args, 0), 3761 separator=self._parse_order(this=seq_get(args, 1)), 3762 ) 3763 3764 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3765 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3766 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3767 if not self._match_text_seq("WITHIN", "GROUP"): 3768 self._retreat(index) 3769 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3770 3771 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3772 order = self._parse_order(this=seq_get(args, 0)) 3773 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3774 3775 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3776 this = self._parse_bitwise() 3777 3778 if self._match(TokenType.USING): 3779 to: t.Optional[exp.Expression] = self.expression( 3780 exp.CharacterSet, this=self._parse_var() 3781 ) 3782 elif self._match(TokenType.COMMA): 3783 to = self._parse_types() 3784 else: 3785 to = None 3786 3787 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3788 3789 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3790 """ 3791 There are generally two variants of the DECODE function: 3792 3793 - DECODE(bin, charset) 3794 - DECODE(expression, search, result [, search, result] ... [, default]) 3795 3796 The second variant will always be parsed into a CASE expression. Note that NULL 3797 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3798 instead of relying on pattern matching. 3799 """ 3800 args = self._parse_csv(self._parse_conjunction) 3801 3802 if len(args) < 3: 3803 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3804 3805 expression, *expressions = args 3806 if not expression: 3807 return None 3808 3809 ifs = [] 3810 for search, result in zip(expressions[::2], expressions[1::2]): 3811 if not search or not result: 3812 return None 3813 3814 if isinstance(search, exp.Literal): 3815 ifs.append( 3816 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3817 ) 3818 elif isinstance(search, exp.Null): 3819 ifs.append( 3820 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3821 ) 3822 else: 3823 cond = exp.or_( 3824 exp.EQ(this=expression.copy(), expression=search), 3825 exp.and_( 3826 exp.Is(this=expression.copy(), expression=exp.Null()), 3827 exp.Is(this=search.copy(), expression=exp.Null()), 3828 copy=False, 3829 ), 3830 copy=False, 3831 ) 3832 ifs.append(exp.If(this=cond, true=result)) 3833 3834 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3835 3836 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3837 self._match_text_seq("KEY") 3838 key = self._parse_field() 3839 self._match(TokenType.COLON) 3840 self._match_text_seq("VALUE") 3841 value = self._parse_field() 3842 3843 if not key and not value: 3844 return None 3845 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3846 3847 def _parse_json_object(self) -> exp.JSONObject: 3848 star = self._parse_star() 3849 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3850 3851 null_handling = None 3852 if self._match_text_seq("NULL", "ON", "NULL"): 3853 null_handling = "NULL ON NULL" 3854 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3855 null_handling = "ABSENT ON NULL" 3856 3857 unique_keys = None 3858 if self._match_text_seq("WITH", "UNIQUE"): 3859 unique_keys = True 3860 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3861 unique_keys = False 3862 3863 self._match_text_seq("KEYS") 3864 3865 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3866 format_json = self._match_text_seq("FORMAT", "JSON") 3867 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3868 3869 return self.expression( 3870 exp.JSONObject, 3871 expressions=expressions, 3872 null_handling=null_handling, 3873 unique_keys=unique_keys, 3874 return_type=return_type, 3875 format_json=format_json, 3876 encoding=encoding, 3877 ) 3878 3879 def _parse_logarithm(self) -> exp.Func: 3880 # Default argument order is base, expression 3881 args = self._parse_csv(self._parse_range) 3882 3883 if len(args) > 1: 3884 if not self.LOG_BASE_FIRST: 3885 args.reverse() 3886 return exp.Log.from_arg_list(args) 3887 3888 return self.expression( 3889 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3890 ) 3891 3892 def _parse_match_against(self) -> exp.MatchAgainst: 3893 expressions = self._parse_csv(self._parse_column) 3894 3895 self._match_text_seq(")", "AGAINST", "(") 3896 3897 this = self._parse_string() 3898 3899 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3900 modifier = "IN NATURAL LANGUAGE MODE" 3901 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3902 modifier = f"{modifier} WITH QUERY EXPANSION" 3903 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3904 modifier = "IN BOOLEAN MODE" 3905 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3906 modifier = "WITH QUERY EXPANSION" 3907 else: 3908 modifier = None 3909 3910 return self.expression( 3911 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3912 ) 3913 3914 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3915 def _parse_open_json(self) -> exp.OpenJSON: 3916 this = self._parse_bitwise() 3917 path = self._match(TokenType.COMMA) and self._parse_string() 3918 3919 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3920 this = self._parse_field(any_token=True) 3921 kind = self._parse_types() 3922 path = self._parse_string() 3923 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3924 3925 return self.expression( 3926 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3927 ) 3928 3929 expressions = None 3930 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3931 self._match_l_paren() 3932 expressions = self._parse_csv(_parse_open_json_column_def) 3933 3934 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3935 3936 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3937 args = self._parse_csv(self._parse_bitwise) 3938 3939 if self._match(TokenType.IN): 3940 return self.expression( 3941 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3942 ) 3943 3944 if haystack_first: 3945 haystack = seq_get(args, 0) 3946 needle = seq_get(args, 1) 3947 else: 3948 needle = seq_get(args, 0) 3949 haystack = seq_get(args, 1) 3950 3951 return self.expression( 3952 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3953 ) 3954 3955 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3956 args = self._parse_csv(self._parse_table) 3957 return exp.JoinHint(this=func_name.upper(), expressions=args) 3958 3959 def _parse_substring(self) -> exp.Substring: 3960 # Postgres supports the form: substring(string [from int] [for int]) 3961 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3962 3963 args = self._parse_csv(self._parse_bitwise) 3964 3965 if self._match(TokenType.FROM): 3966 args.append(self._parse_bitwise()) 3967 if self._match(TokenType.FOR): 3968 args.append(self._parse_bitwise()) 3969 3970 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3971 3972 def _parse_trim(self) -> exp.Trim: 3973 # https://www.w3resource.com/sql/character-functions/trim.php 3974 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3975 3976 position = None 3977 collation = None 3978 3979 if self._match_texts(self.TRIM_TYPES): 3980 position = self._prev.text.upper() 3981 3982 expression = self._parse_bitwise() 3983 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3984 this = self._parse_bitwise() 3985 else: 3986 this = expression 3987 expression = None 3988 3989 if self._match(TokenType.COLLATE): 3990 collation = self._parse_bitwise() 3991 3992 return self.expression( 3993 exp.Trim, this=this, position=position, expression=expression, collation=collation 3994 ) 3995 3996 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3997 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3998 3999 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4000 return self._parse_window(self._parse_id_var(), alias=True) 4001 4002 def _parse_respect_or_ignore_nulls( 4003 self, this: t.Optional[exp.Expression] 4004 ) -> t.Optional[exp.Expression]: 4005 if self._match_text_seq("IGNORE", "NULLS"): 4006 return self.expression(exp.IgnoreNulls, this=this) 4007 if self._match_text_seq("RESPECT", "NULLS"): 4008 return self.expression(exp.RespectNulls, this=this) 4009 return this 4010 4011 def _parse_window( 4012 self, this: t.Optional[exp.Expression], alias: bool = False 4013 ) -> t.Optional[exp.Expression]: 4014 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4015 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4016 self._match_r_paren() 4017 4018 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4019 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4020 if self._match_text_seq("WITHIN", "GROUP"): 4021 order = self._parse_wrapped(self._parse_order) 4022 this = self.expression(exp.WithinGroup, this=this, expression=order) 4023 4024 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4025 # Some dialects choose to implement and some do not. 4026 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4027 4028 # There is some code above in _parse_lambda that handles 4029 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4030 4031 # The below changes handle 4032 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4033 4034 # Oracle allows both formats 4035 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4036 # and Snowflake chose to do the same for familiarity 4037 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4038 this = self._parse_respect_or_ignore_nulls(this) 4039 4040 # bigquery select from window x AS (partition by ...) 4041 if alias: 4042 over = None 4043 self._match(TokenType.ALIAS) 4044 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4045 return this 4046 else: 4047 over = self._prev.text.upper() 4048 4049 if not self._match(TokenType.L_PAREN): 4050 return self.expression( 4051 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4052 ) 4053 4054 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4055 4056 first = self._match(TokenType.FIRST) 4057 if self._match_text_seq("LAST"): 4058 first = False 4059 4060 partition = self._parse_partition_by() 4061 order = self._parse_order() 4062 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4063 4064 if kind: 4065 self._match(TokenType.BETWEEN) 4066 start = self._parse_window_spec() 4067 self._match(TokenType.AND) 4068 end = self._parse_window_spec() 4069 4070 spec = self.expression( 4071 exp.WindowSpec, 4072 kind=kind, 4073 start=start["value"], 4074 start_side=start["side"], 4075 end=end["value"], 4076 end_side=end["side"], 4077 ) 4078 else: 4079 spec = None 4080 4081 self._match_r_paren() 4082 4083 return self.expression( 4084 exp.Window, 4085 this=this, 4086 partition_by=partition, 4087 order=order, 4088 spec=spec, 4089 alias=window_alias, 4090 over=over, 4091 first=first, 4092 ) 4093 4094 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4095 self._match(TokenType.BETWEEN) 4096 4097 return { 4098 "value": ( 4099 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4100 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4101 or self._parse_bitwise() 4102 ), 4103 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4104 } 4105 4106 def _parse_alias( 4107 self, this: t.Optional[exp.Expression], explicit: bool = False 4108 ) -> t.Optional[exp.Expression]: 4109 any_token = self._match(TokenType.ALIAS) 4110 4111 if explicit and not any_token: 4112 return this 4113 4114 if self._match(TokenType.L_PAREN): 4115 aliases = self.expression( 4116 exp.Aliases, 4117 this=this, 4118 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4119 ) 4120 self._match_r_paren(aliases) 4121 return aliases 4122 4123 alias = self._parse_id_var(any_token) 4124 4125 if alias: 4126 return self.expression(exp.Alias, this=this, alias=alias) 4127 4128 return this 4129 4130 def _parse_id_var( 4131 self, 4132 any_token: bool = True, 4133 tokens: t.Optional[t.Collection[TokenType]] = None, 4134 ) -> t.Optional[exp.Expression]: 4135 identifier = self._parse_identifier() 4136 4137 if identifier: 4138 return identifier 4139 4140 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4141 quoted = self._prev.token_type == TokenType.STRING 4142 return exp.Identifier(this=self._prev.text, quoted=quoted) 4143 4144 return None 4145 4146 def _parse_string(self) -> t.Optional[exp.Expression]: 4147 if self._match(TokenType.STRING): 4148 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4149 return self._parse_placeholder() 4150 4151 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4152 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4153 4154 def _parse_number(self) -> t.Optional[exp.Expression]: 4155 if self._match(TokenType.NUMBER): 4156 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4157 return self._parse_placeholder() 4158 4159 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4160 if self._match(TokenType.IDENTIFIER): 4161 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4162 return self._parse_placeholder() 4163 4164 def _parse_var( 4165 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4166 ) -> t.Optional[exp.Expression]: 4167 if ( 4168 (any_token and self._advance_any()) 4169 or self._match(TokenType.VAR) 4170 or (self._match_set(tokens) if tokens else False) 4171 ): 4172 return self.expression(exp.Var, this=self._prev.text) 4173 return self._parse_placeholder() 4174 4175 def _advance_any(self) -> t.Optional[Token]: 4176 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4177 self._advance() 4178 return self._prev 4179 return None 4180 4181 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4182 return self._parse_var() or self._parse_string() 4183 4184 def _parse_null(self) -> t.Optional[exp.Expression]: 4185 if self._match(TokenType.NULL): 4186 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4187 return None 4188 4189 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4190 if self._match(TokenType.TRUE): 4191 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4192 if self._match(TokenType.FALSE): 4193 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4194 return None 4195 4196 def _parse_star(self) -> t.Optional[exp.Expression]: 4197 if self._match(TokenType.STAR): 4198 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4199 return None 4200 4201 def _parse_parameter(self) -> exp.Parameter: 4202 wrapped = self._match(TokenType.L_BRACE) 4203 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4204 self._match(TokenType.R_BRACE) 4205 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4206 4207 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4208 if self._match_set(self.PLACEHOLDER_PARSERS): 4209 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4210 if placeholder: 4211 return placeholder 4212 self._advance(-1) 4213 return None 4214 4215 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4216 if not self._match(TokenType.EXCEPT): 4217 return None 4218 if self._match(TokenType.L_PAREN, advance=False): 4219 return self._parse_wrapped_csv(self._parse_column) 4220 return self._parse_csv(self._parse_column) 4221 4222 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4223 if not self._match(TokenType.REPLACE): 4224 return None 4225 if self._match(TokenType.L_PAREN, advance=False): 4226 return self._parse_wrapped_csv(self._parse_expression) 4227 return self._parse_csv(self._parse_expression) 4228 4229 def _parse_csv( 4230 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4231 ) -> t.List[t.Optional[exp.Expression]]: 4232 parse_result = parse_method() 4233 items = [parse_result] if parse_result is not None else [] 4234 4235 while self._match(sep): 4236 self._add_comments(parse_result) 4237 parse_result = parse_method() 4238 if parse_result is not None: 4239 items.append(parse_result) 4240 4241 return items 4242 4243 def _parse_tokens( 4244 self, parse_method: t.Callable, expressions: t.Dict 4245 ) -> t.Optional[exp.Expression]: 4246 this = parse_method() 4247 4248 while self._match_set(expressions): 4249 this = self.expression( 4250 expressions[self._prev.token_type], 4251 this=this, 4252 comments=self._prev_comments, 4253 expression=parse_method(), 4254 ) 4255 4256 return this 4257 4258 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4259 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4260 4261 def _parse_wrapped_csv( 4262 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4263 ) -> t.List[t.Optional[exp.Expression]]: 4264 return self._parse_wrapped( 4265 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4266 ) 4267 4268 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4269 wrapped = self._match(TokenType.L_PAREN) 4270 if not wrapped and not optional: 4271 self.raise_error("Expecting (") 4272 parse_result = parse_method() 4273 if wrapped: 4274 self._match_r_paren() 4275 return parse_result 4276 4277 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4278 return self._parse_select() or self._parse_set_operations( 4279 self._parse_expression() if alias else self._parse_conjunction() 4280 ) 4281 4282 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4283 return self._parse_query_modifiers( 4284 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4285 ) 4286 4287 def _parse_transaction(self) -> exp.Transaction: 4288 this = None 4289 if self._match_texts(self.TRANSACTION_KIND): 4290 this = self._prev.text 4291 4292 self._match_texts({"TRANSACTION", "WORK"}) 4293 4294 modes = [] 4295 while True: 4296 mode = [] 4297 while self._match(TokenType.VAR): 4298 mode.append(self._prev.text) 4299 4300 if mode: 4301 modes.append(" ".join(mode)) 4302 if not self._match(TokenType.COMMA): 4303 break 4304 4305 return self.expression(exp.Transaction, this=this, modes=modes) 4306 4307 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4308 chain = None 4309 savepoint = None 4310 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4311 4312 self._match_texts({"TRANSACTION", "WORK"}) 4313 4314 if self._match_text_seq("TO"): 4315 self._match_text_seq("SAVEPOINT") 4316 savepoint = self._parse_id_var() 4317 4318 if self._match(TokenType.AND): 4319 chain = not self._match_text_seq("NO") 4320 self._match_text_seq("CHAIN") 4321 4322 if is_rollback: 4323 return self.expression(exp.Rollback, savepoint=savepoint) 4324 4325 return self.expression(exp.Commit, chain=chain) 4326 4327 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4328 if not self._match_text_seq("ADD"): 4329 return None 4330 4331 self._match(TokenType.COLUMN) 4332 exists_column = self._parse_exists(not_=True) 4333 expression = self._parse_column_def(self._parse_field(any_token=True)) 4334 4335 if expression: 4336 expression.set("exists", exists_column) 4337 4338 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4339 if self._match_texts(("FIRST", "AFTER")): 4340 position = self._prev.text 4341 column_position = self.expression( 4342 exp.ColumnPosition, this=self._parse_column(), position=position 4343 ) 4344 expression.set("position", column_position) 4345 4346 return expression 4347 4348 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4349 drop = self._match(TokenType.DROP) and self._parse_drop() 4350 if drop and not isinstance(drop, exp.Command): 4351 drop.set("kind", drop.args.get("kind", "COLUMN")) 4352 return drop 4353 4354 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4355 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4356 return self.expression( 4357 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4358 ) 4359 4360 def _parse_add_constraint(self) -> exp.AddConstraint: 4361 this = None 4362 kind = self._prev.token_type 4363 4364 if kind == TokenType.CONSTRAINT: 4365 this = self._parse_id_var() 4366 4367 if self._match_text_seq("CHECK"): 4368 expression = self._parse_wrapped(self._parse_conjunction) 4369 enforced = self._match_text_seq("ENFORCED") 4370 4371 return self.expression( 4372 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4373 ) 4374 4375 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4376 expression = self._parse_foreign_key() 4377 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4378 expression = self._parse_primary_key() 4379 else: 4380 expression = None 4381 4382 return self.expression(exp.AddConstraint, this=this, expression=expression) 4383 4384 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4385 index = self._index - 1 4386 4387 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4388 return self._parse_csv(self._parse_add_constraint) 4389 4390 self._retreat(index) 4391 return self._parse_csv(self._parse_add_column) 4392 4393 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4394 self._match(TokenType.COLUMN) 4395 column = self._parse_field(any_token=True) 4396 4397 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4398 return self.expression(exp.AlterColumn, this=column, drop=True) 4399 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4400 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4401 4402 self._match_text_seq("SET", "DATA") 4403 return self.expression( 4404 exp.AlterColumn, 4405 this=column, 4406 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4407 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4408 using=self._match(TokenType.USING) and self._parse_conjunction(), 4409 ) 4410 4411 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4412 index = self._index - 1 4413 4414 partition_exists = self._parse_exists() 4415 if self._match(TokenType.PARTITION, advance=False): 4416 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4417 4418 self._retreat(index) 4419 return self._parse_csv(self._parse_drop_column) 4420 4421 def _parse_alter_table_rename(self) -> exp.RenameTable: 4422 self._match_text_seq("TO") 4423 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4424 4425 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4426 start = self._prev 4427 4428 if not self._match(TokenType.TABLE): 4429 return self._parse_as_command(start) 4430 4431 exists = self._parse_exists() 4432 this = self._parse_table(schema=True) 4433 4434 if self._next: 4435 self._advance() 4436 4437 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4438 if parser: 4439 actions = ensure_list(parser(self)) 4440 4441 if not self._curr: 4442 return self.expression( 4443 exp.AlterTable, 4444 this=this, 4445 exists=exists, 4446 actions=actions, 4447 ) 4448 return self._parse_as_command(start) 4449 4450 def _parse_merge(self) -> exp.Merge: 4451 self._match(TokenType.INTO) 4452 target = self._parse_table() 4453 4454 self._match(TokenType.USING) 4455 using = self._parse_table() 4456 4457 self._match(TokenType.ON) 4458 on = self._parse_conjunction() 4459 4460 whens = [] 4461 while self._match(TokenType.WHEN): 4462 matched = not self._match(TokenType.NOT) 4463 self._match_text_seq("MATCHED") 4464 source = ( 4465 False 4466 if self._match_text_seq("BY", "TARGET") 4467 else self._match_text_seq("BY", "SOURCE") 4468 ) 4469 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4470 4471 self._match(TokenType.THEN) 4472 4473 if self._match(TokenType.INSERT): 4474 _this = self._parse_star() 4475 if _this: 4476 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4477 else: 4478 then = self.expression( 4479 exp.Insert, 4480 this=self._parse_value(), 4481 expression=self._match(TokenType.VALUES) and self._parse_value(), 4482 ) 4483 elif self._match(TokenType.UPDATE): 4484 expressions = self._parse_star() 4485 if expressions: 4486 then = self.expression(exp.Update, expressions=expressions) 4487 else: 4488 then = self.expression( 4489 exp.Update, 4490 expressions=self._match(TokenType.SET) 4491 and self._parse_csv(self._parse_equality), 4492 ) 4493 elif self._match(TokenType.DELETE): 4494 then = self.expression(exp.Var, this=self._prev.text) 4495 else: 4496 then = None 4497 4498 whens.append( 4499 self.expression( 4500 exp.When, 4501 matched=matched, 4502 source=source, 4503 condition=condition, 4504 then=then, 4505 ) 4506 ) 4507 4508 return self.expression( 4509 exp.Merge, 4510 this=target, 4511 using=using, 4512 on=on, 4513 expressions=whens, 4514 ) 4515 4516 def _parse_show(self) -> t.Optional[exp.Expression]: 4517 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4518 if parser: 4519 return parser(self) 4520 self._advance() 4521 return self.expression(exp.Show, this=self._prev.text.upper()) 4522 4523 def _parse_set_item_assignment( 4524 self, kind: t.Optional[str] = None 4525 ) -> t.Optional[exp.Expression]: 4526 index = self._index 4527 4528 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4529 return self._parse_set_transaction(global_=kind == "GLOBAL") 4530 4531 left = self._parse_primary() or self._parse_id_var() 4532 4533 if not self._match_texts(("=", "TO")): 4534 self._retreat(index) 4535 return None 4536 4537 right = self._parse_statement() or self._parse_id_var() 4538 this = self.expression(exp.EQ, this=left, expression=right) 4539 4540 return self.expression(exp.SetItem, this=this, kind=kind) 4541 4542 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4543 self._match_text_seq("TRANSACTION") 4544 characteristics = self._parse_csv( 4545 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4546 ) 4547 return self.expression( 4548 exp.SetItem, 4549 expressions=characteristics, 4550 kind="TRANSACTION", 4551 **{"global": global_}, # type: ignore 4552 ) 4553 4554 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4555 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4556 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4557 4558 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4559 index = self._index 4560 set_ = self.expression( 4561 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4562 ) 4563 4564 if self._curr: 4565 self._retreat(index) 4566 return self._parse_as_command(self._prev) 4567 4568 return set_ 4569 4570 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4571 for option in options: 4572 if self._match_text_seq(*option.split(" ")): 4573 return exp.var(option) 4574 return None 4575 4576 def _parse_as_command(self, start: Token) -> exp.Command: 4577 while self._curr: 4578 self._advance() 4579 text = self._find_sql(start, self._prev) 4580 size = len(start.text) 4581 return exp.Command(this=text[:size], expression=text[size:]) 4582 4583 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4584 settings = [] 4585 4586 self._match_l_paren() 4587 kind = self._parse_id_var() 4588 4589 if self._match(TokenType.L_PAREN): 4590 while True: 4591 key = self._parse_id_var() 4592 value = self._parse_primary() 4593 4594 if not key and value is None: 4595 break 4596 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4597 self._match(TokenType.R_PAREN) 4598 4599 self._match_r_paren() 4600 4601 return self.expression( 4602 exp.DictProperty, 4603 this=this, 4604 kind=kind.this if kind else None, 4605 settings=settings, 4606 ) 4607 4608 def _parse_dict_range(self, this: str) -> exp.DictRange: 4609 self._match_l_paren() 4610 has_min = self._match_text_seq("MIN") 4611 if has_min: 4612 min = self._parse_var() or self._parse_primary() 4613 self._match_text_seq("MAX") 4614 max = self._parse_var() or self._parse_primary() 4615 else: 4616 max = self._parse_var() or self._parse_primary() 4617 min = exp.Literal.number(0) 4618 self._match_r_paren() 4619 return self.expression(exp.DictRange, this=this, min=min, max=max) 4620 4621 def _find_parser( 4622 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4623 ) -> t.Optional[t.Callable]: 4624 if not self._curr: 4625 return None 4626 4627 index = self._index 4628 this = [] 4629 while True: 4630 # The current token might be multiple words 4631 curr = self._curr.text.upper() 4632 key = curr.split(" ") 4633 this.append(curr) 4634 4635 self._advance() 4636 result, trie = in_trie(trie, key) 4637 if result == TrieResult.FAILED: 4638 break 4639 4640 if result == TrieResult.EXISTS: 4641 subparser = parsers[" ".join(this)] 4642 return subparser 4643 4644 self._retreat(index) 4645 return None 4646 4647 def _match(self, token_type, advance=True, expression=None): 4648 if not self._curr: 4649 return None 4650 4651 if self._curr.token_type == token_type: 4652 if advance: 4653 self._advance() 4654 self._add_comments(expression) 4655 return True 4656 4657 return None 4658 4659 def _match_set(self, types, advance=True): 4660 if not self._curr: 4661 return None 4662 4663 if self._curr.token_type in types: 4664 if advance: 4665 self._advance() 4666 return True 4667 4668 return None 4669 4670 def _match_pair(self, token_type_a, token_type_b, advance=True): 4671 if not self._curr or not self._next: 4672 return None 4673 4674 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4675 if advance: 4676 self._advance(2) 4677 return True 4678 4679 return None 4680 4681 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4682 if not self._match(TokenType.L_PAREN, expression=expression): 4683 self.raise_error("Expecting (") 4684 4685 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4686 if not self._match(TokenType.R_PAREN, expression=expression): 4687 self.raise_error("Expecting )") 4688 4689 def _match_texts(self, texts, advance=True): 4690 if self._curr and self._curr.text.upper() in texts: 4691 if advance: 4692 self._advance() 4693 return True 4694 return False 4695 4696 def _match_text_seq(self, *texts, advance=True): 4697 index = self._index 4698 for text in texts: 4699 if self._curr and self._curr.text.upper() == text: 4700 self._advance() 4701 else: 4702 self._retreat(index) 4703 return False 4704 4705 if not advance: 4706 self._retreat(index) 4707 4708 return True 4709 4710 @t.overload 4711 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4712 ... 4713 4714 @t.overload 4715 def _replace_columns_with_dots( 4716 self, this: t.Optional[exp.Expression] 4717 ) -> t.Optional[exp.Expression]: 4718 ... 4719 4720 def _replace_columns_with_dots(self, this): 4721 if isinstance(this, exp.Dot): 4722 exp.replace_children(this, self._replace_columns_with_dots) 4723 elif isinstance(this, exp.Column): 4724 exp.replace_children(this, self._replace_columns_with_dots) 4725 table = this.args.get("table") 4726 this = ( 4727 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4728 ) 4729 4730 return this 4731 4732 def _replace_lambda( 4733 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4734 ) -> t.Optional[exp.Expression]: 4735 if not node: 4736 return node 4737 4738 for column in node.find_all(exp.Column): 4739 if column.parts[0].name in lambda_variables: 4740 dot_or_id = column.to_dot() if column.table else column.this 4741 parent = column.parent 4742 4743 while isinstance(parent, exp.Dot): 4744 if not isinstance(parent.parent, exp.Dot): 4745 parent.replace(dot_or_id) 4746 break 4747 parent = parent.parent 4748 else: 4749 if column is node: 4750 node = dot_or_id 4751 else: 4752 column.replace(dot_or_id) 4753 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
833 def __init__( 834 self, 835 error_level: t.Optional[ErrorLevel] = None, 836 error_message_context: int = 100, 837 max_errors: int = 3, 838 ): 839 self.error_level = error_level or ErrorLevel.IMMEDIATE 840 self.error_message_context = error_message_context 841 self.max_errors = max_errors 842 self.reset()
854 def parse( 855 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 856 ) -> t.List[t.Optional[exp.Expression]]: 857 """ 858 Parses a list of tokens and returns a list of syntax trees, one tree 859 per parsed SQL statement. 860 861 Args: 862 raw_tokens: The list of tokens. 863 sql: The original SQL string, used to produce helpful debug messages. 864 865 Returns: 866 The list of the produced syntax trees. 867 """ 868 return self._parse( 869 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 870 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
872 def parse_into( 873 self, 874 expression_types: exp.IntoType, 875 raw_tokens: t.List[Token], 876 sql: t.Optional[str] = None, 877 ) -> t.List[t.Optional[exp.Expression]]: 878 """ 879 Parses a list of tokens into a given Expression type. If a collection of Expression 880 types is given instead, this method will try to parse the token list into each one 881 of them, stopping at the first for which the parsing succeeds. 882 883 Args: 884 expression_types: The expression type(s) to try and parse the token list into. 885 raw_tokens: The list of tokens. 886 sql: The original SQL string, used to produce helpful debug messages. 887 888 Returns: 889 The target Expression. 890 """ 891 errors = [] 892 for expression_type in ensure_list(expression_types): 893 parser = self.EXPRESSION_PARSERS.get(expression_type) 894 if not parser: 895 raise TypeError(f"No parser registered for {expression_type}") 896 897 try: 898 return self._parse(parser, raw_tokens, sql) 899 except ParseError as e: 900 e.errors[0]["into_expression"] = expression_type 901 errors.append(e) 902 903 raise ParseError( 904 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 905 errors=merge_errors(errors), 906 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
943 def check_errors(self) -> None: 944 """Logs or raises any found errors, depending on the chosen error level setting.""" 945 if self.error_level == ErrorLevel.WARN: 946 for error in self.errors: 947 logger.error(str(error)) 948 elif self.error_level == ErrorLevel.RAISE and self.errors: 949 raise ParseError( 950 concat_messages(self.errors, self.max_errors), 951 errors=merge_errors(self.errors), 952 )
Logs or raises any found errors, depending on the chosen error level setting.
954 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 955 """ 956 Appends an error in the list of recorded errors or raises it, depending on the chosen 957 error level setting. 958 """ 959 token = token or self._curr or self._prev or Token.string("") 960 start = token.start 961 end = token.end + 1 962 start_context = self.sql[max(start - self.error_message_context, 0) : start] 963 highlight = self.sql[start:end] 964 end_context = self.sql[end : end + self.error_message_context] 965 966 error = ParseError.new( 967 f"{message}. Line {token.line}, Col: {token.col}.\n" 968 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 969 description=message, 970 line=token.line, 971 col=token.col, 972 start_context=start_context, 973 highlight=highlight, 974 end_context=end_context, 975 ) 976 977 if self.error_level == ErrorLevel.IMMEDIATE: 978 raise error 979 980 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
982 def expression( 983 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 984 ) -> E: 985 """ 986 Creates a new, validated Expression. 987 988 Args: 989 exp_class: The expression class to instantiate. 990 comments: An optional list of comments to attach to the expression. 991 kwargs: The arguments to set for the expression along with their respective values. 992 993 Returns: 994 The target expression. 995 """ 996 instance = exp_class(**kwargs) 997 instance.add_comments(comments) if comments else self._add_comments(instance) 998 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1005 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1006 """ 1007 Validates an Expression, making sure that all its mandatory arguments are set. 1008 1009 Args: 1010 expression: The expression to validate. 1011 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1012 1013 Returns: 1014 The validated expression. 1015 """ 1016 if self.error_level != ErrorLevel.IGNORE: 1017 for error_message in expression.error_messages(args): 1018 self.raise_error(error_message) 1019 1020 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.