sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER BY": lambda self: self._parse_cluster(), 588 "CLUSTERED": lambda self: self._parse_clustered_by(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "ANY_VALUE": lambda self: self._parse_any_value(), 721 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 722 "CONCAT": lambda self: self._parse_concat(), 723 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 724 "DECODE": lambda self: self._parse_decode(), 725 "EXTRACT": lambda self: self._parse_extract(), 726 "JSON_OBJECT": lambda self: self._parse_json_object(), 727 "LOG": lambda self: self._parse_logarithm(), 728 "MATCH": lambda self: self._parse_match_against(), 729 "OPENJSON": lambda self: self._parse_open_json(), 730 "POSITION": lambda self: self._parse_position(), 731 "SAFE_CAST": lambda self: self._parse_cast(False), 732 "STRING_AGG": lambda self: self._parse_string_agg(), 733 "SUBSTRING": lambda self: self._parse_substring(), 734 "TRIM": lambda self: self._parse_trim(), 735 "TRY_CAST": lambda self: self._parse_cast(False), 736 "TRY_CONVERT": lambda self: self._parse_convert(False), 737 } 738 739 QUERY_MODIFIER_PARSERS = { 740 "joins": lambda self: list(iter(self._parse_join, None)), 741 "laterals": lambda self: list(iter(self._parse_lateral, None)), 742 "match": lambda self: self._parse_match_recognize(), 743 "where": lambda self: self._parse_where(), 744 "group": lambda self: self._parse_group(), 745 "having": lambda self: self._parse_having(), 746 "qualify": lambda self: self._parse_qualify(), 747 "windows": lambda self: self._parse_window_clause(), 748 "order": lambda self: self._parse_order(), 749 "limit": lambda self: self._parse_limit(), 750 "offset": lambda self: self._parse_offset(), 751 "locks": lambda self: self._parse_locks(), 752 "sample": lambda self: self._parse_table_sample(as_modifier=True), 753 } 754 755 SET_PARSERS = { 756 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 757 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 758 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 759 "TRANSACTION": lambda self: self._parse_set_transaction(), 760 } 761 762 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 763 764 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 765 766 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 767 768 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 769 770 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 771 772 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 773 TRANSACTION_CHARACTERISTICS = { 774 "ISOLATION LEVEL REPEATABLE READ", 775 "ISOLATION LEVEL READ COMMITTED", 776 "ISOLATION LEVEL READ UNCOMMITTED", 777 "ISOLATION LEVEL SERIALIZABLE", 778 "READ WRITE", 779 "READ ONLY", 780 } 781 782 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 783 784 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 785 786 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 787 788 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 789 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 790 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 791 792 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 793 794 STRICT_CAST = True 795 796 # A NULL arg in CONCAT yields NULL by default 797 CONCAT_NULL_OUTPUTS_STRING = False 798 799 PREFIXED_PIVOT_COLUMNS = False 800 IDENTIFY_PIVOT_STRINGS = False 801 802 LOG_BASE_FIRST = True 803 LOG_DEFAULTS_TO_LN = False 804 805 __slots__ = ( 806 "error_level", 807 "error_message_context", 808 "max_errors", 809 "sql", 810 "errors", 811 "_tokens", 812 "_index", 813 "_curr", 814 "_next", 815 "_prev", 816 "_prev_comments", 817 ) 818 819 # Autofilled 820 INDEX_OFFSET: int = 0 821 UNNEST_COLUMN_ONLY: bool = False 822 ALIAS_POST_TABLESAMPLE: bool = False 823 STRICT_STRING_CONCAT = False 824 NULL_ORDERING: str = "nulls_are_small" 825 SHOW_TRIE: t.Dict = {} 826 SET_TRIE: t.Dict = {} 827 FORMAT_MAPPING: t.Dict[str, str] = {} 828 FORMAT_TRIE: t.Dict = {} 829 TIME_MAPPING: t.Dict[str, str] = {} 830 TIME_TRIE: t.Dict = {} 831 832 def __init__( 833 self, 834 error_level: t.Optional[ErrorLevel] = None, 835 error_message_context: int = 100, 836 max_errors: int = 3, 837 ): 838 self.error_level = error_level or ErrorLevel.IMMEDIATE 839 self.error_message_context = error_message_context 840 self.max_errors = max_errors 841 self.reset() 842 843 def reset(self): 844 self.sql = "" 845 self.errors = [] 846 self._tokens = [] 847 self._index = 0 848 self._curr = None 849 self._next = None 850 self._prev = None 851 self._prev_comments = None 852 853 def parse( 854 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 855 ) -> t.List[t.Optional[exp.Expression]]: 856 """ 857 Parses a list of tokens and returns a list of syntax trees, one tree 858 per parsed SQL statement. 859 860 Args: 861 raw_tokens: The list of tokens. 862 sql: The original SQL string, used to produce helpful debug messages. 863 864 Returns: 865 The list of the produced syntax trees. 866 """ 867 return self._parse( 868 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 869 ) 870 871 def parse_into( 872 self, 873 expression_types: exp.IntoType, 874 raw_tokens: t.List[Token], 875 sql: t.Optional[str] = None, 876 ) -> t.List[t.Optional[exp.Expression]]: 877 """ 878 Parses a list of tokens into a given Expression type. If a collection of Expression 879 types is given instead, this method will try to parse the token list into each one 880 of them, stopping at the first for which the parsing succeeds. 881 882 Args: 883 expression_types: The expression type(s) to try and parse the token list into. 884 raw_tokens: The list of tokens. 885 sql: The original SQL string, used to produce helpful debug messages. 886 887 Returns: 888 The target Expression. 889 """ 890 errors = [] 891 for expression_type in ensure_list(expression_types): 892 parser = self.EXPRESSION_PARSERS.get(expression_type) 893 if not parser: 894 raise TypeError(f"No parser registered for {expression_type}") 895 896 try: 897 return self._parse(parser, raw_tokens, sql) 898 except ParseError as e: 899 e.errors[0]["into_expression"] = expression_type 900 errors.append(e) 901 902 raise ParseError( 903 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 904 errors=merge_errors(errors), 905 ) from errors[-1] 906 907 def _parse( 908 self, 909 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 910 raw_tokens: t.List[Token], 911 sql: t.Optional[str] = None, 912 ) -> t.List[t.Optional[exp.Expression]]: 913 self.reset() 914 self.sql = sql or "" 915 916 total = len(raw_tokens) 917 chunks: t.List[t.List[Token]] = [[]] 918 919 for i, token in enumerate(raw_tokens): 920 if token.token_type == TokenType.SEMICOLON: 921 if i < total - 1: 922 chunks.append([]) 923 else: 924 chunks[-1].append(token) 925 926 expressions = [] 927 928 for tokens in chunks: 929 self._index = -1 930 self._tokens = tokens 931 self._advance() 932 933 expressions.append(parse_method(self)) 934 935 if self._index < len(self._tokens): 936 self.raise_error("Invalid expression / Unexpected token") 937 938 self.check_errors() 939 940 return expressions 941 942 def check_errors(self) -> None: 943 """Logs or raises any found errors, depending on the chosen error level setting.""" 944 if self.error_level == ErrorLevel.WARN: 945 for error in self.errors: 946 logger.error(str(error)) 947 elif self.error_level == ErrorLevel.RAISE and self.errors: 948 raise ParseError( 949 concat_messages(self.errors, self.max_errors), 950 errors=merge_errors(self.errors), 951 ) 952 953 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 954 """ 955 Appends an error in the list of recorded errors or raises it, depending on the chosen 956 error level setting. 957 """ 958 token = token or self._curr or self._prev or Token.string("") 959 start = token.start 960 end = token.end + 1 961 start_context = self.sql[max(start - self.error_message_context, 0) : start] 962 highlight = self.sql[start:end] 963 end_context = self.sql[end : end + self.error_message_context] 964 965 error = ParseError.new( 966 f"{message}. Line {token.line}, Col: {token.col}.\n" 967 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 968 description=message, 969 line=token.line, 970 col=token.col, 971 start_context=start_context, 972 highlight=highlight, 973 end_context=end_context, 974 ) 975 976 if self.error_level == ErrorLevel.IMMEDIATE: 977 raise error 978 979 self.errors.append(error) 980 981 def expression( 982 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 983 ) -> E: 984 """ 985 Creates a new, validated Expression. 986 987 Args: 988 exp_class: The expression class to instantiate. 989 comments: An optional list of comments to attach to the expression. 990 kwargs: The arguments to set for the expression along with their respective values. 991 992 Returns: 993 The target expression. 994 """ 995 instance = exp_class(**kwargs) 996 instance.add_comments(comments) if comments else self._add_comments(instance) 997 return self.validate_expression(instance) 998 999 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1000 if expression and self._prev_comments: 1001 expression.add_comments(self._prev_comments) 1002 self._prev_comments = None 1003 1004 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1005 """ 1006 Validates an Expression, making sure that all its mandatory arguments are set. 1007 1008 Args: 1009 expression: The expression to validate. 1010 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1011 1012 Returns: 1013 The validated expression. 1014 """ 1015 if self.error_level != ErrorLevel.IGNORE: 1016 for error_message in expression.error_messages(args): 1017 self.raise_error(error_message) 1018 1019 return expression 1020 1021 def _find_sql(self, start: Token, end: Token) -> str: 1022 return self.sql[start.start : end.end + 1] 1023 1024 def _advance(self, times: int = 1) -> None: 1025 self._index += times 1026 self._curr = seq_get(self._tokens, self._index) 1027 self._next = seq_get(self._tokens, self._index + 1) 1028 1029 if self._index > 0: 1030 self._prev = self._tokens[self._index - 1] 1031 self._prev_comments = self._prev.comments 1032 else: 1033 self._prev = None 1034 self._prev_comments = None 1035 1036 def _retreat(self, index: int) -> None: 1037 if index != self._index: 1038 self._advance(index - self._index) 1039 1040 def _parse_command(self) -> exp.Command: 1041 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1042 1043 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1044 start = self._prev 1045 exists = self._parse_exists() if allow_exists else None 1046 1047 self._match(TokenType.ON) 1048 1049 kind = self._match_set(self.CREATABLES) and self._prev 1050 if not kind: 1051 return self._parse_as_command(start) 1052 1053 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1054 this = self._parse_user_defined_function(kind=kind.token_type) 1055 elif kind.token_type == TokenType.TABLE: 1056 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1057 elif kind.token_type == TokenType.COLUMN: 1058 this = self._parse_column() 1059 else: 1060 this = self._parse_id_var() 1061 1062 self._match(TokenType.IS) 1063 1064 return self.expression( 1065 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1066 ) 1067 1068 def _parse_to_table( 1069 self, 1070 ) -> exp.ToTableProperty: 1071 table = self._parse_table_parts(schema=True) 1072 return self.expression(exp.ToTableProperty, this=table) 1073 1074 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1075 def _parse_ttl(self) -> exp.Expression: 1076 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1077 this = self._parse_bitwise() 1078 1079 if self._match_text_seq("DELETE"): 1080 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1081 if self._match_text_seq("RECOMPRESS"): 1082 return self.expression( 1083 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1084 ) 1085 if self._match_text_seq("TO", "DISK"): 1086 return self.expression( 1087 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1088 ) 1089 if self._match_text_seq("TO", "VOLUME"): 1090 return self.expression( 1091 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1092 ) 1093 1094 return this 1095 1096 expressions = self._parse_csv(_parse_ttl_action) 1097 where = self._parse_where() 1098 group = self._parse_group() 1099 1100 aggregates = None 1101 if group and self._match(TokenType.SET): 1102 aggregates = self._parse_csv(self._parse_set_item) 1103 1104 return self.expression( 1105 exp.MergeTreeTTL, 1106 expressions=expressions, 1107 where=where, 1108 group=group, 1109 aggregates=aggregates, 1110 ) 1111 1112 def _parse_statement(self) -> t.Optional[exp.Expression]: 1113 if self._curr is None: 1114 return None 1115 1116 if self._match_set(self.STATEMENT_PARSERS): 1117 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1118 1119 if self._match_set(Tokenizer.COMMANDS): 1120 return self._parse_command() 1121 1122 expression = self._parse_expression() 1123 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1124 return self._parse_query_modifiers(expression) 1125 1126 def _parse_drop(self) -> exp.Drop | exp.Command: 1127 start = self._prev 1128 temporary = self._match(TokenType.TEMPORARY) 1129 materialized = self._match_text_seq("MATERIALIZED") 1130 1131 kind = self._match_set(self.CREATABLES) and self._prev.text 1132 if not kind: 1133 return self._parse_as_command(start) 1134 1135 return self.expression( 1136 exp.Drop, 1137 exists=self._parse_exists(), 1138 this=self._parse_table(schema=True), 1139 kind=kind, 1140 temporary=temporary, 1141 materialized=materialized, 1142 cascade=self._match_text_seq("CASCADE"), 1143 constraints=self._match_text_seq("CONSTRAINTS"), 1144 purge=self._match_text_seq("PURGE"), 1145 ) 1146 1147 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1148 return ( 1149 self._match(TokenType.IF) 1150 and (not not_ or self._match(TokenType.NOT)) 1151 and self._match(TokenType.EXISTS) 1152 ) 1153 1154 def _parse_create(self) -> exp.Create | exp.Command: 1155 # Note: this can't be None because we've matched a statement parser 1156 start = self._prev 1157 replace = start.text.upper() == "REPLACE" or self._match_pair( 1158 TokenType.OR, TokenType.REPLACE 1159 ) 1160 unique = self._match(TokenType.UNIQUE) 1161 1162 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1163 self._advance() 1164 1165 properties = None 1166 create_token = self._match_set(self.CREATABLES) and self._prev 1167 1168 if not create_token: 1169 # exp.Properties.Location.POST_CREATE 1170 properties = self._parse_properties() 1171 create_token = self._match_set(self.CREATABLES) and self._prev 1172 1173 if not properties or not create_token: 1174 return self._parse_as_command(start) 1175 1176 exists = self._parse_exists(not_=True) 1177 this = None 1178 expression = None 1179 indexes = None 1180 no_schema_binding = None 1181 begin = None 1182 clone = None 1183 1184 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1185 nonlocal properties 1186 if properties and temp_props: 1187 properties.expressions.extend(temp_props.expressions) 1188 elif temp_props: 1189 properties = temp_props 1190 1191 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1192 this = self._parse_user_defined_function(kind=create_token.token_type) 1193 1194 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1195 extend_props(self._parse_properties()) 1196 1197 self._match(TokenType.ALIAS) 1198 begin = self._match(TokenType.BEGIN) 1199 return_ = self._match_text_seq("RETURN") 1200 expression = self._parse_statement() 1201 1202 if return_: 1203 expression = self.expression(exp.Return, this=expression) 1204 elif create_token.token_type == TokenType.INDEX: 1205 this = self._parse_index(index=self._parse_id_var()) 1206 elif create_token.token_type in self.DB_CREATABLES: 1207 table_parts = self._parse_table_parts(schema=True) 1208 1209 # exp.Properties.Location.POST_NAME 1210 self._match(TokenType.COMMA) 1211 extend_props(self._parse_properties(before=True)) 1212 1213 this = self._parse_schema(this=table_parts) 1214 1215 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1216 extend_props(self._parse_properties()) 1217 1218 self._match(TokenType.ALIAS) 1219 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1220 # exp.Properties.Location.POST_ALIAS 1221 extend_props(self._parse_properties()) 1222 1223 expression = self._parse_ddl_select() 1224 1225 if create_token.token_type == TokenType.TABLE: 1226 indexes = [] 1227 while True: 1228 index = self._parse_index() 1229 1230 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1231 extend_props(self._parse_properties()) 1232 1233 if not index: 1234 break 1235 else: 1236 self._match(TokenType.COMMA) 1237 indexes.append(index) 1238 elif create_token.token_type == TokenType.VIEW: 1239 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1240 no_schema_binding = True 1241 1242 if self._match_text_seq("CLONE"): 1243 clone = self._parse_table(schema=True) 1244 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1245 clone_kind = ( 1246 self._match(TokenType.L_PAREN) 1247 and self._match_texts(self.CLONE_KINDS) 1248 and self._prev.text.upper() 1249 ) 1250 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1251 self._match(TokenType.R_PAREN) 1252 clone = self.expression( 1253 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1254 ) 1255 1256 return self.expression( 1257 exp.Create, 1258 this=this, 1259 kind=create_token.text, 1260 replace=replace, 1261 unique=unique, 1262 expression=expression, 1263 exists=exists, 1264 properties=properties, 1265 indexes=indexes, 1266 no_schema_binding=no_schema_binding, 1267 begin=begin, 1268 clone=clone, 1269 ) 1270 1271 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1272 # only used for teradata currently 1273 self._match(TokenType.COMMA) 1274 1275 kwargs = { 1276 "no": self._match_text_seq("NO"), 1277 "dual": self._match_text_seq("DUAL"), 1278 "before": self._match_text_seq("BEFORE"), 1279 "default": self._match_text_seq("DEFAULT"), 1280 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1281 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1282 "after": self._match_text_seq("AFTER"), 1283 "minimum": self._match_texts(("MIN", "MINIMUM")), 1284 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1285 } 1286 1287 if self._match_texts(self.PROPERTY_PARSERS): 1288 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1289 try: 1290 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1291 except TypeError: 1292 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1293 1294 return None 1295 1296 def _parse_property(self) -> t.Optional[exp.Expression]: 1297 if self._match_texts(self.PROPERTY_PARSERS): 1298 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1299 1300 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1301 return self._parse_character_set(default=True) 1302 1303 if self._match_text_seq("COMPOUND", "SORTKEY"): 1304 return self._parse_sortkey(compound=True) 1305 1306 if self._match_text_seq("SQL", "SECURITY"): 1307 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1308 1309 assignment = self._match_pair( 1310 TokenType.VAR, TokenType.EQ, advance=False 1311 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1312 1313 if assignment: 1314 key = self._parse_var_or_string() 1315 self._match(TokenType.EQ) 1316 return self.expression(exp.Property, this=key, value=self._parse_column()) 1317 1318 return None 1319 1320 def _parse_stored(self) -> exp.FileFormatProperty: 1321 self._match(TokenType.ALIAS) 1322 1323 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1324 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1325 1326 return self.expression( 1327 exp.FileFormatProperty, 1328 this=self.expression( 1329 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1330 ) 1331 if input_format or output_format 1332 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1333 ) 1334 1335 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1336 self._match(TokenType.EQ) 1337 self._match(TokenType.ALIAS) 1338 return self.expression(exp_class, this=self._parse_field()) 1339 1340 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1341 properties = [] 1342 while True: 1343 if before: 1344 prop = self._parse_property_before() 1345 else: 1346 prop = self._parse_property() 1347 1348 if not prop: 1349 break 1350 for p in ensure_list(prop): 1351 properties.append(p) 1352 1353 if properties: 1354 return self.expression(exp.Properties, expressions=properties) 1355 1356 return None 1357 1358 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1359 return self.expression( 1360 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1361 ) 1362 1363 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1364 if self._index >= 2: 1365 pre_volatile_token = self._tokens[self._index - 2] 1366 else: 1367 pre_volatile_token = None 1368 1369 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1370 return exp.VolatileProperty() 1371 1372 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1373 1374 def _parse_with_property( 1375 self, 1376 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1377 self._match(TokenType.WITH) 1378 if self._match(TokenType.L_PAREN, advance=False): 1379 return self._parse_wrapped_csv(self._parse_property) 1380 1381 if self._match_text_seq("JOURNAL"): 1382 return self._parse_withjournaltable() 1383 1384 if self._match_text_seq("DATA"): 1385 return self._parse_withdata(no=False) 1386 elif self._match_text_seq("NO", "DATA"): 1387 return self._parse_withdata(no=True) 1388 1389 if not self._next: 1390 return None 1391 1392 return self._parse_withisolatedloading() 1393 1394 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1395 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1396 self._match(TokenType.EQ) 1397 1398 user = self._parse_id_var() 1399 self._match(TokenType.PARAMETER) 1400 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1401 1402 if not user or not host: 1403 return None 1404 1405 return exp.DefinerProperty(this=f"{user}@{host}") 1406 1407 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1408 self._match(TokenType.TABLE) 1409 self._match(TokenType.EQ) 1410 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1411 1412 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1413 return self.expression(exp.LogProperty, no=no) 1414 1415 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1416 return self.expression(exp.JournalProperty, **kwargs) 1417 1418 def _parse_checksum(self) -> exp.ChecksumProperty: 1419 self._match(TokenType.EQ) 1420 1421 on = None 1422 if self._match(TokenType.ON): 1423 on = True 1424 elif self._match_text_seq("OFF"): 1425 on = False 1426 1427 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1428 1429 def _parse_cluster(self) -> exp.Cluster: 1430 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1431 1432 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1433 self._match_text_seq("BY") 1434 1435 self._match_l_paren() 1436 expressions = self._parse_csv(self._parse_column) 1437 self._match_r_paren() 1438 1439 if self._match_text_seq("SORTED", "BY"): 1440 self._match_l_paren() 1441 sorted_by = self._parse_csv(self._parse_ordered) 1442 self._match_r_paren() 1443 else: 1444 sorted_by = None 1445 1446 self._match(TokenType.INTO) 1447 buckets = self._parse_number() 1448 self._match_text_seq("BUCKETS") 1449 1450 return self.expression( 1451 exp.ClusteredByProperty, 1452 expressions=expressions, 1453 sorted_by=sorted_by, 1454 buckets=buckets, 1455 ) 1456 1457 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1458 if not self._match_text_seq("GRANTS"): 1459 self._retreat(self._index - 1) 1460 return None 1461 1462 return self.expression(exp.CopyGrantsProperty) 1463 1464 def _parse_freespace(self) -> exp.FreespaceProperty: 1465 self._match(TokenType.EQ) 1466 return self.expression( 1467 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1468 ) 1469 1470 def _parse_mergeblockratio( 1471 self, no: bool = False, default: bool = False 1472 ) -> exp.MergeBlockRatioProperty: 1473 if self._match(TokenType.EQ): 1474 return self.expression( 1475 exp.MergeBlockRatioProperty, 1476 this=self._parse_number(), 1477 percent=self._match(TokenType.PERCENT), 1478 ) 1479 1480 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1481 1482 def _parse_datablocksize( 1483 self, 1484 default: t.Optional[bool] = None, 1485 minimum: t.Optional[bool] = None, 1486 maximum: t.Optional[bool] = None, 1487 ) -> exp.DataBlocksizeProperty: 1488 self._match(TokenType.EQ) 1489 size = self._parse_number() 1490 1491 units = None 1492 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1493 units = self._prev.text 1494 1495 return self.expression( 1496 exp.DataBlocksizeProperty, 1497 size=size, 1498 units=units, 1499 default=default, 1500 minimum=minimum, 1501 maximum=maximum, 1502 ) 1503 1504 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1505 self._match(TokenType.EQ) 1506 always = self._match_text_seq("ALWAYS") 1507 manual = self._match_text_seq("MANUAL") 1508 never = self._match_text_seq("NEVER") 1509 default = self._match_text_seq("DEFAULT") 1510 1511 autotemp = None 1512 if self._match_text_seq("AUTOTEMP"): 1513 autotemp = self._parse_schema() 1514 1515 return self.expression( 1516 exp.BlockCompressionProperty, 1517 always=always, 1518 manual=manual, 1519 never=never, 1520 default=default, 1521 autotemp=autotemp, 1522 ) 1523 1524 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1525 no = self._match_text_seq("NO") 1526 concurrent = self._match_text_seq("CONCURRENT") 1527 self._match_text_seq("ISOLATED", "LOADING") 1528 for_all = self._match_text_seq("FOR", "ALL") 1529 for_insert = self._match_text_seq("FOR", "INSERT") 1530 for_none = self._match_text_seq("FOR", "NONE") 1531 return self.expression( 1532 exp.IsolatedLoadingProperty, 1533 no=no, 1534 concurrent=concurrent, 1535 for_all=for_all, 1536 for_insert=for_insert, 1537 for_none=for_none, 1538 ) 1539 1540 def _parse_locking(self) -> exp.LockingProperty: 1541 if self._match(TokenType.TABLE): 1542 kind = "TABLE" 1543 elif self._match(TokenType.VIEW): 1544 kind = "VIEW" 1545 elif self._match(TokenType.ROW): 1546 kind = "ROW" 1547 elif self._match_text_seq("DATABASE"): 1548 kind = "DATABASE" 1549 else: 1550 kind = None 1551 1552 if kind in ("DATABASE", "TABLE", "VIEW"): 1553 this = self._parse_table_parts() 1554 else: 1555 this = None 1556 1557 if self._match(TokenType.FOR): 1558 for_or_in = "FOR" 1559 elif self._match(TokenType.IN): 1560 for_or_in = "IN" 1561 else: 1562 for_or_in = None 1563 1564 if self._match_text_seq("ACCESS"): 1565 lock_type = "ACCESS" 1566 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1567 lock_type = "EXCLUSIVE" 1568 elif self._match_text_seq("SHARE"): 1569 lock_type = "SHARE" 1570 elif self._match_text_seq("READ"): 1571 lock_type = "READ" 1572 elif self._match_text_seq("WRITE"): 1573 lock_type = "WRITE" 1574 elif self._match_text_seq("CHECKSUM"): 1575 lock_type = "CHECKSUM" 1576 else: 1577 lock_type = None 1578 1579 override = self._match_text_seq("OVERRIDE") 1580 1581 return self.expression( 1582 exp.LockingProperty, 1583 this=this, 1584 kind=kind, 1585 for_or_in=for_or_in, 1586 lock_type=lock_type, 1587 override=override, 1588 ) 1589 1590 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1591 if self._match(TokenType.PARTITION_BY): 1592 return self._parse_csv(self._parse_conjunction) 1593 return [] 1594 1595 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1596 self._match(TokenType.EQ) 1597 return self.expression( 1598 exp.PartitionedByProperty, 1599 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1600 ) 1601 1602 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1603 if self._match_text_seq("AND", "STATISTICS"): 1604 statistics = True 1605 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1606 statistics = False 1607 else: 1608 statistics = None 1609 1610 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1611 1612 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1613 if self._match_text_seq("PRIMARY", "INDEX"): 1614 return exp.NoPrimaryIndexProperty() 1615 return None 1616 1617 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1618 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1619 return exp.OnCommitProperty() 1620 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1621 return exp.OnCommitProperty(delete=True) 1622 return None 1623 1624 def _parse_distkey(self) -> exp.DistKeyProperty: 1625 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1626 1627 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1628 table = self._parse_table(schema=True) 1629 1630 options = [] 1631 while self._match_texts(("INCLUDING", "EXCLUDING")): 1632 this = self._prev.text.upper() 1633 1634 id_var = self._parse_id_var() 1635 if not id_var: 1636 return None 1637 1638 options.append( 1639 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1640 ) 1641 1642 return self.expression(exp.LikeProperty, this=table, expressions=options) 1643 1644 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1645 return self.expression( 1646 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1647 ) 1648 1649 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1650 self._match(TokenType.EQ) 1651 return self.expression( 1652 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1653 ) 1654 1655 def _parse_returns(self) -> exp.ReturnsProperty: 1656 value: t.Optional[exp.Expression] 1657 is_table = self._match(TokenType.TABLE) 1658 1659 if is_table: 1660 if self._match(TokenType.LT): 1661 value = self.expression( 1662 exp.Schema, 1663 this="TABLE", 1664 expressions=self._parse_csv(self._parse_struct_types), 1665 ) 1666 if not self._match(TokenType.GT): 1667 self.raise_error("Expecting >") 1668 else: 1669 value = self._parse_schema(exp.var("TABLE")) 1670 else: 1671 value = self._parse_types() 1672 1673 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1674 1675 def _parse_describe(self) -> exp.Describe: 1676 kind = self._match_set(self.CREATABLES) and self._prev.text 1677 this = self._parse_table() 1678 return self.expression(exp.Describe, this=this, kind=kind) 1679 1680 def _parse_insert(self) -> exp.Insert: 1681 overwrite = self._match(TokenType.OVERWRITE) 1682 local = self._match_text_seq("LOCAL") 1683 alternative = None 1684 1685 if self._match_text_seq("DIRECTORY"): 1686 this: t.Optional[exp.Expression] = self.expression( 1687 exp.Directory, 1688 this=self._parse_var_or_string(), 1689 local=local, 1690 row_format=self._parse_row_format(match_row=True), 1691 ) 1692 else: 1693 if self._match(TokenType.OR): 1694 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1695 1696 self._match(TokenType.INTO) 1697 self._match(TokenType.TABLE) 1698 this = self._parse_table(schema=True) 1699 1700 return self.expression( 1701 exp.Insert, 1702 this=this, 1703 exists=self._parse_exists(), 1704 partition=self._parse_partition(), 1705 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1706 and self._parse_conjunction(), 1707 expression=self._parse_ddl_select(), 1708 conflict=self._parse_on_conflict(), 1709 returning=self._parse_returning(), 1710 overwrite=overwrite, 1711 alternative=alternative, 1712 ) 1713 1714 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1715 conflict = self._match_text_seq("ON", "CONFLICT") 1716 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1717 1718 if not conflict and not duplicate: 1719 return None 1720 1721 nothing = None 1722 expressions = None 1723 key = None 1724 constraint = None 1725 1726 if conflict: 1727 if self._match_text_seq("ON", "CONSTRAINT"): 1728 constraint = self._parse_id_var() 1729 else: 1730 key = self._parse_csv(self._parse_value) 1731 1732 self._match_text_seq("DO") 1733 if self._match_text_seq("NOTHING"): 1734 nothing = True 1735 else: 1736 self._match(TokenType.UPDATE) 1737 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1738 1739 return self.expression( 1740 exp.OnConflict, 1741 duplicate=duplicate, 1742 expressions=expressions, 1743 nothing=nothing, 1744 key=key, 1745 constraint=constraint, 1746 ) 1747 1748 def _parse_returning(self) -> t.Optional[exp.Returning]: 1749 if not self._match(TokenType.RETURNING): 1750 return None 1751 1752 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1753 1754 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1755 if not self._match(TokenType.FORMAT): 1756 return None 1757 return self._parse_row_format() 1758 1759 def _parse_row_format( 1760 self, match_row: bool = False 1761 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1762 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1763 return None 1764 1765 if self._match_text_seq("SERDE"): 1766 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1767 1768 self._match_text_seq("DELIMITED") 1769 1770 kwargs = {} 1771 1772 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1773 kwargs["fields"] = self._parse_string() 1774 if self._match_text_seq("ESCAPED", "BY"): 1775 kwargs["escaped"] = self._parse_string() 1776 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1777 kwargs["collection_items"] = self._parse_string() 1778 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1779 kwargs["map_keys"] = self._parse_string() 1780 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1781 kwargs["lines"] = self._parse_string() 1782 if self._match_text_seq("NULL", "DEFINED", "AS"): 1783 kwargs["null"] = self._parse_string() 1784 1785 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1786 1787 def _parse_load(self) -> exp.LoadData | exp.Command: 1788 if self._match_text_seq("DATA"): 1789 local = self._match_text_seq("LOCAL") 1790 self._match_text_seq("INPATH") 1791 inpath = self._parse_string() 1792 overwrite = self._match(TokenType.OVERWRITE) 1793 self._match_pair(TokenType.INTO, TokenType.TABLE) 1794 1795 return self.expression( 1796 exp.LoadData, 1797 this=self._parse_table(schema=True), 1798 local=local, 1799 overwrite=overwrite, 1800 inpath=inpath, 1801 partition=self._parse_partition(), 1802 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1803 serde=self._match_text_seq("SERDE") and self._parse_string(), 1804 ) 1805 return self._parse_as_command(self._prev) 1806 1807 def _parse_delete(self) -> exp.Delete: 1808 self._match(TokenType.FROM) 1809 1810 return self.expression( 1811 exp.Delete, 1812 this=self._parse_table(), 1813 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1814 where=self._parse_where(), 1815 returning=self._parse_returning(), 1816 limit=self._parse_limit(), 1817 ) 1818 1819 def _parse_update(self) -> exp.Update: 1820 return self.expression( 1821 exp.Update, 1822 **{ # type: ignore 1823 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1824 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1825 "from": self._parse_from(modifiers=True), 1826 "where": self._parse_where(), 1827 "returning": self._parse_returning(), 1828 "limit": self._parse_limit(), 1829 }, 1830 ) 1831 1832 def _parse_uncache(self) -> exp.Uncache: 1833 if not self._match(TokenType.TABLE): 1834 self.raise_error("Expecting TABLE after UNCACHE") 1835 1836 return self.expression( 1837 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1838 ) 1839 1840 def _parse_cache(self) -> exp.Cache: 1841 lazy = self._match_text_seq("LAZY") 1842 self._match(TokenType.TABLE) 1843 table = self._parse_table(schema=True) 1844 1845 options = [] 1846 if self._match_text_seq("OPTIONS"): 1847 self._match_l_paren() 1848 k = self._parse_string() 1849 self._match(TokenType.EQ) 1850 v = self._parse_string() 1851 options = [k, v] 1852 self._match_r_paren() 1853 1854 self._match(TokenType.ALIAS) 1855 return self.expression( 1856 exp.Cache, 1857 this=table, 1858 lazy=lazy, 1859 options=options, 1860 expression=self._parse_select(nested=True), 1861 ) 1862 1863 def _parse_partition(self) -> t.Optional[exp.Partition]: 1864 if not self._match(TokenType.PARTITION): 1865 return None 1866 1867 return self.expression( 1868 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1869 ) 1870 1871 def _parse_value(self) -> exp.Tuple: 1872 if self._match(TokenType.L_PAREN): 1873 expressions = self._parse_csv(self._parse_conjunction) 1874 self._match_r_paren() 1875 return self.expression(exp.Tuple, expressions=expressions) 1876 1877 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1878 # Source: https://prestodb.io/docs/current/sql/values.html 1879 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1880 1881 def _parse_select( 1882 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1883 ) -> t.Optional[exp.Expression]: 1884 cte = self._parse_with() 1885 if cte: 1886 this = self._parse_statement() 1887 1888 if not this: 1889 self.raise_error("Failed to parse any statement following CTE") 1890 return cte 1891 1892 if "with" in this.arg_types: 1893 this.set("with", cte) 1894 else: 1895 self.raise_error(f"{this.key} does not support CTE") 1896 this = cte 1897 elif self._match(TokenType.SELECT): 1898 comments = self._prev_comments 1899 1900 hint = self._parse_hint() 1901 all_ = self._match(TokenType.ALL) 1902 distinct = self._match(TokenType.DISTINCT) 1903 1904 kind = ( 1905 self._match(TokenType.ALIAS) 1906 and self._match_texts(("STRUCT", "VALUE")) 1907 and self._prev.text 1908 ) 1909 1910 if distinct: 1911 distinct = self.expression( 1912 exp.Distinct, 1913 on=self._parse_value() if self._match(TokenType.ON) else None, 1914 ) 1915 1916 if all_ and distinct: 1917 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1918 1919 limit = self._parse_limit(top=True) 1920 expressions = self._parse_csv(self._parse_expression) 1921 1922 this = self.expression( 1923 exp.Select, 1924 kind=kind, 1925 hint=hint, 1926 distinct=distinct, 1927 expressions=expressions, 1928 limit=limit, 1929 ) 1930 this.comments = comments 1931 1932 into = self._parse_into() 1933 if into: 1934 this.set("into", into) 1935 1936 from_ = self._parse_from() 1937 if from_: 1938 this.set("from", from_) 1939 1940 this = self._parse_query_modifiers(this) 1941 elif (table or nested) and self._match(TokenType.L_PAREN): 1942 if self._match(TokenType.PIVOT): 1943 this = self._parse_simplified_pivot() 1944 elif self._match(TokenType.FROM): 1945 this = exp.select("*").from_( 1946 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1947 ) 1948 else: 1949 this = self._parse_table() if table else self._parse_select(nested=True) 1950 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1951 1952 self._match_r_paren() 1953 1954 # early return so that subquery unions aren't parsed again 1955 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1956 # Union ALL should be a property of the top select node, not the subquery 1957 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1958 elif self._match(TokenType.VALUES): 1959 this = self.expression( 1960 exp.Values, 1961 expressions=self._parse_csv(self._parse_value), 1962 alias=self._parse_table_alias(), 1963 ) 1964 else: 1965 this = None 1966 1967 return self._parse_set_operations(this) 1968 1969 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1970 if not skip_with_token and not self._match(TokenType.WITH): 1971 return None 1972 1973 comments = self._prev_comments 1974 recursive = self._match(TokenType.RECURSIVE) 1975 1976 expressions = [] 1977 while True: 1978 expressions.append(self._parse_cte()) 1979 1980 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1981 break 1982 else: 1983 self._match(TokenType.WITH) 1984 1985 return self.expression( 1986 exp.With, comments=comments, expressions=expressions, recursive=recursive 1987 ) 1988 1989 def _parse_cte(self) -> exp.CTE: 1990 alias = self._parse_table_alias() 1991 if not alias or not alias.this: 1992 self.raise_error("Expected CTE to have alias") 1993 1994 self._match(TokenType.ALIAS) 1995 return self.expression( 1996 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1997 ) 1998 1999 def _parse_table_alias( 2000 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2001 ) -> t.Optional[exp.TableAlias]: 2002 any_token = self._match(TokenType.ALIAS) 2003 alias = ( 2004 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2005 or self._parse_string_as_identifier() 2006 ) 2007 2008 index = self._index 2009 if self._match(TokenType.L_PAREN): 2010 columns = self._parse_csv(self._parse_function_parameter) 2011 self._match_r_paren() if columns else self._retreat(index) 2012 else: 2013 columns = None 2014 2015 if not alias and not columns: 2016 return None 2017 2018 return self.expression(exp.TableAlias, this=alias, columns=columns) 2019 2020 def _parse_subquery( 2021 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2022 ) -> t.Optional[exp.Subquery]: 2023 if not this: 2024 return None 2025 2026 return self.expression( 2027 exp.Subquery, 2028 this=this, 2029 pivots=self._parse_pivots(), 2030 alias=self._parse_table_alias() if parse_alias else None, 2031 ) 2032 2033 def _parse_query_modifiers( 2034 self, this: t.Optional[exp.Expression] 2035 ) -> t.Optional[exp.Expression]: 2036 if isinstance(this, self.MODIFIABLES): 2037 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2038 expression = parser(self) 2039 2040 if expression: 2041 if key == "limit": 2042 offset = expression.args.pop("offset", None) 2043 if offset: 2044 this.set("offset", exp.Offset(expression=offset)) 2045 this.set(key, expression) 2046 return this 2047 2048 def _parse_hint(self) -> t.Optional[exp.Hint]: 2049 if self._match(TokenType.HINT): 2050 hints = self._parse_csv(self._parse_function) 2051 2052 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2053 self.raise_error("Expected */ after HINT") 2054 2055 return self.expression(exp.Hint, expressions=hints) 2056 2057 return None 2058 2059 def _parse_into(self) -> t.Optional[exp.Into]: 2060 if not self._match(TokenType.INTO): 2061 return None 2062 2063 temp = self._match(TokenType.TEMPORARY) 2064 unlogged = self._match_text_seq("UNLOGGED") 2065 self._match(TokenType.TABLE) 2066 2067 return self.expression( 2068 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2069 ) 2070 2071 def _parse_from( 2072 self, modifiers: bool = False, skip_from_token: bool = False 2073 ) -> t.Optional[exp.From]: 2074 if not skip_from_token and not self._match(TokenType.FROM): 2075 return None 2076 2077 comments = self._prev_comments 2078 this = self._parse_table() 2079 2080 return self.expression( 2081 exp.From, 2082 comments=comments, 2083 this=self._parse_query_modifiers(this) if modifiers else this, 2084 ) 2085 2086 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2087 if not self._match(TokenType.MATCH_RECOGNIZE): 2088 return None 2089 2090 self._match_l_paren() 2091 2092 partition = self._parse_partition_by() 2093 order = self._parse_order() 2094 measures = ( 2095 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2096 ) 2097 2098 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2099 rows = exp.var("ONE ROW PER MATCH") 2100 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2101 text = "ALL ROWS PER MATCH" 2102 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2103 text += f" SHOW EMPTY MATCHES" 2104 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2105 text += f" OMIT EMPTY MATCHES" 2106 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2107 text += f" WITH UNMATCHED ROWS" 2108 rows = exp.var(text) 2109 else: 2110 rows = None 2111 2112 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2113 text = "AFTER MATCH SKIP" 2114 if self._match_text_seq("PAST", "LAST", "ROW"): 2115 text += f" PAST LAST ROW" 2116 elif self._match_text_seq("TO", "NEXT", "ROW"): 2117 text += f" TO NEXT ROW" 2118 elif self._match_text_seq("TO", "FIRST"): 2119 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2120 elif self._match_text_seq("TO", "LAST"): 2121 text += f" TO LAST {self._advance_any().text}" # type: ignore 2122 after = exp.var(text) 2123 else: 2124 after = None 2125 2126 if self._match_text_seq("PATTERN"): 2127 self._match_l_paren() 2128 2129 if not self._curr: 2130 self.raise_error("Expecting )", self._curr) 2131 2132 paren = 1 2133 start = self._curr 2134 2135 while self._curr and paren > 0: 2136 if self._curr.token_type == TokenType.L_PAREN: 2137 paren += 1 2138 if self._curr.token_type == TokenType.R_PAREN: 2139 paren -= 1 2140 2141 end = self._prev 2142 self._advance() 2143 2144 if paren > 0: 2145 self.raise_error("Expecting )", self._curr) 2146 2147 pattern = exp.var(self._find_sql(start, end)) 2148 else: 2149 pattern = None 2150 2151 define = ( 2152 self._parse_csv( 2153 lambda: self.expression( 2154 exp.Alias, 2155 alias=self._parse_id_var(any_token=True), 2156 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2157 ) 2158 ) 2159 if self._match_text_seq("DEFINE") 2160 else None 2161 ) 2162 2163 self._match_r_paren() 2164 2165 return self.expression( 2166 exp.MatchRecognize, 2167 partition_by=partition, 2168 order=order, 2169 measures=measures, 2170 rows=rows, 2171 after=after, 2172 pattern=pattern, 2173 define=define, 2174 alias=self._parse_table_alias(), 2175 ) 2176 2177 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2178 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2179 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2180 2181 if outer_apply or cross_apply: 2182 this = self._parse_select(table=True) 2183 view = None 2184 outer = not cross_apply 2185 elif self._match(TokenType.LATERAL): 2186 this = self._parse_select(table=True) 2187 view = self._match(TokenType.VIEW) 2188 outer = self._match(TokenType.OUTER) 2189 else: 2190 return None 2191 2192 if not this: 2193 this = self._parse_function() or self._parse_id_var(any_token=False) 2194 while self._match(TokenType.DOT): 2195 this = exp.Dot( 2196 this=this, 2197 expression=self._parse_function() or self._parse_id_var(any_token=False), 2198 ) 2199 2200 if view: 2201 table = self._parse_id_var(any_token=False) 2202 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2203 table_alias: t.Optional[exp.TableAlias] = self.expression( 2204 exp.TableAlias, this=table, columns=columns 2205 ) 2206 elif isinstance(this, exp.Subquery) and this.alias: 2207 # Ensures parity between the Subquery's and the Lateral's "alias" args 2208 table_alias = this.args["alias"].copy() 2209 else: 2210 table_alias = self._parse_table_alias() 2211 2212 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2213 2214 def _parse_join_parts( 2215 self, 2216 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2217 return ( 2218 self._match_set(self.JOIN_METHODS) and self._prev, 2219 self._match_set(self.JOIN_SIDES) and self._prev, 2220 self._match_set(self.JOIN_KINDS) and self._prev, 2221 ) 2222 2223 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2224 if self._match(TokenType.COMMA): 2225 return self.expression(exp.Join, this=self._parse_table()) 2226 2227 index = self._index 2228 method, side, kind = self._parse_join_parts() 2229 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2230 join = self._match(TokenType.JOIN) 2231 2232 if not skip_join_token and not join: 2233 self._retreat(index) 2234 kind = None 2235 method = None 2236 side = None 2237 2238 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2239 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2240 2241 if not skip_join_token and not join and not outer_apply and not cross_apply: 2242 return None 2243 2244 if outer_apply: 2245 side = Token(TokenType.LEFT, "LEFT") 2246 2247 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2248 2249 if method: 2250 kwargs["method"] = method.text 2251 if side: 2252 kwargs["side"] = side.text 2253 if kind: 2254 kwargs["kind"] = kind.text 2255 if hint: 2256 kwargs["hint"] = hint 2257 2258 if self._match(TokenType.ON): 2259 kwargs["on"] = self._parse_conjunction() 2260 elif self._match(TokenType.USING): 2261 kwargs["using"] = self._parse_wrapped_id_vars() 2262 2263 return self.expression(exp.Join, **kwargs) 2264 2265 def _parse_index( 2266 self, 2267 index: t.Optional[exp.Expression] = None, 2268 ) -> t.Optional[exp.Index]: 2269 if index: 2270 unique = None 2271 primary = None 2272 amp = None 2273 2274 self._match(TokenType.ON) 2275 self._match(TokenType.TABLE) # hive 2276 table = self._parse_table_parts(schema=True) 2277 else: 2278 unique = self._match(TokenType.UNIQUE) 2279 primary = self._match_text_seq("PRIMARY") 2280 amp = self._match_text_seq("AMP") 2281 2282 if not self._match(TokenType.INDEX): 2283 return None 2284 2285 index = self._parse_id_var() 2286 table = None 2287 2288 using = self._parse_field() if self._match(TokenType.USING) else None 2289 2290 if self._match(TokenType.L_PAREN, advance=False): 2291 columns = self._parse_wrapped_csv(self._parse_ordered) 2292 else: 2293 columns = None 2294 2295 return self.expression( 2296 exp.Index, 2297 this=index, 2298 table=table, 2299 using=using, 2300 columns=columns, 2301 unique=unique, 2302 primary=primary, 2303 amp=amp, 2304 partition_by=self._parse_partition_by(), 2305 ) 2306 2307 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2308 hints: t.List[exp.Expression] = [] 2309 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2310 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2311 hints.append( 2312 self.expression( 2313 exp.WithTableHint, 2314 expressions=self._parse_csv( 2315 lambda: self._parse_function() or self._parse_var(any_token=True) 2316 ), 2317 ) 2318 ) 2319 self._match_r_paren() 2320 else: 2321 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2322 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2323 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2324 2325 self._match_texts({"INDEX", "KEY"}) 2326 if self._match(TokenType.FOR): 2327 hint.set("target", self._advance_any() and self._prev.text.upper()) 2328 2329 hint.set("expressions", self._parse_wrapped_id_vars()) 2330 hints.append(hint) 2331 2332 return hints or None 2333 2334 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2335 return ( 2336 (not schema and self._parse_function(optional_parens=False)) 2337 or self._parse_id_var(any_token=False) 2338 or self._parse_string_as_identifier() 2339 or self._parse_placeholder() 2340 ) 2341 2342 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2343 catalog = None 2344 db = None 2345 table = self._parse_table_part(schema=schema) 2346 2347 while self._match(TokenType.DOT): 2348 if catalog: 2349 # This allows nesting the table in arbitrarily many dot expressions if needed 2350 table = self.expression( 2351 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2352 ) 2353 else: 2354 catalog = db 2355 db = table 2356 table = self._parse_table_part(schema=schema) 2357 2358 if not table: 2359 self.raise_error(f"Expected table name but got {self._curr}") 2360 2361 return self.expression( 2362 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2363 ) 2364 2365 def _parse_table( 2366 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2367 ) -> t.Optional[exp.Expression]: 2368 lateral = self._parse_lateral() 2369 if lateral: 2370 return lateral 2371 2372 unnest = self._parse_unnest() 2373 if unnest: 2374 return unnest 2375 2376 values = self._parse_derived_table_values() 2377 if values: 2378 return values 2379 2380 subquery = self._parse_select(table=True) 2381 if subquery: 2382 if not subquery.args.get("pivots"): 2383 subquery.set("pivots", self._parse_pivots()) 2384 return subquery 2385 2386 this: exp.Expression = self._parse_table_parts(schema=schema) 2387 2388 if schema: 2389 return self._parse_schema(this=this) 2390 2391 if self.ALIAS_POST_TABLESAMPLE: 2392 table_sample = self._parse_table_sample() 2393 2394 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2395 if alias: 2396 this.set("alias", alias) 2397 2398 if not this.args.get("pivots"): 2399 this.set("pivots", self._parse_pivots()) 2400 2401 this.set("hints", self._parse_table_hints()) 2402 2403 if not self.ALIAS_POST_TABLESAMPLE: 2404 table_sample = self._parse_table_sample() 2405 2406 if table_sample: 2407 table_sample.set("this", this) 2408 this = table_sample 2409 2410 return this 2411 2412 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2413 if not self._match(TokenType.UNNEST): 2414 return None 2415 2416 expressions = self._parse_wrapped_csv(self._parse_type) 2417 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2418 2419 alias = self._parse_table_alias() if with_alias else None 2420 2421 if alias and self.UNNEST_COLUMN_ONLY: 2422 if alias.args.get("columns"): 2423 self.raise_error("Unexpected extra column alias in unnest.") 2424 2425 alias.set("columns", [alias.this]) 2426 alias.set("this", None) 2427 2428 offset = None 2429 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2430 self._match(TokenType.ALIAS) 2431 offset = self._parse_id_var() or exp.to_identifier("offset") 2432 2433 return self.expression( 2434 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2435 ) 2436 2437 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2438 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2439 if not is_derived and not self._match(TokenType.VALUES): 2440 return None 2441 2442 expressions = self._parse_csv(self._parse_value) 2443 alias = self._parse_table_alias() 2444 2445 if is_derived: 2446 self._match_r_paren() 2447 2448 return self.expression( 2449 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2450 ) 2451 2452 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2453 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2454 as_modifier and self._match_text_seq("USING", "SAMPLE") 2455 ): 2456 return None 2457 2458 bucket_numerator = None 2459 bucket_denominator = None 2460 bucket_field = None 2461 percent = None 2462 rows = None 2463 size = None 2464 seed = None 2465 2466 kind = ( 2467 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2468 ) 2469 method = self._parse_var(tokens=(TokenType.ROW,)) 2470 2471 self._match(TokenType.L_PAREN) 2472 2473 num = self._parse_number() 2474 2475 if self._match_text_seq("BUCKET"): 2476 bucket_numerator = self._parse_number() 2477 self._match_text_seq("OUT", "OF") 2478 bucket_denominator = bucket_denominator = self._parse_number() 2479 self._match(TokenType.ON) 2480 bucket_field = self._parse_field() 2481 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2482 percent = num 2483 elif self._match(TokenType.ROWS): 2484 rows = num 2485 else: 2486 size = num 2487 2488 self._match(TokenType.R_PAREN) 2489 2490 if self._match(TokenType.L_PAREN): 2491 method = self._parse_var() 2492 seed = self._match(TokenType.COMMA) and self._parse_number() 2493 self._match_r_paren() 2494 elif self._match_texts(("SEED", "REPEATABLE")): 2495 seed = self._parse_wrapped(self._parse_number) 2496 2497 return self.expression( 2498 exp.TableSample, 2499 method=method, 2500 bucket_numerator=bucket_numerator, 2501 bucket_denominator=bucket_denominator, 2502 bucket_field=bucket_field, 2503 percent=percent, 2504 rows=rows, 2505 size=size, 2506 seed=seed, 2507 kind=kind, 2508 ) 2509 2510 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2511 return list(iter(self._parse_pivot, None)) 2512 2513 # https://duckdb.org/docs/sql/statements/pivot 2514 def _parse_simplified_pivot(self) -> exp.Pivot: 2515 def _parse_on() -> t.Optional[exp.Expression]: 2516 this = self._parse_bitwise() 2517 return self._parse_in(this) if self._match(TokenType.IN) else this 2518 2519 this = self._parse_table() 2520 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2521 using = self._match(TokenType.USING) and self._parse_csv( 2522 lambda: self._parse_alias(self._parse_function()) 2523 ) 2524 group = self._parse_group() 2525 return self.expression( 2526 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2527 ) 2528 2529 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2530 index = self._index 2531 2532 if self._match(TokenType.PIVOT): 2533 unpivot = False 2534 elif self._match(TokenType.UNPIVOT): 2535 unpivot = True 2536 else: 2537 return None 2538 2539 expressions = [] 2540 field = None 2541 2542 if not self._match(TokenType.L_PAREN): 2543 self._retreat(index) 2544 return None 2545 2546 if unpivot: 2547 expressions = self._parse_csv(self._parse_column) 2548 else: 2549 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2550 2551 if not expressions: 2552 self.raise_error("Failed to parse PIVOT's aggregation list") 2553 2554 if not self._match(TokenType.FOR): 2555 self.raise_error("Expecting FOR") 2556 2557 value = self._parse_column() 2558 2559 if not self._match(TokenType.IN): 2560 self.raise_error("Expecting IN") 2561 2562 field = self._parse_in(value, alias=True) 2563 2564 self._match_r_paren() 2565 2566 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2567 2568 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2569 pivot.set("alias", self._parse_table_alias()) 2570 2571 if not unpivot: 2572 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2573 2574 columns: t.List[exp.Expression] = [] 2575 for fld in pivot.args["field"].expressions: 2576 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2577 for name in names: 2578 if self.PREFIXED_PIVOT_COLUMNS: 2579 name = f"{name}_{field_name}" if name else field_name 2580 else: 2581 name = f"{field_name}_{name}" if name else field_name 2582 2583 columns.append(exp.to_identifier(name)) 2584 2585 pivot.set("columns", columns) 2586 2587 return pivot 2588 2589 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2590 return [agg.alias for agg in aggregations] 2591 2592 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2593 if not skip_where_token and not self._match(TokenType.WHERE): 2594 return None 2595 2596 return self.expression( 2597 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2598 ) 2599 2600 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2601 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2602 return None 2603 2604 elements = defaultdict(list) 2605 2606 while True: 2607 expressions = self._parse_csv(self._parse_conjunction) 2608 if expressions: 2609 elements["expressions"].extend(expressions) 2610 2611 grouping_sets = self._parse_grouping_sets() 2612 if grouping_sets: 2613 elements["grouping_sets"].extend(grouping_sets) 2614 2615 rollup = None 2616 cube = None 2617 totals = None 2618 2619 with_ = self._match(TokenType.WITH) 2620 if self._match(TokenType.ROLLUP): 2621 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2622 elements["rollup"].extend(ensure_list(rollup)) 2623 2624 if self._match(TokenType.CUBE): 2625 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2626 elements["cube"].extend(ensure_list(cube)) 2627 2628 if self._match_text_seq("TOTALS"): 2629 totals = True 2630 elements["totals"] = True # type: ignore 2631 2632 if not (grouping_sets or rollup or cube or totals): 2633 break 2634 2635 return self.expression(exp.Group, **elements) # type: ignore 2636 2637 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2638 if not self._match(TokenType.GROUPING_SETS): 2639 return None 2640 2641 return self._parse_wrapped_csv(self._parse_grouping_set) 2642 2643 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2644 if self._match(TokenType.L_PAREN): 2645 grouping_set = self._parse_csv(self._parse_column) 2646 self._match_r_paren() 2647 return self.expression(exp.Tuple, expressions=grouping_set) 2648 2649 return self._parse_column() 2650 2651 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2652 if not skip_having_token and not self._match(TokenType.HAVING): 2653 return None 2654 return self.expression(exp.Having, this=self._parse_conjunction()) 2655 2656 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2657 if not self._match(TokenType.QUALIFY): 2658 return None 2659 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2660 2661 def _parse_order( 2662 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2663 ) -> t.Optional[exp.Expression]: 2664 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2665 return this 2666 2667 return self.expression( 2668 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2669 ) 2670 2671 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2672 if not self._match(token): 2673 return None 2674 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2675 2676 def _parse_ordered(self) -> exp.Ordered: 2677 this = self._parse_conjunction() 2678 self._match(TokenType.ASC) 2679 2680 is_desc = self._match(TokenType.DESC) 2681 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2682 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2683 desc = is_desc or False 2684 asc = not desc 2685 nulls_first = is_nulls_first or False 2686 explicitly_null_ordered = is_nulls_first or is_nulls_last 2687 2688 if ( 2689 not explicitly_null_ordered 2690 and ( 2691 (asc and self.NULL_ORDERING == "nulls_are_small") 2692 or (desc and self.NULL_ORDERING != "nulls_are_small") 2693 ) 2694 and self.NULL_ORDERING != "nulls_are_last" 2695 ): 2696 nulls_first = True 2697 2698 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2699 2700 def _parse_limit( 2701 self, this: t.Optional[exp.Expression] = None, top: bool = False 2702 ) -> t.Optional[exp.Expression]: 2703 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2704 limit_paren = self._match(TokenType.L_PAREN) 2705 expression = self._parse_number() if top else self._parse_term() 2706 2707 if self._match(TokenType.COMMA): 2708 offset = expression 2709 expression = self._parse_term() 2710 else: 2711 offset = None 2712 2713 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2714 2715 if limit_paren: 2716 self._match_r_paren() 2717 2718 return limit_exp 2719 2720 if self._match(TokenType.FETCH): 2721 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2722 direction = self._prev.text if direction else "FIRST" 2723 2724 count = self._parse_number() 2725 percent = self._match(TokenType.PERCENT) 2726 2727 self._match_set((TokenType.ROW, TokenType.ROWS)) 2728 2729 only = self._match_text_seq("ONLY") 2730 with_ties = self._match_text_seq("WITH", "TIES") 2731 2732 if only and with_ties: 2733 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2734 2735 return self.expression( 2736 exp.Fetch, 2737 direction=direction, 2738 count=count, 2739 percent=percent, 2740 with_ties=with_ties, 2741 ) 2742 2743 return this 2744 2745 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2746 if not self._match(TokenType.OFFSET): 2747 return this 2748 2749 count = self._parse_number() 2750 self._match_set((TokenType.ROW, TokenType.ROWS)) 2751 return self.expression(exp.Offset, this=this, expression=count) 2752 2753 def _parse_locks(self) -> t.List[exp.Lock]: 2754 locks = [] 2755 while True: 2756 if self._match_text_seq("FOR", "UPDATE"): 2757 update = True 2758 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2759 "LOCK", "IN", "SHARE", "MODE" 2760 ): 2761 update = False 2762 else: 2763 break 2764 2765 expressions = None 2766 if self._match_text_seq("OF"): 2767 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2768 2769 wait: t.Optional[bool | exp.Expression] = None 2770 if self._match_text_seq("NOWAIT"): 2771 wait = True 2772 elif self._match_text_seq("WAIT"): 2773 wait = self._parse_primary() 2774 elif self._match_text_seq("SKIP", "LOCKED"): 2775 wait = False 2776 2777 locks.append( 2778 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2779 ) 2780 2781 return locks 2782 2783 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2784 if not self._match_set(self.SET_OPERATIONS): 2785 return this 2786 2787 token_type = self._prev.token_type 2788 2789 if token_type == TokenType.UNION: 2790 expression = exp.Union 2791 elif token_type == TokenType.EXCEPT: 2792 expression = exp.Except 2793 else: 2794 expression = exp.Intersect 2795 2796 return self.expression( 2797 expression, 2798 this=this, 2799 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2800 expression=self._parse_set_operations(self._parse_select(nested=True)), 2801 ) 2802 2803 def _parse_expression(self) -> t.Optional[exp.Expression]: 2804 return self._parse_alias(self._parse_conjunction()) 2805 2806 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2807 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2808 2809 def _parse_equality(self) -> t.Optional[exp.Expression]: 2810 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2811 2812 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2813 return self._parse_tokens(self._parse_range, self.COMPARISON) 2814 2815 def _parse_range(self) -> t.Optional[exp.Expression]: 2816 this = self._parse_bitwise() 2817 negate = self._match(TokenType.NOT) 2818 2819 if self._match_set(self.RANGE_PARSERS): 2820 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2821 if not expression: 2822 return this 2823 2824 this = expression 2825 elif self._match(TokenType.ISNULL): 2826 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2827 2828 # Postgres supports ISNULL and NOTNULL for conditions. 2829 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2830 if self._match(TokenType.NOTNULL): 2831 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2832 this = self.expression(exp.Not, this=this) 2833 2834 if negate: 2835 this = self.expression(exp.Not, this=this) 2836 2837 if self._match(TokenType.IS): 2838 this = self._parse_is(this) 2839 2840 return this 2841 2842 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2843 index = self._index - 1 2844 negate = self._match(TokenType.NOT) 2845 2846 if self._match_text_seq("DISTINCT", "FROM"): 2847 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2848 return self.expression(klass, this=this, expression=self._parse_expression()) 2849 2850 expression = self._parse_null() or self._parse_boolean() 2851 if not expression: 2852 self._retreat(index) 2853 return None 2854 2855 this = self.expression(exp.Is, this=this, expression=expression) 2856 return self.expression(exp.Not, this=this) if negate else this 2857 2858 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2859 unnest = self._parse_unnest(with_alias=False) 2860 if unnest: 2861 this = self.expression(exp.In, this=this, unnest=unnest) 2862 elif self._match(TokenType.L_PAREN): 2863 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2864 2865 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2866 this = self.expression(exp.In, this=this, query=expressions[0]) 2867 else: 2868 this = self.expression(exp.In, this=this, expressions=expressions) 2869 2870 self._match_r_paren(this) 2871 else: 2872 this = self.expression(exp.In, this=this, field=self._parse_field()) 2873 2874 return this 2875 2876 def _parse_between(self, this: exp.Expression) -> exp.Between: 2877 low = self._parse_bitwise() 2878 self._match(TokenType.AND) 2879 high = self._parse_bitwise() 2880 return self.expression(exp.Between, this=this, low=low, high=high) 2881 2882 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2883 if not self._match(TokenType.ESCAPE): 2884 return this 2885 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2886 2887 def _parse_interval(self) -> t.Optional[exp.Interval]: 2888 if not self._match(TokenType.INTERVAL): 2889 return None 2890 2891 if self._match(TokenType.STRING, advance=False): 2892 this = self._parse_primary() 2893 else: 2894 this = self._parse_term() 2895 2896 unit = self._parse_function() or self._parse_var() 2897 2898 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2899 # each INTERVAL expression into this canonical form so it's easy to transpile 2900 if this and this.is_number: 2901 this = exp.Literal.string(this.name) 2902 elif this and this.is_string: 2903 parts = this.name.split() 2904 2905 if len(parts) == 2: 2906 if unit: 2907 # this is not actually a unit, it's something else 2908 unit = None 2909 self._retreat(self._index - 1) 2910 else: 2911 this = exp.Literal.string(parts[0]) 2912 unit = self.expression(exp.Var, this=parts[1]) 2913 2914 return self.expression(exp.Interval, this=this, unit=unit) 2915 2916 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2917 this = self._parse_term() 2918 2919 while True: 2920 if self._match_set(self.BITWISE): 2921 this = self.expression( 2922 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2923 ) 2924 elif self._match_pair(TokenType.LT, TokenType.LT): 2925 this = self.expression( 2926 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2927 ) 2928 elif self._match_pair(TokenType.GT, TokenType.GT): 2929 this = self.expression( 2930 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2931 ) 2932 else: 2933 break 2934 2935 return this 2936 2937 def _parse_term(self) -> t.Optional[exp.Expression]: 2938 return self._parse_tokens(self._parse_factor, self.TERM) 2939 2940 def _parse_factor(self) -> t.Optional[exp.Expression]: 2941 return self._parse_tokens(self._parse_unary, self.FACTOR) 2942 2943 def _parse_unary(self) -> t.Optional[exp.Expression]: 2944 if self._match_set(self.UNARY_PARSERS): 2945 return self.UNARY_PARSERS[self._prev.token_type](self) 2946 return self._parse_at_time_zone(self._parse_type()) 2947 2948 def _parse_type(self) -> t.Optional[exp.Expression]: 2949 interval = self._parse_interval() 2950 if interval: 2951 return interval 2952 2953 index = self._index 2954 data_type = self._parse_types(check_func=True) 2955 this = self._parse_column() 2956 2957 if data_type: 2958 if isinstance(this, exp.Literal): 2959 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2960 if parser: 2961 return parser(self, this, data_type) 2962 return self.expression(exp.Cast, this=this, to=data_type) 2963 if not data_type.expressions: 2964 self._retreat(index) 2965 return self._parse_column() 2966 return self._parse_column_ops(data_type) 2967 2968 return this 2969 2970 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2971 this = self._parse_type() 2972 if not this: 2973 return None 2974 2975 return self.expression( 2976 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2977 ) 2978 2979 def _parse_types( 2980 self, check_func: bool = False, schema: bool = False 2981 ) -> t.Optional[exp.Expression]: 2982 index = self._index 2983 2984 prefix = self._match_text_seq("SYSUDTLIB", ".") 2985 2986 if not self._match_set(self.TYPE_TOKENS): 2987 return None 2988 2989 type_token = self._prev.token_type 2990 2991 if type_token == TokenType.PSEUDO_TYPE: 2992 return self.expression(exp.PseudoType, this=self._prev.text) 2993 2994 nested = type_token in self.NESTED_TYPE_TOKENS 2995 is_struct = type_token == TokenType.STRUCT 2996 expressions = None 2997 maybe_func = False 2998 2999 if self._match(TokenType.L_PAREN): 3000 if is_struct: 3001 expressions = self._parse_csv(self._parse_struct_types) 3002 elif nested: 3003 expressions = self._parse_csv( 3004 lambda: self._parse_types(check_func=check_func, schema=schema) 3005 ) 3006 elif type_token in self.ENUM_TYPE_TOKENS: 3007 expressions = self._parse_csv(self._parse_primary) 3008 else: 3009 expressions = self._parse_csv(self._parse_type_size) 3010 3011 if not expressions or not self._match(TokenType.R_PAREN): 3012 self._retreat(index) 3013 return None 3014 3015 maybe_func = True 3016 3017 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3018 this = exp.DataType( 3019 this=exp.DataType.Type.ARRAY, 3020 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3021 nested=True, 3022 ) 3023 3024 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3025 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3026 3027 return this 3028 3029 if self._match(TokenType.L_BRACKET): 3030 self._retreat(index) 3031 return None 3032 3033 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3034 if nested and self._match(TokenType.LT): 3035 if is_struct: 3036 expressions = self._parse_csv(self._parse_struct_types) 3037 else: 3038 expressions = self._parse_csv( 3039 lambda: self._parse_types(check_func=check_func, schema=schema) 3040 ) 3041 3042 if not self._match(TokenType.GT): 3043 self.raise_error("Expecting >") 3044 3045 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3046 values = self._parse_csv(self._parse_conjunction) 3047 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3048 3049 value: t.Optional[exp.Expression] = None 3050 if type_token in self.TIMESTAMPS: 3051 if self._match_text_seq("WITH", "TIME", "ZONE"): 3052 maybe_func = False 3053 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3054 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3055 maybe_func = False 3056 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3057 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3058 maybe_func = False 3059 elif type_token == TokenType.INTERVAL: 3060 unit = self._parse_var() 3061 3062 if not unit: 3063 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3064 else: 3065 value = self.expression(exp.Interval, unit=unit) 3066 3067 if maybe_func and check_func: 3068 index2 = self._index 3069 peek = self._parse_string() 3070 3071 if not peek: 3072 self._retreat(index) 3073 return None 3074 3075 self._retreat(index2) 3076 3077 if value: 3078 return value 3079 3080 return exp.DataType( 3081 this=exp.DataType.Type[type_token.value.upper()], 3082 expressions=expressions, 3083 nested=nested, 3084 values=values, 3085 prefix=prefix, 3086 ) 3087 3088 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3089 this = self._parse_type() or self._parse_id_var() 3090 self._match(TokenType.COLON) 3091 return self._parse_column_def(this) 3092 3093 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3094 if not self._match_text_seq("AT", "TIME", "ZONE"): 3095 return this 3096 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3097 3098 def _parse_column(self) -> t.Optional[exp.Expression]: 3099 this = self._parse_field() 3100 if isinstance(this, exp.Identifier): 3101 this = self.expression(exp.Column, this=this) 3102 elif not this: 3103 return self._parse_bracket(this) 3104 return self._parse_column_ops(this) 3105 3106 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3107 this = self._parse_bracket(this) 3108 3109 while self._match_set(self.COLUMN_OPERATORS): 3110 op_token = self._prev.token_type 3111 op = self.COLUMN_OPERATORS.get(op_token) 3112 3113 if op_token == TokenType.DCOLON: 3114 field = self._parse_types() 3115 if not field: 3116 self.raise_error("Expected type") 3117 elif op and self._curr: 3118 self._advance() 3119 value = self._prev.text 3120 field = ( 3121 exp.Literal.number(value) 3122 if self._prev.token_type == TokenType.NUMBER 3123 else exp.Literal.string(value) 3124 ) 3125 else: 3126 field = self._parse_field(anonymous_func=True, any_token=True) 3127 3128 if isinstance(field, exp.Func): 3129 # bigquery allows function calls like x.y.count(...) 3130 # SAFE.SUBSTR(...) 3131 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3132 this = self._replace_columns_with_dots(this) 3133 3134 if op: 3135 this = op(self, this, field) 3136 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3137 this = self.expression( 3138 exp.Column, 3139 this=field, 3140 table=this.this, 3141 db=this.args.get("table"), 3142 catalog=this.args.get("db"), 3143 ) 3144 else: 3145 this = self.expression(exp.Dot, this=this, expression=field) 3146 this = self._parse_bracket(this) 3147 return this 3148 3149 def _parse_primary(self) -> t.Optional[exp.Expression]: 3150 if self._match_set(self.PRIMARY_PARSERS): 3151 token_type = self._prev.token_type 3152 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3153 3154 if token_type == TokenType.STRING: 3155 expressions = [primary] 3156 while self._match(TokenType.STRING): 3157 expressions.append(exp.Literal.string(self._prev.text)) 3158 3159 if len(expressions) > 1: 3160 return self.expression(exp.Concat, expressions=expressions) 3161 3162 return primary 3163 3164 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3165 return exp.Literal.number(f"0.{self._prev.text}") 3166 3167 if self._match(TokenType.L_PAREN): 3168 comments = self._prev_comments 3169 query = self._parse_select() 3170 3171 if query: 3172 expressions = [query] 3173 else: 3174 expressions = self._parse_csv(self._parse_expression) 3175 3176 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3177 3178 if isinstance(this, exp.Subqueryable): 3179 this = self._parse_set_operations( 3180 self._parse_subquery(this=this, parse_alias=False) 3181 ) 3182 elif len(expressions) > 1: 3183 this = self.expression(exp.Tuple, expressions=expressions) 3184 else: 3185 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3186 3187 if this: 3188 this.add_comments(comments) 3189 3190 self._match_r_paren(expression=this) 3191 return this 3192 3193 return None 3194 3195 def _parse_field( 3196 self, 3197 any_token: bool = False, 3198 tokens: t.Optional[t.Collection[TokenType]] = None, 3199 anonymous_func: bool = False, 3200 ) -> t.Optional[exp.Expression]: 3201 return ( 3202 self._parse_primary() 3203 or self._parse_function(anonymous=anonymous_func) 3204 or self._parse_id_var(any_token=any_token, tokens=tokens) 3205 ) 3206 3207 def _parse_function( 3208 self, 3209 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3210 anonymous: bool = False, 3211 optional_parens: bool = True, 3212 ) -> t.Optional[exp.Expression]: 3213 if not self._curr: 3214 return None 3215 3216 token_type = self._curr.token_type 3217 3218 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3219 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3220 3221 if not self._next or self._next.token_type != TokenType.L_PAREN: 3222 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3223 self._advance() 3224 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3225 3226 return None 3227 3228 if token_type not in self.FUNC_TOKENS: 3229 return None 3230 3231 this = self._curr.text 3232 upper = this.upper() 3233 self._advance(2) 3234 3235 parser = self.FUNCTION_PARSERS.get(upper) 3236 3237 if parser and not anonymous: 3238 this = parser(self) 3239 else: 3240 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3241 3242 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3243 this = self.expression(subquery_predicate, this=self._parse_select()) 3244 self._match_r_paren() 3245 return this 3246 3247 if functions is None: 3248 functions = self.FUNCTIONS 3249 3250 function = functions.get(upper) 3251 3252 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3253 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3254 3255 if function and not anonymous: 3256 this = self.validate_expression(function(args), args) 3257 else: 3258 this = self.expression(exp.Anonymous, this=this, expressions=args) 3259 3260 self._match_r_paren(this) 3261 return self._parse_window(this) 3262 3263 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3264 return self._parse_column_def(self._parse_id_var()) 3265 3266 def _parse_user_defined_function( 3267 self, kind: t.Optional[TokenType] = None 3268 ) -> t.Optional[exp.Expression]: 3269 this = self._parse_id_var() 3270 3271 while self._match(TokenType.DOT): 3272 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3273 3274 if not self._match(TokenType.L_PAREN): 3275 return this 3276 3277 expressions = self._parse_csv(self._parse_function_parameter) 3278 self._match_r_paren() 3279 return self.expression( 3280 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3281 ) 3282 3283 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3284 literal = self._parse_primary() 3285 if literal: 3286 return self.expression(exp.Introducer, this=token.text, expression=literal) 3287 3288 return self.expression(exp.Identifier, this=token.text) 3289 3290 def _parse_session_parameter(self) -> exp.SessionParameter: 3291 kind = None 3292 this = self._parse_id_var() or self._parse_primary() 3293 3294 if this and self._match(TokenType.DOT): 3295 kind = this.name 3296 this = self._parse_var() or self._parse_primary() 3297 3298 return self.expression(exp.SessionParameter, this=this, kind=kind) 3299 3300 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3301 index = self._index 3302 3303 if self._match(TokenType.L_PAREN): 3304 expressions = self._parse_csv(self._parse_id_var) 3305 3306 if not self._match(TokenType.R_PAREN): 3307 self._retreat(index) 3308 else: 3309 expressions = [self._parse_id_var()] 3310 3311 if self._match_set(self.LAMBDAS): 3312 return self.LAMBDAS[self._prev.token_type](self, expressions) 3313 3314 self._retreat(index) 3315 3316 this: t.Optional[exp.Expression] 3317 3318 if self._match(TokenType.DISTINCT): 3319 this = self.expression( 3320 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3321 ) 3322 else: 3323 this = self._parse_select_or_expression(alias=alias) 3324 3325 if isinstance(this, exp.EQ): 3326 left = this.this 3327 if isinstance(left, exp.Column): 3328 left.replace(exp.var(left.text("this"))) 3329 3330 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3331 3332 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3333 index = self._index 3334 3335 if not self.errors: 3336 try: 3337 if self._parse_select(nested=True): 3338 return this 3339 except ParseError: 3340 pass 3341 finally: 3342 self.errors.clear() 3343 self._retreat(index) 3344 3345 if not self._match(TokenType.L_PAREN): 3346 return this 3347 3348 args = self._parse_csv( 3349 lambda: self._parse_constraint() 3350 or self._parse_column_def(self._parse_field(any_token=True)) 3351 ) 3352 3353 self._match_r_paren() 3354 return self.expression(exp.Schema, this=this, expressions=args) 3355 3356 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3357 # column defs are not really columns, they're identifiers 3358 if isinstance(this, exp.Column): 3359 this = this.this 3360 3361 kind = self._parse_types(schema=True) 3362 3363 if self._match_text_seq("FOR", "ORDINALITY"): 3364 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3365 3366 constraints = [] 3367 while True: 3368 constraint = self._parse_column_constraint() 3369 if not constraint: 3370 break 3371 constraints.append(constraint) 3372 3373 if not kind and not constraints: 3374 return this 3375 3376 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3377 3378 def _parse_auto_increment( 3379 self, 3380 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3381 start = None 3382 increment = None 3383 3384 if self._match(TokenType.L_PAREN, advance=False): 3385 args = self._parse_wrapped_csv(self._parse_bitwise) 3386 start = seq_get(args, 0) 3387 increment = seq_get(args, 1) 3388 elif self._match_text_seq("START"): 3389 start = self._parse_bitwise() 3390 self._match_text_seq("INCREMENT") 3391 increment = self._parse_bitwise() 3392 3393 if start and increment: 3394 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3395 3396 return exp.AutoIncrementColumnConstraint() 3397 3398 def _parse_compress(self) -> exp.CompressColumnConstraint: 3399 if self._match(TokenType.L_PAREN, advance=False): 3400 return self.expression( 3401 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3402 ) 3403 3404 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3405 3406 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3407 if self._match_text_seq("BY", "DEFAULT"): 3408 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3409 this = self.expression( 3410 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3411 ) 3412 else: 3413 self._match_text_seq("ALWAYS") 3414 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3415 3416 self._match(TokenType.ALIAS) 3417 identity = self._match_text_seq("IDENTITY") 3418 3419 if self._match(TokenType.L_PAREN): 3420 if self._match_text_seq("START", "WITH"): 3421 this.set("start", self._parse_bitwise()) 3422 if self._match_text_seq("INCREMENT", "BY"): 3423 this.set("increment", self._parse_bitwise()) 3424 if self._match_text_seq("MINVALUE"): 3425 this.set("minvalue", self._parse_bitwise()) 3426 if self._match_text_seq("MAXVALUE"): 3427 this.set("maxvalue", self._parse_bitwise()) 3428 3429 if self._match_text_seq("CYCLE"): 3430 this.set("cycle", True) 3431 elif self._match_text_seq("NO", "CYCLE"): 3432 this.set("cycle", False) 3433 3434 if not identity: 3435 this.set("expression", self._parse_bitwise()) 3436 3437 self._match_r_paren() 3438 3439 return this 3440 3441 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3442 self._match_text_seq("LENGTH") 3443 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3444 3445 def _parse_not_constraint( 3446 self, 3447 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3448 if self._match_text_seq("NULL"): 3449 return self.expression(exp.NotNullColumnConstraint) 3450 if self._match_text_seq("CASESPECIFIC"): 3451 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3452 return None 3453 3454 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3455 if self._match(TokenType.CONSTRAINT): 3456 this = self._parse_id_var() 3457 else: 3458 this = None 3459 3460 if self._match_texts(self.CONSTRAINT_PARSERS): 3461 return self.expression( 3462 exp.ColumnConstraint, 3463 this=this, 3464 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3465 ) 3466 3467 return this 3468 3469 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3470 if not self._match(TokenType.CONSTRAINT): 3471 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3472 3473 this = self._parse_id_var() 3474 expressions = [] 3475 3476 while True: 3477 constraint = self._parse_unnamed_constraint() or self._parse_function() 3478 if not constraint: 3479 break 3480 expressions.append(constraint) 3481 3482 return self.expression(exp.Constraint, this=this, expressions=expressions) 3483 3484 def _parse_unnamed_constraint( 3485 self, constraints: t.Optional[t.Collection[str]] = None 3486 ) -> t.Optional[exp.Expression]: 3487 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3488 return None 3489 3490 constraint = self._prev.text.upper() 3491 if constraint not in self.CONSTRAINT_PARSERS: 3492 self.raise_error(f"No parser found for schema constraint {constraint}.") 3493 3494 return self.CONSTRAINT_PARSERS[constraint](self) 3495 3496 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3497 self._match_text_seq("KEY") 3498 return self.expression( 3499 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3500 ) 3501 3502 def _parse_key_constraint_options(self) -> t.List[str]: 3503 options = [] 3504 while True: 3505 if not self._curr: 3506 break 3507 3508 if self._match(TokenType.ON): 3509 action = None 3510 on = self._advance_any() and self._prev.text 3511 3512 if self._match_text_seq("NO", "ACTION"): 3513 action = "NO ACTION" 3514 elif self._match_text_seq("CASCADE"): 3515 action = "CASCADE" 3516 elif self._match_pair(TokenType.SET, TokenType.NULL): 3517 action = "SET NULL" 3518 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3519 action = "SET DEFAULT" 3520 else: 3521 self.raise_error("Invalid key constraint") 3522 3523 options.append(f"ON {on} {action}") 3524 elif self._match_text_seq("NOT", "ENFORCED"): 3525 options.append("NOT ENFORCED") 3526 elif self._match_text_seq("DEFERRABLE"): 3527 options.append("DEFERRABLE") 3528 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3529 options.append("INITIALLY DEFERRED") 3530 elif self._match_text_seq("NORELY"): 3531 options.append("NORELY") 3532 elif self._match_text_seq("MATCH", "FULL"): 3533 options.append("MATCH FULL") 3534 else: 3535 break 3536 3537 return options 3538 3539 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3540 if match and not self._match(TokenType.REFERENCES): 3541 return None 3542 3543 expressions = None 3544 this = self._parse_id_var() 3545 3546 if self._match(TokenType.L_PAREN, advance=False): 3547 expressions = self._parse_wrapped_id_vars() 3548 3549 options = self._parse_key_constraint_options() 3550 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3551 3552 def _parse_foreign_key(self) -> exp.ForeignKey: 3553 expressions = self._parse_wrapped_id_vars() 3554 reference = self._parse_references() 3555 options = {} 3556 3557 while self._match(TokenType.ON): 3558 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3559 self.raise_error("Expected DELETE or UPDATE") 3560 3561 kind = self._prev.text.lower() 3562 3563 if self._match_text_seq("NO", "ACTION"): 3564 action = "NO ACTION" 3565 elif self._match(TokenType.SET): 3566 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3567 action = "SET " + self._prev.text.upper() 3568 else: 3569 self._advance() 3570 action = self._prev.text.upper() 3571 3572 options[kind] = action 3573 3574 return self.expression( 3575 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3576 ) 3577 3578 def _parse_primary_key( 3579 self, wrapped_optional: bool = False, in_props: bool = False 3580 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3581 desc = ( 3582 self._match_set((TokenType.ASC, TokenType.DESC)) 3583 and self._prev.token_type == TokenType.DESC 3584 ) 3585 3586 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3587 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3588 3589 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3590 options = self._parse_key_constraint_options() 3591 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3592 3593 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3594 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3595 return this 3596 3597 bracket_kind = self._prev.token_type 3598 3599 if self._match(TokenType.COLON): 3600 expressions: t.List[t.Optional[exp.Expression]] = [ 3601 self.expression(exp.Slice, expression=self._parse_conjunction()) 3602 ] 3603 else: 3604 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3605 3606 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3607 if bracket_kind == TokenType.L_BRACE: 3608 this = self.expression(exp.Struct, expressions=expressions) 3609 elif not this or this.name.upper() == "ARRAY": 3610 this = self.expression(exp.Array, expressions=expressions) 3611 else: 3612 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3613 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3614 3615 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3616 self.raise_error("Expected ]") 3617 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3618 self.raise_error("Expected }") 3619 3620 self._add_comments(this) 3621 return self._parse_bracket(this) 3622 3623 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3624 if self._match(TokenType.COLON): 3625 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3626 return this 3627 3628 def _parse_case(self) -> t.Optional[exp.Expression]: 3629 ifs = [] 3630 default = None 3631 3632 expression = self._parse_conjunction() 3633 3634 while self._match(TokenType.WHEN): 3635 this = self._parse_conjunction() 3636 self._match(TokenType.THEN) 3637 then = self._parse_conjunction() 3638 ifs.append(self.expression(exp.If, this=this, true=then)) 3639 3640 if self._match(TokenType.ELSE): 3641 default = self._parse_conjunction() 3642 3643 if not self._match(TokenType.END): 3644 self.raise_error("Expected END after CASE", self._prev) 3645 3646 return self._parse_window( 3647 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3648 ) 3649 3650 def _parse_if(self) -> t.Optional[exp.Expression]: 3651 if self._match(TokenType.L_PAREN): 3652 args = self._parse_csv(self._parse_conjunction) 3653 this = self.validate_expression(exp.If.from_arg_list(args), args) 3654 self._match_r_paren() 3655 else: 3656 index = self._index - 1 3657 condition = self._parse_conjunction() 3658 3659 if not condition: 3660 self._retreat(index) 3661 return None 3662 3663 self._match(TokenType.THEN) 3664 true = self._parse_conjunction() 3665 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3666 self._match(TokenType.END) 3667 this = self.expression(exp.If, this=condition, true=true, false=false) 3668 3669 return self._parse_window(this) 3670 3671 def _parse_extract(self) -> exp.Extract: 3672 this = self._parse_function() or self._parse_var() or self._parse_type() 3673 3674 if self._match(TokenType.FROM): 3675 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3676 3677 if not self._match(TokenType.COMMA): 3678 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3679 3680 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3681 3682 def _parse_any_value(self) -> exp.AnyValue: 3683 this = self._parse_lambda() 3684 is_max = None 3685 having = None 3686 3687 if self._match(TokenType.HAVING): 3688 self._match_texts(("MAX", "MIN")) 3689 is_max = self._prev.text == "MAX" 3690 having = self._parse_column() 3691 3692 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3693 3694 def _parse_cast(self, strict: bool) -> exp.Expression: 3695 this = self._parse_conjunction() 3696 3697 if not self._match(TokenType.ALIAS): 3698 if self._match(TokenType.COMMA): 3699 return self.expression( 3700 exp.CastToStrType, this=this, expression=self._parse_string() 3701 ) 3702 else: 3703 self.raise_error("Expected AS after CAST") 3704 3705 fmt = None 3706 to = self._parse_types() 3707 3708 if not to: 3709 self.raise_error("Expected TYPE after CAST") 3710 elif to.this == exp.DataType.Type.CHAR: 3711 if self._match(TokenType.CHARACTER_SET): 3712 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3713 elif self._match(TokenType.FORMAT): 3714 fmt = self._parse_at_time_zone(self._parse_string()) 3715 3716 if to.this in exp.DataType.TEMPORAL_TYPES: 3717 return self.expression( 3718 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3719 this=this, 3720 format=exp.Literal.string( 3721 format_time( 3722 fmt.this if fmt else "", 3723 self.FORMAT_MAPPING or self.TIME_MAPPING, 3724 self.FORMAT_TRIE or self.TIME_TRIE, 3725 ) 3726 ), 3727 ) 3728 3729 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3730 3731 def _parse_concat(self) -> t.Optional[exp.Expression]: 3732 args = self._parse_csv(self._parse_conjunction) 3733 if self.CONCAT_NULL_OUTPUTS_STRING: 3734 args = [ 3735 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3736 for arg in args 3737 if arg 3738 ] 3739 3740 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3741 # we find such a call we replace it with its argument. 3742 if len(args) == 1: 3743 return args[0] 3744 3745 return self.expression( 3746 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3747 ) 3748 3749 def _parse_string_agg(self) -> exp.Expression: 3750 if self._match(TokenType.DISTINCT): 3751 args: t.List[t.Optional[exp.Expression]] = [ 3752 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3753 ] 3754 if self._match(TokenType.COMMA): 3755 args.extend(self._parse_csv(self._parse_conjunction)) 3756 else: 3757 args = self._parse_csv(self._parse_conjunction) 3758 3759 index = self._index 3760 if not self._match(TokenType.R_PAREN): 3761 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3762 return self.expression( 3763 exp.GroupConcat, 3764 this=seq_get(args, 0), 3765 separator=self._parse_order(this=seq_get(args, 1)), 3766 ) 3767 3768 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3769 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3770 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3771 if not self._match_text_seq("WITHIN", "GROUP"): 3772 self._retreat(index) 3773 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3774 3775 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3776 order = self._parse_order(this=seq_get(args, 0)) 3777 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3778 3779 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3780 this = self._parse_bitwise() 3781 3782 if self._match(TokenType.USING): 3783 to: t.Optional[exp.Expression] = self.expression( 3784 exp.CharacterSet, this=self._parse_var() 3785 ) 3786 elif self._match(TokenType.COMMA): 3787 to = self._parse_types() 3788 else: 3789 to = None 3790 3791 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3792 3793 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3794 """ 3795 There are generally two variants of the DECODE function: 3796 3797 - DECODE(bin, charset) 3798 - DECODE(expression, search, result [, search, result] ... [, default]) 3799 3800 The second variant will always be parsed into a CASE expression. Note that NULL 3801 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3802 instead of relying on pattern matching. 3803 """ 3804 args = self._parse_csv(self._parse_conjunction) 3805 3806 if len(args) < 3: 3807 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3808 3809 expression, *expressions = args 3810 if not expression: 3811 return None 3812 3813 ifs = [] 3814 for search, result in zip(expressions[::2], expressions[1::2]): 3815 if not search or not result: 3816 return None 3817 3818 if isinstance(search, exp.Literal): 3819 ifs.append( 3820 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3821 ) 3822 elif isinstance(search, exp.Null): 3823 ifs.append( 3824 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3825 ) 3826 else: 3827 cond = exp.or_( 3828 exp.EQ(this=expression.copy(), expression=search), 3829 exp.and_( 3830 exp.Is(this=expression.copy(), expression=exp.Null()), 3831 exp.Is(this=search.copy(), expression=exp.Null()), 3832 copy=False, 3833 ), 3834 copy=False, 3835 ) 3836 ifs.append(exp.If(this=cond, true=result)) 3837 3838 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3839 3840 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3841 self._match_text_seq("KEY") 3842 key = self._parse_field() 3843 self._match(TokenType.COLON) 3844 self._match_text_seq("VALUE") 3845 value = self._parse_field() 3846 3847 if not key and not value: 3848 return None 3849 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3850 3851 def _parse_json_object(self) -> exp.JSONObject: 3852 star = self._parse_star() 3853 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3854 3855 null_handling = None 3856 if self._match_text_seq("NULL", "ON", "NULL"): 3857 null_handling = "NULL ON NULL" 3858 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3859 null_handling = "ABSENT ON NULL" 3860 3861 unique_keys = None 3862 if self._match_text_seq("WITH", "UNIQUE"): 3863 unique_keys = True 3864 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3865 unique_keys = False 3866 3867 self._match_text_seq("KEYS") 3868 3869 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3870 format_json = self._match_text_seq("FORMAT", "JSON") 3871 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3872 3873 return self.expression( 3874 exp.JSONObject, 3875 expressions=expressions, 3876 null_handling=null_handling, 3877 unique_keys=unique_keys, 3878 return_type=return_type, 3879 format_json=format_json, 3880 encoding=encoding, 3881 ) 3882 3883 def _parse_logarithm(self) -> exp.Func: 3884 # Default argument order is base, expression 3885 args = self._parse_csv(self._parse_range) 3886 3887 if len(args) > 1: 3888 if not self.LOG_BASE_FIRST: 3889 args.reverse() 3890 return exp.Log.from_arg_list(args) 3891 3892 return self.expression( 3893 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3894 ) 3895 3896 def _parse_match_against(self) -> exp.MatchAgainst: 3897 expressions = self._parse_csv(self._parse_column) 3898 3899 self._match_text_seq(")", "AGAINST", "(") 3900 3901 this = self._parse_string() 3902 3903 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3904 modifier = "IN NATURAL LANGUAGE MODE" 3905 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3906 modifier = f"{modifier} WITH QUERY EXPANSION" 3907 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3908 modifier = "IN BOOLEAN MODE" 3909 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3910 modifier = "WITH QUERY EXPANSION" 3911 else: 3912 modifier = None 3913 3914 return self.expression( 3915 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3916 ) 3917 3918 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3919 def _parse_open_json(self) -> exp.OpenJSON: 3920 this = self._parse_bitwise() 3921 path = self._match(TokenType.COMMA) and self._parse_string() 3922 3923 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3924 this = self._parse_field(any_token=True) 3925 kind = self._parse_types() 3926 path = self._parse_string() 3927 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3928 3929 return self.expression( 3930 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3931 ) 3932 3933 expressions = None 3934 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3935 self._match_l_paren() 3936 expressions = self._parse_csv(_parse_open_json_column_def) 3937 3938 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3939 3940 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3941 args = self._parse_csv(self._parse_bitwise) 3942 3943 if self._match(TokenType.IN): 3944 return self.expression( 3945 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3946 ) 3947 3948 if haystack_first: 3949 haystack = seq_get(args, 0) 3950 needle = seq_get(args, 1) 3951 else: 3952 needle = seq_get(args, 0) 3953 haystack = seq_get(args, 1) 3954 3955 return self.expression( 3956 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3957 ) 3958 3959 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3960 args = self._parse_csv(self._parse_table) 3961 return exp.JoinHint(this=func_name.upper(), expressions=args) 3962 3963 def _parse_substring(self) -> exp.Substring: 3964 # Postgres supports the form: substring(string [from int] [for int]) 3965 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3966 3967 args = self._parse_csv(self._parse_bitwise) 3968 3969 if self._match(TokenType.FROM): 3970 args.append(self._parse_bitwise()) 3971 if self._match(TokenType.FOR): 3972 args.append(self._parse_bitwise()) 3973 3974 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3975 3976 def _parse_trim(self) -> exp.Trim: 3977 # https://www.w3resource.com/sql/character-functions/trim.php 3978 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3979 3980 position = None 3981 collation = None 3982 3983 if self._match_texts(self.TRIM_TYPES): 3984 position = self._prev.text.upper() 3985 3986 expression = self._parse_bitwise() 3987 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3988 this = self._parse_bitwise() 3989 else: 3990 this = expression 3991 expression = None 3992 3993 if self._match(TokenType.COLLATE): 3994 collation = self._parse_bitwise() 3995 3996 return self.expression( 3997 exp.Trim, this=this, position=position, expression=expression, collation=collation 3998 ) 3999 4000 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4001 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4002 4003 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4004 return self._parse_window(self._parse_id_var(), alias=True) 4005 4006 def _parse_respect_or_ignore_nulls( 4007 self, this: t.Optional[exp.Expression] 4008 ) -> t.Optional[exp.Expression]: 4009 if self._match_text_seq("IGNORE", "NULLS"): 4010 return self.expression(exp.IgnoreNulls, this=this) 4011 if self._match_text_seq("RESPECT", "NULLS"): 4012 return self.expression(exp.RespectNulls, this=this) 4013 return this 4014 4015 def _parse_window( 4016 self, this: t.Optional[exp.Expression], alias: bool = False 4017 ) -> t.Optional[exp.Expression]: 4018 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4019 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4020 self._match_r_paren() 4021 4022 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4023 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4024 if self._match_text_seq("WITHIN", "GROUP"): 4025 order = self._parse_wrapped(self._parse_order) 4026 this = self.expression(exp.WithinGroup, this=this, expression=order) 4027 4028 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4029 # Some dialects choose to implement and some do not. 4030 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4031 4032 # There is some code above in _parse_lambda that handles 4033 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4034 4035 # The below changes handle 4036 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4037 4038 # Oracle allows both formats 4039 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4040 # and Snowflake chose to do the same for familiarity 4041 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4042 this = self._parse_respect_or_ignore_nulls(this) 4043 4044 # bigquery select from window x AS (partition by ...) 4045 if alias: 4046 over = None 4047 self._match(TokenType.ALIAS) 4048 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4049 return this 4050 else: 4051 over = self._prev.text.upper() 4052 4053 if not self._match(TokenType.L_PAREN): 4054 return self.expression( 4055 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4056 ) 4057 4058 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4059 4060 first = self._match(TokenType.FIRST) 4061 if self._match_text_seq("LAST"): 4062 first = False 4063 4064 partition = self._parse_partition_by() 4065 order = self._parse_order() 4066 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4067 4068 if kind: 4069 self._match(TokenType.BETWEEN) 4070 start = self._parse_window_spec() 4071 self._match(TokenType.AND) 4072 end = self._parse_window_spec() 4073 4074 spec = self.expression( 4075 exp.WindowSpec, 4076 kind=kind, 4077 start=start["value"], 4078 start_side=start["side"], 4079 end=end["value"], 4080 end_side=end["side"], 4081 ) 4082 else: 4083 spec = None 4084 4085 self._match_r_paren() 4086 4087 return self.expression( 4088 exp.Window, 4089 this=this, 4090 partition_by=partition, 4091 order=order, 4092 spec=spec, 4093 alias=window_alias, 4094 over=over, 4095 first=first, 4096 ) 4097 4098 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4099 self._match(TokenType.BETWEEN) 4100 4101 return { 4102 "value": ( 4103 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4104 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4105 or self._parse_bitwise() 4106 ), 4107 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4108 } 4109 4110 def _parse_alias( 4111 self, this: t.Optional[exp.Expression], explicit: bool = False 4112 ) -> t.Optional[exp.Expression]: 4113 any_token = self._match(TokenType.ALIAS) 4114 4115 if explicit and not any_token: 4116 return this 4117 4118 if self._match(TokenType.L_PAREN): 4119 aliases = self.expression( 4120 exp.Aliases, 4121 this=this, 4122 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4123 ) 4124 self._match_r_paren(aliases) 4125 return aliases 4126 4127 alias = self._parse_id_var(any_token) 4128 4129 if alias: 4130 return self.expression(exp.Alias, this=this, alias=alias) 4131 4132 return this 4133 4134 def _parse_id_var( 4135 self, 4136 any_token: bool = True, 4137 tokens: t.Optional[t.Collection[TokenType]] = None, 4138 ) -> t.Optional[exp.Expression]: 4139 identifier = self._parse_identifier() 4140 4141 if identifier: 4142 return identifier 4143 4144 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4145 quoted = self._prev.token_type == TokenType.STRING 4146 return exp.Identifier(this=self._prev.text, quoted=quoted) 4147 4148 return None 4149 4150 def _parse_string(self) -> t.Optional[exp.Expression]: 4151 if self._match(TokenType.STRING): 4152 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4153 return self._parse_placeholder() 4154 4155 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4156 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4157 4158 def _parse_number(self) -> t.Optional[exp.Expression]: 4159 if self._match(TokenType.NUMBER): 4160 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4161 return self._parse_placeholder() 4162 4163 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4164 if self._match(TokenType.IDENTIFIER): 4165 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4166 return self._parse_placeholder() 4167 4168 def _parse_var( 4169 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4170 ) -> t.Optional[exp.Expression]: 4171 if ( 4172 (any_token and self._advance_any()) 4173 or self._match(TokenType.VAR) 4174 or (self._match_set(tokens) if tokens else False) 4175 ): 4176 return self.expression(exp.Var, this=self._prev.text) 4177 return self._parse_placeholder() 4178 4179 def _advance_any(self) -> t.Optional[Token]: 4180 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4181 self._advance() 4182 return self._prev 4183 return None 4184 4185 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4186 return self._parse_var() or self._parse_string() 4187 4188 def _parse_null(self) -> t.Optional[exp.Expression]: 4189 if self._match(TokenType.NULL): 4190 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4191 return None 4192 4193 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4194 if self._match(TokenType.TRUE): 4195 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4196 if self._match(TokenType.FALSE): 4197 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4198 return None 4199 4200 def _parse_star(self) -> t.Optional[exp.Expression]: 4201 if self._match(TokenType.STAR): 4202 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4203 return None 4204 4205 def _parse_parameter(self) -> exp.Parameter: 4206 wrapped = self._match(TokenType.L_BRACE) 4207 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4208 self._match(TokenType.R_BRACE) 4209 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4210 4211 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4212 if self._match_set(self.PLACEHOLDER_PARSERS): 4213 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4214 if placeholder: 4215 return placeholder 4216 self._advance(-1) 4217 return None 4218 4219 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4220 if not self._match(TokenType.EXCEPT): 4221 return None 4222 if self._match(TokenType.L_PAREN, advance=False): 4223 return self._parse_wrapped_csv(self._parse_column) 4224 return self._parse_csv(self._parse_column) 4225 4226 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4227 if not self._match(TokenType.REPLACE): 4228 return None 4229 if self._match(TokenType.L_PAREN, advance=False): 4230 return self._parse_wrapped_csv(self._parse_expression) 4231 return self._parse_csv(self._parse_expression) 4232 4233 def _parse_csv( 4234 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4235 ) -> t.List[t.Optional[exp.Expression]]: 4236 parse_result = parse_method() 4237 items = [parse_result] if parse_result is not None else [] 4238 4239 while self._match(sep): 4240 self._add_comments(parse_result) 4241 parse_result = parse_method() 4242 if parse_result is not None: 4243 items.append(parse_result) 4244 4245 return items 4246 4247 def _parse_tokens( 4248 self, parse_method: t.Callable, expressions: t.Dict 4249 ) -> t.Optional[exp.Expression]: 4250 this = parse_method() 4251 4252 while self._match_set(expressions): 4253 this = self.expression( 4254 expressions[self._prev.token_type], 4255 this=this, 4256 comments=self._prev_comments, 4257 expression=parse_method(), 4258 ) 4259 4260 return this 4261 4262 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4263 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4264 4265 def _parse_wrapped_csv( 4266 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4267 ) -> t.List[t.Optional[exp.Expression]]: 4268 return self._parse_wrapped( 4269 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4270 ) 4271 4272 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4273 wrapped = self._match(TokenType.L_PAREN) 4274 if not wrapped and not optional: 4275 self.raise_error("Expecting (") 4276 parse_result = parse_method() 4277 if wrapped: 4278 self._match_r_paren() 4279 return parse_result 4280 4281 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4282 return self._parse_select() or self._parse_set_operations( 4283 self._parse_expression() if alias else self._parse_conjunction() 4284 ) 4285 4286 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4287 return self._parse_query_modifiers( 4288 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4289 ) 4290 4291 def _parse_transaction(self) -> exp.Transaction: 4292 this = None 4293 if self._match_texts(self.TRANSACTION_KIND): 4294 this = self._prev.text 4295 4296 self._match_texts({"TRANSACTION", "WORK"}) 4297 4298 modes = [] 4299 while True: 4300 mode = [] 4301 while self._match(TokenType.VAR): 4302 mode.append(self._prev.text) 4303 4304 if mode: 4305 modes.append(" ".join(mode)) 4306 if not self._match(TokenType.COMMA): 4307 break 4308 4309 return self.expression(exp.Transaction, this=this, modes=modes) 4310 4311 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4312 chain = None 4313 savepoint = None 4314 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4315 4316 self._match_texts({"TRANSACTION", "WORK"}) 4317 4318 if self._match_text_seq("TO"): 4319 self._match_text_seq("SAVEPOINT") 4320 savepoint = self._parse_id_var() 4321 4322 if self._match(TokenType.AND): 4323 chain = not self._match_text_seq("NO") 4324 self._match_text_seq("CHAIN") 4325 4326 if is_rollback: 4327 return self.expression(exp.Rollback, savepoint=savepoint) 4328 4329 return self.expression(exp.Commit, chain=chain) 4330 4331 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4332 if not self._match_text_seq("ADD"): 4333 return None 4334 4335 self._match(TokenType.COLUMN) 4336 exists_column = self._parse_exists(not_=True) 4337 expression = self._parse_column_def(self._parse_field(any_token=True)) 4338 4339 if expression: 4340 expression.set("exists", exists_column) 4341 4342 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4343 if self._match_texts(("FIRST", "AFTER")): 4344 position = self._prev.text 4345 column_position = self.expression( 4346 exp.ColumnPosition, this=self._parse_column(), position=position 4347 ) 4348 expression.set("position", column_position) 4349 4350 return expression 4351 4352 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4353 drop = self._match(TokenType.DROP) and self._parse_drop() 4354 if drop and not isinstance(drop, exp.Command): 4355 drop.set("kind", drop.args.get("kind", "COLUMN")) 4356 return drop 4357 4358 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4359 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4360 return self.expression( 4361 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4362 ) 4363 4364 def _parse_add_constraint(self) -> exp.AddConstraint: 4365 this = None 4366 kind = self._prev.token_type 4367 4368 if kind == TokenType.CONSTRAINT: 4369 this = self._parse_id_var() 4370 4371 if self._match_text_seq("CHECK"): 4372 expression = self._parse_wrapped(self._parse_conjunction) 4373 enforced = self._match_text_seq("ENFORCED") 4374 4375 return self.expression( 4376 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4377 ) 4378 4379 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4380 expression = self._parse_foreign_key() 4381 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4382 expression = self._parse_primary_key() 4383 else: 4384 expression = None 4385 4386 return self.expression(exp.AddConstraint, this=this, expression=expression) 4387 4388 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4389 index = self._index - 1 4390 4391 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4392 return self._parse_csv(self._parse_add_constraint) 4393 4394 self._retreat(index) 4395 return self._parse_csv(self._parse_add_column) 4396 4397 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4398 self._match(TokenType.COLUMN) 4399 column = self._parse_field(any_token=True) 4400 4401 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4402 return self.expression(exp.AlterColumn, this=column, drop=True) 4403 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4404 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4405 4406 self._match_text_seq("SET", "DATA") 4407 return self.expression( 4408 exp.AlterColumn, 4409 this=column, 4410 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4411 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4412 using=self._match(TokenType.USING) and self._parse_conjunction(), 4413 ) 4414 4415 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4416 index = self._index - 1 4417 4418 partition_exists = self._parse_exists() 4419 if self._match(TokenType.PARTITION, advance=False): 4420 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4421 4422 self._retreat(index) 4423 return self._parse_csv(self._parse_drop_column) 4424 4425 def _parse_alter_table_rename(self) -> exp.RenameTable: 4426 self._match_text_seq("TO") 4427 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4428 4429 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4430 start = self._prev 4431 4432 if not self._match(TokenType.TABLE): 4433 return self._parse_as_command(start) 4434 4435 exists = self._parse_exists() 4436 this = self._parse_table(schema=True) 4437 4438 if self._next: 4439 self._advance() 4440 4441 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4442 if parser: 4443 actions = ensure_list(parser(self)) 4444 4445 if not self._curr: 4446 return self.expression( 4447 exp.AlterTable, 4448 this=this, 4449 exists=exists, 4450 actions=actions, 4451 ) 4452 return self._parse_as_command(start) 4453 4454 def _parse_merge(self) -> exp.Merge: 4455 self._match(TokenType.INTO) 4456 target = self._parse_table() 4457 4458 self._match(TokenType.USING) 4459 using = self._parse_table() 4460 4461 self._match(TokenType.ON) 4462 on = self._parse_conjunction() 4463 4464 whens = [] 4465 while self._match(TokenType.WHEN): 4466 matched = not self._match(TokenType.NOT) 4467 self._match_text_seq("MATCHED") 4468 source = ( 4469 False 4470 if self._match_text_seq("BY", "TARGET") 4471 else self._match_text_seq("BY", "SOURCE") 4472 ) 4473 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4474 4475 self._match(TokenType.THEN) 4476 4477 if self._match(TokenType.INSERT): 4478 _this = self._parse_star() 4479 if _this: 4480 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4481 else: 4482 then = self.expression( 4483 exp.Insert, 4484 this=self._parse_value(), 4485 expression=self._match(TokenType.VALUES) and self._parse_value(), 4486 ) 4487 elif self._match(TokenType.UPDATE): 4488 expressions = self._parse_star() 4489 if expressions: 4490 then = self.expression(exp.Update, expressions=expressions) 4491 else: 4492 then = self.expression( 4493 exp.Update, 4494 expressions=self._match(TokenType.SET) 4495 and self._parse_csv(self._parse_equality), 4496 ) 4497 elif self._match(TokenType.DELETE): 4498 then = self.expression(exp.Var, this=self._prev.text) 4499 else: 4500 then = None 4501 4502 whens.append( 4503 self.expression( 4504 exp.When, 4505 matched=matched, 4506 source=source, 4507 condition=condition, 4508 then=then, 4509 ) 4510 ) 4511 4512 return self.expression( 4513 exp.Merge, 4514 this=target, 4515 using=using, 4516 on=on, 4517 expressions=whens, 4518 ) 4519 4520 def _parse_show(self) -> t.Optional[exp.Expression]: 4521 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4522 if parser: 4523 return parser(self) 4524 self._advance() 4525 return self.expression(exp.Show, this=self._prev.text.upper()) 4526 4527 def _parse_set_item_assignment( 4528 self, kind: t.Optional[str] = None 4529 ) -> t.Optional[exp.Expression]: 4530 index = self._index 4531 4532 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4533 return self._parse_set_transaction(global_=kind == "GLOBAL") 4534 4535 left = self._parse_primary() or self._parse_id_var() 4536 4537 if not self._match_texts(("=", "TO")): 4538 self._retreat(index) 4539 return None 4540 4541 right = self._parse_statement() or self._parse_id_var() 4542 this = self.expression(exp.EQ, this=left, expression=right) 4543 4544 return self.expression(exp.SetItem, this=this, kind=kind) 4545 4546 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4547 self._match_text_seq("TRANSACTION") 4548 characteristics = self._parse_csv( 4549 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4550 ) 4551 return self.expression( 4552 exp.SetItem, 4553 expressions=characteristics, 4554 kind="TRANSACTION", 4555 **{"global": global_}, # type: ignore 4556 ) 4557 4558 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4559 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4560 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4561 4562 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4563 index = self._index 4564 set_ = self.expression( 4565 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4566 ) 4567 4568 if self._curr: 4569 self._retreat(index) 4570 return self._parse_as_command(self._prev) 4571 4572 return set_ 4573 4574 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4575 for option in options: 4576 if self._match_text_seq(*option.split(" ")): 4577 return exp.var(option) 4578 return None 4579 4580 def _parse_as_command(self, start: Token) -> exp.Command: 4581 while self._curr: 4582 self._advance() 4583 text = self._find_sql(start, self._prev) 4584 size = len(start.text) 4585 return exp.Command(this=text[:size], expression=text[size:]) 4586 4587 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4588 settings = [] 4589 4590 self._match_l_paren() 4591 kind = self._parse_id_var() 4592 4593 if self._match(TokenType.L_PAREN): 4594 while True: 4595 key = self._parse_id_var() 4596 value = self._parse_primary() 4597 4598 if not key and value is None: 4599 break 4600 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4601 self._match(TokenType.R_PAREN) 4602 4603 self._match_r_paren() 4604 4605 return self.expression( 4606 exp.DictProperty, 4607 this=this, 4608 kind=kind.this if kind else None, 4609 settings=settings, 4610 ) 4611 4612 def _parse_dict_range(self, this: str) -> exp.DictRange: 4613 self._match_l_paren() 4614 has_min = self._match_text_seq("MIN") 4615 if has_min: 4616 min = self._parse_var() or self._parse_primary() 4617 self._match_text_seq("MAX") 4618 max = self._parse_var() or self._parse_primary() 4619 else: 4620 max = self._parse_var() or self._parse_primary() 4621 min = exp.Literal.number(0) 4622 self._match_r_paren() 4623 return self.expression(exp.DictRange, this=this, min=min, max=max) 4624 4625 def _find_parser( 4626 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4627 ) -> t.Optional[t.Callable]: 4628 if not self._curr: 4629 return None 4630 4631 index = self._index 4632 this = [] 4633 while True: 4634 # The current token might be multiple words 4635 curr = self._curr.text.upper() 4636 key = curr.split(" ") 4637 this.append(curr) 4638 4639 self._advance() 4640 result, trie = in_trie(trie, key) 4641 if result == TrieResult.FAILED: 4642 break 4643 4644 if result == TrieResult.EXISTS: 4645 subparser = parsers[" ".join(this)] 4646 return subparser 4647 4648 self._retreat(index) 4649 return None 4650 4651 def _match(self, token_type, advance=True, expression=None): 4652 if not self._curr: 4653 return None 4654 4655 if self._curr.token_type == token_type: 4656 if advance: 4657 self._advance() 4658 self._add_comments(expression) 4659 return True 4660 4661 return None 4662 4663 def _match_set(self, types, advance=True): 4664 if not self._curr: 4665 return None 4666 4667 if self._curr.token_type in types: 4668 if advance: 4669 self._advance() 4670 return True 4671 4672 return None 4673 4674 def _match_pair(self, token_type_a, token_type_b, advance=True): 4675 if not self._curr or not self._next: 4676 return None 4677 4678 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4679 if advance: 4680 self._advance(2) 4681 return True 4682 4683 return None 4684 4685 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4686 if not self._match(TokenType.L_PAREN, expression=expression): 4687 self.raise_error("Expecting (") 4688 4689 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4690 if not self._match(TokenType.R_PAREN, expression=expression): 4691 self.raise_error("Expecting )") 4692 4693 def _match_texts(self, texts, advance=True): 4694 if self._curr and self._curr.text.upper() in texts: 4695 if advance: 4696 self._advance() 4697 return True 4698 return False 4699 4700 def _match_text_seq(self, *texts, advance=True): 4701 index = self._index 4702 for text in texts: 4703 if self._curr and self._curr.text.upper() == text: 4704 self._advance() 4705 else: 4706 self._retreat(index) 4707 return False 4708 4709 if not advance: 4710 self._retreat(index) 4711 4712 return True 4713 4714 @t.overload 4715 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4716 ... 4717 4718 @t.overload 4719 def _replace_columns_with_dots( 4720 self, this: t.Optional[exp.Expression] 4721 ) -> t.Optional[exp.Expression]: 4722 ... 4723 4724 def _replace_columns_with_dots(self, this): 4725 if isinstance(this, exp.Dot): 4726 exp.replace_children(this, self._replace_columns_with_dots) 4727 elif isinstance(this, exp.Column): 4728 exp.replace_children(this, self._replace_columns_with_dots) 4729 table = this.args.get("table") 4730 this = ( 4731 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4732 ) 4733 4734 return this 4735 4736 def _replace_lambda( 4737 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4738 ) -> t.Optional[exp.Expression]: 4739 if not node: 4740 return node 4741 4742 for column in node.find_all(exp.Column): 4743 if column.parts[0].name in lambda_variables: 4744 dot_or_id = column.to_dot() if column.table else column.this 4745 parent = column.parent 4746 4747 while isinstance(parent, exp.Dot): 4748 if not isinstance(parent.parent, exp.Dot): 4749 parent.replace(dot_or_id) 4750 break 4751 parent = parent.parent 4752 else: 4753 if column is node: 4754 node = dot_or_id 4755 else: 4756 column.replace(dot_or_id) 4757 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 *TYPE_TOKENS, 343 *SUBQUERY_PREDICATES, 344 } 345 346 CONJUNCTION = { 347 TokenType.AND: exp.And, 348 TokenType.OR: exp.Or, 349 } 350 351 EQUALITY = { 352 TokenType.EQ: exp.EQ, 353 TokenType.NEQ: exp.NEQ, 354 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 355 } 356 357 COMPARISON = { 358 TokenType.GT: exp.GT, 359 TokenType.GTE: exp.GTE, 360 TokenType.LT: exp.LT, 361 TokenType.LTE: exp.LTE, 362 } 363 364 BITWISE = { 365 TokenType.AMP: exp.BitwiseAnd, 366 TokenType.CARET: exp.BitwiseXor, 367 TokenType.PIPE: exp.BitwiseOr, 368 TokenType.DPIPE: exp.DPipe, 369 } 370 371 TERM = { 372 TokenType.DASH: exp.Sub, 373 TokenType.PLUS: exp.Add, 374 TokenType.MOD: exp.Mod, 375 TokenType.COLLATE: exp.Collate, 376 } 377 378 FACTOR = { 379 TokenType.DIV: exp.IntDiv, 380 TokenType.LR_ARROW: exp.Distance, 381 TokenType.SLASH: exp.Div, 382 TokenType.STAR: exp.Mul, 383 } 384 385 TIMESTAMPS = { 386 TokenType.TIME, 387 TokenType.TIMESTAMP, 388 TokenType.TIMESTAMPTZ, 389 TokenType.TIMESTAMPLTZ, 390 } 391 392 SET_OPERATIONS = { 393 TokenType.UNION, 394 TokenType.INTERSECT, 395 TokenType.EXCEPT, 396 } 397 398 JOIN_METHODS = { 399 TokenType.NATURAL, 400 TokenType.ASOF, 401 } 402 403 JOIN_SIDES = { 404 TokenType.LEFT, 405 TokenType.RIGHT, 406 TokenType.FULL, 407 } 408 409 JOIN_KINDS = { 410 TokenType.INNER, 411 TokenType.OUTER, 412 TokenType.CROSS, 413 TokenType.SEMI, 414 TokenType.ANTI, 415 } 416 417 JOIN_HINTS: t.Set[str] = set() 418 419 LAMBDAS = { 420 TokenType.ARROW: lambda self, expressions: self.expression( 421 exp.Lambda, 422 this=self._replace_lambda( 423 self._parse_conjunction(), 424 {node.name for node in expressions}, 425 ), 426 expressions=expressions, 427 ), 428 TokenType.FARROW: lambda self, expressions: self.expression( 429 exp.Kwarg, 430 this=exp.var(expressions[0].name), 431 expression=self._parse_conjunction(), 432 ), 433 } 434 435 COLUMN_OPERATORS = { 436 TokenType.DOT: None, 437 TokenType.DCOLON: lambda self, this, to: self.expression( 438 exp.Cast if self.STRICT_CAST else exp.TryCast, 439 this=this, 440 to=to, 441 ), 442 TokenType.ARROW: lambda self, this, path: self.expression( 443 exp.JSONExtract, 444 this=this, 445 expression=path, 446 ), 447 TokenType.DARROW: lambda self, this, path: self.expression( 448 exp.JSONExtractScalar, 449 this=this, 450 expression=path, 451 ), 452 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 453 exp.JSONBExtract, 454 this=this, 455 expression=path, 456 ), 457 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 458 exp.JSONBExtractScalar, 459 this=this, 460 expression=path, 461 ), 462 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 463 exp.JSONBContains, 464 this=this, 465 expression=key, 466 ), 467 } 468 469 EXPRESSION_PARSERS = { 470 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 471 exp.Column: lambda self: self._parse_column(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.DataType: lambda self: self._parse_types(), 474 exp.Expression: lambda self: self._parse_statement(), 475 exp.From: lambda self: self._parse_from(), 476 exp.Group: lambda self: self._parse_group(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.Identifier: lambda self: self._parse_id_var(), 479 exp.Join: lambda self: self._parse_join(), 480 exp.Lambda: lambda self: self._parse_lambda(), 481 exp.Lateral: lambda self: self._parse_lateral(), 482 exp.Limit: lambda self: self._parse_limit(), 483 exp.Offset: lambda self: self._parse_offset(), 484 exp.Order: lambda self: self._parse_order(), 485 exp.Ordered: lambda self: self._parse_ordered(), 486 exp.Properties: lambda self: self._parse_properties(), 487 exp.Qualify: lambda self: self._parse_qualify(), 488 exp.Returning: lambda self: self._parse_returning(), 489 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 490 exp.Table: lambda self: self._parse_table_parts(), 491 exp.TableAlias: lambda self: self._parse_table_alias(), 492 exp.Where: lambda self: self._parse_where(), 493 exp.Window: lambda self: self._parse_named_window(), 494 exp.With: lambda self: self._parse_with(), 495 "JOIN_TYPE": lambda self: self._parse_join_parts(), 496 } 497 498 STATEMENT_PARSERS = { 499 TokenType.ALTER: lambda self: self._parse_alter(), 500 TokenType.BEGIN: lambda self: self._parse_transaction(), 501 TokenType.CACHE: lambda self: self._parse_cache(), 502 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 503 TokenType.COMMENT: lambda self: self._parse_comment(), 504 TokenType.CREATE: lambda self: self._parse_create(), 505 TokenType.DELETE: lambda self: self._parse_delete(), 506 TokenType.DESC: lambda self: self._parse_describe(), 507 TokenType.DESCRIBE: lambda self: self._parse_describe(), 508 TokenType.DROP: lambda self: self._parse_drop(), 509 TokenType.END: lambda self: self._parse_commit_or_rollback(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "CLUSTERED": lambda self: self._parse_clustered_by(), 590 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 591 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 592 "COPY": lambda self: self._parse_copy_property(), 593 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 594 "DEFINER": lambda self: self._parse_definer(), 595 "DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "DISTKEY": lambda self: self._parse_distkey(), 599 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 600 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 601 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 602 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 603 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 604 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 605 "FREESPACE": lambda self: self._parse_freespace(), 606 "IMMUTABLE": lambda self: self.expression( 607 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 608 ), 609 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 610 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 611 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 612 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 613 "LIKE": lambda self: self._parse_create_like(), 614 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 615 "LOCK": lambda self: self._parse_locking(), 616 "LOCKING": lambda self: self._parse_locking(), 617 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 618 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 619 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 620 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 621 "NO": lambda self: self._parse_no_property(), 622 "ON": lambda self: self._parse_on_property(), 623 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 624 "PARTITION BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 627 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 628 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 629 "RETURNS": lambda self: self._parse_returns(), 630 "ROW": lambda self: self._parse_row(), 631 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 632 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 633 "SETTINGS": lambda self: self.expression( 634 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 635 ), 636 "SORTKEY": lambda self: self._parse_sortkey(), 637 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 638 "STABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("STABLE") 640 ), 641 "STORED": lambda self: self._parse_stored(), 642 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 643 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 644 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 645 "TO": lambda self: self._parse_to_table(), 646 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 647 "TTL": lambda self: self._parse_ttl(), 648 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "VOLATILE": lambda self: self._parse_volatile_property(), 650 "WITH": lambda self: self._parse_with_property(), 651 } 652 653 CONSTRAINT_PARSERS = { 654 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 655 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 656 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 657 "CHARACTER SET": lambda self: self.expression( 658 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "CHECK": lambda self: self.expression( 661 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 662 ), 663 "COLLATE": lambda self: self.expression( 664 exp.CollateColumnConstraint, this=self._parse_var() 665 ), 666 "COMMENT": lambda self: self.expression( 667 exp.CommentColumnConstraint, this=self._parse_string() 668 ), 669 "COMPRESS": lambda self: self._parse_compress(), 670 "DEFAULT": lambda self: self.expression( 671 exp.DefaultColumnConstraint, this=self._parse_bitwise() 672 ), 673 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 674 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 675 "FORMAT": lambda self: self.expression( 676 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "GENERATED": lambda self: self._parse_generated_as_identity(), 679 "IDENTITY": lambda self: self._parse_auto_increment(), 680 "INLINE": lambda self: self._parse_inline(), 681 "LIKE": lambda self: self._parse_create_like(), 682 "NOT": lambda self: self._parse_not_constraint(), 683 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 684 "ON": lambda self: self._match(TokenType.UPDATE) 685 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 686 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 687 "PRIMARY KEY": lambda self: self._parse_primary_key(), 688 "REFERENCES": lambda self: self._parse_references(match=False), 689 "TITLE": lambda self: self.expression( 690 exp.TitleColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 693 "UNIQUE": lambda self: self._parse_unique(), 694 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 695 } 696 697 ALTER_PARSERS = { 698 "ADD": lambda self: self._parse_alter_table_add(), 699 "ALTER": lambda self: self._parse_alter_table_alter(), 700 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 701 "DROP": lambda self: self._parse_alter_table_drop(), 702 "RENAME": lambda self: self._parse_alter_table_rename(), 703 } 704 705 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 706 707 NO_PAREN_FUNCTION_PARSERS = { 708 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 709 TokenType.CASE: lambda self: self._parse_case(), 710 TokenType.IF: lambda self: self._parse_if(), 711 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 712 exp.NextValueFor, 713 this=self._parse_column(), 714 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 715 ), 716 } 717 718 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 719 720 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 721 "ANY_VALUE": lambda self: self._parse_any_value(), 722 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 723 "CONCAT": lambda self: self._parse_concat(), 724 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 725 "DECODE": lambda self: self._parse_decode(), 726 "EXTRACT": lambda self: self._parse_extract(), 727 "JSON_OBJECT": lambda self: self._parse_json_object(), 728 "LOG": lambda self: self._parse_logarithm(), 729 "MATCH": lambda self: self._parse_match_against(), 730 "OPENJSON": lambda self: self._parse_open_json(), 731 "POSITION": lambda self: self._parse_position(), 732 "SAFE_CAST": lambda self: self._parse_cast(False), 733 "STRING_AGG": lambda self: self._parse_string_agg(), 734 "SUBSTRING": lambda self: self._parse_substring(), 735 "TRIM": lambda self: self._parse_trim(), 736 "TRY_CAST": lambda self: self._parse_cast(False), 737 "TRY_CONVERT": lambda self: self._parse_convert(False), 738 } 739 740 QUERY_MODIFIER_PARSERS = { 741 "joins": lambda self: list(iter(self._parse_join, None)), 742 "laterals": lambda self: list(iter(self._parse_lateral, None)), 743 "match": lambda self: self._parse_match_recognize(), 744 "where": lambda self: self._parse_where(), 745 "group": lambda self: self._parse_group(), 746 "having": lambda self: self._parse_having(), 747 "qualify": lambda self: self._parse_qualify(), 748 "windows": lambda self: self._parse_window_clause(), 749 "order": lambda self: self._parse_order(), 750 "limit": lambda self: self._parse_limit(), 751 "offset": lambda self: self._parse_offset(), 752 "locks": lambda self: self._parse_locks(), 753 "sample": lambda self: self._parse_table_sample(as_modifier=True), 754 } 755 756 SET_PARSERS = { 757 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 758 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 759 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 760 "TRANSACTION": lambda self: self._parse_set_transaction(), 761 } 762 763 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 764 765 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 766 767 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 768 769 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 770 771 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 772 773 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 774 TRANSACTION_CHARACTERISTICS = { 775 "ISOLATION LEVEL REPEATABLE READ", 776 "ISOLATION LEVEL READ COMMITTED", 777 "ISOLATION LEVEL READ UNCOMMITTED", 778 "ISOLATION LEVEL SERIALIZABLE", 779 "READ WRITE", 780 "READ ONLY", 781 } 782 783 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 784 785 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 786 787 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 788 789 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 790 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 791 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 792 793 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 794 795 STRICT_CAST = True 796 797 # A NULL arg in CONCAT yields NULL by default 798 CONCAT_NULL_OUTPUTS_STRING = False 799 800 PREFIXED_PIVOT_COLUMNS = False 801 IDENTIFY_PIVOT_STRINGS = False 802 803 LOG_BASE_FIRST = True 804 LOG_DEFAULTS_TO_LN = False 805 806 __slots__ = ( 807 "error_level", 808 "error_message_context", 809 "max_errors", 810 "sql", 811 "errors", 812 "_tokens", 813 "_index", 814 "_curr", 815 "_next", 816 "_prev", 817 "_prev_comments", 818 ) 819 820 # Autofilled 821 INDEX_OFFSET: int = 0 822 UNNEST_COLUMN_ONLY: bool = False 823 ALIAS_POST_TABLESAMPLE: bool = False 824 STRICT_STRING_CONCAT = False 825 NULL_ORDERING: str = "nulls_are_small" 826 SHOW_TRIE: t.Dict = {} 827 SET_TRIE: t.Dict = {} 828 FORMAT_MAPPING: t.Dict[str, str] = {} 829 FORMAT_TRIE: t.Dict = {} 830 TIME_MAPPING: t.Dict[str, str] = {} 831 TIME_TRIE: t.Dict = {} 832 833 def __init__( 834 self, 835 error_level: t.Optional[ErrorLevel] = None, 836 error_message_context: int = 100, 837 max_errors: int = 3, 838 ): 839 self.error_level = error_level or ErrorLevel.IMMEDIATE 840 self.error_message_context = error_message_context 841 self.max_errors = max_errors 842 self.reset() 843 844 def reset(self): 845 self.sql = "" 846 self.errors = [] 847 self._tokens = [] 848 self._index = 0 849 self._curr = None 850 self._next = None 851 self._prev = None 852 self._prev_comments = None 853 854 def parse( 855 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 856 ) -> t.List[t.Optional[exp.Expression]]: 857 """ 858 Parses a list of tokens and returns a list of syntax trees, one tree 859 per parsed SQL statement. 860 861 Args: 862 raw_tokens: The list of tokens. 863 sql: The original SQL string, used to produce helpful debug messages. 864 865 Returns: 866 The list of the produced syntax trees. 867 """ 868 return self._parse( 869 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 870 ) 871 872 def parse_into( 873 self, 874 expression_types: exp.IntoType, 875 raw_tokens: t.List[Token], 876 sql: t.Optional[str] = None, 877 ) -> t.List[t.Optional[exp.Expression]]: 878 """ 879 Parses a list of tokens into a given Expression type. If a collection of Expression 880 types is given instead, this method will try to parse the token list into each one 881 of them, stopping at the first for which the parsing succeeds. 882 883 Args: 884 expression_types: The expression type(s) to try and parse the token list into. 885 raw_tokens: The list of tokens. 886 sql: The original SQL string, used to produce helpful debug messages. 887 888 Returns: 889 The target Expression. 890 """ 891 errors = [] 892 for expression_type in ensure_list(expression_types): 893 parser = self.EXPRESSION_PARSERS.get(expression_type) 894 if not parser: 895 raise TypeError(f"No parser registered for {expression_type}") 896 897 try: 898 return self._parse(parser, raw_tokens, sql) 899 except ParseError as e: 900 e.errors[0]["into_expression"] = expression_type 901 errors.append(e) 902 903 raise ParseError( 904 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 905 errors=merge_errors(errors), 906 ) from errors[-1] 907 908 def _parse( 909 self, 910 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 911 raw_tokens: t.List[Token], 912 sql: t.Optional[str] = None, 913 ) -> t.List[t.Optional[exp.Expression]]: 914 self.reset() 915 self.sql = sql or "" 916 917 total = len(raw_tokens) 918 chunks: t.List[t.List[Token]] = [[]] 919 920 for i, token in enumerate(raw_tokens): 921 if token.token_type == TokenType.SEMICOLON: 922 if i < total - 1: 923 chunks.append([]) 924 else: 925 chunks[-1].append(token) 926 927 expressions = [] 928 929 for tokens in chunks: 930 self._index = -1 931 self._tokens = tokens 932 self._advance() 933 934 expressions.append(parse_method(self)) 935 936 if self._index < len(self._tokens): 937 self.raise_error("Invalid expression / Unexpected token") 938 939 self.check_errors() 940 941 return expressions 942 943 def check_errors(self) -> None: 944 """Logs or raises any found errors, depending on the chosen error level setting.""" 945 if self.error_level == ErrorLevel.WARN: 946 for error in self.errors: 947 logger.error(str(error)) 948 elif self.error_level == ErrorLevel.RAISE and self.errors: 949 raise ParseError( 950 concat_messages(self.errors, self.max_errors), 951 errors=merge_errors(self.errors), 952 ) 953 954 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 955 """ 956 Appends an error in the list of recorded errors or raises it, depending on the chosen 957 error level setting. 958 """ 959 token = token or self._curr or self._prev or Token.string("") 960 start = token.start 961 end = token.end + 1 962 start_context = self.sql[max(start - self.error_message_context, 0) : start] 963 highlight = self.sql[start:end] 964 end_context = self.sql[end : end + self.error_message_context] 965 966 error = ParseError.new( 967 f"{message}. Line {token.line}, Col: {token.col}.\n" 968 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 969 description=message, 970 line=token.line, 971 col=token.col, 972 start_context=start_context, 973 highlight=highlight, 974 end_context=end_context, 975 ) 976 977 if self.error_level == ErrorLevel.IMMEDIATE: 978 raise error 979 980 self.errors.append(error) 981 982 def expression( 983 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 984 ) -> E: 985 """ 986 Creates a new, validated Expression. 987 988 Args: 989 exp_class: The expression class to instantiate. 990 comments: An optional list of comments to attach to the expression. 991 kwargs: The arguments to set for the expression along with their respective values. 992 993 Returns: 994 The target expression. 995 """ 996 instance = exp_class(**kwargs) 997 instance.add_comments(comments) if comments else self._add_comments(instance) 998 return self.validate_expression(instance) 999 1000 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1001 if expression and self._prev_comments: 1002 expression.add_comments(self._prev_comments) 1003 self._prev_comments = None 1004 1005 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1006 """ 1007 Validates an Expression, making sure that all its mandatory arguments are set. 1008 1009 Args: 1010 expression: The expression to validate. 1011 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1012 1013 Returns: 1014 The validated expression. 1015 """ 1016 if self.error_level != ErrorLevel.IGNORE: 1017 for error_message in expression.error_messages(args): 1018 self.raise_error(error_message) 1019 1020 return expression 1021 1022 def _find_sql(self, start: Token, end: Token) -> str: 1023 return self.sql[start.start : end.end + 1] 1024 1025 def _advance(self, times: int = 1) -> None: 1026 self._index += times 1027 self._curr = seq_get(self._tokens, self._index) 1028 self._next = seq_get(self._tokens, self._index + 1) 1029 1030 if self._index > 0: 1031 self._prev = self._tokens[self._index - 1] 1032 self._prev_comments = self._prev.comments 1033 else: 1034 self._prev = None 1035 self._prev_comments = None 1036 1037 def _retreat(self, index: int) -> None: 1038 if index != self._index: 1039 self._advance(index - self._index) 1040 1041 def _parse_command(self) -> exp.Command: 1042 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1043 1044 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1045 start = self._prev 1046 exists = self._parse_exists() if allow_exists else None 1047 1048 self._match(TokenType.ON) 1049 1050 kind = self._match_set(self.CREATABLES) and self._prev 1051 if not kind: 1052 return self._parse_as_command(start) 1053 1054 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1055 this = self._parse_user_defined_function(kind=kind.token_type) 1056 elif kind.token_type == TokenType.TABLE: 1057 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1058 elif kind.token_type == TokenType.COLUMN: 1059 this = self._parse_column() 1060 else: 1061 this = self._parse_id_var() 1062 1063 self._match(TokenType.IS) 1064 1065 return self.expression( 1066 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1067 ) 1068 1069 def _parse_to_table( 1070 self, 1071 ) -> exp.ToTableProperty: 1072 table = self._parse_table_parts(schema=True) 1073 return self.expression(exp.ToTableProperty, this=table) 1074 1075 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1076 def _parse_ttl(self) -> exp.Expression: 1077 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1078 this = self._parse_bitwise() 1079 1080 if self._match_text_seq("DELETE"): 1081 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1082 if self._match_text_seq("RECOMPRESS"): 1083 return self.expression( 1084 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1085 ) 1086 if self._match_text_seq("TO", "DISK"): 1087 return self.expression( 1088 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1089 ) 1090 if self._match_text_seq("TO", "VOLUME"): 1091 return self.expression( 1092 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1093 ) 1094 1095 return this 1096 1097 expressions = self._parse_csv(_parse_ttl_action) 1098 where = self._parse_where() 1099 group = self._parse_group() 1100 1101 aggregates = None 1102 if group and self._match(TokenType.SET): 1103 aggregates = self._parse_csv(self._parse_set_item) 1104 1105 return self.expression( 1106 exp.MergeTreeTTL, 1107 expressions=expressions, 1108 where=where, 1109 group=group, 1110 aggregates=aggregates, 1111 ) 1112 1113 def _parse_statement(self) -> t.Optional[exp.Expression]: 1114 if self._curr is None: 1115 return None 1116 1117 if self._match_set(self.STATEMENT_PARSERS): 1118 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1119 1120 if self._match_set(Tokenizer.COMMANDS): 1121 return self._parse_command() 1122 1123 expression = self._parse_expression() 1124 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1125 return self._parse_query_modifiers(expression) 1126 1127 def _parse_drop(self) -> exp.Drop | exp.Command: 1128 start = self._prev 1129 temporary = self._match(TokenType.TEMPORARY) 1130 materialized = self._match_text_seq("MATERIALIZED") 1131 1132 kind = self._match_set(self.CREATABLES) and self._prev.text 1133 if not kind: 1134 return self._parse_as_command(start) 1135 1136 return self.expression( 1137 exp.Drop, 1138 exists=self._parse_exists(), 1139 this=self._parse_table(schema=True), 1140 kind=kind, 1141 temporary=temporary, 1142 materialized=materialized, 1143 cascade=self._match_text_seq("CASCADE"), 1144 constraints=self._match_text_seq("CONSTRAINTS"), 1145 purge=self._match_text_seq("PURGE"), 1146 ) 1147 1148 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1149 return ( 1150 self._match(TokenType.IF) 1151 and (not not_ or self._match(TokenType.NOT)) 1152 and self._match(TokenType.EXISTS) 1153 ) 1154 1155 def _parse_create(self) -> exp.Create | exp.Command: 1156 # Note: this can't be None because we've matched a statement parser 1157 start = self._prev 1158 replace = start.text.upper() == "REPLACE" or self._match_pair( 1159 TokenType.OR, TokenType.REPLACE 1160 ) 1161 unique = self._match(TokenType.UNIQUE) 1162 1163 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1164 self._advance() 1165 1166 properties = None 1167 create_token = self._match_set(self.CREATABLES) and self._prev 1168 1169 if not create_token: 1170 # exp.Properties.Location.POST_CREATE 1171 properties = self._parse_properties() 1172 create_token = self._match_set(self.CREATABLES) and self._prev 1173 1174 if not properties or not create_token: 1175 return self._parse_as_command(start) 1176 1177 exists = self._parse_exists(not_=True) 1178 this = None 1179 expression = None 1180 indexes = None 1181 no_schema_binding = None 1182 begin = None 1183 clone = None 1184 1185 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1186 nonlocal properties 1187 if properties and temp_props: 1188 properties.expressions.extend(temp_props.expressions) 1189 elif temp_props: 1190 properties = temp_props 1191 1192 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1193 this = self._parse_user_defined_function(kind=create_token.token_type) 1194 1195 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1196 extend_props(self._parse_properties()) 1197 1198 self._match(TokenType.ALIAS) 1199 begin = self._match(TokenType.BEGIN) 1200 return_ = self._match_text_seq("RETURN") 1201 expression = self._parse_statement() 1202 1203 if return_: 1204 expression = self.expression(exp.Return, this=expression) 1205 elif create_token.token_type == TokenType.INDEX: 1206 this = self._parse_index(index=self._parse_id_var()) 1207 elif create_token.token_type in self.DB_CREATABLES: 1208 table_parts = self._parse_table_parts(schema=True) 1209 1210 # exp.Properties.Location.POST_NAME 1211 self._match(TokenType.COMMA) 1212 extend_props(self._parse_properties(before=True)) 1213 1214 this = self._parse_schema(this=table_parts) 1215 1216 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1217 extend_props(self._parse_properties()) 1218 1219 self._match(TokenType.ALIAS) 1220 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1221 # exp.Properties.Location.POST_ALIAS 1222 extend_props(self._parse_properties()) 1223 1224 expression = self._parse_ddl_select() 1225 1226 if create_token.token_type == TokenType.TABLE: 1227 indexes = [] 1228 while True: 1229 index = self._parse_index() 1230 1231 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1232 extend_props(self._parse_properties()) 1233 1234 if not index: 1235 break 1236 else: 1237 self._match(TokenType.COMMA) 1238 indexes.append(index) 1239 elif create_token.token_type == TokenType.VIEW: 1240 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1241 no_schema_binding = True 1242 1243 if self._match_text_seq("CLONE"): 1244 clone = self._parse_table(schema=True) 1245 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1246 clone_kind = ( 1247 self._match(TokenType.L_PAREN) 1248 and self._match_texts(self.CLONE_KINDS) 1249 and self._prev.text.upper() 1250 ) 1251 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1252 self._match(TokenType.R_PAREN) 1253 clone = self.expression( 1254 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1255 ) 1256 1257 return self.expression( 1258 exp.Create, 1259 this=this, 1260 kind=create_token.text, 1261 replace=replace, 1262 unique=unique, 1263 expression=expression, 1264 exists=exists, 1265 properties=properties, 1266 indexes=indexes, 1267 no_schema_binding=no_schema_binding, 1268 begin=begin, 1269 clone=clone, 1270 ) 1271 1272 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1273 # only used for teradata currently 1274 self._match(TokenType.COMMA) 1275 1276 kwargs = { 1277 "no": self._match_text_seq("NO"), 1278 "dual": self._match_text_seq("DUAL"), 1279 "before": self._match_text_seq("BEFORE"), 1280 "default": self._match_text_seq("DEFAULT"), 1281 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1282 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1283 "after": self._match_text_seq("AFTER"), 1284 "minimum": self._match_texts(("MIN", "MINIMUM")), 1285 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1286 } 1287 1288 if self._match_texts(self.PROPERTY_PARSERS): 1289 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1290 try: 1291 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1292 except TypeError: 1293 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1294 1295 return None 1296 1297 def _parse_property(self) -> t.Optional[exp.Expression]: 1298 if self._match_texts(self.PROPERTY_PARSERS): 1299 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1300 1301 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1302 return self._parse_character_set(default=True) 1303 1304 if self._match_text_seq("COMPOUND", "SORTKEY"): 1305 return self._parse_sortkey(compound=True) 1306 1307 if self._match_text_seq("SQL", "SECURITY"): 1308 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1309 1310 assignment = self._match_pair( 1311 TokenType.VAR, TokenType.EQ, advance=False 1312 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1313 1314 if assignment: 1315 key = self._parse_var_or_string() 1316 self._match(TokenType.EQ) 1317 return self.expression(exp.Property, this=key, value=self._parse_column()) 1318 1319 return None 1320 1321 def _parse_stored(self) -> exp.FileFormatProperty: 1322 self._match(TokenType.ALIAS) 1323 1324 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1325 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1326 1327 return self.expression( 1328 exp.FileFormatProperty, 1329 this=self.expression( 1330 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1331 ) 1332 if input_format or output_format 1333 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1334 ) 1335 1336 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1337 self._match(TokenType.EQ) 1338 self._match(TokenType.ALIAS) 1339 return self.expression(exp_class, this=self._parse_field()) 1340 1341 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1342 properties = [] 1343 while True: 1344 if before: 1345 prop = self._parse_property_before() 1346 else: 1347 prop = self._parse_property() 1348 1349 if not prop: 1350 break 1351 for p in ensure_list(prop): 1352 properties.append(p) 1353 1354 if properties: 1355 return self.expression(exp.Properties, expressions=properties) 1356 1357 return None 1358 1359 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1360 return self.expression( 1361 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1362 ) 1363 1364 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1365 if self._index >= 2: 1366 pre_volatile_token = self._tokens[self._index - 2] 1367 else: 1368 pre_volatile_token = None 1369 1370 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1371 return exp.VolatileProperty() 1372 1373 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1374 1375 def _parse_with_property( 1376 self, 1377 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1378 self._match(TokenType.WITH) 1379 if self._match(TokenType.L_PAREN, advance=False): 1380 return self._parse_wrapped_csv(self._parse_property) 1381 1382 if self._match_text_seq("JOURNAL"): 1383 return self._parse_withjournaltable() 1384 1385 if self._match_text_seq("DATA"): 1386 return self._parse_withdata(no=False) 1387 elif self._match_text_seq("NO", "DATA"): 1388 return self._parse_withdata(no=True) 1389 1390 if not self._next: 1391 return None 1392 1393 return self._parse_withisolatedloading() 1394 1395 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1396 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1397 self._match(TokenType.EQ) 1398 1399 user = self._parse_id_var() 1400 self._match(TokenType.PARAMETER) 1401 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1402 1403 if not user or not host: 1404 return None 1405 1406 return exp.DefinerProperty(this=f"{user}@{host}") 1407 1408 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1409 self._match(TokenType.TABLE) 1410 self._match(TokenType.EQ) 1411 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1412 1413 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1414 return self.expression(exp.LogProperty, no=no) 1415 1416 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1417 return self.expression(exp.JournalProperty, **kwargs) 1418 1419 def _parse_checksum(self) -> exp.ChecksumProperty: 1420 self._match(TokenType.EQ) 1421 1422 on = None 1423 if self._match(TokenType.ON): 1424 on = True 1425 elif self._match_text_seq("OFF"): 1426 on = False 1427 1428 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1429 1430 def _parse_cluster(self) -> exp.Cluster: 1431 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1432 1433 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1434 self._match_text_seq("BY") 1435 1436 self._match_l_paren() 1437 expressions = self._parse_csv(self._parse_column) 1438 self._match_r_paren() 1439 1440 if self._match_text_seq("SORTED", "BY"): 1441 self._match_l_paren() 1442 sorted_by = self._parse_csv(self._parse_ordered) 1443 self._match_r_paren() 1444 else: 1445 sorted_by = None 1446 1447 self._match(TokenType.INTO) 1448 buckets = self._parse_number() 1449 self._match_text_seq("BUCKETS") 1450 1451 return self.expression( 1452 exp.ClusteredByProperty, 1453 expressions=expressions, 1454 sorted_by=sorted_by, 1455 buckets=buckets, 1456 ) 1457 1458 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1459 if not self._match_text_seq("GRANTS"): 1460 self._retreat(self._index - 1) 1461 return None 1462 1463 return self.expression(exp.CopyGrantsProperty) 1464 1465 def _parse_freespace(self) -> exp.FreespaceProperty: 1466 self._match(TokenType.EQ) 1467 return self.expression( 1468 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1469 ) 1470 1471 def _parse_mergeblockratio( 1472 self, no: bool = False, default: bool = False 1473 ) -> exp.MergeBlockRatioProperty: 1474 if self._match(TokenType.EQ): 1475 return self.expression( 1476 exp.MergeBlockRatioProperty, 1477 this=self._parse_number(), 1478 percent=self._match(TokenType.PERCENT), 1479 ) 1480 1481 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1482 1483 def _parse_datablocksize( 1484 self, 1485 default: t.Optional[bool] = None, 1486 minimum: t.Optional[bool] = None, 1487 maximum: t.Optional[bool] = None, 1488 ) -> exp.DataBlocksizeProperty: 1489 self._match(TokenType.EQ) 1490 size = self._parse_number() 1491 1492 units = None 1493 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1494 units = self._prev.text 1495 1496 return self.expression( 1497 exp.DataBlocksizeProperty, 1498 size=size, 1499 units=units, 1500 default=default, 1501 minimum=minimum, 1502 maximum=maximum, 1503 ) 1504 1505 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1506 self._match(TokenType.EQ) 1507 always = self._match_text_seq("ALWAYS") 1508 manual = self._match_text_seq("MANUAL") 1509 never = self._match_text_seq("NEVER") 1510 default = self._match_text_seq("DEFAULT") 1511 1512 autotemp = None 1513 if self._match_text_seq("AUTOTEMP"): 1514 autotemp = self._parse_schema() 1515 1516 return self.expression( 1517 exp.BlockCompressionProperty, 1518 always=always, 1519 manual=manual, 1520 never=never, 1521 default=default, 1522 autotemp=autotemp, 1523 ) 1524 1525 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1526 no = self._match_text_seq("NO") 1527 concurrent = self._match_text_seq("CONCURRENT") 1528 self._match_text_seq("ISOLATED", "LOADING") 1529 for_all = self._match_text_seq("FOR", "ALL") 1530 for_insert = self._match_text_seq("FOR", "INSERT") 1531 for_none = self._match_text_seq("FOR", "NONE") 1532 return self.expression( 1533 exp.IsolatedLoadingProperty, 1534 no=no, 1535 concurrent=concurrent, 1536 for_all=for_all, 1537 for_insert=for_insert, 1538 for_none=for_none, 1539 ) 1540 1541 def _parse_locking(self) -> exp.LockingProperty: 1542 if self._match(TokenType.TABLE): 1543 kind = "TABLE" 1544 elif self._match(TokenType.VIEW): 1545 kind = "VIEW" 1546 elif self._match(TokenType.ROW): 1547 kind = "ROW" 1548 elif self._match_text_seq("DATABASE"): 1549 kind = "DATABASE" 1550 else: 1551 kind = None 1552 1553 if kind in ("DATABASE", "TABLE", "VIEW"): 1554 this = self._parse_table_parts() 1555 else: 1556 this = None 1557 1558 if self._match(TokenType.FOR): 1559 for_or_in = "FOR" 1560 elif self._match(TokenType.IN): 1561 for_or_in = "IN" 1562 else: 1563 for_or_in = None 1564 1565 if self._match_text_seq("ACCESS"): 1566 lock_type = "ACCESS" 1567 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1568 lock_type = "EXCLUSIVE" 1569 elif self._match_text_seq("SHARE"): 1570 lock_type = "SHARE" 1571 elif self._match_text_seq("READ"): 1572 lock_type = "READ" 1573 elif self._match_text_seq("WRITE"): 1574 lock_type = "WRITE" 1575 elif self._match_text_seq("CHECKSUM"): 1576 lock_type = "CHECKSUM" 1577 else: 1578 lock_type = None 1579 1580 override = self._match_text_seq("OVERRIDE") 1581 1582 return self.expression( 1583 exp.LockingProperty, 1584 this=this, 1585 kind=kind, 1586 for_or_in=for_or_in, 1587 lock_type=lock_type, 1588 override=override, 1589 ) 1590 1591 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1592 if self._match(TokenType.PARTITION_BY): 1593 return self._parse_csv(self._parse_conjunction) 1594 return [] 1595 1596 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1597 self._match(TokenType.EQ) 1598 return self.expression( 1599 exp.PartitionedByProperty, 1600 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1601 ) 1602 1603 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1604 if self._match_text_seq("AND", "STATISTICS"): 1605 statistics = True 1606 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1607 statistics = False 1608 else: 1609 statistics = None 1610 1611 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1612 1613 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1614 if self._match_text_seq("PRIMARY", "INDEX"): 1615 return exp.NoPrimaryIndexProperty() 1616 return None 1617 1618 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1619 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1620 return exp.OnCommitProperty() 1621 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1622 return exp.OnCommitProperty(delete=True) 1623 return None 1624 1625 def _parse_distkey(self) -> exp.DistKeyProperty: 1626 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1627 1628 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1629 table = self._parse_table(schema=True) 1630 1631 options = [] 1632 while self._match_texts(("INCLUDING", "EXCLUDING")): 1633 this = self._prev.text.upper() 1634 1635 id_var = self._parse_id_var() 1636 if not id_var: 1637 return None 1638 1639 options.append( 1640 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1641 ) 1642 1643 return self.expression(exp.LikeProperty, this=table, expressions=options) 1644 1645 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1646 return self.expression( 1647 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1648 ) 1649 1650 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1651 self._match(TokenType.EQ) 1652 return self.expression( 1653 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1654 ) 1655 1656 def _parse_returns(self) -> exp.ReturnsProperty: 1657 value: t.Optional[exp.Expression] 1658 is_table = self._match(TokenType.TABLE) 1659 1660 if is_table: 1661 if self._match(TokenType.LT): 1662 value = self.expression( 1663 exp.Schema, 1664 this="TABLE", 1665 expressions=self._parse_csv(self._parse_struct_types), 1666 ) 1667 if not self._match(TokenType.GT): 1668 self.raise_error("Expecting >") 1669 else: 1670 value = self._parse_schema(exp.var("TABLE")) 1671 else: 1672 value = self._parse_types() 1673 1674 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1675 1676 def _parse_describe(self) -> exp.Describe: 1677 kind = self._match_set(self.CREATABLES) and self._prev.text 1678 this = self._parse_table() 1679 return self.expression(exp.Describe, this=this, kind=kind) 1680 1681 def _parse_insert(self) -> exp.Insert: 1682 overwrite = self._match(TokenType.OVERWRITE) 1683 local = self._match_text_seq("LOCAL") 1684 alternative = None 1685 1686 if self._match_text_seq("DIRECTORY"): 1687 this: t.Optional[exp.Expression] = self.expression( 1688 exp.Directory, 1689 this=self._parse_var_or_string(), 1690 local=local, 1691 row_format=self._parse_row_format(match_row=True), 1692 ) 1693 else: 1694 if self._match(TokenType.OR): 1695 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1696 1697 self._match(TokenType.INTO) 1698 self._match(TokenType.TABLE) 1699 this = self._parse_table(schema=True) 1700 1701 return self.expression( 1702 exp.Insert, 1703 this=this, 1704 exists=self._parse_exists(), 1705 partition=self._parse_partition(), 1706 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1707 and self._parse_conjunction(), 1708 expression=self._parse_ddl_select(), 1709 conflict=self._parse_on_conflict(), 1710 returning=self._parse_returning(), 1711 overwrite=overwrite, 1712 alternative=alternative, 1713 ) 1714 1715 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1716 conflict = self._match_text_seq("ON", "CONFLICT") 1717 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1718 1719 if not conflict and not duplicate: 1720 return None 1721 1722 nothing = None 1723 expressions = None 1724 key = None 1725 constraint = None 1726 1727 if conflict: 1728 if self._match_text_seq("ON", "CONSTRAINT"): 1729 constraint = self._parse_id_var() 1730 else: 1731 key = self._parse_csv(self._parse_value) 1732 1733 self._match_text_seq("DO") 1734 if self._match_text_seq("NOTHING"): 1735 nothing = True 1736 else: 1737 self._match(TokenType.UPDATE) 1738 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1739 1740 return self.expression( 1741 exp.OnConflict, 1742 duplicate=duplicate, 1743 expressions=expressions, 1744 nothing=nothing, 1745 key=key, 1746 constraint=constraint, 1747 ) 1748 1749 def _parse_returning(self) -> t.Optional[exp.Returning]: 1750 if not self._match(TokenType.RETURNING): 1751 return None 1752 1753 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1754 1755 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1756 if not self._match(TokenType.FORMAT): 1757 return None 1758 return self._parse_row_format() 1759 1760 def _parse_row_format( 1761 self, match_row: bool = False 1762 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1763 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1764 return None 1765 1766 if self._match_text_seq("SERDE"): 1767 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1768 1769 self._match_text_seq("DELIMITED") 1770 1771 kwargs = {} 1772 1773 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1774 kwargs["fields"] = self._parse_string() 1775 if self._match_text_seq("ESCAPED", "BY"): 1776 kwargs["escaped"] = self._parse_string() 1777 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1778 kwargs["collection_items"] = self._parse_string() 1779 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1780 kwargs["map_keys"] = self._parse_string() 1781 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1782 kwargs["lines"] = self._parse_string() 1783 if self._match_text_seq("NULL", "DEFINED", "AS"): 1784 kwargs["null"] = self._parse_string() 1785 1786 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1787 1788 def _parse_load(self) -> exp.LoadData | exp.Command: 1789 if self._match_text_seq("DATA"): 1790 local = self._match_text_seq("LOCAL") 1791 self._match_text_seq("INPATH") 1792 inpath = self._parse_string() 1793 overwrite = self._match(TokenType.OVERWRITE) 1794 self._match_pair(TokenType.INTO, TokenType.TABLE) 1795 1796 return self.expression( 1797 exp.LoadData, 1798 this=self._parse_table(schema=True), 1799 local=local, 1800 overwrite=overwrite, 1801 inpath=inpath, 1802 partition=self._parse_partition(), 1803 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1804 serde=self._match_text_seq("SERDE") and self._parse_string(), 1805 ) 1806 return self._parse_as_command(self._prev) 1807 1808 def _parse_delete(self) -> exp.Delete: 1809 self._match(TokenType.FROM) 1810 1811 return self.expression( 1812 exp.Delete, 1813 this=self._parse_table(), 1814 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1815 where=self._parse_where(), 1816 returning=self._parse_returning(), 1817 limit=self._parse_limit(), 1818 ) 1819 1820 def _parse_update(self) -> exp.Update: 1821 return self.expression( 1822 exp.Update, 1823 **{ # type: ignore 1824 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1825 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1826 "from": self._parse_from(modifiers=True), 1827 "where": self._parse_where(), 1828 "returning": self._parse_returning(), 1829 "limit": self._parse_limit(), 1830 }, 1831 ) 1832 1833 def _parse_uncache(self) -> exp.Uncache: 1834 if not self._match(TokenType.TABLE): 1835 self.raise_error("Expecting TABLE after UNCACHE") 1836 1837 return self.expression( 1838 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1839 ) 1840 1841 def _parse_cache(self) -> exp.Cache: 1842 lazy = self._match_text_seq("LAZY") 1843 self._match(TokenType.TABLE) 1844 table = self._parse_table(schema=True) 1845 1846 options = [] 1847 if self._match_text_seq("OPTIONS"): 1848 self._match_l_paren() 1849 k = self._parse_string() 1850 self._match(TokenType.EQ) 1851 v = self._parse_string() 1852 options = [k, v] 1853 self._match_r_paren() 1854 1855 self._match(TokenType.ALIAS) 1856 return self.expression( 1857 exp.Cache, 1858 this=table, 1859 lazy=lazy, 1860 options=options, 1861 expression=self._parse_select(nested=True), 1862 ) 1863 1864 def _parse_partition(self) -> t.Optional[exp.Partition]: 1865 if not self._match(TokenType.PARTITION): 1866 return None 1867 1868 return self.expression( 1869 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1870 ) 1871 1872 def _parse_value(self) -> exp.Tuple: 1873 if self._match(TokenType.L_PAREN): 1874 expressions = self._parse_csv(self._parse_conjunction) 1875 self._match_r_paren() 1876 return self.expression(exp.Tuple, expressions=expressions) 1877 1878 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1879 # Source: https://prestodb.io/docs/current/sql/values.html 1880 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1881 1882 def _parse_select( 1883 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1884 ) -> t.Optional[exp.Expression]: 1885 cte = self._parse_with() 1886 if cte: 1887 this = self._parse_statement() 1888 1889 if not this: 1890 self.raise_error("Failed to parse any statement following CTE") 1891 return cte 1892 1893 if "with" in this.arg_types: 1894 this.set("with", cte) 1895 else: 1896 self.raise_error(f"{this.key} does not support CTE") 1897 this = cte 1898 elif self._match(TokenType.SELECT): 1899 comments = self._prev_comments 1900 1901 hint = self._parse_hint() 1902 all_ = self._match(TokenType.ALL) 1903 distinct = self._match(TokenType.DISTINCT) 1904 1905 kind = ( 1906 self._match(TokenType.ALIAS) 1907 and self._match_texts(("STRUCT", "VALUE")) 1908 and self._prev.text 1909 ) 1910 1911 if distinct: 1912 distinct = self.expression( 1913 exp.Distinct, 1914 on=self._parse_value() if self._match(TokenType.ON) else None, 1915 ) 1916 1917 if all_ and distinct: 1918 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1919 1920 limit = self._parse_limit(top=True) 1921 expressions = self._parse_csv(self._parse_expression) 1922 1923 this = self.expression( 1924 exp.Select, 1925 kind=kind, 1926 hint=hint, 1927 distinct=distinct, 1928 expressions=expressions, 1929 limit=limit, 1930 ) 1931 this.comments = comments 1932 1933 into = self._parse_into() 1934 if into: 1935 this.set("into", into) 1936 1937 from_ = self._parse_from() 1938 if from_: 1939 this.set("from", from_) 1940 1941 this = self._parse_query_modifiers(this) 1942 elif (table or nested) and self._match(TokenType.L_PAREN): 1943 if self._match(TokenType.PIVOT): 1944 this = self._parse_simplified_pivot() 1945 elif self._match(TokenType.FROM): 1946 this = exp.select("*").from_( 1947 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1948 ) 1949 else: 1950 this = self._parse_table() if table else self._parse_select(nested=True) 1951 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1952 1953 self._match_r_paren() 1954 1955 # early return so that subquery unions aren't parsed again 1956 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1957 # Union ALL should be a property of the top select node, not the subquery 1958 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1959 elif self._match(TokenType.VALUES): 1960 this = self.expression( 1961 exp.Values, 1962 expressions=self._parse_csv(self._parse_value), 1963 alias=self._parse_table_alias(), 1964 ) 1965 else: 1966 this = None 1967 1968 return self._parse_set_operations(this) 1969 1970 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1971 if not skip_with_token and not self._match(TokenType.WITH): 1972 return None 1973 1974 comments = self._prev_comments 1975 recursive = self._match(TokenType.RECURSIVE) 1976 1977 expressions = [] 1978 while True: 1979 expressions.append(self._parse_cte()) 1980 1981 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1982 break 1983 else: 1984 self._match(TokenType.WITH) 1985 1986 return self.expression( 1987 exp.With, comments=comments, expressions=expressions, recursive=recursive 1988 ) 1989 1990 def _parse_cte(self) -> exp.CTE: 1991 alias = self._parse_table_alias() 1992 if not alias or not alias.this: 1993 self.raise_error("Expected CTE to have alias") 1994 1995 self._match(TokenType.ALIAS) 1996 return self.expression( 1997 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1998 ) 1999 2000 def _parse_table_alias( 2001 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2002 ) -> t.Optional[exp.TableAlias]: 2003 any_token = self._match(TokenType.ALIAS) 2004 alias = ( 2005 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2006 or self._parse_string_as_identifier() 2007 ) 2008 2009 index = self._index 2010 if self._match(TokenType.L_PAREN): 2011 columns = self._parse_csv(self._parse_function_parameter) 2012 self._match_r_paren() if columns else self._retreat(index) 2013 else: 2014 columns = None 2015 2016 if not alias and not columns: 2017 return None 2018 2019 return self.expression(exp.TableAlias, this=alias, columns=columns) 2020 2021 def _parse_subquery( 2022 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2023 ) -> t.Optional[exp.Subquery]: 2024 if not this: 2025 return None 2026 2027 return self.expression( 2028 exp.Subquery, 2029 this=this, 2030 pivots=self._parse_pivots(), 2031 alias=self._parse_table_alias() if parse_alias else None, 2032 ) 2033 2034 def _parse_query_modifiers( 2035 self, this: t.Optional[exp.Expression] 2036 ) -> t.Optional[exp.Expression]: 2037 if isinstance(this, self.MODIFIABLES): 2038 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2039 expression = parser(self) 2040 2041 if expression: 2042 if key == "limit": 2043 offset = expression.args.pop("offset", None) 2044 if offset: 2045 this.set("offset", exp.Offset(expression=offset)) 2046 this.set(key, expression) 2047 return this 2048 2049 def _parse_hint(self) -> t.Optional[exp.Hint]: 2050 if self._match(TokenType.HINT): 2051 hints = self._parse_csv(self._parse_function) 2052 2053 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2054 self.raise_error("Expected */ after HINT") 2055 2056 return self.expression(exp.Hint, expressions=hints) 2057 2058 return None 2059 2060 def _parse_into(self) -> t.Optional[exp.Into]: 2061 if not self._match(TokenType.INTO): 2062 return None 2063 2064 temp = self._match(TokenType.TEMPORARY) 2065 unlogged = self._match_text_seq("UNLOGGED") 2066 self._match(TokenType.TABLE) 2067 2068 return self.expression( 2069 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2070 ) 2071 2072 def _parse_from( 2073 self, modifiers: bool = False, skip_from_token: bool = False 2074 ) -> t.Optional[exp.From]: 2075 if not skip_from_token and not self._match(TokenType.FROM): 2076 return None 2077 2078 comments = self._prev_comments 2079 this = self._parse_table() 2080 2081 return self.expression( 2082 exp.From, 2083 comments=comments, 2084 this=self._parse_query_modifiers(this) if modifiers else this, 2085 ) 2086 2087 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2088 if not self._match(TokenType.MATCH_RECOGNIZE): 2089 return None 2090 2091 self._match_l_paren() 2092 2093 partition = self._parse_partition_by() 2094 order = self._parse_order() 2095 measures = ( 2096 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2097 ) 2098 2099 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2100 rows = exp.var("ONE ROW PER MATCH") 2101 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2102 text = "ALL ROWS PER MATCH" 2103 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2104 text += f" SHOW EMPTY MATCHES" 2105 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2106 text += f" OMIT EMPTY MATCHES" 2107 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2108 text += f" WITH UNMATCHED ROWS" 2109 rows = exp.var(text) 2110 else: 2111 rows = None 2112 2113 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2114 text = "AFTER MATCH SKIP" 2115 if self._match_text_seq("PAST", "LAST", "ROW"): 2116 text += f" PAST LAST ROW" 2117 elif self._match_text_seq("TO", "NEXT", "ROW"): 2118 text += f" TO NEXT ROW" 2119 elif self._match_text_seq("TO", "FIRST"): 2120 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2121 elif self._match_text_seq("TO", "LAST"): 2122 text += f" TO LAST {self._advance_any().text}" # type: ignore 2123 after = exp.var(text) 2124 else: 2125 after = None 2126 2127 if self._match_text_seq("PATTERN"): 2128 self._match_l_paren() 2129 2130 if not self._curr: 2131 self.raise_error("Expecting )", self._curr) 2132 2133 paren = 1 2134 start = self._curr 2135 2136 while self._curr and paren > 0: 2137 if self._curr.token_type == TokenType.L_PAREN: 2138 paren += 1 2139 if self._curr.token_type == TokenType.R_PAREN: 2140 paren -= 1 2141 2142 end = self._prev 2143 self._advance() 2144 2145 if paren > 0: 2146 self.raise_error("Expecting )", self._curr) 2147 2148 pattern = exp.var(self._find_sql(start, end)) 2149 else: 2150 pattern = None 2151 2152 define = ( 2153 self._parse_csv( 2154 lambda: self.expression( 2155 exp.Alias, 2156 alias=self._parse_id_var(any_token=True), 2157 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2158 ) 2159 ) 2160 if self._match_text_seq("DEFINE") 2161 else None 2162 ) 2163 2164 self._match_r_paren() 2165 2166 return self.expression( 2167 exp.MatchRecognize, 2168 partition_by=partition, 2169 order=order, 2170 measures=measures, 2171 rows=rows, 2172 after=after, 2173 pattern=pattern, 2174 define=define, 2175 alias=self._parse_table_alias(), 2176 ) 2177 2178 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2179 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2180 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2181 2182 if outer_apply or cross_apply: 2183 this = self._parse_select(table=True) 2184 view = None 2185 outer = not cross_apply 2186 elif self._match(TokenType.LATERAL): 2187 this = self._parse_select(table=True) 2188 view = self._match(TokenType.VIEW) 2189 outer = self._match(TokenType.OUTER) 2190 else: 2191 return None 2192 2193 if not this: 2194 this = self._parse_function() or self._parse_id_var(any_token=False) 2195 while self._match(TokenType.DOT): 2196 this = exp.Dot( 2197 this=this, 2198 expression=self._parse_function() or self._parse_id_var(any_token=False), 2199 ) 2200 2201 if view: 2202 table = self._parse_id_var(any_token=False) 2203 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2204 table_alias: t.Optional[exp.TableAlias] = self.expression( 2205 exp.TableAlias, this=table, columns=columns 2206 ) 2207 elif isinstance(this, exp.Subquery) and this.alias: 2208 # Ensures parity between the Subquery's and the Lateral's "alias" args 2209 table_alias = this.args["alias"].copy() 2210 else: 2211 table_alias = self._parse_table_alias() 2212 2213 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2214 2215 def _parse_join_parts( 2216 self, 2217 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2218 return ( 2219 self._match_set(self.JOIN_METHODS) and self._prev, 2220 self._match_set(self.JOIN_SIDES) and self._prev, 2221 self._match_set(self.JOIN_KINDS) and self._prev, 2222 ) 2223 2224 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2225 if self._match(TokenType.COMMA): 2226 return self.expression(exp.Join, this=self._parse_table()) 2227 2228 index = self._index 2229 method, side, kind = self._parse_join_parts() 2230 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2231 join = self._match(TokenType.JOIN) 2232 2233 if not skip_join_token and not join: 2234 self._retreat(index) 2235 kind = None 2236 method = None 2237 side = None 2238 2239 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2240 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2241 2242 if not skip_join_token and not join and not outer_apply and not cross_apply: 2243 return None 2244 2245 if outer_apply: 2246 side = Token(TokenType.LEFT, "LEFT") 2247 2248 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2249 2250 if method: 2251 kwargs["method"] = method.text 2252 if side: 2253 kwargs["side"] = side.text 2254 if kind: 2255 kwargs["kind"] = kind.text 2256 if hint: 2257 kwargs["hint"] = hint 2258 2259 if self._match(TokenType.ON): 2260 kwargs["on"] = self._parse_conjunction() 2261 elif self._match(TokenType.USING): 2262 kwargs["using"] = self._parse_wrapped_id_vars() 2263 2264 return self.expression(exp.Join, **kwargs) 2265 2266 def _parse_index( 2267 self, 2268 index: t.Optional[exp.Expression] = None, 2269 ) -> t.Optional[exp.Index]: 2270 if index: 2271 unique = None 2272 primary = None 2273 amp = None 2274 2275 self._match(TokenType.ON) 2276 self._match(TokenType.TABLE) # hive 2277 table = self._parse_table_parts(schema=True) 2278 else: 2279 unique = self._match(TokenType.UNIQUE) 2280 primary = self._match_text_seq("PRIMARY") 2281 amp = self._match_text_seq("AMP") 2282 2283 if not self._match(TokenType.INDEX): 2284 return None 2285 2286 index = self._parse_id_var() 2287 table = None 2288 2289 using = self._parse_field() if self._match(TokenType.USING) else None 2290 2291 if self._match(TokenType.L_PAREN, advance=False): 2292 columns = self._parse_wrapped_csv(self._parse_ordered) 2293 else: 2294 columns = None 2295 2296 return self.expression( 2297 exp.Index, 2298 this=index, 2299 table=table, 2300 using=using, 2301 columns=columns, 2302 unique=unique, 2303 primary=primary, 2304 amp=amp, 2305 partition_by=self._parse_partition_by(), 2306 ) 2307 2308 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2309 hints: t.List[exp.Expression] = [] 2310 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2311 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2312 hints.append( 2313 self.expression( 2314 exp.WithTableHint, 2315 expressions=self._parse_csv( 2316 lambda: self._parse_function() or self._parse_var(any_token=True) 2317 ), 2318 ) 2319 ) 2320 self._match_r_paren() 2321 else: 2322 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2323 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2324 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2325 2326 self._match_texts({"INDEX", "KEY"}) 2327 if self._match(TokenType.FOR): 2328 hint.set("target", self._advance_any() and self._prev.text.upper()) 2329 2330 hint.set("expressions", self._parse_wrapped_id_vars()) 2331 hints.append(hint) 2332 2333 return hints or None 2334 2335 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2336 return ( 2337 (not schema and self._parse_function(optional_parens=False)) 2338 or self._parse_id_var(any_token=False) 2339 or self._parse_string_as_identifier() 2340 or self._parse_placeholder() 2341 ) 2342 2343 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2344 catalog = None 2345 db = None 2346 table = self._parse_table_part(schema=schema) 2347 2348 while self._match(TokenType.DOT): 2349 if catalog: 2350 # This allows nesting the table in arbitrarily many dot expressions if needed 2351 table = self.expression( 2352 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2353 ) 2354 else: 2355 catalog = db 2356 db = table 2357 table = self._parse_table_part(schema=schema) 2358 2359 if not table: 2360 self.raise_error(f"Expected table name but got {self._curr}") 2361 2362 return self.expression( 2363 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2364 ) 2365 2366 def _parse_table( 2367 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2368 ) -> t.Optional[exp.Expression]: 2369 lateral = self._parse_lateral() 2370 if lateral: 2371 return lateral 2372 2373 unnest = self._parse_unnest() 2374 if unnest: 2375 return unnest 2376 2377 values = self._parse_derived_table_values() 2378 if values: 2379 return values 2380 2381 subquery = self._parse_select(table=True) 2382 if subquery: 2383 if not subquery.args.get("pivots"): 2384 subquery.set("pivots", self._parse_pivots()) 2385 return subquery 2386 2387 this: exp.Expression = self._parse_table_parts(schema=schema) 2388 2389 if schema: 2390 return self._parse_schema(this=this) 2391 2392 if self.ALIAS_POST_TABLESAMPLE: 2393 table_sample = self._parse_table_sample() 2394 2395 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2396 if alias: 2397 this.set("alias", alias) 2398 2399 if not this.args.get("pivots"): 2400 this.set("pivots", self._parse_pivots()) 2401 2402 this.set("hints", self._parse_table_hints()) 2403 2404 if not self.ALIAS_POST_TABLESAMPLE: 2405 table_sample = self._parse_table_sample() 2406 2407 if table_sample: 2408 table_sample.set("this", this) 2409 this = table_sample 2410 2411 return this 2412 2413 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2414 if not self._match(TokenType.UNNEST): 2415 return None 2416 2417 expressions = self._parse_wrapped_csv(self._parse_type) 2418 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2419 2420 alias = self._parse_table_alias() if with_alias else None 2421 2422 if alias and self.UNNEST_COLUMN_ONLY: 2423 if alias.args.get("columns"): 2424 self.raise_error("Unexpected extra column alias in unnest.") 2425 2426 alias.set("columns", [alias.this]) 2427 alias.set("this", None) 2428 2429 offset = None 2430 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2431 self._match(TokenType.ALIAS) 2432 offset = self._parse_id_var() or exp.to_identifier("offset") 2433 2434 return self.expression( 2435 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2436 ) 2437 2438 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2439 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2440 if not is_derived and not self._match(TokenType.VALUES): 2441 return None 2442 2443 expressions = self._parse_csv(self._parse_value) 2444 alias = self._parse_table_alias() 2445 2446 if is_derived: 2447 self._match_r_paren() 2448 2449 return self.expression( 2450 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2451 ) 2452 2453 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2454 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2455 as_modifier and self._match_text_seq("USING", "SAMPLE") 2456 ): 2457 return None 2458 2459 bucket_numerator = None 2460 bucket_denominator = None 2461 bucket_field = None 2462 percent = None 2463 rows = None 2464 size = None 2465 seed = None 2466 2467 kind = ( 2468 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2469 ) 2470 method = self._parse_var(tokens=(TokenType.ROW,)) 2471 2472 self._match(TokenType.L_PAREN) 2473 2474 num = self._parse_number() 2475 2476 if self._match_text_seq("BUCKET"): 2477 bucket_numerator = self._parse_number() 2478 self._match_text_seq("OUT", "OF") 2479 bucket_denominator = bucket_denominator = self._parse_number() 2480 self._match(TokenType.ON) 2481 bucket_field = self._parse_field() 2482 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2483 percent = num 2484 elif self._match(TokenType.ROWS): 2485 rows = num 2486 else: 2487 size = num 2488 2489 self._match(TokenType.R_PAREN) 2490 2491 if self._match(TokenType.L_PAREN): 2492 method = self._parse_var() 2493 seed = self._match(TokenType.COMMA) and self._parse_number() 2494 self._match_r_paren() 2495 elif self._match_texts(("SEED", "REPEATABLE")): 2496 seed = self._parse_wrapped(self._parse_number) 2497 2498 return self.expression( 2499 exp.TableSample, 2500 method=method, 2501 bucket_numerator=bucket_numerator, 2502 bucket_denominator=bucket_denominator, 2503 bucket_field=bucket_field, 2504 percent=percent, 2505 rows=rows, 2506 size=size, 2507 seed=seed, 2508 kind=kind, 2509 ) 2510 2511 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2512 return list(iter(self._parse_pivot, None)) 2513 2514 # https://duckdb.org/docs/sql/statements/pivot 2515 def _parse_simplified_pivot(self) -> exp.Pivot: 2516 def _parse_on() -> t.Optional[exp.Expression]: 2517 this = self._parse_bitwise() 2518 return self._parse_in(this) if self._match(TokenType.IN) else this 2519 2520 this = self._parse_table() 2521 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2522 using = self._match(TokenType.USING) and self._parse_csv( 2523 lambda: self._parse_alias(self._parse_function()) 2524 ) 2525 group = self._parse_group() 2526 return self.expression( 2527 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2528 ) 2529 2530 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2531 index = self._index 2532 2533 if self._match(TokenType.PIVOT): 2534 unpivot = False 2535 elif self._match(TokenType.UNPIVOT): 2536 unpivot = True 2537 else: 2538 return None 2539 2540 expressions = [] 2541 field = None 2542 2543 if not self._match(TokenType.L_PAREN): 2544 self._retreat(index) 2545 return None 2546 2547 if unpivot: 2548 expressions = self._parse_csv(self._parse_column) 2549 else: 2550 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2551 2552 if not expressions: 2553 self.raise_error("Failed to parse PIVOT's aggregation list") 2554 2555 if not self._match(TokenType.FOR): 2556 self.raise_error("Expecting FOR") 2557 2558 value = self._parse_column() 2559 2560 if not self._match(TokenType.IN): 2561 self.raise_error("Expecting IN") 2562 2563 field = self._parse_in(value, alias=True) 2564 2565 self._match_r_paren() 2566 2567 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2568 2569 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2570 pivot.set("alias", self._parse_table_alias()) 2571 2572 if not unpivot: 2573 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2574 2575 columns: t.List[exp.Expression] = [] 2576 for fld in pivot.args["field"].expressions: 2577 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2578 for name in names: 2579 if self.PREFIXED_PIVOT_COLUMNS: 2580 name = f"{name}_{field_name}" if name else field_name 2581 else: 2582 name = f"{field_name}_{name}" if name else field_name 2583 2584 columns.append(exp.to_identifier(name)) 2585 2586 pivot.set("columns", columns) 2587 2588 return pivot 2589 2590 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2591 return [agg.alias for agg in aggregations] 2592 2593 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2594 if not skip_where_token and not self._match(TokenType.WHERE): 2595 return None 2596 2597 return self.expression( 2598 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2599 ) 2600 2601 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2602 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2603 return None 2604 2605 elements = defaultdict(list) 2606 2607 while True: 2608 expressions = self._parse_csv(self._parse_conjunction) 2609 if expressions: 2610 elements["expressions"].extend(expressions) 2611 2612 grouping_sets = self._parse_grouping_sets() 2613 if grouping_sets: 2614 elements["grouping_sets"].extend(grouping_sets) 2615 2616 rollup = None 2617 cube = None 2618 totals = None 2619 2620 with_ = self._match(TokenType.WITH) 2621 if self._match(TokenType.ROLLUP): 2622 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2623 elements["rollup"].extend(ensure_list(rollup)) 2624 2625 if self._match(TokenType.CUBE): 2626 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2627 elements["cube"].extend(ensure_list(cube)) 2628 2629 if self._match_text_seq("TOTALS"): 2630 totals = True 2631 elements["totals"] = True # type: ignore 2632 2633 if not (grouping_sets or rollup or cube or totals): 2634 break 2635 2636 return self.expression(exp.Group, **elements) # type: ignore 2637 2638 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2639 if not self._match(TokenType.GROUPING_SETS): 2640 return None 2641 2642 return self._parse_wrapped_csv(self._parse_grouping_set) 2643 2644 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2645 if self._match(TokenType.L_PAREN): 2646 grouping_set = self._parse_csv(self._parse_column) 2647 self._match_r_paren() 2648 return self.expression(exp.Tuple, expressions=grouping_set) 2649 2650 return self._parse_column() 2651 2652 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2653 if not skip_having_token and not self._match(TokenType.HAVING): 2654 return None 2655 return self.expression(exp.Having, this=self._parse_conjunction()) 2656 2657 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2658 if not self._match(TokenType.QUALIFY): 2659 return None 2660 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2661 2662 def _parse_order( 2663 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2664 ) -> t.Optional[exp.Expression]: 2665 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2666 return this 2667 2668 return self.expression( 2669 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2670 ) 2671 2672 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2673 if not self._match(token): 2674 return None 2675 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2676 2677 def _parse_ordered(self) -> exp.Ordered: 2678 this = self._parse_conjunction() 2679 self._match(TokenType.ASC) 2680 2681 is_desc = self._match(TokenType.DESC) 2682 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2683 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2684 desc = is_desc or False 2685 asc = not desc 2686 nulls_first = is_nulls_first or False 2687 explicitly_null_ordered = is_nulls_first or is_nulls_last 2688 2689 if ( 2690 not explicitly_null_ordered 2691 and ( 2692 (asc and self.NULL_ORDERING == "nulls_are_small") 2693 or (desc and self.NULL_ORDERING != "nulls_are_small") 2694 ) 2695 and self.NULL_ORDERING != "nulls_are_last" 2696 ): 2697 nulls_first = True 2698 2699 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2700 2701 def _parse_limit( 2702 self, this: t.Optional[exp.Expression] = None, top: bool = False 2703 ) -> t.Optional[exp.Expression]: 2704 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2705 limit_paren = self._match(TokenType.L_PAREN) 2706 expression = self._parse_number() if top else self._parse_term() 2707 2708 if self._match(TokenType.COMMA): 2709 offset = expression 2710 expression = self._parse_term() 2711 else: 2712 offset = None 2713 2714 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2715 2716 if limit_paren: 2717 self._match_r_paren() 2718 2719 return limit_exp 2720 2721 if self._match(TokenType.FETCH): 2722 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2723 direction = self._prev.text if direction else "FIRST" 2724 2725 count = self._parse_number() 2726 percent = self._match(TokenType.PERCENT) 2727 2728 self._match_set((TokenType.ROW, TokenType.ROWS)) 2729 2730 only = self._match_text_seq("ONLY") 2731 with_ties = self._match_text_seq("WITH", "TIES") 2732 2733 if only and with_ties: 2734 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2735 2736 return self.expression( 2737 exp.Fetch, 2738 direction=direction, 2739 count=count, 2740 percent=percent, 2741 with_ties=with_ties, 2742 ) 2743 2744 return this 2745 2746 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2747 if not self._match(TokenType.OFFSET): 2748 return this 2749 2750 count = self._parse_number() 2751 self._match_set((TokenType.ROW, TokenType.ROWS)) 2752 return self.expression(exp.Offset, this=this, expression=count) 2753 2754 def _parse_locks(self) -> t.List[exp.Lock]: 2755 locks = [] 2756 while True: 2757 if self._match_text_seq("FOR", "UPDATE"): 2758 update = True 2759 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2760 "LOCK", "IN", "SHARE", "MODE" 2761 ): 2762 update = False 2763 else: 2764 break 2765 2766 expressions = None 2767 if self._match_text_seq("OF"): 2768 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2769 2770 wait: t.Optional[bool | exp.Expression] = None 2771 if self._match_text_seq("NOWAIT"): 2772 wait = True 2773 elif self._match_text_seq("WAIT"): 2774 wait = self._parse_primary() 2775 elif self._match_text_seq("SKIP", "LOCKED"): 2776 wait = False 2777 2778 locks.append( 2779 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2780 ) 2781 2782 return locks 2783 2784 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2785 if not self._match_set(self.SET_OPERATIONS): 2786 return this 2787 2788 token_type = self._prev.token_type 2789 2790 if token_type == TokenType.UNION: 2791 expression = exp.Union 2792 elif token_type == TokenType.EXCEPT: 2793 expression = exp.Except 2794 else: 2795 expression = exp.Intersect 2796 2797 return self.expression( 2798 expression, 2799 this=this, 2800 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2801 expression=self._parse_set_operations(self._parse_select(nested=True)), 2802 ) 2803 2804 def _parse_expression(self) -> t.Optional[exp.Expression]: 2805 return self._parse_alias(self._parse_conjunction()) 2806 2807 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2808 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2809 2810 def _parse_equality(self) -> t.Optional[exp.Expression]: 2811 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2812 2813 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2814 return self._parse_tokens(self._parse_range, self.COMPARISON) 2815 2816 def _parse_range(self) -> t.Optional[exp.Expression]: 2817 this = self._parse_bitwise() 2818 negate = self._match(TokenType.NOT) 2819 2820 if self._match_set(self.RANGE_PARSERS): 2821 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2822 if not expression: 2823 return this 2824 2825 this = expression 2826 elif self._match(TokenType.ISNULL): 2827 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2828 2829 # Postgres supports ISNULL and NOTNULL for conditions. 2830 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2831 if self._match(TokenType.NOTNULL): 2832 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2833 this = self.expression(exp.Not, this=this) 2834 2835 if negate: 2836 this = self.expression(exp.Not, this=this) 2837 2838 if self._match(TokenType.IS): 2839 this = self._parse_is(this) 2840 2841 return this 2842 2843 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2844 index = self._index - 1 2845 negate = self._match(TokenType.NOT) 2846 2847 if self._match_text_seq("DISTINCT", "FROM"): 2848 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2849 return self.expression(klass, this=this, expression=self._parse_expression()) 2850 2851 expression = self._parse_null() or self._parse_boolean() 2852 if not expression: 2853 self._retreat(index) 2854 return None 2855 2856 this = self.expression(exp.Is, this=this, expression=expression) 2857 return self.expression(exp.Not, this=this) if negate else this 2858 2859 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2860 unnest = self._parse_unnest(with_alias=False) 2861 if unnest: 2862 this = self.expression(exp.In, this=this, unnest=unnest) 2863 elif self._match(TokenType.L_PAREN): 2864 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2865 2866 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2867 this = self.expression(exp.In, this=this, query=expressions[0]) 2868 else: 2869 this = self.expression(exp.In, this=this, expressions=expressions) 2870 2871 self._match_r_paren(this) 2872 else: 2873 this = self.expression(exp.In, this=this, field=self._parse_field()) 2874 2875 return this 2876 2877 def _parse_between(self, this: exp.Expression) -> exp.Between: 2878 low = self._parse_bitwise() 2879 self._match(TokenType.AND) 2880 high = self._parse_bitwise() 2881 return self.expression(exp.Between, this=this, low=low, high=high) 2882 2883 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2884 if not self._match(TokenType.ESCAPE): 2885 return this 2886 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2887 2888 def _parse_interval(self) -> t.Optional[exp.Interval]: 2889 if not self._match(TokenType.INTERVAL): 2890 return None 2891 2892 if self._match(TokenType.STRING, advance=False): 2893 this = self._parse_primary() 2894 else: 2895 this = self._parse_term() 2896 2897 unit = self._parse_function() or self._parse_var() 2898 2899 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2900 # each INTERVAL expression into this canonical form so it's easy to transpile 2901 if this and this.is_number: 2902 this = exp.Literal.string(this.name) 2903 elif this and this.is_string: 2904 parts = this.name.split() 2905 2906 if len(parts) == 2: 2907 if unit: 2908 # this is not actually a unit, it's something else 2909 unit = None 2910 self._retreat(self._index - 1) 2911 else: 2912 this = exp.Literal.string(parts[0]) 2913 unit = self.expression(exp.Var, this=parts[1]) 2914 2915 return self.expression(exp.Interval, this=this, unit=unit) 2916 2917 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2918 this = self._parse_term() 2919 2920 while True: 2921 if self._match_set(self.BITWISE): 2922 this = self.expression( 2923 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2924 ) 2925 elif self._match_pair(TokenType.LT, TokenType.LT): 2926 this = self.expression( 2927 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2928 ) 2929 elif self._match_pair(TokenType.GT, TokenType.GT): 2930 this = self.expression( 2931 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2932 ) 2933 else: 2934 break 2935 2936 return this 2937 2938 def _parse_term(self) -> t.Optional[exp.Expression]: 2939 return self._parse_tokens(self._parse_factor, self.TERM) 2940 2941 def _parse_factor(self) -> t.Optional[exp.Expression]: 2942 return self._parse_tokens(self._parse_unary, self.FACTOR) 2943 2944 def _parse_unary(self) -> t.Optional[exp.Expression]: 2945 if self._match_set(self.UNARY_PARSERS): 2946 return self.UNARY_PARSERS[self._prev.token_type](self) 2947 return self._parse_at_time_zone(self._parse_type()) 2948 2949 def _parse_type(self) -> t.Optional[exp.Expression]: 2950 interval = self._parse_interval() 2951 if interval: 2952 return interval 2953 2954 index = self._index 2955 data_type = self._parse_types(check_func=True) 2956 this = self._parse_column() 2957 2958 if data_type: 2959 if isinstance(this, exp.Literal): 2960 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2961 if parser: 2962 return parser(self, this, data_type) 2963 return self.expression(exp.Cast, this=this, to=data_type) 2964 if not data_type.expressions: 2965 self._retreat(index) 2966 return self._parse_column() 2967 return self._parse_column_ops(data_type) 2968 2969 return this 2970 2971 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2972 this = self._parse_type() 2973 if not this: 2974 return None 2975 2976 return self.expression( 2977 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2978 ) 2979 2980 def _parse_types( 2981 self, check_func: bool = False, schema: bool = False 2982 ) -> t.Optional[exp.Expression]: 2983 index = self._index 2984 2985 prefix = self._match_text_seq("SYSUDTLIB", ".") 2986 2987 if not self._match_set(self.TYPE_TOKENS): 2988 return None 2989 2990 type_token = self._prev.token_type 2991 2992 if type_token == TokenType.PSEUDO_TYPE: 2993 return self.expression(exp.PseudoType, this=self._prev.text) 2994 2995 nested = type_token in self.NESTED_TYPE_TOKENS 2996 is_struct = type_token == TokenType.STRUCT 2997 expressions = None 2998 maybe_func = False 2999 3000 if self._match(TokenType.L_PAREN): 3001 if is_struct: 3002 expressions = self._parse_csv(self._parse_struct_types) 3003 elif nested: 3004 expressions = self._parse_csv( 3005 lambda: self._parse_types(check_func=check_func, schema=schema) 3006 ) 3007 elif type_token in self.ENUM_TYPE_TOKENS: 3008 expressions = self._parse_csv(self._parse_primary) 3009 else: 3010 expressions = self._parse_csv(self._parse_type_size) 3011 3012 if not expressions or not self._match(TokenType.R_PAREN): 3013 self._retreat(index) 3014 return None 3015 3016 maybe_func = True 3017 3018 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3019 this = exp.DataType( 3020 this=exp.DataType.Type.ARRAY, 3021 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3022 nested=True, 3023 ) 3024 3025 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3026 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3027 3028 return this 3029 3030 if self._match(TokenType.L_BRACKET): 3031 self._retreat(index) 3032 return None 3033 3034 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3035 if nested and self._match(TokenType.LT): 3036 if is_struct: 3037 expressions = self._parse_csv(self._parse_struct_types) 3038 else: 3039 expressions = self._parse_csv( 3040 lambda: self._parse_types(check_func=check_func, schema=schema) 3041 ) 3042 3043 if not self._match(TokenType.GT): 3044 self.raise_error("Expecting >") 3045 3046 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3047 values = self._parse_csv(self._parse_conjunction) 3048 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3049 3050 value: t.Optional[exp.Expression] = None 3051 if type_token in self.TIMESTAMPS: 3052 if self._match_text_seq("WITH", "TIME", "ZONE"): 3053 maybe_func = False 3054 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3055 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3056 maybe_func = False 3057 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3058 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3059 maybe_func = False 3060 elif type_token == TokenType.INTERVAL: 3061 unit = self._parse_var() 3062 3063 if not unit: 3064 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3065 else: 3066 value = self.expression(exp.Interval, unit=unit) 3067 3068 if maybe_func and check_func: 3069 index2 = self._index 3070 peek = self._parse_string() 3071 3072 if not peek: 3073 self._retreat(index) 3074 return None 3075 3076 self._retreat(index2) 3077 3078 if value: 3079 return value 3080 3081 return exp.DataType( 3082 this=exp.DataType.Type[type_token.value.upper()], 3083 expressions=expressions, 3084 nested=nested, 3085 values=values, 3086 prefix=prefix, 3087 ) 3088 3089 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3090 this = self._parse_type() or self._parse_id_var() 3091 self._match(TokenType.COLON) 3092 return self._parse_column_def(this) 3093 3094 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3095 if not self._match_text_seq("AT", "TIME", "ZONE"): 3096 return this 3097 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3098 3099 def _parse_column(self) -> t.Optional[exp.Expression]: 3100 this = self._parse_field() 3101 if isinstance(this, exp.Identifier): 3102 this = self.expression(exp.Column, this=this) 3103 elif not this: 3104 return self._parse_bracket(this) 3105 return self._parse_column_ops(this) 3106 3107 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3108 this = self._parse_bracket(this) 3109 3110 while self._match_set(self.COLUMN_OPERATORS): 3111 op_token = self._prev.token_type 3112 op = self.COLUMN_OPERATORS.get(op_token) 3113 3114 if op_token == TokenType.DCOLON: 3115 field = self._parse_types() 3116 if not field: 3117 self.raise_error("Expected type") 3118 elif op and self._curr: 3119 self._advance() 3120 value = self._prev.text 3121 field = ( 3122 exp.Literal.number(value) 3123 if self._prev.token_type == TokenType.NUMBER 3124 else exp.Literal.string(value) 3125 ) 3126 else: 3127 field = self._parse_field(anonymous_func=True, any_token=True) 3128 3129 if isinstance(field, exp.Func): 3130 # bigquery allows function calls like x.y.count(...) 3131 # SAFE.SUBSTR(...) 3132 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3133 this = self._replace_columns_with_dots(this) 3134 3135 if op: 3136 this = op(self, this, field) 3137 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3138 this = self.expression( 3139 exp.Column, 3140 this=field, 3141 table=this.this, 3142 db=this.args.get("table"), 3143 catalog=this.args.get("db"), 3144 ) 3145 else: 3146 this = self.expression(exp.Dot, this=this, expression=field) 3147 this = self._parse_bracket(this) 3148 return this 3149 3150 def _parse_primary(self) -> t.Optional[exp.Expression]: 3151 if self._match_set(self.PRIMARY_PARSERS): 3152 token_type = self._prev.token_type 3153 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3154 3155 if token_type == TokenType.STRING: 3156 expressions = [primary] 3157 while self._match(TokenType.STRING): 3158 expressions.append(exp.Literal.string(self._prev.text)) 3159 3160 if len(expressions) > 1: 3161 return self.expression(exp.Concat, expressions=expressions) 3162 3163 return primary 3164 3165 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3166 return exp.Literal.number(f"0.{self._prev.text}") 3167 3168 if self._match(TokenType.L_PAREN): 3169 comments = self._prev_comments 3170 query = self._parse_select() 3171 3172 if query: 3173 expressions = [query] 3174 else: 3175 expressions = self._parse_csv(self._parse_expression) 3176 3177 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3178 3179 if isinstance(this, exp.Subqueryable): 3180 this = self._parse_set_operations( 3181 self._parse_subquery(this=this, parse_alias=False) 3182 ) 3183 elif len(expressions) > 1: 3184 this = self.expression(exp.Tuple, expressions=expressions) 3185 else: 3186 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3187 3188 if this: 3189 this.add_comments(comments) 3190 3191 self._match_r_paren(expression=this) 3192 return this 3193 3194 return None 3195 3196 def _parse_field( 3197 self, 3198 any_token: bool = False, 3199 tokens: t.Optional[t.Collection[TokenType]] = None, 3200 anonymous_func: bool = False, 3201 ) -> t.Optional[exp.Expression]: 3202 return ( 3203 self._parse_primary() 3204 or self._parse_function(anonymous=anonymous_func) 3205 or self._parse_id_var(any_token=any_token, tokens=tokens) 3206 ) 3207 3208 def _parse_function( 3209 self, 3210 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3211 anonymous: bool = False, 3212 optional_parens: bool = True, 3213 ) -> t.Optional[exp.Expression]: 3214 if not self._curr: 3215 return None 3216 3217 token_type = self._curr.token_type 3218 3219 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3220 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3221 3222 if not self._next or self._next.token_type != TokenType.L_PAREN: 3223 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3224 self._advance() 3225 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3226 3227 return None 3228 3229 if token_type not in self.FUNC_TOKENS: 3230 return None 3231 3232 this = self._curr.text 3233 upper = this.upper() 3234 self._advance(2) 3235 3236 parser = self.FUNCTION_PARSERS.get(upper) 3237 3238 if parser and not anonymous: 3239 this = parser(self) 3240 else: 3241 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3242 3243 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3244 this = self.expression(subquery_predicate, this=self._parse_select()) 3245 self._match_r_paren() 3246 return this 3247 3248 if functions is None: 3249 functions = self.FUNCTIONS 3250 3251 function = functions.get(upper) 3252 3253 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3254 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3255 3256 if function and not anonymous: 3257 this = self.validate_expression(function(args), args) 3258 else: 3259 this = self.expression(exp.Anonymous, this=this, expressions=args) 3260 3261 self._match_r_paren(this) 3262 return self._parse_window(this) 3263 3264 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3265 return self._parse_column_def(self._parse_id_var()) 3266 3267 def _parse_user_defined_function( 3268 self, kind: t.Optional[TokenType] = None 3269 ) -> t.Optional[exp.Expression]: 3270 this = self._parse_id_var() 3271 3272 while self._match(TokenType.DOT): 3273 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3274 3275 if not self._match(TokenType.L_PAREN): 3276 return this 3277 3278 expressions = self._parse_csv(self._parse_function_parameter) 3279 self._match_r_paren() 3280 return self.expression( 3281 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3282 ) 3283 3284 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3285 literal = self._parse_primary() 3286 if literal: 3287 return self.expression(exp.Introducer, this=token.text, expression=literal) 3288 3289 return self.expression(exp.Identifier, this=token.text) 3290 3291 def _parse_session_parameter(self) -> exp.SessionParameter: 3292 kind = None 3293 this = self._parse_id_var() or self._parse_primary() 3294 3295 if this and self._match(TokenType.DOT): 3296 kind = this.name 3297 this = self._parse_var() or self._parse_primary() 3298 3299 return self.expression(exp.SessionParameter, this=this, kind=kind) 3300 3301 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3302 index = self._index 3303 3304 if self._match(TokenType.L_PAREN): 3305 expressions = self._parse_csv(self._parse_id_var) 3306 3307 if not self._match(TokenType.R_PAREN): 3308 self._retreat(index) 3309 else: 3310 expressions = [self._parse_id_var()] 3311 3312 if self._match_set(self.LAMBDAS): 3313 return self.LAMBDAS[self._prev.token_type](self, expressions) 3314 3315 self._retreat(index) 3316 3317 this: t.Optional[exp.Expression] 3318 3319 if self._match(TokenType.DISTINCT): 3320 this = self.expression( 3321 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3322 ) 3323 else: 3324 this = self._parse_select_or_expression(alias=alias) 3325 3326 if isinstance(this, exp.EQ): 3327 left = this.this 3328 if isinstance(left, exp.Column): 3329 left.replace(exp.var(left.text("this"))) 3330 3331 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3332 3333 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3334 index = self._index 3335 3336 if not self.errors: 3337 try: 3338 if self._parse_select(nested=True): 3339 return this 3340 except ParseError: 3341 pass 3342 finally: 3343 self.errors.clear() 3344 self._retreat(index) 3345 3346 if not self._match(TokenType.L_PAREN): 3347 return this 3348 3349 args = self._parse_csv( 3350 lambda: self._parse_constraint() 3351 or self._parse_column_def(self._parse_field(any_token=True)) 3352 ) 3353 3354 self._match_r_paren() 3355 return self.expression(exp.Schema, this=this, expressions=args) 3356 3357 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3358 # column defs are not really columns, they're identifiers 3359 if isinstance(this, exp.Column): 3360 this = this.this 3361 3362 kind = self._parse_types(schema=True) 3363 3364 if self._match_text_seq("FOR", "ORDINALITY"): 3365 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3366 3367 constraints = [] 3368 while True: 3369 constraint = self._parse_column_constraint() 3370 if not constraint: 3371 break 3372 constraints.append(constraint) 3373 3374 if not kind and not constraints: 3375 return this 3376 3377 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3378 3379 def _parse_auto_increment( 3380 self, 3381 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3382 start = None 3383 increment = None 3384 3385 if self._match(TokenType.L_PAREN, advance=False): 3386 args = self._parse_wrapped_csv(self._parse_bitwise) 3387 start = seq_get(args, 0) 3388 increment = seq_get(args, 1) 3389 elif self._match_text_seq("START"): 3390 start = self._parse_bitwise() 3391 self._match_text_seq("INCREMENT") 3392 increment = self._parse_bitwise() 3393 3394 if start and increment: 3395 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3396 3397 return exp.AutoIncrementColumnConstraint() 3398 3399 def _parse_compress(self) -> exp.CompressColumnConstraint: 3400 if self._match(TokenType.L_PAREN, advance=False): 3401 return self.expression( 3402 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3403 ) 3404 3405 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3406 3407 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3408 if self._match_text_seq("BY", "DEFAULT"): 3409 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3410 this = self.expression( 3411 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3412 ) 3413 else: 3414 self._match_text_seq("ALWAYS") 3415 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3416 3417 self._match(TokenType.ALIAS) 3418 identity = self._match_text_seq("IDENTITY") 3419 3420 if self._match(TokenType.L_PAREN): 3421 if self._match_text_seq("START", "WITH"): 3422 this.set("start", self._parse_bitwise()) 3423 if self._match_text_seq("INCREMENT", "BY"): 3424 this.set("increment", self._parse_bitwise()) 3425 if self._match_text_seq("MINVALUE"): 3426 this.set("minvalue", self._parse_bitwise()) 3427 if self._match_text_seq("MAXVALUE"): 3428 this.set("maxvalue", self._parse_bitwise()) 3429 3430 if self._match_text_seq("CYCLE"): 3431 this.set("cycle", True) 3432 elif self._match_text_seq("NO", "CYCLE"): 3433 this.set("cycle", False) 3434 3435 if not identity: 3436 this.set("expression", self._parse_bitwise()) 3437 3438 self._match_r_paren() 3439 3440 return this 3441 3442 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3443 self._match_text_seq("LENGTH") 3444 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3445 3446 def _parse_not_constraint( 3447 self, 3448 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3449 if self._match_text_seq("NULL"): 3450 return self.expression(exp.NotNullColumnConstraint) 3451 if self._match_text_seq("CASESPECIFIC"): 3452 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3453 return None 3454 3455 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3456 if self._match(TokenType.CONSTRAINT): 3457 this = self._parse_id_var() 3458 else: 3459 this = None 3460 3461 if self._match_texts(self.CONSTRAINT_PARSERS): 3462 return self.expression( 3463 exp.ColumnConstraint, 3464 this=this, 3465 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3466 ) 3467 3468 return this 3469 3470 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3471 if not self._match(TokenType.CONSTRAINT): 3472 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3473 3474 this = self._parse_id_var() 3475 expressions = [] 3476 3477 while True: 3478 constraint = self._parse_unnamed_constraint() or self._parse_function() 3479 if not constraint: 3480 break 3481 expressions.append(constraint) 3482 3483 return self.expression(exp.Constraint, this=this, expressions=expressions) 3484 3485 def _parse_unnamed_constraint( 3486 self, constraints: t.Optional[t.Collection[str]] = None 3487 ) -> t.Optional[exp.Expression]: 3488 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3489 return None 3490 3491 constraint = self._prev.text.upper() 3492 if constraint not in self.CONSTRAINT_PARSERS: 3493 self.raise_error(f"No parser found for schema constraint {constraint}.") 3494 3495 return self.CONSTRAINT_PARSERS[constraint](self) 3496 3497 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3498 self._match_text_seq("KEY") 3499 return self.expression( 3500 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3501 ) 3502 3503 def _parse_key_constraint_options(self) -> t.List[str]: 3504 options = [] 3505 while True: 3506 if not self._curr: 3507 break 3508 3509 if self._match(TokenType.ON): 3510 action = None 3511 on = self._advance_any() and self._prev.text 3512 3513 if self._match_text_seq("NO", "ACTION"): 3514 action = "NO ACTION" 3515 elif self._match_text_seq("CASCADE"): 3516 action = "CASCADE" 3517 elif self._match_pair(TokenType.SET, TokenType.NULL): 3518 action = "SET NULL" 3519 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3520 action = "SET DEFAULT" 3521 else: 3522 self.raise_error("Invalid key constraint") 3523 3524 options.append(f"ON {on} {action}") 3525 elif self._match_text_seq("NOT", "ENFORCED"): 3526 options.append("NOT ENFORCED") 3527 elif self._match_text_seq("DEFERRABLE"): 3528 options.append("DEFERRABLE") 3529 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3530 options.append("INITIALLY DEFERRED") 3531 elif self._match_text_seq("NORELY"): 3532 options.append("NORELY") 3533 elif self._match_text_seq("MATCH", "FULL"): 3534 options.append("MATCH FULL") 3535 else: 3536 break 3537 3538 return options 3539 3540 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3541 if match and not self._match(TokenType.REFERENCES): 3542 return None 3543 3544 expressions = None 3545 this = self._parse_id_var() 3546 3547 if self._match(TokenType.L_PAREN, advance=False): 3548 expressions = self._parse_wrapped_id_vars() 3549 3550 options = self._parse_key_constraint_options() 3551 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3552 3553 def _parse_foreign_key(self) -> exp.ForeignKey: 3554 expressions = self._parse_wrapped_id_vars() 3555 reference = self._parse_references() 3556 options = {} 3557 3558 while self._match(TokenType.ON): 3559 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3560 self.raise_error("Expected DELETE or UPDATE") 3561 3562 kind = self._prev.text.lower() 3563 3564 if self._match_text_seq("NO", "ACTION"): 3565 action = "NO ACTION" 3566 elif self._match(TokenType.SET): 3567 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3568 action = "SET " + self._prev.text.upper() 3569 else: 3570 self._advance() 3571 action = self._prev.text.upper() 3572 3573 options[kind] = action 3574 3575 return self.expression( 3576 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3577 ) 3578 3579 def _parse_primary_key( 3580 self, wrapped_optional: bool = False, in_props: bool = False 3581 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3582 desc = ( 3583 self._match_set((TokenType.ASC, TokenType.DESC)) 3584 and self._prev.token_type == TokenType.DESC 3585 ) 3586 3587 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3588 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3589 3590 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3591 options = self._parse_key_constraint_options() 3592 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3593 3594 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3595 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3596 return this 3597 3598 bracket_kind = self._prev.token_type 3599 3600 if self._match(TokenType.COLON): 3601 expressions: t.List[t.Optional[exp.Expression]] = [ 3602 self.expression(exp.Slice, expression=self._parse_conjunction()) 3603 ] 3604 else: 3605 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3606 3607 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3608 if bracket_kind == TokenType.L_BRACE: 3609 this = self.expression(exp.Struct, expressions=expressions) 3610 elif not this or this.name.upper() == "ARRAY": 3611 this = self.expression(exp.Array, expressions=expressions) 3612 else: 3613 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3614 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3615 3616 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3617 self.raise_error("Expected ]") 3618 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3619 self.raise_error("Expected }") 3620 3621 self._add_comments(this) 3622 return self._parse_bracket(this) 3623 3624 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3625 if self._match(TokenType.COLON): 3626 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3627 return this 3628 3629 def _parse_case(self) -> t.Optional[exp.Expression]: 3630 ifs = [] 3631 default = None 3632 3633 expression = self._parse_conjunction() 3634 3635 while self._match(TokenType.WHEN): 3636 this = self._parse_conjunction() 3637 self._match(TokenType.THEN) 3638 then = self._parse_conjunction() 3639 ifs.append(self.expression(exp.If, this=this, true=then)) 3640 3641 if self._match(TokenType.ELSE): 3642 default = self._parse_conjunction() 3643 3644 if not self._match(TokenType.END): 3645 self.raise_error("Expected END after CASE", self._prev) 3646 3647 return self._parse_window( 3648 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3649 ) 3650 3651 def _parse_if(self) -> t.Optional[exp.Expression]: 3652 if self._match(TokenType.L_PAREN): 3653 args = self._parse_csv(self._parse_conjunction) 3654 this = self.validate_expression(exp.If.from_arg_list(args), args) 3655 self._match_r_paren() 3656 else: 3657 index = self._index - 1 3658 condition = self._parse_conjunction() 3659 3660 if not condition: 3661 self._retreat(index) 3662 return None 3663 3664 self._match(TokenType.THEN) 3665 true = self._parse_conjunction() 3666 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3667 self._match(TokenType.END) 3668 this = self.expression(exp.If, this=condition, true=true, false=false) 3669 3670 return self._parse_window(this) 3671 3672 def _parse_extract(self) -> exp.Extract: 3673 this = self._parse_function() or self._parse_var() or self._parse_type() 3674 3675 if self._match(TokenType.FROM): 3676 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3677 3678 if not self._match(TokenType.COMMA): 3679 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3680 3681 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3682 3683 def _parse_any_value(self) -> exp.AnyValue: 3684 this = self._parse_lambda() 3685 is_max = None 3686 having = None 3687 3688 if self._match(TokenType.HAVING): 3689 self._match_texts(("MAX", "MIN")) 3690 is_max = self._prev.text == "MAX" 3691 having = self._parse_column() 3692 3693 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3694 3695 def _parse_cast(self, strict: bool) -> exp.Expression: 3696 this = self._parse_conjunction() 3697 3698 if not self._match(TokenType.ALIAS): 3699 if self._match(TokenType.COMMA): 3700 return self.expression( 3701 exp.CastToStrType, this=this, expression=self._parse_string() 3702 ) 3703 else: 3704 self.raise_error("Expected AS after CAST") 3705 3706 fmt = None 3707 to = self._parse_types() 3708 3709 if not to: 3710 self.raise_error("Expected TYPE after CAST") 3711 elif to.this == exp.DataType.Type.CHAR: 3712 if self._match(TokenType.CHARACTER_SET): 3713 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3714 elif self._match(TokenType.FORMAT): 3715 fmt = self._parse_at_time_zone(self._parse_string()) 3716 3717 if to.this in exp.DataType.TEMPORAL_TYPES: 3718 return self.expression( 3719 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3720 this=this, 3721 format=exp.Literal.string( 3722 format_time( 3723 fmt.this if fmt else "", 3724 self.FORMAT_MAPPING or self.TIME_MAPPING, 3725 self.FORMAT_TRIE or self.TIME_TRIE, 3726 ) 3727 ), 3728 ) 3729 3730 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3731 3732 def _parse_concat(self) -> t.Optional[exp.Expression]: 3733 args = self._parse_csv(self._parse_conjunction) 3734 if self.CONCAT_NULL_OUTPUTS_STRING: 3735 args = [ 3736 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3737 for arg in args 3738 if arg 3739 ] 3740 3741 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3742 # we find such a call we replace it with its argument. 3743 if len(args) == 1: 3744 return args[0] 3745 3746 return self.expression( 3747 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3748 ) 3749 3750 def _parse_string_agg(self) -> exp.Expression: 3751 if self._match(TokenType.DISTINCT): 3752 args: t.List[t.Optional[exp.Expression]] = [ 3753 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3754 ] 3755 if self._match(TokenType.COMMA): 3756 args.extend(self._parse_csv(self._parse_conjunction)) 3757 else: 3758 args = self._parse_csv(self._parse_conjunction) 3759 3760 index = self._index 3761 if not self._match(TokenType.R_PAREN): 3762 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3763 return self.expression( 3764 exp.GroupConcat, 3765 this=seq_get(args, 0), 3766 separator=self._parse_order(this=seq_get(args, 1)), 3767 ) 3768 3769 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3770 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3771 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3772 if not self._match_text_seq("WITHIN", "GROUP"): 3773 self._retreat(index) 3774 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3775 3776 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3777 order = self._parse_order(this=seq_get(args, 0)) 3778 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3779 3780 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3781 this = self._parse_bitwise() 3782 3783 if self._match(TokenType.USING): 3784 to: t.Optional[exp.Expression] = self.expression( 3785 exp.CharacterSet, this=self._parse_var() 3786 ) 3787 elif self._match(TokenType.COMMA): 3788 to = self._parse_types() 3789 else: 3790 to = None 3791 3792 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3793 3794 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3795 """ 3796 There are generally two variants of the DECODE function: 3797 3798 - DECODE(bin, charset) 3799 - DECODE(expression, search, result [, search, result] ... [, default]) 3800 3801 The second variant will always be parsed into a CASE expression. Note that NULL 3802 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3803 instead of relying on pattern matching. 3804 """ 3805 args = self._parse_csv(self._parse_conjunction) 3806 3807 if len(args) < 3: 3808 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3809 3810 expression, *expressions = args 3811 if not expression: 3812 return None 3813 3814 ifs = [] 3815 for search, result in zip(expressions[::2], expressions[1::2]): 3816 if not search or not result: 3817 return None 3818 3819 if isinstance(search, exp.Literal): 3820 ifs.append( 3821 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3822 ) 3823 elif isinstance(search, exp.Null): 3824 ifs.append( 3825 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3826 ) 3827 else: 3828 cond = exp.or_( 3829 exp.EQ(this=expression.copy(), expression=search), 3830 exp.and_( 3831 exp.Is(this=expression.copy(), expression=exp.Null()), 3832 exp.Is(this=search.copy(), expression=exp.Null()), 3833 copy=False, 3834 ), 3835 copy=False, 3836 ) 3837 ifs.append(exp.If(this=cond, true=result)) 3838 3839 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3840 3841 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3842 self._match_text_seq("KEY") 3843 key = self._parse_field() 3844 self._match(TokenType.COLON) 3845 self._match_text_seq("VALUE") 3846 value = self._parse_field() 3847 3848 if not key and not value: 3849 return None 3850 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3851 3852 def _parse_json_object(self) -> exp.JSONObject: 3853 star = self._parse_star() 3854 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3855 3856 null_handling = None 3857 if self._match_text_seq("NULL", "ON", "NULL"): 3858 null_handling = "NULL ON NULL" 3859 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3860 null_handling = "ABSENT ON NULL" 3861 3862 unique_keys = None 3863 if self._match_text_seq("WITH", "UNIQUE"): 3864 unique_keys = True 3865 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3866 unique_keys = False 3867 3868 self._match_text_seq("KEYS") 3869 3870 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3871 format_json = self._match_text_seq("FORMAT", "JSON") 3872 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3873 3874 return self.expression( 3875 exp.JSONObject, 3876 expressions=expressions, 3877 null_handling=null_handling, 3878 unique_keys=unique_keys, 3879 return_type=return_type, 3880 format_json=format_json, 3881 encoding=encoding, 3882 ) 3883 3884 def _parse_logarithm(self) -> exp.Func: 3885 # Default argument order is base, expression 3886 args = self._parse_csv(self._parse_range) 3887 3888 if len(args) > 1: 3889 if not self.LOG_BASE_FIRST: 3890 args.reverse() 3891 return exp.Log.from_arg_list(args) 3892 3893 return self.expression( 3894 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3895 ) 3896 3897 def _parse_match_against(self) -> exp.MatchAgainst: 3898 expressions = self._parse_csv(self._parse_column) 3899 3900 self._match_text_seq(")", "AGAINST", "(") 3901 3902 this = self._parse_string() 3903 3904 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3905 modifier = "IN NATURAL LANGUAGE MODE" 3906 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3907 modifier = f"{modifier} WITH QUERY EXPANSION" 3908 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3909 modifier = "IN BOOLEAN MODE" 3910 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3911 modifier = "WITH QUERY EXPANSION" 3912 else: 3913 modifier = None 3914 3915 return self.expression( 3916 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3917 ) 3918 3919 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3920 def _parse_open_json(self) -> exp.OpenJSON: 3921 this = self._parse_bitwise() 3922 path = self._match(TokenType.COMMA) and self._parse_string() 3923 3924 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3925 this = self._parse_field(any_token=True) 3926 kind = self._parse_types() 3927 path = self._parse_string() 3928 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3929 3930 return self.expression( 3931 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3932 ) 3933 3934 expressions = None 3935 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3936 self._match_l_paren() 3937 expressions = self._parse_csv(_parse_open_json_column_def) 3938 3939 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3940 3941 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3942 args = self._parse_csv(self._parse_bitwise) 3943 3944 if self._match(TokenType.IN): 3945 return self.expression( 3946 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3947 ) 3948 3949 if haystack_first: 3950 haystack = seq_get(args, 0) 3951 needle = seq_get(args, 1) 3952 else: 3953 needle = seq_get(args, 0) 3954 haystack = seq_get(args, 1) 3955 3956 return self.expression( 3957 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3958 ) 3959 3960 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3961 args = self._parse_csv(self._parse_table) 3962 return exp.JoinHint(this=func_name.upper(), expressions=args) 3963 3964 def _parse_substring(self) -> exp.Substring: 3965 # Postgres supports the form: substring(string [from int] [for int]) 3966 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3967 3968 args = self._parse_csv(self._parse_bitwise) 3969 3970 if self._match(TokenType.FROM): 3971 args.append(self._parse_bitwise()) 3972 if self._match(TokenType.FOR): 3973 args.append(self._parse_bitwise()) 3974 3975 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3976 3977 def _parse_trim(self) -> exp.Trim: 3978 # https://www.w3resource.com/sql/character-functions/trim.php 3979 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3980 3981 position = None 3982 collation = None 3983 3984 if self._match_texts(self.TRIM_TYPES): 3985 position = self._prev.text.upper() 3986 3987 expression = self._parse_bitwise() 3988 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3989 this = self._parse_bitwise() 3990 else: 3991 this = expression 3992 expression = None 3993 3994 if self._match(TokenType.COLLATE): 3995 collation = self._parse_bitwise() 3996 3997 return self.expression( 3998 exp.Trim, this=this, position=position, expression=expression, collation=collation 3999 ) 4000 4001 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4002 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4003 4004 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4005 return self._parse_window(self._parse_id_var(), alias=True) 4006 4007 def _parse_respect_or_ignore_nulls( 4008 self, this: t.Optional[exp.Expression] 4009 ) -> t.Optional[exp.Expression]: 4010 if self._match_text_seq("IGNORE", "NULLS"): 4011 return self.expression(exp.IgnoreNulls, this=this) 4012 if self._match_text_seq("RESPECT", "NULLS"): 4013 return self.expression(exp.RespectNulls, this=this) 4014 return this 4015 4016 def _parse_window( 4017 self, this: t.Optional[exp.Expression], alias: bool = False 4018 ) -> t.Optional[exp.Expression]: 4019 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4020 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4021 self._match_r_paren() 4022 4023 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4024 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4025 if self._match_text_seq("WITHIN", "GROUP"): 4026 order = self._parse_wrapped(self._parse_order) 4027 this = self.expression(exp.WithinGroup, this=this, expression=order) 4028 4029 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4030 # Some dialects choose to implement and some do not. 4031 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4032 4033 # There is some code above in _parse_lambda that handles 4034 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4035 4036 # The below changes handle 4037 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4038 4039 # Oracle allows both formats 4040 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4041 # and Snowflake chose to do the same for familiarity 4042 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4043 this = self._parse_respect_or_ignore_nulls(this) 4044 4045 # bigquery select from window x AS (partition by ...) 4046 if alias: 4047 over = None 4048 self._match(TokenType.ALIAS) 4049 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4050 return this 4051 else: 4052 over = self._prev.text.upper() 4053 4054 if not self._match(TokenType.L_PAREN): 4055 return self.expression( 4056 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4057 ) 4058 4059 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4060 4061 first = self._match(TokenType.FIRST) 4062 if self._match_text_seq("LAST"): 4063 first = False 4064 4065 partition = self._parse_partition_by() 4066 order = self._parse_order() 4067 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4068 4069 if kind: 4070 self._match(TokenType.BETWEEN) 4071 start = self._parse_window_spec() 4072 self._match(TokenType.AND) 4073 end = self._parse_window_spec() 4074 4075 spec = self.expression( 4076 exp.WindowSpec, 4077 kind=kind, 4078 start=start["value"], 4079 start_side=start["side"], 4080 end=end["value"], 4081 end_side=end["side"], 4082 ) 4083 else: 4084 spec = None 4085 4086 self._match_r_paren() 4087 4088 return self.expression( 4089 exp.Window, 4090 this=this, 4091 partition_by=partition, 4092 order=order, 4093 spec=spec, 4094 alias=window_alias, 4095 over=over, 4096 first=first, 4097 ) 4098 4099 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4100 self._match(TokenType.BETWEEN) 4101 4102 return { 4103 "value": ( 4104 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4105 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4106 or self._parse_bitwise() 4107 ), 4108 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4109 } 4110 4111 def _parse_alias( 4112 self, this: t.Optional[exp.Expression], explicit: bool = False 4113 ) -> t.Optional[exp.Expression]: 4114 any_token = self._match(TokenType.ALIAS) 4115 4116 if explicit and not any_token: 4117 return this 4118 4119 if self._match(TokenType.L_PAREN): 4120 aliases = self.expression( 4121 exp.Aliases, 4122 this=this, 4123 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4124 ) 4125 self._match_r_paren(aliases) 4126 return aliases 4127 4128 alias = self._parse_id_var(any_token) 4129 4130 if alias: 4131 return self.expression(exp.Alias, this=this, alias=alias) 4132 4133 return this 4134 4135 def _parse_id_var( 4136 self, 4137 any_token: bool = True, 4138 tokens: t.Optional[t.Collection[TokenType]] = None, 4139 ) -> t.Optional[exp.Expression]: 4140 identifier = self._parse_identifier() 4141 4142 if identifier: 4143 return identifier 4144 4145 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4146 quoted = self._prev.token_type == TokenType.STRING 4147 return exp.Identifier(this=self._prev.text, quoted=quoted) 4148 4149 return None 4150 4151 def _parse_string(self) -> t.Optional[exp.Expression]: 4152 if self._match(TokenType.STRING): 4153 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4154 return self._parse_placeholder() 4155 4156 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4157 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4158 4159 def _parse_number(self) -> t.Optional[exp.Expression]: 4160 if self._match(TokenType.NUMBER): 4161 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4162 return self._parse_placeholder() 4163 4164 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4165 if self._match(TokenType.IDENTIFIER): 4166 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4167 return self._parse_placeholder() 4168 4169 def _parse_var( 4170 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4171 ) -> t.Optional[exp.Expression]: 4172 if ( 4173 (any_token and self._advance_any()) 4174 or self._match(TokenType.VAR) 4175 or (self._match_set(tokens) if tokens else False) 4176 ): 4177 return self.expression(exp.Var, this=self._prev.text) 4178 return self._parse_placeholder() 4179 4180 def _advance_any(self) -> t.Optional[Token]: 4181 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4182 self._advance() 4183 return self._prev 4184 return None 4185 4186 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4187 return self._parse_var() or self._parse_string() 4188 4189 def _parse_null(self) -> t.Optional[exp.Expression]: 4190 if self._match(TokenType.NULL): 4191 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4192 return None 4193 4194 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4195 if self._match(TokenType.TRUE): 4196 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4197 if self._match(TokenType.FALSE): 4198 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4199 return None 4200 4201 def _parse_star(self) -> t.Optional[exp.Expression]: 4202 if self._match(TokenType.STAR): 4203 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4204 return None 4205 4206 def _parse_parameter(self) -> exp.Parameter: 4207 wrapped = self._match(TokenType.L_BRACE) 4208 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4209 self._match(TokenType.R_BRACE) 4210 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4211 4212 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4213 if self._match_set(self.PLACEHOLDER_PARSERS): 4214 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4215 if placeholder: 4216 return placeholder 4217 self._advance(-1) 4218 return None 4219 4220 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4221 if not self._match(TokenType.EXCEPT): 4222 return None 4223 if self._match(TokenType.L_PAREN, advance=False): 4224 return self._parse_wrapped_csv(self._parse_column) 4225 return self._parse_csv(self._parse_column) 4226 4227 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4228 if not self._match(TokenType.REPLACE): 4229 return None 4230 if self._match(TokenType.L_PAREN, advance=False): 4231 return self._parse_wrapped_csv(self._parse_expression) 4232 return self._parse_csv(self._parse_expression) 4233 4234 def _parse_csv( 4235 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4236 ) -> t.List[t.Optional[exp.Expression]]: 4237 parse_result = parse_method() 4238 items = [parse_result] if parse_result is not None else [] 4239 4240 while self._match(sep): 4241 self._add_comments(parse_result) 4242 parse_result = parse_method() 4243 if parse_result is not None: 4244 items.append(parse_result) 4245 4246 return items 4247 4248 def _parse_tokens( 4249 self, parse_method: t.Callable, expressions: t.Dict 4250 ) -> t.Optional[exp.Expression]: 4251 this = parse_method() 4252 4253 while self._match_set(expressions): 4254 this = self.expression( 4255 expressions[self._prev.token_type], 4256 this=this, 4257 comments=self._prev_comments, 4258 expression=parse_method(), 4259 ) 4260 4261 return this 4262 4263 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4264 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4265 4266 def _parse_wrapped_csv( 4267 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4268 ) -> t.List[t.Optional[exp.Expression]]: 4269 return self._parse_wrapped( 4270 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4271 ) 4272 4273 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4274 wrapped = self._match(TokenType.L_PAREN) 4275 if not wrapped and not optional: 4276 self.raise_error("Expecting (") 4277 parse_result = parse_method() 4278 if wrapped: 4279 self._match_r_paren() 4280 return parse_result 4281 4282 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4283 return self._parse_select() or self._parse_set_operations( 4284 self._parse_expression() if alias else self._parse_conjunction() 4285 ) 4286 4287 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4288 return self._parse_query_modifiers( 4289 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4290 ) 4291 4292 def _parse_transaction(self) -> exp.Transaction: 4293 this = None 4294 if self._match_texts(self.TRANSACTION_KIND): 4295 this = self._prev.text 4296 4297 self._match_texts({"TRANSACTION", "WORK"}) 4298 4299 modes = [] 4300 while True: 4301 mode = [] 4302 while self._match(TokenType.VAR): 4303 mode.append(self._prev.text) 4304 4305 if mode: 4306 modes.append(" ".join(mode)) 4307 if not self._match(TokenType.COMMA): 4308 break 4309 4310 return self.expression(exp.Transaction, this=this, modes=modes) 4311 4312 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4313 chain = None 4314 savepoint = None 4315 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4316 4317 self._match_texts({"TRANSACTION", "WORK"}) 4318 4319 if self._match_text_seq("TO"): 4320 self._match_text_seq("SAVEPOINT") 4321 savepoint = self._parse_id_var() 4322 4323 if self._match(TokenType.AND): 4324 chain = not self._match_text_seq("NO") 4325 self._match_text_seq("CHAIN") 4326 4327 if is_rollback: 4328 return self.expression(exp.Rollback, savepoint=savepoint) 4329 4330 return self.expression(exp.Commit, chain=chain) 4331 4332 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4333 if not self._match_text_seq("ADD"): 4334 return None 4335 4336 self._match(TokenType.COLUMN) 4337 exists_column = self._parse_exists(not_=True) 4338 expression = self._parse_column_def(self._parse_field(any_token=True)) 4339 4340 if expression: 4341 expression.set("exists", exists_column) 4342 4343 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4344 if self._match_texts(("FIRST", "AFTER")): 4345 position = self._prev.text 4346 column_position = self.expression( 4347 exp.ColumnPosition, this=self._parse_column(), position=position 4348 ) 4349 expression.set("position", column_position) 4350 4351 return expression 4352 4353 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4354 drop = self._match(TokenType.DROP) and self._parse_drop() 4355 if drop and not isinstance(drop, exp.Command): 4356 drop.set("kind", drop.args.get("kind", "COLUMN")) 4357 return drop 4358 4359 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4360 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4361 return self.expression( 4362 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4363 ) 4364 4365 def _parse_add_constraint(self) -> exp.AddConstraint: 4366 this = None 4367 kind = self._prev.token_type 4368 4369 if kind == TokenType.CONSTRAINT: 4370 this = self._parse_id_var() 4371 4372 if self._match_text_seq("CHECK"): 4373 expression = self._parse_wrapped(self._parse_conjunction) 4374 enforced = self._match_text_seq("ENFORCED") 4375 4376 return self.expression( 4377 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4378 ) 4379 4380 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4381 expression = self._parse_foreign_key() 4382 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4383 expression = self._parse_primary_key() 4384 else: 4385 expression = None 4386 4387 return self.expression(exp.AddConstraint, this=this, expression=expression) 4388 4389 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4390 index = self._index - 1 4391 4392 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4393 return self._parse_csv(self._parse_add_constraint) 4394 4395 self._retreat(index) 4396 return self._parse_csv(self._parse_add_column) 4397 4398 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4399 self._match(TokenType.COLUMN) 4400 column = self._parse_field(any_token=True) 4401 4402 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4403 return self.expression(exp.AlterColumn, this=column, drop=True) 4404 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4405 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4406 4407 self._match_text_seq("SET", "DATA") 4408 return self.expression( 4409 exp.AlterColumn, 4410 this=column, 4411 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4412 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4413 using=self._match(TokenType.USING) and self._parse_conjunction(), 4414 ) 4415 4416 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4417 index = self._index - 1 4418 4419 partition_exists = self._parse_exists() 4420 if self._match(TokenType.PARTITION, advance=False): 4421 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4422 4423 self._retreat(index) 4424 return self._parse_csv(self._parse_drop_column) 4425 4426 def _parse_alter_table_rename(self) -> exp.RenameTable: 4427 self._match_text_seq("TO") 4428 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4429 4430 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4431 start = self._prev 4432 4433 if not self._match(TokenType.TABLE): 4434 return self._parse_as_command(start) 4435 4436 exists = self._parse_exists() 4437 this = self._parse_table(schema=True) 4438 4439 if self._next: 4440 self._advance() 4441 4442 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4443 if parser: 4444 actions = ensure_list(parser(self)) 4445 4446 if not self._curr: 4447 return self.expression( 4448 exp.AlterTable, 4449 this=this, 4450 exists=exists, 4451 actions=actions, 4452 ) 4453 return self._parse_as_command(start) 4454 4455 def _parse_merge(self) -> exp.Merge: 4456 self._match(TokenType.INTO) 4457 target = self._parse_table() 4458 4459 self._match(TokenType.USING) 4460 using = self._parse_table() 4461 4462 self._match(TokenType.ON) 4463 on = self._parse_conjunction() 4464 4465 whens = [] 4466 while self._match(TokenType.WHEN): 4467 matched = not self._match(TokenType.NOT) 4468 self._match_text_seq("MATCHED") 4469 source = ( 4470 False 4471 if self._match_text_seq("BY", "TARGET") 4472 else self._match_text_seq("BY", "SOURCE") 4473 ) 4474 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4475 4476 self._match(TokenType.THEN) 4477 4478 if self._match(TokenType.INSERT): 4479 _this = self._parse_star() 4480 if _this: 4481 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4482 else: 4483 then = self.expression( 4484 exp.Insert, 4485 this=self._parse_value(), 4486 expression=self._match(TokenType.VALUES) and self._parse_value(), 4487 ) 4488 elif self._match(TokenType.UPDATE): 4489 expressions = self._parse_star() 4490 if expressions: 4491 then = self.expression(exp.Update, expressions=expressions) 4492 else: 4493 then = self.expression( 4494 exp.Update, 4495 expressions=self._match(TokenType.SET) 4496 and self._parse_csv(self._parse_equality), 4497 ) 4498 elif self._match(TokenType.DELETE): 4499 then = self.expression(exp.Var, this=self._prev.text) 4500 else: 4501 then = None 4502 4503 whens.append( 4504 self.expression( 4505 exp.When, 4506 matched=matched, 4507 source=source, 4508 condition=condition, 4509 then=then, 4510 ) 4511 ) 4512 4513 return self.expression( 4514 exp.Merge, 4515 this=target, 4516 using=using, 4517 on=on, 4518 expressions=whens, 4519 ) 4520 4521 def _parse_show(self) -> t.Optional[exp.Expression]: 4522 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4523 if parser: 4524 return parser(self) 4525 self._advance() 4526 return self.expression(exp.Show, this=self._prev.text.upper()) 4527 4528 def _parse_set_item_assignment( 4529 self, kind: t.Optional[str] = None 4530 ) -> t.Optional[exp.Expression]: 4531 index = self._index 4532 4533 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4534 return self._parse_set_transaction(global_=kind == "GLOBAL") 4535 4536 left = self._parse_primary() or self._parse_id_var() 4537 4538 if not self._match_texts(("=", "TO")): 4539 self._retreat(index) 4540 return None 4541 4542 right = self._parse_statement() or self._parse_id_var() 4543 this = self.expression(exp.EQ, this=left, expression=right) 4544 4545 return self.expression(exp.SetItem, this=this, kind=kind) 4546 4547 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4548 self._match_text_seq("TRANSACTION") 4549 characteristics = self._parse_csv( 4550 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4551 ) 4552 return self.expression( 4553 exp.SetItem, 4554 expressions=characteristics, 4555 kind="TRANSACTION", 4556 **{"global": global_}, # type: ignore 4557 ) 4558 4559 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4560 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4561 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4562 4563 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4564 index = self._index 4565 set_ = self.expression( 4566 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4567 ) 4568 4569 if self._curr: 4570 self._retreat(index) 4571 return self._parse_as_command(self._prev) 4572 4573 return set_ 4574 4575 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4576 for option in options: 4577 if self._match_text_seq(*option.split(" ")): 4578 return exp.var(option) 4579 return None 4580 4581 def _parse_as_command(self, start: Token) -> exp.Command: 4582 while self._curr: 4583 self._advance() 4584 text = self._find_sql(start, self._prev) 4585 size = len(start.text) 4586 return exp.Command(this=text[:size], expression=text[size:]) 4587 4588 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4589 settings = [] 4590 4591 self._match_l_paren() 4592 kind = self._parse_id_var() 4593 4594 if self._match(TokenType.L_PAREN): 4595 while True: 4596 key = self._parse_id_var() 4597 value = self._parse_primary() 4598 4599 if not key and value is None: 4600 break 4601 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4602 self._match(TokenType.R_PAREN) 4603 4604 self._match_r_paren() 4605 4606 return self.expression( 4607 exp.DictProperty, 4608 this=this, 4609 kind=kind.this if kind else None, 4610 settings=settings, 4611 ) 4612 4613 def _parse_dict_range(self, this: str) -> exp.DictRange: 4614 self._match_l_paren() 4615 has_min = self._match_text_seq("MIN") 4616 if has_min: 4617 min = self._parse_var() or self._parse_primary() 4618 self._match_text_seq("MAX") 4619 max = self._parse_var() or self._parse_primary() 4620 else: 4621 max = self._parse_var() or self._parse_primary() 4622 min = exp.Literal.number(0) 4623 self._match_r_paren() 4624 return self.expression(exp.DictRange, this=this, min=min, max=max) 4625 4626 def _find_parser( 4627 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4628 ) -> t.Optional[t.Callable]: 4629 if not self._curr: 4630 return None 4631 4632 index = self._index 4633 this = [] 4634 while True: 4635 # The current token might be multiple words 4636 curr = self._curr.text.upper() 4637 key = curr.split(" ") 4638 this.append(curr) 4639 4640 self._advance() 4641 result, trie = in_trie(trie, key) 4642 if result == TrieResult.FAILED: 4643 break 4644 4645 if result == TrieResult.EXISTS: 4646 subparser = parsers[" ".join(this)] 4647 return subparser 4648 4649 self._retreat(index) 4650 return None 4651 4652 def _match(self, token_type, advance=True, expression=None): 4653 if not self._curr: 4654 return None 4655 4656 if self._curr.token_type == token_type: 4657 if advance: 4658 self._advance() 4659 self._add_comments(expression) 4660 return True 4661 4662 return None 4663 4664 def _match_set(self, types, advance=True): 4665 if not self._curr: 4666 return None 4667 4668 if self._curr.token_type in types: 4669 if advance: 4670 self._advance() 4671 return True 4672 4673 return None 4674 4675 def _match_pair(self, token_type_a, token_type_b, advance=True): 4676 if not self._curr or not self._next: 4677 return None 4678 4679 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4680 if advance: 4681 self._advance(2) 4682 return True 4683 4684 return None 4685 4686 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4687 if not self._match(TokenType.L_PAREN, expression=expression): 4688 self.raise_error("Expecting (") 4689 4690 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4691 if not self._match(TokenType.R_PAREN, expression=expression): 4692 self.raise_error("Expecting )") 4693 4694 def _match_texts(self, texts, advance=True): 4695 if self._curr and self._curr.text.upper() in texts: 4696 if advance: 4697 self._advance() 4698 return True 4699 return False 4700 4701 def _match_text_seq(self, *texts, advance=True): 4702 index = self._index 4703 for text in texts: 4704 if self._curr and self._curr.text.upper() == text: 4705 self._advance() 4706 else: 4707 self._retreat(index) 4708 return False 4709 4710 if not advance: 4711 self._retreat(index) 4712 4713 return True 4714 4715 @t.overload 4716 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4717 ... 4718 4719 @t.overload 4720 def _replace_columns_with_dots( 4721 self, this: t.Optional[exp.Expression] 4722 ) -> t.Optional[exp.Expression]: 4723 ... 4724 4725 def _replace_columns_with_dots(self, this): 4726 if isinstance(this, exp.Dot): 4727 exp.replace_children(this, self._replace_columns_with_dots) 4728 elif isinstance(this, exp.Column): 4729 exp.replace_children(this, self._replace_columns_with_dots) 4730 table = this.args.get("table") 4731 this = ( 4732 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4733 ) 4734 4735 return this 4736 4737 def _replace_lambda( 4738 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4739 ) -> t.Optional[exp.Expression]: 4740 if not node: 4741 return node 4742 4743 for column in node.find_all(exp.Column): 4744 if column.parts[0].name in lambda_variables: 4745 dot_or_id = column.to_dot() if column.table else column.this 4746 parent = column.parent 4747 4748 while isinstance(parent, exp.Dot): 4749 if not isinstance(parent.parent, exp.Dot): 4750 parent.replace(dot_or_id) 4751 break 4752 parent = parent.parent 4753 else: 4754 if column is node: 4755 node = dot_or_id 4756 else: 4757 column.replace(dot_or_id) 4758 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
833 def __init__( 834 self, 835 error_level: t.Optional[ErrorLevel] = None, 836 error_message_context: int = 100, 837 max_errors: int = 3, 838 ): 839 self.error_level = error_level or ErrorLevel.IMMEDIATE 840 self.error_message_context = error_message_context 841 self.max_errors = max_errors 842 self.reset()
854 def parse( 855 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 856 ) -> t.List[t.Optional[exp.Expression]]: 857 """ 858 Parses a list of tokens and returns a list of syntax trees, one tree 859 per parsed SQL statement. 860 861 Args: 862 raw_tokens: The list of tokens. 863 sql: The original SQL string, used to produce helpful debug messages. 864 865 Returns: 866 The list of the produced syntax trees. 867 """ 868 return self._parse( 869 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 870 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
872 def parse_into( 873 self, 874 expression_types: exp.IntoType, 875 raw_tokens: t.List[Token], 876 sql: t.Optional[str] = None, 877 ) -> t.List[t.Optional[exp.Expression]]: 878 """ 879 Parses a list of tokens into a given Expression type. If a collection of Expression 880 types is given instead, this method will try to parse the token list into each one 881 of them, stopping at the first for which the parsing succeeds. 882 883 Args: 884 expression_types: The expression type(s) to try and parse the token list into. 885 raw_tokens: The list of tokens. 886 sql: The original SQL string, used to produce helpful debug messages. 887 888 Returns: 889 The target Expression. 890 """ 891 errors = [] 892 for expression_type in ensure_list(expression_types): 893 parser = self.EXPRESSION_PARSERS.get(expression_type) 894 if not parser: 895 raise TypeError(f"No parser registered for {expression_type}") 896 897 try: 898 return self._parse(parser, raw_tokens, sql) 899 except ParseError as e: 900 e.errors[0]["into_expression"] = expression_type 901 errors.append(e) 902 903 raise ParseError( 904 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 905 errors=merge_errors(errors), 906 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
943 def check_errors(self) -> None: 944 """Logs or raises any found errors, depending on the chosen error level setting.""" 945 if self.error_level == ErrorLevel.WARN: 946 for error in self.errors: 947 logger.error(str(error)) 948 elif self.error_level == ErrorLevel.RAISE and self.errors: 949 raise ParseError( 950 concat_messages(self.errors, self.max_errors), 951 errors=merge_errors(self.errors), 952 )
Logs or raises any found errors, depending on the chosen error level setting.
954 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 955 """ 956 Appends an error in the list of recorded errors or raises it, depending on the chosen 957 error level setting. 958 """ 959 token = token or self._curr or self._prev or Token.string("") 960 start = token.start 961 end = token.end + 1 962 start_context = self.sql[max(start - self.error_message_context, 0) : start] 963 highlight = self.sql[start:end] 964 end_context = self.sql[end : end + self.error_message_context] 965 966 error = ParseError.new( 967 f"{message}. Line {token.line}, Col: {token.col}.\n" 968 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 969 description=message, 970 line=token.line, 971 col=token.col, 972 start_context=start_context, 973 highlight=highlight, 974 end_context=end_context, 975 ) 976 977 if self.error_level == ErrorLevel.IMMEDIATE: 978 raise error 979 980 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
982 def expression( 983 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 984 ) -> E: 985 """ 986 Creates a new, validated Expression. 987 988 Args: 989 exp_class: The expression class to instantiate. 990 comments: An optional list of comments to attach to the expression. 991 kwargs: The arguments to set for the expression along with their respective values. 992 993 Returns: 994 The target expression. 995 """ 996 instance = exp_class(**kwargs) 997 instance.add_comments(comments) if comments else self._add_comments(instance) 998 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1005 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1006 """ 1007 Validates an Expression, making sure that all its mandatory arguments are set. 1008 1009 Args: 1010 expression: The expression to validate. 1011 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1012 1013 Returns: 1014 The validated expression. 1015 """ 1016 if self.error_level != ErrorLevel.IGNORE: 1017 for error_message in expression.error_messages(args): 1018 self.raise_error(error_message) 1019 1020 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.