sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER BY": lambda self: self._parse_cluster(), 588 "CLUSTERED": lambda self: self._parse_clustered_by(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "ANY_VALUE": lambda self: self._parse_any_value(), 721 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 722 "CONCAT": lambda self: self._parse_concat(), 723 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 724 "DECODE": lambda self: self._parse_decode(), 725 "EXTRACT": lambda self: self._parse_extract(), 726 "JSON_OBJECT": lambda self: self._parse_json_object(), 727 "LOG": lambda self: self._parse_logarithm(), 728 "MATCH": lambda self: self._parse_match_against(), 729 "OPENJSON": lambda self: self._parse_open_json(), 730 "POSITION": lambda self: self._parse_position(), 731 "SAFE_CAST": lambda self: self._parse_cast(False), 732 "STRING_AGG": lambda self: self._parse_string_agg(), 733 "SUBSTRING": lambda self: self._parse_substring(), 734 "TRIM": lambda self: self._parse_trim(), 735 "TRY_CAST": lambda self: self._parse_cast(False), 736 "TRY_CONVERT": lambda self: self._parse_convert(False), 737 } 738 739 QUERY_MODIFIER_PARSERS = { 740 "joins": lambda self: list(iter(self._parse_join, None)), 741 "laterals": lambda self: list(iter(self._parse_lateral, None)), 742 "match": lambda self: self._parse_match_recognize(), 743 "where": lambda self: self._parse_where(), 744 "group": lambda self: self._parse_group(), 745 "having": lambda self: self._parse_having(), 746 "qualify": lambda self: self._parse_qualify(), 747 "windows": lambda self: self._parse_window_clause(), 748 "order": lambda self: self._parse_order(), 749 "limit": lambda self: self._parse_limit(), 750 "offset": lambda self: self._parse_offset(), 751 "locks": lambda self: self._parse_locks(), 752 "sample": lambda self: self._parse_table_sample(as_modifier=True), 753 } 754 755 SET_PARSERS = { 756 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 757 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 758 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 759 "TRANSACTION": lambda self: self._parse_set_transaction(), 760 } 761 762 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 763 764 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 765 766 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 767 768 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 769 770 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 771 772 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 773 TRANSACTION_CHARACTERISTICS = { 774 "ISOLATION LEVEL REPEATABLE READ", 775 "ISOLATION LEVEL READ COMMITTED", 776 "ISOLATION LEVEL READ UNCOMMITTED", 777 "ISOLATION LEVEL SERIALIZABLE", 778 "READ WRITE", 779 "READ ONLY", 780 } 781 782 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 783 784 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 785 786 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 787 788 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 789 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 790 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 791 792 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 793 794 STRICT_CAST = True 795 796 # A NULL arg in CONCAT yields NULL by default 797 CONCAT_NULL_OUTPUTS_STRING = False 798 799 PREFIXED_PIVOT_COLUMNS = False 800 IDENTIFY_PIVOT_STRINGS = False 801 802 LOG_BASE_FIRST = True 803 LOG_DEFAULTS_TO_LN = False 804 805 __slots__ = ( 806 "error_level", 807 "error_message_context", 808 "max_errors", 809 "sql", 810 "errors", 811 "_tokens", 812 "_index", 813 "_curr", 814 "_next", 815 "_prev", 816 "_prev_comments", 817 ) 818 819 # Autofilled 820 INDEX_OFFSET: int = 0 821 UNNEST_COLUMN_ONLY: bool = False 822 ALIAS_POST_TABLESAMPLE: bool = False 823 STRICT_STRING_CONCAT = False 824 NULL_ORDERING: str = "nulls_are_small" 825 SHOW_TRIE: t.Dict = {} 826 SET_TRIE: t.Dict = {} 827 FORMAT_MAPPING: t.Dict[str, str] = {} 828 FORMAT_TRIE: t.Dict = {} 829 TIME_MAPPING: t.Dict[str, str] = {} 830 TIME_TRIE: t.Dict = {} 831 832 def __init__( 833 self, 834 error_level: t.Optional[ErrorLevel] = None, 835 error_message_context: int = 100, 836 max_errors: int = 3, 837 ): 838 self.error_level = error_level or ErrorLevel.IMMEDIATE 839 self.error_message_context = error_message_context 840 self.max_errors = max_errors 841 self.reset() 842 843 def reset(self): 844 self.sql = "" 845 self.errors = [] 846 self._tokens = [] 847 self._index = 0 848 self._curr = None 849 self._next = None 850 self._prev = None 851 self._prev_comments = None 852 853 def parse( 854 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 855 ) -> t.List[t.Optional[exp.Expression]]: 856 """ 857 Parses a list of tokens and returns a list of syntax trees, one tree 858 per parsed SQL statement. 859 860 Args: 861 raw_tokens: The list of tokens. 862 sql: The original SQL string, used to produce helpful debug messages. 863 864 Returns: 865 The list of the produced syntax trees. 866 """ 867 return self._parse( 868 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 869 ) 870 871 def parse_into( 872 self, 873 expression_types: exp.IntoType, 874 raw_tokens: t.List[Token], 875 sql: t.Optional[str] = None, 876 ) -> t.List[t.Optional[exp.Expression]]: 877 """ 878 Parses a list of tokens into a given Expression type. If a collection of Expression 879 types is given instead, this method will try to parse the token list into each one 880 of them, stopping at the first for which the parsing succeeds. 881 882 Args: 883 expression_types: The expression type(s) to try and parse the token list into. 884 raw_tokens: The list of tokens. 885 sql: The original SQL string, used to produce helpful debug messages. 886 887 Returns: 888 The target Expression. 889 """ 890 errors = [] 891 for expression_type in ensure_list(expression_types): 892 parser = self.EXPRESSION_PARSERS.get(expression_type) 893 if not parser: 894 raise TypeError(f"No parser registered for {expression_type}") 895 896 try: 897 return self._parse(parser, raw_tokens, sql) 898 except ParseError as e: 899 e.errors[0]["into_expression"] = expression_type 900 errors.append(e) 901 902 raise ParseError( 903 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 904 errors=merge_errors(errors), 905 ) from errors[-1] 906 907 def _parse( 908 self, 909 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 910 raw_tokens: t.List[Token], 911 sql: t.Optional[str] = None, 912 ) -> t.List[t.Optional[exp.Expression]]: 913 self.reset() 914 self.sql = sql or "" 915 916 total = len(raw_tokens) 917 chunks: t.List[t.List[Token]] = [[]] 918 919 for i, token in enumerate(raw_tokens): 920 if token.token_type == TokenType.SEMICOLON: 921 if i < total - 1: 922 chunks.append([]) 923 else: 924 chunks[-1].append(token) 925 926 expressions = [] 927 928 for tokens in chunks: 929 self._index = -1 930 self._tokens = tokens 931 self._advance() 932 933 expressions.append(parse_method(self)) 934 935 if self._index < len(self._tokens): 936 self.raise_error("Invalid expression / Unexpected token") 937 938 self.check_errors() 939 940 return expressions 941 942 def check_errors(self) -> None: 943 """Logs or raises any found errors, depending on the chosen error level setting.""" 944 if self.error_level == ErrorLevel.WARN: 945 for error in self.errors: 946 logger.error(str(error)) 947 elif self.error_level == ErrorLevel.RAISE and self.errors: 948 raise ParseError( 949 concat_messages(self.errors, self.max_errors), 950 errors=merge_errors(self.errors), 951 ) 952 953 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 954 """ 955 Appends an error in the list of recorded errors or raises it, depending on the chosen 956 error level setting. 957 """ 958 token = token or self._curr or self._prev or Token.string("") 959 start = token.start 960 end = token.end + 1 961 start_context = self.sql[max(start - self.error_message_context, 0) : start] 962 highlight = self.sql[start:end] 963 end_context = self.sql[end : end + self.error_message_context] 964 965 error = ParseError.new( 966 f"{message}. Line {token.line}, Col: {token.col}.\n" 967 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 968 description=message, 969 line=token.line, 970 col=token.col, 971 start_context=start_context, 972 highlight=highlight, 973 end_context=end_context, 974 ) 975 976 if self.error_level == ErrorLevel.IMMEDIATE: 977 raise error 978 979 self.errors.append(error) 980 981 def expression( 982 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 983 ) -> E: 984 """ 985 Creates a new, validated Expression. 986 987 Args: 988 exp_class: The expression class to instantiate. 989 comments: An optional list of comments to attach to the expression. 990 kwargs: The arguments to set for the expression along with their respective values. 991 992 Returns: 993 The target expression. 994 """ 995 instance = exp_class(**kwargs) 996 instance.add_comments(comments) if comments else self._add_comments(instance) 997 return self.validate_expression(instance) 998 999 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1000 if expression and self._prev_comments: 1001 expression.add_comments(self._prev_comments) 1002 self._prev_comments = None 1003 1004 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1005 """ 1006 Validates an Expression, making sure that all its mandatory arguments are set. 1007 1008 Args: 1009 expression: The expression to validate. 1010 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1011 1012 Returns: 1013 The validated expression. 1014 """ 1015 if self.error_level != ErrorLevel.IGNORE: 1016 for error_message in expression.error_messages(args): 1017 self.raise_error(error_message) 1018 1019 return expression 1020 1021 def _find_sql(self, start: Token, end: Token) -> str: 1022 return self.sql[start.start : end.end + 1] 1023 1024 def _advance(self, times: int = 1) -> None: 1025 self._index += times 1026 self._curr = seq_get(self._tokens, self._index) 1027 self._next = seq_get(self._tokens, self._index + 1) 1028 1029 if self._index > 0: 1030 self._prev = self._tokens[self._index - 1] 1031 self._prev_comments = self._prev.comments 1032 else: 1033 self._prev = None 1034 self._prev_comments = None 1035 1036 def _retreat(self, index: int) -> None: 1037 if index != self._index: 1038 self._advance(index - self._index) 1039 1040 def _parse_command(self) -> exp.Command: 1041 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1042 1043 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1044 start = self._prev 1045 exists = self._parse_exists() if allow_exists else None 1046 1047 self._match(TokenType.ON) 1048 1049 kind = self._match_set(self.CREATABLES) and self._prev 1050 if not kind: 1051 return self._parse_as_command(start) 1052 1053 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1054 this = self._parse_user_defined_function(kind=kind.token_type) 1055 elif kind.token_type == TokenType.TABLE: 1056 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1057 elif kind.token_type == TokenType.COLUMN: 1058 this = self._parse_column() 1059 else: 1060 this = self._parse_id_var() 1061 1062 self._match(TokenType.IS) 1063 1064 return self.expression( 1065 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1066 ) 1067 1068 def _parse_to_table( 1069 self, 1070 ) -> exp.ToTableProperty: 1071 table = self._parse_table_parts(schema=True) 1072 return self.expression(exp.ToTableProperty, this=table) 1073 1074 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1075 def _parse_ttl(self) -> exp.Expression: 1076 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1077 this = self._parse_bitwise() 1078 1079 if self._match_text_seq("DELETE"): 1080 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1081 if self._match_text_seq("RECOMPRESS"): 1082 return self.expression( 1083 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1084 ) 1085 if self._match_text_seq("TO", "DISK"): 1086 return self.expression( 1087 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1088 ) 1089 if self._match_text_seq("TO", "VOLUME"): 1090 return self.expression( 1091 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1092 ) 1093 1094 return this 1095 1096 expressions = self._parse_csv(_parse_ttl_action) 1097 where = self._parse_where() 1098 group = self._parse_group() 1099 1100 aggregates = None 1101 if group and self._match(TokenType.SET): 1102 aggregates = self._parse_csv(self._parse_set_item) 1103 1104 return self.expression( 1105 exp.MergeTreeTTL, 1106 expressions=expressions, 1107 where=where, 1108 group=group, 1109 aggregates=aggregates, 1110 ) 1111 1112 def _parse_statement(self) -> t.Optional[exp.Expression]: 1113 if self._curr is None: 1114 return None 1115 1116 if self._match_set(self.STATEMENT_PARSERS): 1117 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1118 1119 if self._match_set(Tokenizer.COMMANDS): 1120 return self._parse_command() 1121 1122 expression = self._parse_expression() 1123 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1124 return self._parse_query_modifiers(expression) 1125 1126 def _parse_drop(self) -> exp.Drop | exp.Command: 1127 start = self._prev 1128 temporary = self._match(TokenType.TEMPORARY) 1129 materialized = self._match_text_seq("MATERIALIZED") 1130 1131 kind = self._match_set(self.CREATABLES) and self._prev.text 1132 if not kind: 1133 return self._parse_as_command(start) 1134 1135 return self.expression( 1136 exp.Drop, 1137 exists=self._parse_exists(), 1138 this=self._parse_table(schema=True), 1139 kind=kind, 1140 temporary=temporary, 1141 materialized=materialized, 1142 cascade=self._match_text_seq("CASCADE"), 1143 constraints=self._match_text_seq("CONSTRAINTS"), 1144 purge=self._match_text_seq("PURGE"), 1145 ) 1146 1147 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1148 return ( 1149 self._match(TokenType.IF) 1150 and (not not_ or self._match(TokenType.NOT)) 1151 and self._match(TokenType.EXISTS) 1152 ) 1153 1154 def _parse_create(self) -> exp.Create | exp.Command: 1155 # Note: this can't be None because we've matched a statement parser 1156 start = self._prev 1157 replace = start.text.upper() == "REPLACE" or self._match_pair( 1158 TokenType.OR, TokenType.REPLACE 1159 ) 1160 unique = self._match(TokenType.UNIQUE) 1161 1162 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1163 self._advance() 1164 1165 properties = None 1166 create_token = self._match_set(self.CREATABLES) and self._prev 1167 1168 if not create_token: 1169 # exp.Properties.Location.POST_CREATE 1170 properties = self._parse_properties() 1171 create_token = self._match_set(self.CREATABLES) and self._prev 1172 1173 if not properties or not create_token: 1174 return self._parse_as_command(start) 1175 1176 exists = self._parse_exists(not_=True) 1177 this = None 1178 expression = None 1179 indexes = None 1180 no_schema_binding = None 1181 begin = None 1182 clone = None 1183 1184 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1185 nonlocal properties 1186 if properties and temp_props: 1187 properties.expressions.extend(temp_props.expressions) 1188 elif temp_props: 1189 properties = temp_props 1190 1191 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1192 this = self._parse_user_defined_function(kind=create_token.token_type) 1193 1194 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1195 extend_props(self._parse_properties()) 1196 1197 self._match(TokenType.ALIAS) 1198 begin = self._match(TokenType.BEGIN) 1199 return_ = self._match_text_seq("RETURN") 1200 expression = self._parse_statement() 1201 1202 if return_: 1203 expression = self.expression(exp.Return, this=expression) 1204 elif create_token.token_type == TokenType.INDEX: 1205 this = self._parse_index(index=self._parse_id_var()) 1206 elif create_token.token_type in self.DB_CREATABLES: 1207 table_parts = self._parse_table_parts(schema=True) 1208 1209 # exp.Properties.Location.POST_NAME 1210 self._match(TokenType.COMMA) 1211 extend_props(self._parse_properties(before=True)) 1212 1213 this = self._parse_schema(this=table_parts) 1214 1215 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1216 extend_props(self._parse_properties()) 1217 1218 self._match(TokenType.ALIAS) 1219 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1220 # exp.Properties.Location.POST_ALIAS 1221 extend_props(self._parse_properties()) 1222 1223 expression = self._parse_ddl_select() 1224 1225 if create_token.token_type == TokenType.TABLE: 1226 indexes = [] 1227 while True: 1228 index = self._parse_index() 1229 1230 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1231 extend_props(self._parse_properties()) 1232 1233 if not index: 1234 break 1235 else: 1236 self._match(TokenType.COMMA) 1237 indexes.append(index) 1238 elif create_token.token_type == TokenType.VIEW: 1239 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1240 no_schema_binding = True 1241 1242 if self._match_text_seq("CLONE"): 1243 clone = self._parse_table(schema=True) 1244 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1245 clone_kind = ( 1246 self._match(TokenType.L_PAREN) 1247 and self._match_texts(self.CLONE_KINDS) 1248 and self._prev.text.upper() 1249 ) 1250 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1251 self._match(TokenType.R_PAREN) 1252 clone = self.expression( 1253 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1254 ) 1255 1256 return self.expression( 1257 exp.Create, 1258 this=this, 1259 kind=create_token.text, 1260 replace=replace, 1261 unique=unique, 1262 expression=expression, 1263 exists=exists, 1264 properties=properties, 1265 indexes=indexes, 1266 no_schema_binding=no_schema_binding, 1267 begin=begin, 1268 clone=clone, 1269 ) 1270 1271 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1272 # only used for teradata currently 1273 self._match(TokenType.COMMA) 1274 1275 kwargs = { 1276 "no": self._match_text_seq("NO"), 1277 "dual": self._match_text_seq("DUAL"), 1278 "before": self._match_text_seq("BEFORE"), 1279 "default": self._match_text_seq("DEFAULT"), 1280 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1281 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1282 "after": self._match_text_seq("AFTER"), 1283 "minimum": self._match_texts(("MIN", "MINIMUM")), 1284 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1285 } 1286 1287 if self._match_texts(self.PROPERTY_PARSERS): 1288 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1289 try: 1290 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1291 except TypeError: 1292 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1293 1294 return None 1295 1296 def _parse_property(self) -> t.Optional[exp.Expression]: 1297 if self._match_texts(self.PROPERTY_PARSERS): 1298 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1299 1300 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1301 return self._parse_character_set(default=True) 1302 1303 if self._match_text_seq("COMPOUND", "SORTKEY"): 1304 return self._parse_sortkey(compound=True) 1305 1306 if self._match_text_seq("SQL", "SECURITY"): 1307 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1308 1309 assignment = self._match_pair( 1310 TokenType.VAR, TokenType.EQ, advance=False 1311 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1312 1313 if assignment: 1314 key = self._parse_var_or_string() 1315 self._match(TokenType.EQ) 1316 return self.expression(exp.Property, this=key, value=self._parse_column()) 1317 1318 return None 1319 1320 def _parse_stored(self) -> exp.FileFormatProperty: 1321 self._match(TokenType.ALIAS) 1322 1323 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1324 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1325 1326 return self.expression( 1327 exp.FileFormatProperty, 1328 this=self.expression( 1329 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1330 ) 1331 if input_format or output_format 1332 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1333 ) 1334 1335 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1336 self._match(TokenType.EQ) 1337 self._match(TokenType.ALIAS) 1338 return self.expression(exp_class, this=self._parse_field()) 1339 1340 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1341 properties = [] 1342 while True: 1343 if before: 1344 prop = self._parse_property_before() 1345 else: 1346 prop = self._parse_property() 1347 1348 if not prop: 1349 break 1350 for p in ensure_list(prop): 1351 properties.append(p) 1352 1353 if properties: 1354 return self.expression(exp.Properties, expressions=properties) 1355 1356 return None 1357 1358 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1359 return self.expression( 1360 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1361 ) 1362 1363 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1364 if self._index >= 2: 1365 pre_volatile_token = self._tokens[self._index - 2] 1366 else: 1367 pre_volatile_token = None 1368 1369 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1370 return exp.VolatileProperty() 1371 1372 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1373 1374 def _parse_with_property( 1375 self, 1376 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1377 self._match(TokenType.WITH) 1378 if self._match(TokenType.L_PAREN, advance=False): 1379 return self._parse_wrapped_csv(self._parse_property) 1380 1381 if self._match_text_seq("JOURNAL"): 1382 return self._parse_withjournaltable() 1383 1384 if self._match_text_seq("DATA"): 1385 return self._parse_withdata(no=False) 1386 elif self._match_text_seq("NO", "DATA"): 1387 return self._parse_withdata(no=True) 1388 1389 if not self._next: 1390 return None 1391 1392 return self._parse_withisolatedloading() 1393 1394 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1395 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1396 self._match(TokenType.EQ) 1397 1398 user = self._parse_id_var() 1399 self._match(TokenType.PARAMETER) 1400 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1401 1402 if not user or not host: 1403 return None 1404 1405 return exp.DefinerProperty(this=f"{user}@{host}") 1406 1407 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1408 self._match(TokenType.TABLE) 1409 self._match(TokenType.EQ) 1410 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1411 1412 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1413 return self.expression(exp.LogProperty, no=no) 1414 1415 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1416 return self.expression(exp.JournalProperty, **kwargs) 1417 1418 def _parse_checksum(self) -> exp.ChecksumProperty: 1419 self._match(TokenType.EQ) 1420 1421 on = None 1422 if self._match(TokenType.ON): 1423 on = True 1424 elif self._match_text_seq("OFF"): 1425 on = False 1426 1427 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1428 1429 def _parse_cluster(self) -> exp.Cluster: 1430 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1431 1432 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1433 self._match_text_seq("BY") 1434 1435 self._match_l_paren() 1436 expressions = self._parse_csv(self._parse_column) 1437 self._match_r_paren() 1438 1439 if self._match_text_seq("SORTED", "BY"): 1440 self._match_l_paren() 1441 sorted_by = self._parse_csv(self._parse_ordered) 1442 self._match_r_paren() 1443 else: 1444 sorted_by = None 1445 1446 self._match(TokenType.INTO) 1447 buckets = self._parse_number() 1448 self._match_text_seq("BUCKETS") 1449 1450 return self.expression( 1451 exp.ClusteredByProperty, 1452 expressions=expressions, 1453 sorted_by=sorted_by, 1454 buckets=buckets, 1455 ) 1456 1457 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1458 if not self._match_text_seq("GRANTS"): 1459 self._retreat(self._index - 1) 1460 return None 1461 1462 return self.expression(exp.CopyGrantsProperty) 1463 1464 def _parse_freespace(self) -> exp.FreespaceProperty: 1465 self._match(TokenType.EQ) 1466 return self.expression( 1467 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1468 ) 1469 1470 def _parse_mergeblockratio( 1471 self, no: bool = False, default: bool = False 1472 ) -> exp.MergeBlockRatioProperty: 1473 if self._match(TokenType.EQ): 1474 return self.expression( 1475 exp.MergeBlockRatioProperty, 1476 this=self._parse_number(), 1477 percent=self._match(TokenType.PERCENT), 1478 ) 1479 1480 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1481 1482 def _parse_datablocksize( 1483 self, 1484 default: t.Optional[bool] = None, 1485 minimum: t.Optional[bool] = None, 1486 maximum: t.Optional[bool] = None, 1487 ) -> exp.DataBlocksizeProperty: 1488 self._match(TokenType.EQ) 1489 size = self._parse_number() 1490 1491 units = None 1492 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1493 units = self._prev.text 1494 1495 return self.expression( 1496 exp.DataBlocksizeProperty, 1497 size=size, 1498 units=units, 1499 default=default, 1500 minimum=minimum, 1501 maximum=maximum, 1502 ) 1503 1504 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1505 self._match(TokenType.EQ) 1506 always = self._match_text_seq("ALWAYS") 1507 manual = self._match_text_seq("MANUAL") 1508 never = self._match_text_seq("NEVER") 1509 default = self._match_text_seq("DEFAULT") 1510 1511 autotemp = None 1512 if self._match_text_seq("AUTOTEMP"): 1513 autotemp = self._parse_schema() 1514 1515 return self.expression( 1516 exp.BlockCompressionProperty, 1517 always=always, 1518 manual=manual, 1519 never=never, 1520 default=default, 1521 autotemp=autotemp, 1522 ) 1523 1524 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1525 no = self._match_text_seq("NO") 1526 concurrent = self._match_text_seq("CONCURRENT") 1527 self._match_text_seq("ISOLATED", "LOADING") 1528 for_all = self._match_text_seq("FOR", "ALL") 1529 for_insert = self._match_text_seq("FOR", "INSERT") 1530 for_none = self._match_text_seq("FOR", "NONE") 1531 return self.expression( 1532 exp.IsolatedLoadingProperty, 1533 no=no, 1534 concurrent=concurrent, 1535 for_all=for_all, 1536 for_insert=for_insert, 1537 for_none=for_none, 1538 ) 1539 1540 def _parse_locking(self) -> exp.LockingProperty: 1541 if self._match(TokenType.TABLE): 1542 kind = "TABLE" 1543 elif self._match(TokenType.VIEW): 1544 kind = "VIEW" 1545 elif self._match(TokenType.ROW): 1546 kind = "ROW" 1547 elif self._match_text_seq("DATABASE"): 1548 kind = "DATABASE" 1549 else: 1550 kind = None 1551 1552 if kind in ("DATABASE", "TABLE", "VIEW"): 1553 this = self._parse_table_parts() 1554 else: 1555 this = None 1556 1557 if self._match(TokenType.FOR): 1558 for_or_in = "FOR" 1559 elif self._match(TokenType.IN): 1560 for_or_in = "IN" 1561 else: 1562 for_or_in = None 1563 1564 if self._match_text_seq("ACCESS"): 1565 lock_type = "ACCESS" 1566 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1567 lock_type = "EXCLUSIVE" 1568 elif self._match_text_seq("SHARE"): 1569 lock_type = "SHARE" 1570 elif self._match_text_seq("READ"): 1571 lock_type = "READ" 1572 elif self._match_text_seq("WRITE"): 1573 lock_type = "WRITE" 1574 elif self._match_text_seq("CHECKSUM"): 1575 lock_type = "CHECKSUM" 1576 else: 1577 lock_type = None 1578 1579 override = self._match_text_seq("OVERRIDE") 1580 1581 return self.expression( 1582 exp.LockingProperty, 1583 this=this, 1584 kind=kind, 1585 for_or_in=for_or_in, 1586 lock_type=lock_type, 1587 override=override, 1588 ) 1589 1590 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1591 if self._match(TokenType.PARTITION_BY): 1592 return self._parse_csv(self._parse_conjunction) 1593 return [] 1594 1595 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1596 self._match(TokenType.EQ) 1597 return self.expression( 1598 exp.PartitionedByProperty, 1599 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1600 ) 1601 1602 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1603 if self._match_text_seq("AND", "STATISTICS"): 1604 statistics = True 1605 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1606 statistics = False 1607 else: 1608 statistics = None 1609 1610 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1611 1612 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1613 if self._match_text_seq("PRIMARY", "INDEX"): 1614 return exp.NoPrimaryIndexProperty() 1615 return None 1616 1617 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1618 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1619 return exp.OnCommitProperty() 1620 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1621 return exp.OnCommitProperty(delete=True) 1622 return None 1623 1624 def _parse_distkey(self) -> exp.DistKeyProperty: 1625 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1626 1627 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1628 table = self._parse_table(schema=True) 1629 1630 options = [] 1631 while self._match_texts(("INCLUDING", "EXCLUDING")): 1632 this = self._prev.text.upper() 1633 1634 id_var = self._parse_id_var() 1635 if not id_var: 1636 return None 1637 1638 options.append( 1639 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1640 ) 1641 1642 return self.expression(exp.LikeProperty, this=table, expressions=options) 1643 1644 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1645 return self.expression( 1646 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1647 ) 1648 1649 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1650 self._match(TokenType.EQ) 1651 return self.expression( 1652 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1653 ) 1654 1655 def _parse_returns(self) -> exp.ReturnsProperty: 1656 value: t.Optional[exp.Expression] 1657 is_table = self._match(TokenType.TABLE) 1658 1659 if is_table: 1660 if self._match(TokenType.LT): 1661 value = self.expression( 1662 exp.Schema, 1663 this="TABLE", 1664 expressions=self._parse_csv(self._parse_struct_types), 1665 ) 1666 if not self._match(TokenType.GT): 1667 self.raise_error("Expecting >") 1668 else: 1669 value = self._parse_schema(exp.var("TABLE")) 1670 else: 1671 value = self._parse_types() 1672 1673 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1674 1675 def _parse_describe(self) -> exp.Describe: 1676 kind = self._match_set(self.CREATABLES) and self._prev.text 1677 this = self._parse_table() 1678 return self.expression(exp.Describe, this=this, kind=kind) 1679 1680 def _parse_insert(self) -> exp.Insert: 1681 overwrite = self._match(TokenType.OVERWRITE) 1682 local = self._match_text_seq("LOCAL") 1683 alternative = None 1684 1685 if self._match_text_seq("DIRECTORY"): 1686 this: t.Optional[exp.Expression] = self.expression( 1687 exp.Directory, 1688 this=self._parse_var_or_string(), 1689 local=local, 1690 row_format=self._parse_row_format(match_row=True), 1691 ) 1692 else: 1693 if self._match(TokenType.OR): 1694 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1695 1696 self._match(TokenType.INTO) 1697 self._match(TokenType.TABLE) 1698 this = self._parse_table(schema=True) 1699 1700 return self.expression( 1701 exp.Insert, 1702 this=this, 1703 exists=self._parse_exists(), 1704 partition=self._parse_partition(), 1705 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1706 and self._parse_conjunction(), 1707 expression=self._parse_ddl_select(), 1708 conflict=self._parse_on_conflict(), 1709 returning=self._parse_returning(), 1710 overwrite=overwrite, 1711 alternative=alternative, 1712 ) 1713 1714 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1715 conflict = self._match_text_seq("ON", "CONFLICT") 1716 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1717 1718 if not conflict and not duplicate: 1719 return None 1720 1721 nothing = None 1722 expressions = None 1723 key = None 1724 constraint = None 1725 1726 if conflict: 1727 if self._match_text_seq("ON", "CONSTRAINT"): 1728 constraint = self._parse_id_var() 1729 else: 1730 key = self._parse_csv(self._parse_value) 1731 1732 self._match_text_seq("DO") 1733 if self._match_text_seq("NOTHING"): 1734 nothing = True 1735 else: 1736 self._match(TokenType.UPDATE) 1737 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1738 1739 return self.expression( 1740 exp.OnConflict, 1741 duplicate=duplicate, 1742 expressions=expressions, 1743 nothing=nothing, 1744 key=key, 1745 constraint=constraint, 1746 ) 1747 1748 def _parse_returning(self) -> t.Optional[exp.Returning]: 1749 if not self._match(TokenType.RETURNING): 1750 return None 1751 1752 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1753 1754 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1755 if not self._match(TokenType.FORMAT): 1756 return None 1757 return self._parse_row_format() 1758 1759 def _parse_row_format( 1760 self, match_row: bool = False 1761 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1762 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1763 return None 1764 1765 if self._match_text_seq("SERDE"): 1766 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1767 1768 self._match_text_seq("DELIMITED") 1769 1770 kwargs = {} 1771 1772 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1773 kwargs["fields"] = self._parse_string() 1774 if self._match_text_seq("ESCAPED", "BY"): 1775 kwargs["escaped"] = self._parse_string() 1776 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1777 kwargs["collection_items"] = self._parse_string() 1778 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1779 kwargs["map_keys"] = self._parse_string() 1780 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1781 kwargs["lines"] = self._parse_string() 1782 if self._match_text_seq("NULL", "DEFINED", "AS"): 1783 kwargs["null"] = self._parse_string() 1784 1785 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1786 1787 def _parse_load(self) -> exp.LoadData | exp.Command: 1788 if self._match_text_seq("DATA"): 1789 local = self._match_text_seq("LOCAL") 1790 self._match_text_seq("INPATH") 1791 inpath = self._parse_string() 1792 overwrite = self._match(TokenType.OVERWRITE) 1793 self._match_pair(TokenType.INTO, TokenType.TABLE) 1794 1795 return self.expression( 1796 exp.LoadData, 1797 this=self._parse_table(schema=True), 1798 local=local, 1799 overwrite=overwrite, 1800 inpath=inpath, 1801 partition=self._parse_partition(), 1802 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1803 serde=self._match_text_seq("SERDE") and self._parse_string(), 1804 ) 1805 return self._parse_as_command(self._prev) 1806 1807 def _parse_delete(self) -> exp.Delete: 1808 self._match(TokenType.FROM) 1809 1810 return self.expression( 1811 exp.Delete, 1812 this=self._parse_table(), 1813 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1814 where=self._parse_where(), 1815 returning=self._parse_returning(), 1816 limit=self._parse_limit(), 1817 ) 1818 1819 def _parse_update(self) -> exp.Update: 1820 return self.expression( 1821 exp.Update, 1822 **{ # type: ignore 1823 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1824 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1825 "from": self._parse_from(modifiers=True), 1826 "where": self._parse_where(), 1827 "returning": self._parse_returning(), 1828 "limit": self._parse_limit(), 1829 }, 1830 ) 1831 1832 def _parse_uncache(self) -> exp.Uncache: 1833 if not self._match(TokenType.TABLE): 1834 self.raise_error("Expecting TABLE after UNCACHE") 1835 1836 return self.expression( 1837 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1838 ) 1839 1840 def _parse_cache(self) -> exp.Cache: 1841 lazy = self._match_text_seq("LAZY") 1842 self._match(TokenType.TABLE) 1843 table = self._parse_table(schema=True) 1844 1845 options = [] 1846 if self._match_text_seq("OPTIONS"): 1847 self._match_l_paren() 1848 k = self._parse_string() 1849 self._match(TokenType.EQ) 1850 v = self._parse_string() 1851 options = [k, v] 1852 self._match_r_paren() 1853 1854 self._match(TokenType.ALIAS) 1855 return self.expression( 1856 exp.Cache, 1857 this=table, 1858 lazy=lazy, 1859 options=options, 1860 expression=self._parse_select(nested=True), 1861 ) 1862 1863 def _parse_partition(self) -> t.Optional[exp.Partition]: 1864 if not self._match(TokenType.PARTITION): 1865 return None 1866 1867 return self.expression( 1868 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1869 ) 1870 1871 def _parse_value(self) -> exp.Tuple: 1872 if self._match(TokenType.L_PAREN): 1873 expressions = self._parse_csv(self._parse_conjunction) 1874 self._match_r_paren() 1875 return self.expression(exp.Tuple, expressions=expressions) 1876 1877 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1878 # Source: https://prestodb.io/docs/current/sql/values.html 1879 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1880 1881 def _parse_select( 1882 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1883 ) -> t.Optional[exp.Expression]: 1884 cte = self._parse_with() 1885 if cte: 1886 this = self._parse_statement() 1887 1888 if not this: 1889 self.raise_error("Failed to parse any statement following CTE") 1890 return cte 1891 1892 if "with" in this.arg_types: 1893 this.set("with", cte) 1894 else: 1895 self.raise_error(f"{this.key} does not support CTE") 1896 this = cte 1897 elif self._match(TokenType.SELECT): 1898 comments = self._prev_comments 1899 1900 hint = self._parse_hint() 1901 all_ = self._match(TokenType.ALL) 1902 distinct = self._match(TokenType.DISTINCT) 1903 1904 kind = ( 1905 self._match(TokenType.ALIAS) 1906 and self._match_texts(("STRUCT", "VALUE")) 1907 and self._prev.text 1908 ) 1909 1910 if distinct: 1911 distinct = self.expression( 1912 exp.Distinct, 1913 on=self._parse_value() if self._match(TokenType.ON) else None, 1914 ) 1915 1916 if all_ and distinct: 1917 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1918 1919 limit = self._parse_limit(top=True) 1920 expressions = self._parse_csv(self._parse_expression) 1921 1922 this = self.expression( 1923 exp.Select, 1924 kind=kind, 1925 hint=hint, 1926 distinct=distinct, 1927 expressions=expressions, 1928 limit=limit, 1929 ) 1930 this.comments = comments 1931 1932 into = self._parse_into() 1933 if into: 1934 this.set("into", into) 1935 1936 from_ = self._parse_from() 1937 if from_: 1938 this.set("from", from_) 1939 1940 this = self._parse_query_modifiers(this) 1941 elif (table or nested) and self._match(TokenType.L_PAREN): 1942 if self._match(TokenType.PIVOT): 1943 this = self._parse_simplified_pivot() 1944 elif self._match(TokenType.FROM): 1945 this = exp.select("*").from_( 1946 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1947 ) 1948 else: 1949 this = self._parse_table() if table else self._parse_select(nested=True) 1950 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1951 1952 self._match_r_paren() 1953 1954 # early return so that subquery unions aren't parsed again 1955 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1956 # Union ALL should be a property of the top select node, not the subquery 1957 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1958 elif self._match(TokenType.VALUES): 1959 this = self.expression( 1960 exp.Values, 1961 expressions=self._parse_csv(self._parse_value), 1962 alias=self._parse_table_alias(), 1963 ) 1964 else: 1965 this = None 1966 1967 return self._parse_set_operations(this) 1968 1969 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1970 if not skip_with_token and not self._match(TokenType.WITH): 1971 return None 1972 1973 comments = self._prev_comments 1974 recursive = self._match(TokenType.RECURSIVE) 1975 1976 expressions = [] 1977 while True: 1978 expressions.append(self._parse_cte()) 1979 1980 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1981 break 1982 else: 1983 self._match(TokenType.WITH) 1984 1985 return self.expression( 1986 exp.With, comments=comments, expressions=expressions, recursive=recursive 1987 ) 1988 1989 def _parse_cte(self) -> exp.CTE: 1990 alias = self._parse_table_alias() 1991 if not alias or not alias.this: 1992 self.raise_error("Expected CTE to have alias") 1993 1994 self._match(TokenType.ALIAS) 1995 return self.expression( 1996 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1997 ) 1998 1999 def _parse_table_alias( 2000 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2001 ) -> t.Optional[exp.TableAlias]: 2002 any_token = self._match(TokenType.ALIAS) 2003 alias = ( 2004 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2005 or self._parse_string_as_identifier() 2006 ) 2007 2008 index = self._index 2009 if self._match(TokenType.L_PAREN): 2010 columns = self._parse_csv(self._parse_function_parameter) 2011 self._match_r_paren() if columns else self._retreat(index) 2012 else: 2013 columns = None 2014 2015 if not alias and not columns: 2016 return None 2017 2018 return self.expression(exp.TableAlias, this=alias, columns=columns) 2019 2020 def _parse_subquery( 2021 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2022 ) -> t.Optional[exp.Subquery]: 2023 if not this: 2024 return None 2025 2026 return self.expression( 2027 exp.Subquery, 2028 this=this, 2029 pivots=self._parse_pivots(), 2030 alias=self._parse_table_alias() if parse_alias else None, 2031 ) 2032 2033 def _parse_query_modifiers( 2034 self, this: t.Optional[exp.Expression] 2035 ) -> t.Optional[exp.Expression]: 2036 if isinstance(this, self.MODIFIABLES): 2037 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2038 expression = parser(self) 2039 2040 if expression: 2041 if key == "limit": 2042 offset = expression.args.pop("offset", None) 2043 if offset: 2044 this.set("offset", exp.Offset(expression=offset)) 2045 this.set(key, expression) 2046 return this 2047 2048 def _parse_hint(self) -> t.Optional[exp.Hint]: 2049 if self._match(TokenType.HINT): 2050 hints = self._parse_csv(self._parse_function) 2051 2052 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2053 self.raise_error("Expected */ after HINT") 2054 2055 return self.expression(exp.Hint, expressions=hints) 2056 2057 return None 2058 2059 def _parse_into(self) -> t.Optional[exp.Into]: 2060 if not self._match(TokenType.INTO): 2061 return None 2062 2063 temp = self._match(TokenType.TEMPORARY) 2064 unlogged = self._match_text_seq("UNLOGGED") 2065 self._match(TokenType.TABLE) 2066 2067 return self.expression( 2068 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2069 ) 2070 2071 def _parse_from( 2072 self, modifiers: bool = False, skip_from_token: bool = False 2073 ) -> t.Optional[exp.From]: 2074 if not skip_from_token and not self._match(TokenType.FROM): 2075 return None 2076 2077 comments = self._prev_comments 2078 this = self._parse_table() 2079 2080 return self.expression( 2081 exp.From, 2082 comments=comments, 2083 this=self._parse_query_modifiers(this) if modifiers else this, 2084 ) 2085 2086 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2087 if not self._match(TokenType.MATCH_RECOGNIZE): 2088 return None 2089 2090 self._match_l_paren() 2091 2092 partition = self._parse_partition_by() 2093 order = self._parse_order() 2094 measures = ( 2095 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2096 ) 2097 2098 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2099 rows = exp.var("ONE ROW PER MATCH") 2100 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2101 text = "ALL ROWS PER MATCH" 2102 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2103 text += f" SHOW EMPTY MATCHES" 2104 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2105 text += f" OMIT EMPTY MATCHES" 2106 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2107 text += f" WITH UNMATCHED ROWS" 2108 rows = exp.var(text) 2109 else: 2110 rows = None 2111 2112 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2113 text = "AFTER MATCH SKIP" 2114 if self._match_text_seq("PAST", "LAST", "ROW"): 2115 text += f" PAST LAST ROW" 2116 elif self._match_text_seq("TO", "NEXT", "ROW"): 2117 text += f" TO NEXT ROW" 2118 elif self._match_text_seq("TO", "FIRST"): 2119 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2120 elif self._match_text_seq("TO", "LAST"): 2121 text += f" TO LAST {self._advance_any().text}" # type: ignore 2122 after = exp.var(text) 2123 else: 2124 after = None 2125 2126 if self._match_text_seq("PATTERN"): 2127 self._match_l_paren() 2128 2129 if not self._curr: 2130 self.raise_error("Expecting )", self._curr) 2131 2132 paren = 1 2133 start = self._curr 2134 2135 while self._curr and paren > 0: 2136 if self._curr.token_type == TokenType.L_PAREN: 2137 paren += 1 2138 if self._curr.token_type == TokenType.R_PAREN: 2139 paren -= 1 2140 2141 end = self._prev 2142 self._advance() 2143 2144 if paren > 0: 2145 self.raise_error("Expecting )", self._curr) 2146 2147 pattern = exp.var(self._find_sql(start, end)) 2148 else: 2149 pattern = None 2150 2151 define = ( 2152 self._parse_csv( 2153 lambda: self.expression( 2154 exp.Alias, 2155 alias=self._parse_id_var(any_token=True), 2156 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2157 ) 2158 ) 2159 if self._match_text_seq("DEFINE") 2160 else None 2161 ) 2162 2163 self._match_r_paren() 2164 2165 return self.expression( 2166 exp.MatchRecognize, 2167 partition_by=partition, 2168 order=order, 2169 measures=measures, 2170 rows=rows, 2171 after=after, 2172 pattern=pattern, 2173 define=define, 2174 alias=self._parse_table_alias(), 2175 ) 2176 2177 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2178 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2179 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2180 2181 if outer_apply or cross_apply: 2182 this = self._parse_select(table=True) 2183 view = None 2184 outer = not cross_apply 2185 elif self._match(TokenType.LATERAL): 2186 this = self._parse_select(table=True) 2187 view = self._match(TokenType.VIEW) 2188 outer = self._match(TokenType.OUTER) 2189 else: 2190 return None 2191 2192 if not this: 2193 this = self._parse_function() or self._parse_id_var(any_token=False) 2194 while self._match(TokenType.DOT): 2195 this = exp.Dot( 2196 this=this, 2197 expression=self._parse_function() or self._parse_id_var(any_token=False), 2198 ) 2199 2200 if view: 2201 table = self._parse_id_var(any_token=False) 2202 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2203 table_alias: t.Optional[exp.TableAlias] = self.expression( 2204 exp.TableAlias, this=table, columns=columns 2205 ) 2206 elif isinstance(this, exp.Subquery) and this.alias: 2207 # Ensures parity between the Subquery's and the Lateral's "alias" args 2208 table_alias = this.args["alias"].copy() 2209 else: 2210 table_alias = self._parse_table_alias() 2211 2212 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2213 2214 def _parse_join_parts( 2215 self, 2216 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2217 return ( 2218 self._match_set(self.JOIN_METHODS) and self._prev, 2219 self._match_set(self.JOIN_SIDES) and self._prev, 2220 self._match_set(self.JOIN_KINDS) and self._prev, 2221 ) 2222 2223 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2224 if self._match(TokenType.COMMA): 2225 return self.expression(exp.Join, this=self._parse_table()) 2226 2227 index = self._index 2228 method, side, kind = self._parse_join_parts() 2229 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2230 join = self._match(TokenType.JOIN) 2231 2232 if not skip_join_token and not join: 2233 self._retreat(index) 2234 kind = None 2235 method = None 2236 side = None 2237 2238 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2239 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2240 2241 if not skip_join_token and not join and not outer_apply and not cross_apply: 2242 return None 2243 2244 if outer_apply: 2245 side = Token(TokenType.LEFT, "LEFT") 2246 2247 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2248 2249 if method: 2250 kwargs["method"] = method.text 2251 if side: 2252 kwargs["side"] = side.text 2253 if kind: 2254 kwargs["kind"] = kind.text 2255 if hint: 2256 kwargs["hint"] = hint 2257 2258 if self._match(TokenType.ON): 2259 kwargs["on"] = self._parse_conjunction() 2260 elif self._match(TokenType.USING): 2261 kwargs["using"] = self._parse_wrapped_id_vars() 2262 2263 return self.expression(exp.Join, **kwargs) 2264 2265 def _parse_index( 2266 self, 2267 index: t.Optional[exp.Expression] = None, 2268 ) -> t.Optional[exp.Index]: 2269 if index: 2270 unique = None 2271 primary = None 2272 amp = None 2273 2274 self._match(TokenType.ON) 2275 self._match(TokenType.TABLE) # hive 2276 table = self._parse_table_parts(schema=True) 2277 else: 2278 unique = self._match(TokenType.UNIQUE) 2279 primary = self._match_text_seq("PRIMARY") 2280 amp = self._match_text_seq("AMP") 2281 2282 if not self._match(TokenType.INDEX): 2283 return None 2284 2285 index = self._parse_id_var() 2286 table = None 2287 2288 using = self._parse_field() if self._match(TokenType.USING) else None 2289 2290 if self._match(TokenType.L_PAREN, advance=False): 2291 columns = self._parse_wrapped_csv(self._parse_ordered) 2292 else: 2293 columns = None 2294 2295 return self.expression( 2296 exp.Index, 2297 this=index, 2298 table=table, 2299 using=using, 2300 columns=columns, 2301 unique=unique, 2302 primary=primary, 2303 amp=amp, 2304 partition_by=self._parse_partition_by(), 2305 ) 2306 2307 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2308 hints: t.List[exp.Expression] = [] 2309 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2310 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2311 hints.append( 2312 self.expression( 2313 exp.WithTableHint, 2314 expressions=self._parse_csv( 2315 lambda: self._parse_function() or self._parse_var(any_token=True) 2316 ), 2317 ) 2318 ) 2319 self._match_r_paren() 2320 else: 2321 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2322 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2323 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2324 2325 self._match_texts({"INDEX", "KEY"}) 2326 if self._match(TokenType.FOR): 2327 hint.set("target", self._advance_any() and self._prev.text.upper()) 2328 2329 hint.set("expressions", self._parse_wrapped_id_vars()) 2330 hints.append(hint) 2331 2332 return hints or None 2333 2334 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2335 return ( 2336 (not schema and self._parse_function(optional_parens=False)) 2337 or self._parse_id_var(any_token=False) 2338 or self._parse_string_as_identifier() 2339 or self._parse_placeholder() 2340 ) 2341 2342 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2343 catalog = None 2344 db = None 2345 table = self._parse_table_part(schema=schema) 2346 2347 while self._match(TokenType.DOT): 2348 if catalog: 2349 # This allows nesting the table in arbitrarily many dot expressions if needed 2350 table = self.expression( 2351 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2352 ) 2353 else: 2354 catalog = db 2355 db = table 2356 table = self._parse_table_part(schema=schema) 2357 2358 if not table: 2359 self.raise_error(f"Expected table name but got {self._curr}") 2360 2361 return self.expression( 2362 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2363 ) 2364 2365 def _parse_table( 2366 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2367 ) -> t.Optional[exp.Expression]: 2368 lateral = self._parse_lateral() 2369 if lateral: 2370 return lateral 2371 2372 unnest = self._parse_unnest() 2373 if unnest: 2374 return unnest 2375 2376 values = self._parse_derived_table_values() 2377 if values: 2378 return values 2379 2380 subquery = self._parse_select(table=True) 2381 if subquery: 2382 if not subquery.args.get("pivots"): 2383 subquery.set("pivots", self._parse_pivots()) 2384 return subquery 2385 2386 this: exp.Expression = self._parse_table_parts(schema=schema) 2387 2388 if schema: 2389 return self._parse_schema(this=this) 2390 2391 if self.ALIAS_POST_TABLESAMPLE: 2392 table_sample = self._parse_table_sample() 2393 2394 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2395 if alias: 2396 this.set("alias", alias) 2397 2398 if not this.args.get("pivots"): 2399 this.set("pivots", self._parse_pivots()) 2400 2401 this.set("hints", self._parse_table_hints()) 2402 2403 if not self.ALIAS_POST_TABLESAMPLE: 2404 table_sample = self._parse_table_sample() 2405 2406 if table_sample: 2407 table_sample.set("this", this) 2408 this = table_sample 2409 2410 return this 2411 2412 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2413 if not self._match(TokenType.UNNEST): 2414 return None 2415 2416 expressions = self._parse_wrapped_csv(self._parse_type) 2417 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2418 2419 alias = self._parse_table_alias() if with_alias else None 2420 2421 if alias and self.UNNEST_COLUMN_ONLY: 2422 if alias.args.get("columns"): 2423 self.raise_error("Unexpected extra column alias in unnest.") 2424 2425 alias.set("columns", [alias.this]) 2426 alias.set("this", None) 2427 2428 offset = None 2429 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2430 self._match(TokenType.ALIAS) 2431 offset = self._parse_id_var() or exp.to_identifier("offset") 2432 2433 return self.expression( 2434 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2435 ) 2436 2437 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2438 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2439 if not is_derived and not self._match(TokenType.VALUES): 2440 return None 2441 2442 expressions = self._parse_csv(self._parse_value) 2443 alias = self._parse_table_alias() 2444 2445 if is_derived: 2446 self._match_r_paren() 2447 2448 return self.expression( 2449 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2450 ) 2451 2452 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2453 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2454 as_modifier and self._match_text_seq("USING", "SAMPLE") 2455 ): 2456 return None 2457 2458 bucket_numerator = None 2459 bucket_denominator = None 2460 bucket_field = None 2461 percent = None 2462 rows = None 2463 size = None 2464 seed = None 2465 2466 kind = ( 2467 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2468 ) 2469 method = self._parse_var(tokens=(TokenType.ROW,)) 2470 2471 self._match(TokenType.L_PAREN) 2472 2473 num = self._parse_number() 2474 2475 if self._match_text_seq("BUCKET"): 2476 bucket_numerator = self._parse_number() 2477 self._match_text_seq("OUT", "OF") 2478 bucket_denominator = bucket_denominator = self._parse_number() 2479 self._match(TokenType.ON) 2480 bucket_field = self._parse_field() 2481 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2482 percent = num 2483 elif self._match(TokenType.ROWS): 2484 rows = num 2485 else: 2486 size = num 2487 2488 self._match(TokenType.R_PAREN) 2489 2490 if self._match(TokenType.L_PAREN): 2491 method = self._parse_var() 2492 seed = self._match(TokenType.COMMA) and self._parse_number() 2493 self._match_r_paren() 2494 elif self._match_texts(("SEED", "REPEATABLE")): 2495 seed = self._parse_wrapped(self._parse_number) 2496 2497 return self.expression( 2498 exp.TableSample, 2499 method=method, 2500 bucket_numerator=bucket_numerator, 2501 bucket_denominator=bucket_denominator, 2502 bucket_field=bucket_field, 2503 percent=percent, 2504 rows=rows, 2505 size=size, 2506 seed=seed, 2507 kind=kind, 2508 ) 2509 2510 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2511 return list(iter(self._parse_pivot, None)) 2512 2513 # https://duckdb.org/docs/sql/statements/pivot 2514 def _parse_simplified_pivot(self) -> exp.Pivot: 2515 def _parse_on() -> t.Optional[exp.Expression]: 2516 this = self._parse_bitwise() 2517 return self._parse_in(this) if self._match(TokenType.IN) else this 2518 2519 this = self._parse_table() 2520 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2521 using = self._match(TokenType.USING) and self._parse_csv( 2522 lambda: self._parse_alias(self._parse_function()) 2523 ) 2524 group = self._parse_group() 2525 return self.expression( 2526 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2527 ) 2528 2529 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2530 index = self._index 2531 2532 if self._match(TokenType.PIVOT): 2533 unpivot = False 2534 elif self._match(TokenType.UNPIVOT): 2535 unpivot = True 2536 else: 2537 return None 2538 2539 expressions = [] 2540 field = None 2541 2542 if not self._match(TokenType.L_PAREN): 2543 self._retreat(index) 2544 return None 2545 2546 if unpivot: 2547 expressions = self._parse_csv(self._parse_column) 2548 else: 2549 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2550 2551 if not expressions: 2552 self.raise_error("Failed to parse PIVOT's aggregation list") 2553 2554 if not self._match(TokenType.FOR): 2555 self.raise_error("Expecting FOR") 2556 2557 value = self._parse_column() 2558 2559 if not self._match(TokenType.IN): 2560 self.raise_error("Expecting IN") 2561 2562 field = self._parse_in(value, alias=True) 2563 2564 self._match_r_paren() 2565 2566 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2567 2568 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2569 pivot.set("alias", self._parse_table_alias()) 2570 2571 if not unpivot: 2572 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2573 2574 columns: t.List[exp.Expression] = [] 2575 for fld in pivot.args["field"].expressions: 2576 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2577 for name in names: 2578 if self.PREFIXED_PIVOT_COLUMNS: 2579 name = f"{name}_{field_name}" if name else field_name 2580 else: 2581 name = f"{field_name}_{name}" if name else field_name 2582 2583 columns.append(exp.to_identifier(name)) 2584 2585 pivot.set("columns", columns) 2586 2587 return pivot 2588 2589 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2590 return [agg.alias for agg in aggregations] 2591 2592 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2593 if not skip_where_token and not self._match(TokenType.WHERE): 2594 return None 2595 2596 return self.expression( 2597 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2598 ) 2599 2600 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2601 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2602 return None 2603 2604 elements = defaultdict(list) 2605 2606 if self._match(TokenType.ALL): 2607 return self.expression(exp.Group, all=True) 2608 2609 while True: 2610 expressions = self._parse_csv(self._parse_conjunction) 2611 if expressions: 2612 elements["expressions"].extend(expressions) 2613 2614 grouping_sets = self._parse_grouping_sets() 2615 if grouping_sets: 2616 elements["grouping_sets"].extend(grouping_sets) 2617 2618 rollup = None 2619 cube = None 2620 totals = None 2621 2622 with_ = self._match(TokenType.WITH) 2623 if self._match(TokenType.ROLLUP): 2624 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2625 elements["rollup"].extend(ensure_list(rollup)) 2626 2627 if self._match(TokenType.CUBE): 2628 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2629 elements["cube"].extend(ensure_list(cube)) 2630 2631 if self._match_text_seq("TOTALS"): 2632 totals = True 2633 elements["totals"] = True # type: ignore 2634 2635 if not (grouping_sets or rollup or cube or totals): 2636 break 2637 2638 return self.expression(exp.Group, **elements) # type: ignore 2639 2640 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2641 if not self._match(TokenType.GROUPING_SETS): 2642 return None 2643 2644 return self._parse_wrapped_csv(self._parse_grouping_set) 2645 2646 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2647 if self._match(TokenType.L_PAREN): 2648 grouping_set = self._parse_csv(self._parse_column) 2649 self._match_r_paren() 2650 return self.expression(exp.Tuple, expressions=grouping_set) 2651 2652 return self._parse_column() 2653 2654 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2655 if not skip_having_token and not self._match(TokenType.HAVING): 2656 return None 2657 return self.expression(exp.Having, this=self._parse_conjunction()) 2658 2659 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2660 if not self._match(TokenType.QUALIFY): 2661 return None 2662 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2663 2664 def _parse_order( 2665 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2666 ) -> t.Optional[exp.Expression]: 2667 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2668 return this 2669 2670 return self.expression( 2671 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2672 ) 2673 2674 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2675 if not self._match(token): 2676 return None 2677 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2678 2679 def _parse_ordered(self) -> exp.Ordered: 2680 this = self._parse_conjunction() 2681 self._match(TokenType.ASC) 2682 2683 is_desc = self._match(TokenType.DESC) 2684 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2685 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2686 desc = is_desc or False 2687 asc = not desc 2688 nulls_first = is_nulls_first or False 2689 explicitly_null_ordered = is_nulls_first or is_nulls_last 2690 2691 if ( 2692 not explicitly_null_ordered 2693 and ( 2694 (asc and self.NULL_ORDERING == "nulls_are_small") 2695 or (desc and self.NULL_ORDERING != "nulls_are_small") 2696 ) 2697 and self.NULL_ORDERING != "nulls_are_last" 2698 ): 2699 nulls_first = True 2700 2701 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2702 2703 def _parse_limit( 2704 self, this: t.Optional[exp.Expression] = None, top: bool = False 2705 ) -> t.Optional[exp.Expression]: 2706 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2707 limit_paren = self._match(TokenType.L_PAREN) 2708 expression = self._parse_number() if top else self._parse_term() 2709 2710 if self._match(TokenType.COMMA): 2711 offset = expression 2712 expression = self._parse_term() 2713 else: 2714 offset = None 2715 2716 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2717 2718 if limit_paren: 2719 self._match_r_paren() 2720 2721 return limit_exp 2722 2723 if self._match(TokenType.FETCH): 2724 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2725 direction = self._prev.text if direction else "FIRST" 2726 2727 count = self._parse_number() 2728 percent = self._match(TokenType.PERCENT) 2729 2730 self._match_set((TokenType.ROW, TokenType.ROWS)) 2731 2732 only = self._match_text_seq("ONLY") 2733 with_ties = self._match_text_seq("WITH", "TIES") 2734 2735 if only and with_ties: 2736 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2737 2738 return self.expression( 2739 exp.Fetch, 2740 direction=direction, 2741 count=count, 2742 percent=percent, 2743 with_ties=with_ties, 2744 ) 2745 2746 return this 2747 2748 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2749 if not self._match(TokenType.OFFSET): 2750 return this 2751 2752 count = self._parse_number() 2753 self._match_set((TokenType.ROW, TokenType.ROWS)) 2754 return self.expression(exp.Offset, this=this, expression=count) 2755 2756 def _parse_locks(self) -> t.List[exp.Lock]: 2757 locks = [] 2758 while True: 2759 if self._match_text_seq("FOR", "UPDATE"): 2760 update = True 2761 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2762 "LOCK", "IN", "SHARE", "MODE" 2763 ): 2764 update = False 2765 else: 2766 break 2767 2768 expressions = None 2769 if self._match_text_seq("OF"): 2770 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2771 2772 wait: t.Optional[bool | exp.Expression] = None 2773 if self._match_text_seq("NOWAIT"): 2774 wait = True 2775 elif self._match_text_seq("WAIT"): 2776 wait = self._parse_primary() 2777 elif self._match_text_seq("SKIP", "LOCKED"): 2778 wait = False 2779 2780 locks.append( 2781 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2782 ) 2783 2784 return locks 2785 2786 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2787 if not self._match_set(self.SET_OPERATIONS): 2788 return this 2789 2790 token_type = self._prev.token_type 2791 2792 if token_type == TokenType.UNION: 2793 expression = exp.Union 2794 elif token_type == TokenType.EXCEPT: 2795 expression = exp.Except 2796 else: 2797 expression = exp.Intersect 2798 2799 return self.expression( 2800 expression, 2801 this=this, 2802 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2803 expression=self._parse_set_operations(self._parse_select(nested=True)), 2804 ) 2805 2806 def _parse_expression(self) -> t.Optional[exp.Expression]: 2807 return self._parse_alias(self._parse_conjunction()) 2808 2809 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2810 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2811 2812 def _parse_equality(self) -> t.Optional[exp.Expression]: 2813 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2814 2815 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2816 return self._parse_tokens(self._parse_range, self.COMPARISON) 2817 2818 def _parse_range(self) -> t.Optional[exp.Expression]: 2819 this = self._parse_bitwise() 2820 negate = self._match(TokenType.NOT) 2821 2822 if self._match_set(self.RANGE_PARSERS): 2823 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2824 if not expression: 2825 return this 2826 2827 this = expression 2828 elif self._match(TokenType.ISNULL): 2829 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2830 2831 # Postgres supports ISNULL and NOTNULL for conditions. 2832 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2833 if self._match(TokenType.NOTNULL): 2834 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2835 this = self.expression(exp.Not, this=this) 2836 2837 if negate: 2838 this = self.expression(exp.Not, this=this) 2839 2840 if self._match(TokenType.IS): 2841 this = self._parse_is(this) 2842 2843 return this 2844 2845 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2846 index = self._index - 1 2847 negate = self._match(TokenType.NOT) 2848 2849 if self._match_text_seq("DISTINCT", "FROM"): 2850 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2851 return self.expression(klass, this=this, expression=self._parse_expression()) 2852 2853 expression = self._parse_null() or self._parse_boolean() 2854 if not expression: 2855 self._retreat(index) 2856 return None 2857 2858 this = self.expression(exp.Is, this=this, expression=expression) 2859 return self.expression(exp.Not, this=this) if negate else this 2860 2861 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2862 unnest = self._parse_unnest(with_alias=False) 2863 if unnest: 2864 this = self.expression(exp.In, this=this, unnest=unnest) 2865 elif self._match(TokenType.L_PAREN): 2866 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2867 2868 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2869 this = self.expression(exp.In, this=this, query=expressions[0]) 2870 else: 2871 this = self.expression(exp.In, this=this, expressions=expressions) 2872 2873 self._match_r_paren(this) 2874 else: 2875 this = self.expression(exp.In, this=this, field=self._parse_field()) 2876 2877 return this 2878 2879 def _parse_between(self, this: exp.Expression) -> exp.Between: 2880 low = self._parse_bitwise() 2881 self._match(TokenType.AND) 2882 high = self._parse_bitwise() 2883 return self.expression(exp.Between, this=this, low=low, high=high) 2884 2885 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2886 if not self._match(TokenType.ESCAPE): 2887 return this 2888 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2889 2890 def _parse_interval(self) -> t.Optional[exp.Interval]: 2891 if not self._match(TokenType.INTERVAL): 2892 return None 2893 2894 if self._match(TokenType.STRING, advance=False): 2895 this = self._parse_primary() 2896 else: 2897 this = self._parse_term() 2898 2899 unit = self._parse_function() or self._parse_var() 2900 2901 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2902 # each INTERVAL expression into this canonical form so it's easy to transpile 2903 if this and this.is_number: 2904 this = exp.Literal.string(this.name) 2905 elif this and this.is_string: 2906 parts = this.name.split() 2907 2908 if len(parts) == 2: 2909 if unit: 2910 # this is not actually a unit, it's something else 2911 unit = None 2912 self._retreat(self._index - 1) 2913 else: 2914 this = exp.Literal.string(parts[0]) 2915 unit = self.expression(exp.Var, this=parts[1]) 2916 2917 return self.expression(exp.Interval, this=this, unit=unit) 2918 2919 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2920 this = self._parse_term() 2921 2922 while True: 2923 if self._match_set(self.BITWISE): 2924 this = self.expression( 2925 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2926 ) 2927 elif self._match_pair(TokenType.LT, TokenType.LT): 2928 this = self.expression( 2929 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2930 ) 2931 elif self._match_pair(TokenType.GT, TokenType.GT): 2932 this = self.expression( 2933 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2934 ) 2935 else: 2936 break 2937 2938 return this 2939 2940 def _parse_term(self) -> t.Optional[exp.Expression]: 2941 return self._parse_tokens(self._parse_factor, self.TERM) 2942 2943 def _parse_factor(self) -> t.Optional[exp.Expression]: 2944 return self._parse_tokens(self._parse_unary, self.FACTOR) 2945 2946 def _parse_unary(self) -> t.Optional[exp.Expression]: 2947 if self._match_set(self.UNARY_PARSERS): 2948 return self.UNARY_PARSERS[self._prev.token_type](self) 2949 return self._parse_at_time_zone(self._parse_type()) 2950 2951 def _parse_type(self) -> t.Optional[exp.Expression]: 2952 interval = self._parse_interval() 2953 if interval: 2954 return interval 2955 2956 index = self._index 2957 data_type = self._parse_types(check_func=True) 2958 this = self._parse_column() 2959 2960 if data_type: 2961 if isinstance(this, exp.Literal): 2962 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2963 if parser: 2964 return parser(self, this, data_type) 2965 return self.expression(exp.Cast, this=this, to=data_type) 2966 if not data_type.expressions: 2967 self._retreat(index) 2968 return self._parse_column() 2969 return self._parse_column_ops(data_type) 2970 2971 return this 2972 2973 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2974 this = self._parse_type() 2975 if not this: 2976 return None 2977 2978 return self.expression( 2979 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2980 ) 2981 2982 def _parse_types( 2983 self, check_func: bool = False, schema: bool = False 2984 ) -> t.Optional[exp.Expression]: 2985 index = self._index 2986 2987 prefix = self._match_text_seq("SYSUDTLIB", ".") 2988 2989 if not self._match_set(self.TYPE_TOKENS): 2990 return None 2991 2992 type_token = self._prev.token_type 2993 2994 if type_token == TokenType.PSEUDO_TYPE: 2995 return self.expression(exp.PseudoType, this=self._prev.text) 2996 2997 nested = type_token in self.NESTED_TYPE_TOKENS 2998 is_struct = type_token == TokenType.STRUCT 2999 expressions = None 3000 maybe_func = False 3001 3002 if self._match(TokenType.L_PAREN): 3003 if is_struct: 3004 expressions = self._parse_csv(self._parse_struct_types) 3005 elif nested: 3006 expressions = self._parse_csv( 3007 lambda: self._parse_types(check_func=check_func, schema=schema) 3008 ) 3009 elif type_token in self.ENUM_TYPE_TOKENS: 3010 expressions = self._parse_csv(self._parse_primary) 3011 else: 3012 expressions = self._parse_csv(self._parse_type_size) 3013 3014 if not expressions or not self._match(TokenType.R_PAREN): 3015 self._retreat(index) 3016 return None 3017 3018 maybe_func = True 3019 3020 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3021 this = exp.DataType( 3022 this=exp.DataType.Type.ARRAY, 3023 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3024 nested=True, 3025 ) 3026 3027 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3028 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3029 3030 return this 3031 3032 if self._match(TokenType.L_BRACKET): 3033 self._retreat(index) 3034 return None 3035 3036 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3037 if nested and self._match(TokenType.LT): 3038 if is_struct: 3039 expressions = self._parse_csv(self._parse_struct_types) 3040 else: 3041 expressions = self._parse_csv( 3042 lambda: self._parse_types(check_func=check_func, schema=schema) 3043 ) 3044 3045 if not self._match(TokenType.GT): 3046 self.raise_error("Expecting >") 3047 3048 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3049 values = self._parse_csv(self._parse_conjunction) 3050 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3051 3052 value: t.Optional[exp.Expression] = None 3053 if type_token in self.TIMESTAMPS: 3054 if self._match_text_seq("WITH", "TIME", "ZONE"): 3055 maybe_func = False 3056 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3057 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3058 maybe_func = False 3059 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3060 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3061 maybe_func = False 3062 elif type_token == TokenType.INTERVAL: 3063 unit = self._parse_var() 3064 3065 if not unit: 3066 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3067 else: 3068 value = self.expression(exp.Interval, unit=unit) 3069 3070 if maybe_func and check_func: 3071 index2 = self._index 3072 peek = self._parse_string() 3073 3074 if not peek: 3075 self._retreat(index) 3076 return None 3077 3078 self._retreat(index2) 3079 3080 if value: 3081 return value 3082 3083 return exp.DataType( 3084 this=exp.DataType.Type[type_token.value.upper()], 3085 expressions=expressions, 3086 nested=nested, 3087 values=values, 3088 prefix=prefix, 3089 ) 3090 3091 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3092 this = self._parse_type() or self._parse_id_var() 3093 self._match(TokenType.COLON) 3094 return self._parse_column_def(this) 3095 3096 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3097 if not self._match_text_seq("AT", "TIME", "ZONE"): 3098 return this 3099 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3100 3101 def _parse_column(self) -> t.Optional[exp.Expression]: 3102 this = self._parse_field() 3103 if isinstance(this, exp.Identifier): 3104 this = self.expression(exp.Column, this=this) 3105 elif not this: 3106 return self._parse_bracket(this) 3107 return self._parse_column_ops(this) 3108 3109 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3110 this = self._parse_bracket(this) 3111 3112 while self._match_set(self.COLUMN_OPERATORS): 3113 op_token = self._prev.token_type 3114 op = self.COLUMN_OPERATORS.get(op_token) 3115 3116 if op_token == TokenType.DCOLON: 3117 field = self._parse_types() 3118 if not field: 3119 self.raise_error("Expected type") 3120 elif op and self._curr: 3121 self._advance() 3122 value = self._prev.text 3123 field = ( 3124 exp.Literal.number(value) 3125 if self._prev.token_type == TokenType.NUMBER 3126 else exp.Literal.string(value) 3127 ) 3128 else: 3129 field = self._parse_field(anonymous_func=True, any_token=True) 3130 3131 if isinstance(field, exp.Func): 3132 # bigquery allows function calls like x.y.count(...) 3133 # SAFE.SUBSTR(...) 3134 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3135 this = self._replace_columns_with_dots(this) 3136 3137 if op: 3138 this = op(self, this, field) 3139 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3140 this = self.expression( 3141 exp.Column, 3142 this=field, 3143 table=this.this, 3144 db=this.args.get("table"), 3145 catalog=this.args.get("db"), 3146 ) 3147 else: 3148 this = self.expression(exp.Dot, this=this, expression=field) 3149 this = self._parse_bracket(this) 3150 return this 3151 3152 def _parse_primary(self) -> t.Optional[exp.Expression]: 3153 if self._match_set(self.PRIMARY_PARSERS): 3154 token_type = self._prev.token_type 3155 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3156 3157 if token_type == TokenType.STRING: 3158 expressions = [primary] 3159 while self._match(TokenType.STRING): 3160 expressions.append(exp.Literal.string(self._prev.text)) 3161 3162 if len(expressions) > 1: 3163 return self.expression(exp.Concat, expressions=expressions) 3164 3165 return primary 3166 3167 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3168 return exp.Literal.number(f"0.{self._prev.text}") 3169 3170 if self._match(TokenType.L_PAREN): 3171 comments = self._prev_comments 3172 query = self._parse_select() 3173 3174 if query: 3175 expressions = [query] 3176 else: 3177 expressions = self._parse_csv(self._parse_expression) 3178 3179 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3180 3181 if isinstance(this, exp.Subqueryable): 3182 this = self._parse_set_operations( 3183 self._parse_subquery(this=this, parse_alias=False) 3184 ) 3185 elif len(expressions) > 1: 3186 this = self.expression(exp.Tuple, expressions=expressions) 3187 else: 3188 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3189 3190 if this: 3191 this.add_comments(comments) 3192 3193 self._match_r_paren(expression=this) 3194 return this 3195 3196 return None 3197 3198 def _parse_field( 3199 self, 3200 any_token: bool = False, 3201 tokens: t.Optional[t.Collection[TokenType]] = None, 3202 anonymous_func: bool = False, 3203 ) -> t.Optional[exp.Expression]: 3204 return ( 3205 self._parse_primary() 3206 or self._parse_function(anonymous=anonymous_func) 3207 or self._parse_id_var(any_token=any_token, tokens=tokens) 3208 ) 3209 3210 def _parse_function( 3211 self, 3212 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3213 anonymous: bool = False, 3214 optional_parens: bool = True, 3215 ) -> t.Optional[exp.Expression]: 3216 if not self._curr: 3217 return None 3218 3219 token_type = self._curr.token_type 3220 3221 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3222 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3223 3224 if not self._next or self._next.token_type != TokenType.L_PAREN: 3225 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3226 self._advance() 3227 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3228 3229 return None 3230 3231 if token_type not in self.FUNC_TOKENS: 3232 return None 3233 3234 this = self._curr.text 3235 upper = this.upper() 3236 self._advance(2) 3237 3238 parser = self.FUNCTION_PARSERS.get(upper) 3239 3240 if parser and not anonymous: 3241 this = parser(self) 3242 else: 3243 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3244 3245 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3246 this = self.expression(subquery_predicate, this=self._parse_select()) 3247 self._match_r_paren() 3248 return this 3249 3250 if functions is None: 3251 functions = self.FUNCTIONS 3252 3253 function = functions.get(upper) 3254 3255 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3256 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3257 3258 if function and not anonymous: 3259 this = self.validate_expression(function(args), args) 3260 else: 3261 this = self.expression(exp.Anonymous, this=this, expressions=args) 3262 3263 self._match_r_paren(this) 3264 return self._parse_window(this) 3265 3266 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3267 return self._parse_column_def(self._parse_id_var()) 3268 3269 def _parse_user_defined_function( 3270 self, kind: t.Optional[TokenType] = None 3271 ) -> t.Optional[exp.Expression]: 3272 this = self._parse_id_var() 3273 3274 while self._match(TokenType.DOT): 3275 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3276 3277 if not self._match(TokenType.L_PAREN): 3278 return this 3279 3280 expressions = self._parse_csv(self._parse_function_parameter) 3281 self._match_r_paren() 3282 return self.expression( 3283 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3284 ) 3285 3286 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3287 literal = self._parse_primary() 3288 if literal: 3289 return self.expression(exp.Introducer, this=token.text, expression=literal) 3290 3291 return self.expression(exp.Identifier, this=token.text) 3292 3293 def _parse_session_parameter(self) -> exp.SessionParameter: 3294 kind = None 3295 this = self._parse_id_var() or self._parse_primary() 3296 3297 if this and self._match(TokenType.DOT): 3298 kind = this.name 3299 this = self._parse_var() or self._parse_primary() 3300 3301 return self.expression(exp.SessionParameter, this=this, kind=kind) 3302 3303 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3304 index = self._index 3305 3306 if self._match(TokenType.L_PAREN): 3307 expressions = self._parse_csv(self._parse_id_var) 3308 3309 if not self._match(TokenType.R_PAREN): 3310 self._retreat(index) 3311 else: 3312 expressions = [self._parse_id_var()] 3313 3314 if self._match_set(self.LAMBDAS): 3315 return self.LAMBDAS[self._prev.token_type](self, expressions) 3316 3317 self._retreat(index) 3318 3319 this: t.Optional[exp.Expression] 3320 3321 if self._match(TokenType.DISTINCT): 3322 this = self.expression( 3323 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3324 ) 3325 else: 3326 this = self._parse_select_or_expression(alias=alias) 3327 3328 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3329 3330 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3331 index = self._index 3332 3333 if not self.errors: 3334 try: 3335 if self._parse_select(nested=True): 3336 return this 3337 except ParseError: 3338 pass 3339 finally: 3340 self.errors.clear() 3341 self._retreat(index) 3342 3343 if not self._match(TokenType.L_PAREN): 3344 return this 3345 3346 args = self._parse_csv( 3347 lambda: self._parse_constraint() 3348 or self._parse_column_def(self._parse_field(any_token=True)) 3349 ) 3350 3351 self._match_r_paren() 3352 return self.expression(exp.Schema, this=this, expressions=args) 3353 3354 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3355 # column defs are not really columns, they're identifiers 3356 if isinstance(this, exp.Column): 3357 this = this.this 3358 3359 kind = self._parse_types(schema=True) 3360 3361 if self._match_text_seq("FOR", "ORDINALITY"): 3362 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3363 3364 constraints = [] 3365 while True: 3366 constraint = self._parse_column_constraint() 3367 if not constraint: 3368 break 3369 constraints.append(constraint) 3370 3371 if not kind and not constraints: 3372 return this 3373 3374 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3375 3376 def _parse_auto_increment( 3377 self, 3378 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3379 start = None 3380 increment = None 3381 3382 if self._match(TokenType.L_PAREN, advance=False): 3383 args = self._parse_wrapped_csv(self._parse_bitwise) 3384 start = seq_get(args, 0) 3385 increment = seq_get(args, 1) 3386 elif self._match_text_seq("START"): 3387 start = self._parse_bitwise() 3388 self._match_text_seq("INCREMENT") 3389 increment = self._parse_bitwise() 3390 3391 if start and increment: 3392 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3393 3394 return exp.AutoIncrementColumnConstraint() 3395 3396 def _parse_compress(self) -> exp.CompressColumnConstraint: 3397 if self._match(TokenType.L_PAREN, advance=False): 3398 return self.expression( 3399 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3400 ) 3401 3402 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3403 3404 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3405 if self._match_text_seq("BY", "DEFAULT"): 3406 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3407 this = self.expression( 3408 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3409 ) 3410 else: 3411 self._match_text_seq("ALWAYS") 3412 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3413 3414 self._match(TokenType.ALIAS) 3415 identity = self._match_text_seq("IDENTITY") 3416 3417 if self._match(TokenType.L_PAREN): 3418 if self._match_text_seq("START", "WITH"): 3419 this.set("start", self._parse_bitwise()) 3420 if self._match_text_seq("INCREMENT", "BY"): 3421 this.set("increment", self._parse_bitwise()) 3422 if self._match_text_seq("MINVALUE"): 3423 this.set("minvalue", self._parse_bitwise()) 3424 if self._match_text_seq("MAXVALUE"): 3425 this.set("maxvalue", self._parse_bitwise()) 3426 3427 if self._match_text_seq("CYCLE"): 3428 this.set("cycle", True) 3429 elif self._match_text_seq("NO", "CYCLE"): 3430 this.set("cycle", False) 3431 3432 if not identity: 3433 this.set("expression", self._parse_bitwise()) 3434 3435 self._match_r_paren() 3436 3437 return this 3438 3439 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3440 self._match_text_seq("LENGTH") 3441 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3442 3443 def _parse_not_constraint( 3444 self, 3445 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3446 if self._match_text_seq("NULL"): 3447 return self.expression(exp.NotNullColumnConstraint) 3448 if self._match_text_seq("CASESPECIFIC"): 3449 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3450 return None 3451 3452 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3453 if self._match(TokenType.CONSTRAINT): 3454 this = self._parse_id_var() 3455 else: 3456 this = None 3457 3458 if self._match_texts(self.CONSTRAINT_PARSERS): 3459 return self.expression( 3460 exp.ColumnConstraint, 3461 this=this, 3462 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3463 ) 3464 3465 return this 3466 3467 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3468 if not self._match(TokenType.CONSTRAINT): 3469 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3470 3471 this = self._parse_id_var() 3472 expressions = [] 3473 3474 while True: 3475 constraint = self._parse_unnamed_constraint() or self._parse_function() 3476 if not constraint: 3477 break 3478 expressions.append(constraint) 3479 3480 return self.expression(exp.Constraint, this=this, expressions=expressions) 3481 3482 def _parse_unnamed_constraint( 3483 self, constraints: t.Optional[t.Collection[str]] = None 3484 ) -> t.Optional[exp.Expression]: 3485 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3486 return None 3487 3488 constraint = self._prev.text.upper() 3489 if constraint not in self.CONSTRAINT_PARSERS: 3490 self.raise_error(f"No parser found for schema constraint {constraint}.") 3491 3492 return self.CONSTRAINT_PARSERS[constraint](self) 3493 3494 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3495 self._match_text_seq("KEY") 3496 return self.expression( 3497 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3498 ) 3499 3500 def _parse_key_constraint_options(self) -> t.List[str]: 3501 options = [] 3502 while True: 3503 if not self._curr: 3504 break 3505 3506 if self._match(TokenType.ON): 3507 action = None 3508 on = self._advance_any() and self._prev.text 3509 3510 if self._match_text_seq("NO", "ACTION"): 3511 action = "NO ACTION" 3512 elif self._match_text_seq("CASCADE"): 3513 action = "CASCADE" 3514 elif self._match_pair(TokenType.SET, TokenType.NULL): 3515 action = "SET NULL" 3516 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3517 action = "SET DEFAULT" 3518 else: 3519 self.raise_error("Invalid key constraint") 3520 3521 options.append(f"ON {on} {action}") 3522 elif self._match_text_seq("NOT", "ENFORCED"): 3523 options.append("NOT ENFORCED") 3524 elif self._match_text_seq("DEFERRABLE"): 3525 options.append("DEFERRABLE") 3526 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3527 options.append("INITIALLY DEFERRED") 3528 elif self._match_text_seq("NORELY"): 3529 options.append("NORELY") 3530 elif self._match_text_seq("MATCH", "FULL"): 3531 options.append("MATCH FULL") 3532 else: 3533 break 3534 3535 return options 3536 3537 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3538 if match and not self._match(TokenType.REFERENCES): 3539 return None 3540 3541 expressions = None 3542 this = self._parse_id_var() 3543 3544 if self._match(TokenType.L_PAREN, advance=False): 3545 expressions = self._parse_wrapped_id_vars() 3546 3547 options = self._parse_key_constraint_options() 3548 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3549 3550 def _parse_foreign_key(self) -> exp.ForeignKey: 3551 expressions = self._parse_wrapped_id_vars() 3552 reference = self._parse_references() 3553 options = {} 3554 3555 while self._match(TokenType.ON): 3556 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3557 self.raise_error("Expected DELETE or UPDATE") 3558 3559 kind = self._prev.text.lower() 3560 3561 if self._match_text_seq("NO", "ACTION"): 3562 action = "NO ACTION" 3563 elif self._match(TokenType.SET): 3564 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3565 action = "SET " + self._prev.text.upper() 3566 else: 3567 self._advance() 3568 action = self._prev.text.upper() 3569 3570 options[kind] = action 3571 3572 return self.expression( 3573 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3574 ) 3575 3576 def _parse_primary_key( 3577 self, wrapped_optional: bool = False, in_props: bool = False 3578 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3579 desc = ( 3580 self._match_set((TokenType.ASC, TokenType.DESC)) 3581 and self._prev.token_type == TokenType.DESC 3582 ) 3583 3584 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3585 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3586 3587 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3588 options = self._parse_key_constraint_options() 3589 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3590 3591 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3592 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3593 return this 3594 3595 bracket_kind = self._prev.token_type 3596 3597 if self._match(TokenType.COLON): 3598 expressions: t.List[t.Optional[exp.Expression]] = [ 3599 self.expression(exp.Slice, expression=self._parse_conjunction()) 3600 ] 3601 else: 3602 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3603 3604 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3605 if bracket_kind == TokenType.L_BRACE: 3606 this = self.expression(exp.Struct, expressions=expressions) 3607 elif not this or this.name.upper() == "ARRAY": 3608 this = self.expression(exp.Array, expressions=expressions) 3609 else: 3610 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3611 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3612 3613 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3614 self.raise_error("Expected ]") 3615 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3616 self.raise_error("Expected }") 3617 3618 self._add_comments(this) 3619 return self._parse_bracket(this) 3620 3621 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3622 if self._match(TokenType.COLON): 3623 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3624 return this 3625 3626 def _parse_case(self) -> t.Optional[exp.Expression]: 3627 ifs = [] 3628 default = None 3629 3630 expression = self._parse_conjunction() 3631 3632 while self._match(TokenType.WHEN): 3633 this = self._parse_conjunction() 3634 self._match(TokenType.THEN) 3635 then = self._parse_conjunction() 3636 ifs.append(self.expression(exp.If, this=this, true=then)) 3637 3638 if self._match(TokenType.ELSE): 3639 default = self._parse_conjunction() 3640 3641 if not self._match(TokenType.END): 3642 self.raise_error("Expected END after CASE", self._prev) 3643 3644 return self._parse_window( 3645 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3646 ) 3647 3648 def _parse_if(self) -> t.Optional[exp.Expression]: 3649 if self._match(TokenType.L_PAREN): 3650 args = self._parse_csv(self._parse_conjunction) 3651 this = self.validate_expression(exp.If.from_arg_list(args), args) 3652 self._match_r_paren() 3653 else: 3654 index = self._index - 1 3655 condition = self._parse_conjunction() 3656 3657 if not condition: 3658 self._retreat(index) 3659 return None 3660 3661 self._match(TokenType.THEN) 3662 true = self._parse_conjunction() 3663 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3664 self._match(TokenType.END) 3665 this = self.expression(exp.If, this=condition, true=true, false=false) 3666 3667 return self._parse_window(this) 3668 3669 def _parse_extract(self) -> exp.Extract: 3670 this = self._parse_function() or self._parse_var() or self._parse_type() 3671 3672 if self._match(TokenType.FROM): 3673 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3674 3675 if not self._match(TokenType.COMMA): 3676 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3677 3678 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3679 3680 def _parse_any_value(self) -> exp.AnyValue: 3681 this = self._parse_lambda() 3682 is_max = None 3683 having = None 3684 3685 if self._match(TokenType.HAVING): 3686 self._match_texts(("MAX", "MIN")) 3687 is_max = self._prev.text == "MAX" 3688 having = self._parse_column() 3689 3690 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3691 3692 def _parse_cast(self, strict: bool) -> exp.Expression: 3693 this = self._parse_conjunction() 3694 3695 if not self._match(TokenType.ALIAS): 3696 if self._match(TokenType.COMMA): 3697 return self.expression( 3698 exp.CastToStrType, this=this, expression=self._parse_string() 3699 ) 3700 else: 3701 self.raise_error("Expected AS after CAST") 3702 3703 fmt = None 3704 to = self._parse_types() 3705 3706 if not to: 3707 self.raise_error("Expected TYPE after CAST") 3708 elif to.this == exp.DataType.Type.CHAR: 3709 if self._match(TokenType.CHARACTER_SET): 3710 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3711 elif self._match(TokenType.FORMAT): 3712 fmt = self._parse_at_time_zone(self._parse_string()) 3713 3714 if to.this in exp.DataType.TEMPORAL_TYPES: 3715 return self.expression( 3716 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3717 this=this, 3718 format=exp.Literal.string( 3719 format_time( 3720 fmt.this if fmt else "", 3721 self.FORMAT_MAPPING or self.TIME_MAPPING, 3722 self.FORMAT_TRIE or self.TIME_TRIE, 3723 ) 3724 ), 3725 ) 3726 3727 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3728 3729 def _parse_concat(self) -> t.Optional[exp.Expression]: 3730 args = self._parse_csv(self._parse_conjunction) 3731 if self.CONCAT_NULL_OUTPUTS_STRING: 3732 args = [ 3733 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3734 for arg in args 3735 if arg 3736 ] 3737 3738 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3739 # we find such a call we replace it with its argument. 3740 if len(args) == 1: 3741 return args[0] 3742 3743 return self.expression( 3744 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3745 ) 3746 3747 def _parse_string_agg(self) -> exp.Expression: 3748 if self._match(TokenType.DISTINCT): 3749 args: t.List[t.Optional[exp.Expression]] = [ 3750 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3751 ] 3752 if self._match(TokenType.COMMA): 3753 args.extend(self._parse_csv(self._parse_conjunction)) 3754 else: 3755 args = self._parse_csv(self._parse_conjunction) 3756 3757 index = self._index 3758 if not self._match(TokenType.R_PAREN): 3759 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3760 return self.expression( 3761 exp.GroupConcat, 3762 this=seq_get(args, 0), 3763 separator=self._parse_order(this=seq_get(args, 1)), 3764 ) 3765 3766 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3767 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3768 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3769 if not self._match_text_seq("WITHIN", "GROUP"): 3770 self._retreat(index) 3771 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3772 3773 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3774 order = self._parse_order(this=seq_get(args, 0)) 3775 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3776 3777 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3778 this = self._parse_bitwise() 3779 3780 if self._match(TokenType.USING): 3781 to: t.Optional[exp.Expression] = self.expression( 3782 exp.CharacterSet, this=self._parse_var() 3783 ) 3784 elif self._match(TokenType.COMMA): 3785 to = self._parse_types() 3786 else: 3787 to = None 3788 3789 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3790 3791 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3792 """ 3793 There are generally two variants of the DECODE function: 3794 3795 - DECODE(bin, charset) 3796 - DECODE(expression, search, result [, search, result] ... [, default]) 3797 3798 The second variant will always be parsed into a CASE expression. Note that NULL 3799 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3800 instead of relying on pattern matching. 3801 """ 3802 args = self._parse_csv(self._parse_conjunction) 3803 3804 if len(args) < 3: 3805 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3806 3807 expression, *expressions = args 3808 if not expression: 3809 return None 3810 3811 ifs = [] 3812 for search, result in zip(expressions[::2], expressions[1::2]): 3813 if not search or not result: 3814 return None 3815 3816 if isinstance(search, exp.Literal): 3817 ifs.append( 3818 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3819 ) 3820 elif isinstance(search, exp.Null): 3821 ifs.append( 3822 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3823 ) 3824 else: 3825 cond = exp.or_( 3826 exp.EQ(this=expression.copy(), expression=search), 3827 exp.and_( 3828 exp.Is(this=expression.copy(), expression=exp.Null()), 3829 exp.Is(this=search.copy(), expression=exp.Null()), 3830 copy=False, 3831 ), 3832 copy=False, 3833 ) 3834 ifs.append(exp.If(this=cond, true=result)) 3835 3836 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3837 3838 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3839 self._match_text_seq("KEY") 3840 key = self._parse_field() 3841 self._match(TokenType.COLON) 3842 self._match_text_seq("VALUE") 3843 value = self._parse_field() 3844 3845 if not key and not value: 3846 return None 3847 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3848 3849 def _parse_json_object(self) -> exp.JSONObject: 3850 star = self._parse_star() 3851 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3852 3853 null_handling = None 3854 if self._match_text_seq("NULL", "ON", "NULL"): 3855 null_handling = "NULL ON NULL" 3856 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3857 null_handling = "ABSENT ON NULL" 3858 3859 unique_keys = None 3860 if self._match_text_seq("WITH", "UNIQUE"): 3861 unique_keys = True 3862 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3863 unique_keys = False 3864 3865 self._match_text_seq("KEYS") 3866 3867 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3868 format_json = self._match_text_seq("FORMAT", "JSON") 3869 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3870 3871 return self.expression( 3872 exp.JSONObject, 3873 expressions=expressions, 3874 null_handling=null_handling, 3875 unique_keys=unique_keys, 3876 return_type=return_type, 3877 format_json=format_json, 3878 encoding=encoding, 3879 ) 3880 3881 def _parse_logarithm(self) -> exp.Func: 3882 # Default argument order is base, expression 3883 args = self._parse_csv(self._parse_range) 3884 3885 if len(args) > 1: 3886 if not self.LOG_BASE_FIRST: 3887 args.reverse() 3888 return exp.Log.from_arg_list(args) 3889 3890 return self.expression( 3891 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3892 ) 3893 3894 def _parse_match_against(self) -> exp.MatchAgainst: 3895 expressions = self._parse_csv(self._parse_column) 3896 3897 self._match_text_seq(")", "AGAINST", "(") 3898 3899 this = self._parse_string() 3900 3901 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3902 modifier = "IN NATURAL LANGUAGE MODE" 3903 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3904 modifier = f"{modifier} WITH QUERY EXPANSION" 3905 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3906 modifier = "IN BOOLEAN MODE" 3907 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3908 modifier = "WITH QUERY EXPANSION" 3909 else: 3910 modifier = None 3911 3912 return self.expression( 3913 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3914 ) 3915 3916 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3917 def _parse_open_json(self) -> exp.OpenJSON: 3918 this = self._parse_bitwise() 3919 path = self._match(TokenType.COMMA) and self._parse_string() 3920 3921 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3922 this = self._parse_field(any_token=True) 3923 kind = self._parse_types() 3924 path = self._parse_string() 3925 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3926 3927 return self.expression( 3928 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3929 ) 3930 3931 expressions = None 3932 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3933 self._match_l_paren() 3934 expressions = self._parse_csv(_parse_open_json_column_def) 3935 3936 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3937 3938 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3939 args = self._parse_csv(self._parse_bitwise) 3940 3941 if self._match(TokenType.IN): 3942 return self.expression( 3943 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3944 ) 3945 3946 if haystack_first: 3947 haystack = seq_get(args, 0) 3948 needle = seq_get(args, 1) 3949 else: 3950 needle = seq_get(args, 0) 3951 haystack = seq_get(args, 1) 3952 3953 return self.expression( 3954 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3955 ) 3956 3957 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3958 args = self._parse_csv(self._parse_table) 3959 return exp.JoinHint(this=func_name.upper(), expressions=args) 3960 3961 def _parse_substring(self) -> exp.Substring: 3962 # Postgres supports the form: substring(string [from int] [for int]) 3963 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3964 3965 args = self._parse_csv(self._parse_bitwise) 3966 3967 if self._match(TokenType.FROM): 3968 args.append(self._parse_bitwise()) 3969 if self._match(TokenType.FOR): 3970 args.append(self._parse_bitwise()) 3971 3972 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3973 3974 def _parse_trim(self) -> exp.Trim: 3975 # https://www.w3resource.com/sql/character-functions/trim.php 3976 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3977 3978 position = None 3979 collation = None 3980 3981 if self._match_texts(self.TRIM_TYPES): 3982 position = self._prev.text.upper() 3983 3984 expression = self._parse_bitwise() 3985 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3986 this = self._parse_bitwise() 3987 else: 3988 this = expression 3989 expression = None 3990 3991 if self._match(TokenType.COLLATE): 3992 collation = self._parse_bitwise() 3993 3994 return self.expression( 3995 exp.Trim, this=this, position=position, expression=expression, collation=collation 3996 ) 3997 3998 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3999 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4000 4001 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4002 return self._parse_window(self._parse_id_var(), alias=True) 4003 4004 def _parse_respect_or_ignore_nulls( 4005 self, this: t.Optional[exp.Expression] 4006 ) -> t.Optional[exp.Expression]: 4007 if self._match_text_seq("IGNORE", "NULLS"): 4008 return self.expression(exp.IgnoreNulls, this=this) 4009 if self._match_text_seq("RESPECT", "NULLS"): 4010 return self.expression(exp.RespectNulls, this=this) 4011 return this 4012 4013 def _parse_window( 4014 self, this: t.Optional[exp.Expression], alias: bool = False 4015 ) -> t.Optional[exp.Expression]: 4016 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4017 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4018 self._match_r_paren() 4019 4020 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4021 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4022 if self._match_text_seq("WITHIN", "GROUP"): 4023 order = self._parse_wrapped(self._parse_order) 4024 this = self.expression(exp.WithinGroup, this=this, expression=order) 4025 4026 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4027 # Some dialects choose to implement and some do not. 4028 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4029 4030 # There is some code above in _parse_lambda that handles 4031 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4032 4033 # The below changes handle 4034 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4035 4036 # Oracle allows both formats 4037 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4038 # and Snowflake chose to do the same for familiarity 4039 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4040 this = self._parse_respect_or_ignore_nulls(this) 4041 4042 # bigquery select from window x AS (partition by ...) 4043 if alias: 4044 over = None 4045 self._match(TokenType.ALIAS) 4046 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4047 return this 4048 else: 4049 over = self._prev.text.upper() 4050 4051 if not self._match(TokenType.L_PAREN): 4052 return self.expression( 4053 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4054 ) 4055 4056 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4057 4058 first = self._match(TokenType.FIRST) 4059 if self._match_text_seq("LAST"): 4060 first = False 4061 4062 partition = self._parse_partition_by() 4063 order = self._parse_order() 4064 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4065 4066 if kind: 4067 self._match(TokenType.BETWEEN) 4068 start = self._parse_window_spec() 4069 self._match(TokenType.AND) 4070 end = self._parse_window_spec() 4071 4072 spec = self.expression( 4073 exp.WindowSpec, 4074 kind=kind, 4075 start=start["value"], 4076 start_side=start["side"], 4077 end=end["value"], 4078 end_side=end["side"], 4079 ) 4080 else: 4081 spec = None 4082 4083 self._match_r_paren() 4084 4085 return self.expression( 4086 exp.Window, 4087 this=this, 4088 partition_by=partition, 4089 order=order, 4090 spec=spec, 4091 alias=window_alias, 4092 over=over, 4093 first=first, 4094 ) 4095 4096 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4097 self._match(TokenType.BETWEEN) 4098 4099 return { 4100 "value": ( 4101 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4102 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4103 or self._parse_bitwise() 4104 ), 4105 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4106 } 4107 4108 def _parse_alias( 4109 self, this: t.Optional[exp.Expression], explicit: bool = False 4110 ) -> t.Optional[exp.Expression]: 4111 any_token = self._match(TokenType.ALIAS) 4112 4113 if explicit and not any_token: 4114 return this 4115 4116 if self._match(TokenType.L_PAREN): 4117 aliases = self.expression( 4118 exp.Aliases, 4119 this=this, 4120 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4121 ) 4122 self._match_r_paren(aliases) 4123 return aliases 4124 4125 alias = self._parse_id_var(any_token) 4126 4127 if alias: 4128 return self.expression(exp.Alias, this=this, alias=alias) 4129 4130 return this 4131 4132 def _parse_id_var( 4133 self, 4134 any_token: bool = True, 4135 tokens: t.Optional[t.Collection[TokenType]] = None, 4136 ) -> t.Optional[exp.Expression]: 4137 identifier = self._parse_identifier() 4138 4139 if identifier: 4140 return identifier 4141 4142 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4143 quoted = self._prev.token_type == TokenType.STRING 4144 return exp.Identifier(this=self._prev.text, quoted=quoted) 4145 4146 return None 4147 4148 def _parse_string(self) -> t.Optional[exp.Expression]: 4149 if self._match(TokenType.STRING): 4150 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4151 return self._parse_placeholder() 4152 4153 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4154 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4155 4156 def _parse_number(self) -> t.Optional[exp.Expression]: 4157 if self._match(TokenType.NUMBER): 4158 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4159 return self._parse_placeholder() 4160 4161 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4162 if self._match(TokenType.IDENTIFIER): 4163 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4164 return self._parse_placeholder() 4165 4166 def _parse_var( 4167 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4168 ) -> t.Optional[exp.Expression]: 4169 if ( 4170 (any_token and self._advance_any()) 4171 or self._match(TokenType.VAR) 4172 or (self._match_set(tokens) if tokens else False) 4173 ): 4174 return self.expression(exp.Var, this=self._prev.text) 4175 return self._parse_placeholder() 4176 4177 def _advance_any(self) -> t.Optional[Token]: 4178 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4179 self._advance() 4180 return self._prev 4181 return None 4182 4183 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4184 return self._parse_var() or self._parse_string() 4185 4186 def _parse_null(self) -> t.Optional[exp.Expression]: 4187 if self._match(TokenType.NULL): 4188 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4189 return None 4190 4191 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4192 if self._match(TokenType.TRUE): 4193 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4194 if self._match(TokenType.FALSE): 4195 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4196 return None 4197 4198 def _parse_star(self) -> t.Optional[exp.Expression]: 4199 if self._match(TokenType.STAR): 4200 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4201 return None 4202 4203 def _parse_parameter(self) -> exp.Parameter: 4204 wrapped = self._match(TokenType.L_BRACE) 4205 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4206 self._match(TokenType.R_BRACE) 4207 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4208 4209 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4210 if self._match_set(self.PLACEHOLDER_PARSERS): 4211 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4212 if placeholder: 4213 return placeholder 4214 self._advance(-1) 4215 return None 4216 4217 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4218 if not self._match(TokenType.EXCEPT): 4219 return None 4220 if self._match(TokenType.L_PAREN, advance=False): 4221 return self._parse_wrapped_csv(self._parse_column) 4222 return self._parse_csv(self._parse_column) 4223 4224 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4225 if not self._match(TokenType.REPLACE): 4226 return None 4227 if self._match(TokenType.L_PAREN, advance=False): 4228 return self._parse_wrapped_csv(self._parse_expression) 4229 return self._parse_csv(self._parse_expression) 4230 4231 def _parse_csv( 4232 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4233 ) -> t.List[t.Optional[exp.Expression]]: 4234 parse_result = parse_method() 4235 items = [parse_result] if parse_result is not None else [] 4236 4237 while self._match(sep): 4238 self._add_comments(parse_result) 4239 parse_result = parse_method() 4240 if parse_result is not None: 4241 items.append(parse_result) 4242 4243 return items 4244 4245 def _parse_tokens( 4246 self, parse_method: t.Callable, expressions: t.Dict 4247 ) -> t.Optional[exp.Expression]: 4248 this = parse_method() 4249 4250 while self._match_set(expressions): 4251 this = self.expression( 4252 expressions[self._prev.token_type], 4253 this=this, 4254 comments=self._prev_comments, 4255 expression=parse_method(), 4256 ) 4257 4258 return this 4259 4260 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4261 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4262 4263 def _parse_wrapped_csv( 4264 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4265 ) -> t.List[t.Optional[exp.Expression]]: 4266 return self._parse_wrapped( 4267 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4268 ) 4269 4270 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4271 wrapped = self._match(TokenType.L_PAREN) 4272 if not wrapped and not optional: 4273 self.raise_error("Expecting (") 4274 parse_result = parse_method() 4275 if wrapped: 4276 self._match_r_paren() 4277 return parse_result 4278 4279 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4280 return self._parse_select() or self._parse_set_operations( 4281 self._parse_expression() if alias else self._parse_conjunction() 4282 ) 4283 4284 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4285 return self._parse_query_modifiers( 4286 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4287 ) 4288 4289 def _parse_transaction(self) -> exp.Transaction: 4290 this = None 4291 if self._match_texts(self.TRANSACTION_KIND): 4292 this = self._prev.text 4293 4294 self._match_texts({"TRANSACTION", "WORK"}) 4295 4296 modes = [] 4297 while True: 4298 mode = [] 4299 while self._match(TokenType.VAR): 4300 mode.append(self._prev.text) 4301 4302 if mode: 4303 modes.append(" ".join(mode)) 4304 if not self._match(TokenType.COMMA): 4305 break 4306 4307 return self.expression(exp.Transaction, this=this, modes=modes) 4308 4309 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4310 chain = None 4311 savepoint = None 4312 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4313 4314 self._match_texts({"TRANSACTION", "WORK"}) 4315 4316 if self._match_text_seq("TO"): 4317 self._match_text_seq("SAVEPOINT") 4318 savepoint = self._parse_id_var() 4319 4320 if self._match(TokenType.AND): 4321 chain = not self._match_text_seq("NO") 4322 self._match_text_seq("CHAIN") 4323 4324 if is_rollback: 4325 return self.expression(exp.Rollback, savepoint=savepoint) 4326 4327 return self.expression(exp.Commit, chain=chain) 4328 4329 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4330 if not self._match_text_seq("ADD"): 4331 return None 4332 4333 self._match(TokenType.COLUMN) 4334 exists_column = self._parse_exists(not_=True) 4335 expression = self._parse_column_def(self._parse_field(any_token=True)) 4336 4337 if expression: 4338 expression.set("exists", exists_column) 4339 4340 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4341 if self._match_texts(("FIRST", "AFTER")): 4342 position = self._prev.text 4343 column_position = self.expression( 4344 exp.ColumnPosition, this=self._parse_column(), position=position 4345 ) 4346 expression.set("position", column_position) 4347 4348 return expression 4349 4350 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4351 drop = self._match(TokenType.DROP) and self._parse_drop() 4352 if drop and not isinstance(drop, exp.Command): 4353 drop.set("kind", drop.args.get("kind", "COLUMN")) 4354 return drop 4355 4356 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4357 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4358 return self.expression( 4359 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4360 ) 4361 4362 def _parse_add_constraint(self) -> exp.AddConstraint: 4363 this = None 4364 kind = self._prev.token_type 4365 4366 if kind == TokenType.CONSTRAINT: 4367 this = self._parse_id_var() 4368 4369 if self._match_text_seq("CHECK"): 4370 expression = self._parse_wrapped(self._parse_conjunction) 4371 enforced = self._match_text_seq("ENFORCED") 4372 4373 return self.expression( 4374 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4375 ) 4376 4377 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4378 expression = self._parse_foreign_key() 4379 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4380 expression = self._parse_primary_key() 4381 else: 4382 expression = None 4383 4384 return self.expression(exp.AddConstraint, this=this, expression=expression) 4385 4386 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4387 index = self._index - 1 4388 4389 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4390 return self._parse_csv(self._parse_add_constraint) 4391 4392 self._retreat(index) 4393 return self._parse_csv(self._parse_add_column) 4394 4395 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4396 self._match(TokenType.COLUMN) 4397 column = self._parse_field(any_token=True) 4398 4399 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4400 return self.expression(exp.AlterColumn, this=column, drop=True) 4401 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4402 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4403 4404 self._match_text_seq("SET", "DATA") 4405 return self.expression( 4406 exp.AlterColumn, 4407 this=column, 4408 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4409 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4410 using=self._match(TokenType.USING) and self._parse_conjunction(), 4411 ) 4412 4413 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4414 index = self._index - 1 4415 4416 partition_exists = self._parse_exists() 4417 if self._match(TokenType.PARTITION, advance=False): 4418 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4419 4420 self._retreat(index) 4421 return self._parse_csv(self._parse_drop_column) 4422 4423 def _parse_alter_table_rename(self) -> exp.RenameTable: 4424 self._match_text_seq("TO") 4425 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4426 4427 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4428 start = self._prev 4429 4430 if not self._match(TokenType.TABLE): 4431 return self._parse_as_command(start) 4432 4433 exists = self._parse_exists() 4434 this = self._parse_table(schema=True) 4435 4436 if self._next: 4437 self._advance() 4438 4439 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4440 if parser: 4441 actions = ensure_list(parser(self)) 4442 4443 if not self._curr: 4444 return self.expression( 4445 exp.AlterTable, 4446 this=this, 4447 exists=exists, 4448 actions=actions, 4449 ) 4450 return self._parse_as_command(start) 4451 4452 def _parse_merge(self) -> exp.Merge: 4453 self._match(TokenType.INTO) 4454 target = self._parse_table() 4455 4456 self._match(TokenType.USING) 4457 using = self._parse_table() 4458 4459 self._match(TokenType.ON) 4460 on = self._parse_conjunction() 4461 4462 whens = [] 4463 while self._match(TokenType.WHEN): 4464 matched = not self._match(TokenType.NOT) 4465 self._match_text_seq("MATCHED") 4466 source = ( 4467 False 4468 if self._match_text_seq("BY", "TARGET") 4469 else self._match_text_seq("BY", "SOURCE") 4470 ) 4471 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4472 4473 self._match(TokenType.THEN) 4474 4475 if self._match(TokenType.INSERT): 4476 _this = self._parse_star() 4477 if _this: 4478 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4479 else: 4480 then = self.expression( 4481 exp.Insert, 4482 this=self._parse_value(), 4483 expression=self._match(TokenType.VALUES) and self._parse_value(), 4484 ) 4485 elif self._match(TokenType.UPDATE): 4486 expressions = self._parse_star() 4487 if expressions: 4488 then = self.expression(exp.Update, expressions=expressions) 4489 else: 4490 then = self.expression( 4491 exp.Update, 4492 expressions=self._match(TokenType.SET) 4493 and self._parse_csv(self._parse_equality), 4494 ) 4495 elif self._match(TokenType.DELETE): 4496 then = self.expression(exp.Var, this=self._prev.text) 4497 else: 4498 then = None 4499 4500 whens.append( 4501 self.expression( 4502 exp.When, 4503 matched=matched, 4504 source=source, 4505 condition=condition, 4506 then=then, 4507 ) 4508 ) 4509 4510 return self.expression( 4511 exp.Merge, 4512 this=target, 4513 using=using, 4514 on=on, 4515 expressions=whens, 4516 ) 4517 4518 def _parse_show(self) -> t.Optional[exp.Expression]: 4519 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4520 if parser: 4521 return parser(self) 4522 self._advance() 4523 return self.expression(exp.Show, this=self._prev.text.upper()) 4524 4525 def _parse_set_item_assignment( 4526 self, kind: t.Optional[str] = None 4527 ) -> t.Optional[exp.Expression]: 4528 index = self._index 4529 4530 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4531 return self._parse_set_transaction(global_=kind == "GLOBAL") 4532 4533 left = self._parse_primary() or self._parse_id_var() 4534 4535 if not self._match_texts(("=", "TO")): 4536 self._retreat(index) 4537 return None 4538 4539 right = self._parse_statement() or self._parse_id_var() 4540 this = self.expression(exp.EQ, this=left, expression=right) 4541 4542 return self.expression(exp.SetItem, this=this, kind=kind) 4543 4544 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4545 self._match_text_seq("TRANSACTION") 4546 characteristics = self._parse_csv( 4547 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4548 ) 4549 return self.expression( 4550 exp.SetItem, 4551 expressions=characteristics, 4552 kind="TRANSACTION", 4553 **{"global": global_}, # type: ignore 4554 ) 4555 4556 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4557 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4558 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4559 4560 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4561 index = self._index 4562 set_ = self.expression( 4563 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4564 ) 4565 4566 if self._curr: 4567 self._retreat(index) 4568 return self._parse_as_command(self._prev) 4569 4570 return set_ 4571 4572 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4573 for option in options: 4574 if self._match_text_seq(*option.split(" ")): 4575 return exp.var(option) 4576 return None 4577 4578 def _parse_as_command(self, start: Token) -> exp.Command: 4579 while self._curr: 4580 self._advance() 4581 text = self._find_sql(start, self._prev) 4582 size = len(start.text) 4583 return exp.Command(this=text[:size], expression=text[size:]) 4584 4585 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4586 settings = [] 4587 4588 self._match_l_paren() 4589 kind = self._parse_id_var() 4590 4591 if self._match(TokenType.L_PAREN): 4592 while True: 4593 key = self._parse_id_var() 4594 value = self._parse_primary() 4595 4596 if not key and value is None: 4597 break 4598 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4599 self._match(TokenType.R_PAREN) 4600 4601 self._match_r_paren() 4602 4603 return self.expression( 4604 exp.DictProperty, 4605 this=this, 4606 kind=kind.this if kind else None, 4607 settings=settings, 4608 ) 4609 4610 def _parse_dict_range(self, this: str) -> exp.DictRange: 4611 self._match_l_paren() 4612 has_min = self._match_text_seq("MIN") 4613 if has_min: 4614 min = self._parse_var() or self._parse_primary() 4615 self._match_text_seq("MAX") 4616 max = self._parse_var() or self._parse_primary() 4617 else: 4618 max = self._parse_var() or self._parse_primary() 4619 min = exp.Literal.number(0) 4620 self._match_r_paren() 4621 return self.expression(exp.DictRange, this=this, min=min, max=max) 4622 4623 def _find_parser( 4624 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4625 ) -> t.Optional[t.Callable]: 4626 if not self._curr: 4627 return None 4628 4629 index = self._index 4630 this = [] 4631 while True: 4632 # The current token might be multiple words 4633 curr = self._curr.text.upper() 4634 key = curr.split(" ") 4635 this.append(curr) 4636 4637 self._advance() 4638 result, trie = in_trie(trie, key) 4639 if result == TrieResult.FAILED: 4640 break 4641 4642 if result == TrieResult.EXISTS: 4643 subparser = parsers[" ".join(this)] 4644 return subparser 4645 4646 self._retreat(index) 4647 return None 4648 4649 def _match(self, token_type, advance=True, expression=None): 4650 if not self._curr: 4651 return None 4652 4653 if self._curr.token_type == token_type: 4654 if advance: 4655 self._advance() 4656 self._add_comments(expression) 4657 return True 4658 4659 return None 4660 4661 def _match_set(self, types, advance=True): 4662 if not self._curr: 4663 return None 4664 4665 if self._curr.token_type in types: 4666 if advance: 4667 self._advance() 4668 return True 4669 4670 return None 4671 4672 def _match_pair(self, token_type_a, token_type_b, advance=True): 4673 if not self._curr or not self._next: 4674 return None 4675 4676 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4677 if advance: 4678 self._advance(2) 4679 return True 4680 4681 return None 4682 4683 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4684 if not self._match(TokenType.L_PAREN, expression=expression): 4685 self.raise_error("Expecting (") 4686 4687 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4688 if not self._match(TokenType.R_PAREN, expression=expression): 4689 self.raise_error("Expecting )") 4690 4691 def _match_texts(self, texts, advance=True): 4692 if self._curr and self._curr.text.upper() in texts: 4693 if advance: 4694 self._advance() 4695 return True 4696 return False 4697 4698 def _match_text_seq(self, *texts, advance=True): 4699 index = self._index 4700 for text in texts: 4701 if self._curr and self._curr.text.upper() == text: 4702 self._advance() 4703 else: 4704 self._retreat(index) 4705 return False 4706 4707 if not advance: 4708 self._retreat(index) 4709 4710 return True 4711 4712 @t.overload 4713 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4714 ... 4715 4716 @t.overload 4717 def _replace_columns_with_dots( 4718 self, this: t.Optional[exp.Expression] 4719 ) -> t.Optional[exp.Expression]: 4720 ... 4721 4722 def _replace_columns_with_dots(self, this): 4723 if isinstance(this, exp.Dot): 4724 exp.replace_children(this, self._replace_columns_with_dots) 4725 elif isinstance(this, exp.Column): 4726 exp.replace_children(this, self._replace_columns_with_dots) 4727 table = this.args.get("table") 4728 this = ( 4729 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4730 ) 4731 4732 return this 4733 4734 def _replace_lambda( 4735 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4736 ) -> t.Optional[exp.Expression]: 4737 if not node: 4738 return node 4739 4740 for column in node.find_all(exp.Column): 4741 if column.parts[0].name in lambda_variables: 4742 dot_or_id = column.to_dot() if column.table else column.this 4743 parent = column.parent 4744 4745 while isinstance(parent, exp.Dot): 4746 if not isinstance(parent.parent, exp.Dot): 4747 parent.replace(dot_or_id) 4748 break 4749 parent = parent.parent 4750 else: 4751 if column is node: 4752 node = dot_or_id 4753 else: 4754 column.replace(dot_or_id) 4755 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 *TYPE_TOKENS, 343 *SUBQUERY_PREDICATES, 344 } 345 346 CONJUNCTION = { 347 TokenType.AND: exp.And, 348 TokenType.OR: exp.Or, 349 } 350 351 EQUALITY = { 352 TokenType.EQ: exp.EQ, 353 TokenType.NEQ: exp.NEQ, 354 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 355 } 356 357 COMPARISON = { 358 TokenType.GT: exp.GT, 359 TokenType.GTE: exp.GTE, 360 TokenType.LT: exp.LT, 361 TokenType.LTE: exp.LTE, 362 } 363 364 BITWISE = { 365 TokenType.AMP: exp.BitwiseAnd, 366 TokenType.CARET: exp.BitwiseXor, 367 TokenType.PIPE: exp.BitwiseOr, 368 TokenType.DPIPE: exp.DPipe, 369 } 370 371 TERM = { 372 TokenType.DASH: exp.Sub, 373 TokenType.PLUS: exp.Add, 374 TokenType.MOD: exp.Mod, 375 TokenType.COLLATE: exp.Collate, 376 } 377 378 FACTOR = { 379 TokenType.DIV: exp.IntDiv, 380 TokenType.LR_ARROW: exp.Distance, 381 TokenType.SLASH: exp.Div, 382 TokenType.STAR: exp.Mul, 383 } 384 385 TIMESTAMPS = { 386 TokenType.TIME, 387 TokenType.TIMESTAMP, 388 TokenType.TIMESTAMPTZ, 389 TokenType.TIMESTAMPLTZ, 390 } 391 392 SET_OPERATIONS = { 393 TokenType.UNION, 394 TokenType.INTERSECT, 395 TokenType.EXCEPT, 396 } 397 398 JOIN_METHODS = { 399 TokenType.NATURAL, 400 TokenType.ASOF, 401 } 402 403 JOIN_SIDES = { 404 TokenType.LEFT, 405 TokenType.RIGHT, 406 TokenType.FULL, 407 } 408 409 JOIN_KINDS = { 410 TokenType.INNER, 411 TokenType.OUTER, 412 TokenType.CROSS, 413 TokenType.SEMI, 414 TokenType.ANTI, 415 } 416 417 JOIN_HINTS: t.Set[str] = set() 418 419 LAMBDAS = { 420 TokenType.ARROW: lambda self, expressions: self.expression( 421 exp.Lambda, 422 this=self._replace_lambda( 423 self._parse_conjunction(), 424 {node.name for node in expressions}, 425 ), 426 expressions=expressions, 427 ), 428 TokenType.FARROW: lambda self, expressions: self.expression( 429 exp.Kwarg, 430 this=exp.var(expressions[0].name), 431 expression=self._parse_conjunction(), 432 ), 433 } 434 435 COLUMN_OPERATORS = { 436 TokenType.DOT: None, 437 TokenType.DCOLON: lambda self, this, to: self.expression( 438 exp.Cast if self.STRICT_CAST else exp.TryCast, 439 this=this, 440 to=to, 441 ), 442 TokenType.ARROW: lambda self, this, path: self.expression( 443 exp.JSONExtract, 444 this=this, 445 expression=path, 446 ), 447 TokenType.DARROW: lambda self, this, path: self.expression( 448 exp.JSONExtractScalar, 449 this=this, 450 expression=path, 451 ), 452 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 453 exp.JSONBExtract, 454 this=this, 455 expression=path, 456 ), 457 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 458 exp.JSONBExtractScalar, 459 this=this, 460 expression=path, 461 ), 462 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 463 exp.JSONBContains, 464 this=this, 465 expression=key, 466 ), 467 } 468 469 EXPRESSION_PARSERS = { 470 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 471 exp.Column: lambda self: self._parse_column(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.DataType: lambda self: self._parse_types(), 474 exp.Expression: lambda self: self._parse_statement(), 475 exp.From: lambda self: self._parse_from(), 476 exp.Group: lambda self: self._parse_group(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.Identifier: lambda self: self._parse_id_var(), 479 exp.Join: lambda self: self._parse_join(), 480 exp.Lambda: lambda self: self._parse_lambda(), 481 exp.Lateral: lambda self: self._parse_lateral(), 482 exp.Limit: lambda self: self._parse_limit(), 483 exp.Offset: lambda self: self._parse_offset(), 484 exp.Order: lambda self: self._parse_order(), 485 exp.Ordered: lambda self: self._parse_ordered(), 486 exp.Properties: lambda self: self._parse_properties(), 487 exp.Qualify: lambda self: self._parse_qualify(), 488 exp.Returning: lambda self: self._parse_returning(), 489 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 490 exp.Table: lambda self: self._parse_table_parts(), 491 exp.TableAlias: lambda self: self._parse_table_alias(), 492 exp.Where: lambda self: self._parse_where(), 493 exp.Window: lambda self: self._parse_named_window(), 494 exp.With: lambda self: self._parse_with(), 495 "JOIN_TYPE": lambda self: self._parse_join_parts(), 496 } 497 498 STATEMENT_PARSERS = { 499 TokenType.ALTER: lambda self: self._parse_alter(), 500 TokenType.BEGIN: lambda self: self._parse_transaction(), 501 TokenType.CACHE: lambda self: self._parse_cache(), 502 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 503 TokenType.COMMENT: lambda self: self._parse_comment(), 504 TokenType.CREATE: lambda self: self._parse_create(), 505 TokenType.DELETE: lambda self: self._parse_delete(), 506 TokenType.DESC: lambda self: self._parse_describe(), 507 TokenType.DESCRIBE: lambda self: self._parse_describe(), 508 TokenType.DROP: lambda self: self._parse_drop(), 509 TokenType.END: lambda self: self._parse_commit_or_rollback(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "CLUSTERED": lambda self: self._parse_clustered_by(), 590 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 591 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 592 "COPY": lambda self: self._parse_copy_property(), 593 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 594 "DEFINER": lambda self: self._parse_definer(), 595 "DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "DISTKEY": lambda self: self._parse_distkey(), 599 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 600 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 601 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 602 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 603 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 604 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 605 "FREESPACE": lambda self: self._parse_freespace(), 606 "IMMUTABLE": lambda self: self.expression( 607 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 608 ), 609 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 610 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 611 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 612 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 613 "LIKE": lambda self: self._parse_create_like(), 614 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 615 "LOCK": lambda self: self._parse_locking(), 616 "LOCKING": lambda self: self._parse_locking(), 617 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 618 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 619 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 620 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 621 "NO": lambda self: self._parse_no_property(), 622 "ON": lambda self: self._parse_on_property(), 623 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 624 "PARTITION BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 627 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 628 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 629 "RETURNS": lambda self: self._parse_returns(), 630 "ROW": lambda self: self._parse_row(), 631 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 632 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 633 "SETTINGS": lambda self: self.expression( 634 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 635 ), 636 "SORTKEY": lambda self: self._parse_sortkey(), 637 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 638 "STABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("STABLE") 640 ), 641 "STORED": lambda self: self._parse_stored(), 642 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 643 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 644 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 645 "TO": lambda self: self._parse_to_table(), 646 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 647 "TTL": lambda self: self._parse_ttl(), 648 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "VOLATILE": lambda self: self._parse_volatile_property(), 650 "WITH": lambda self: self._parse_with_property(), 651 } 652 653 CONSTRAINT_PARSERS = { 654 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 655 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 656 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 657 "CHARACTER SET": lambda self: self.expression( 658 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "CHECK": lambda self: self.expression( 661 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 662 ), 663 "COLLATE": lambda self: self.expression( 664 exp.CollateColumnConstraint, this=self._parse_var() 665 ), 666 "COMMENT": lambda self: self.expression( 667 exp.CommentColumnConstraint, this=self._parse_string() 668 ), 669 "COMPRESS": lambda self: self._parse_compress(), 670 "DEFAULT": lambda self: self.expression( 671 exp.DefaultColumnConstraint, this=self._parse_bitwise() 672 ), 673 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 674 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 675 "FORMAT": lambda self: self.expression( 676 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "GENERATED": lambda self: self._parse_generated_as_identity(), 679 "IDENTITY": lambda self: self._parse_auto_increment(), 680 "INLINE": lambda self: self._parse_inline(), 681 "LIKE": lambda self: self._parse_create_like(), 682 "NOT": lambda self: self._parse_not_constraint(), 683 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 684 "ON": lambda self: self._match(TokenType.UPDATE) 685 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 686 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 687 "PRIMARY KEY": lambda self: self._parse_primary_key(), 688 "REFERENCES": lambda self: self._parse_references(match=False), 689 "TITLE": lambda self: self.expression( 690 exp.TitleColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 693 "UNIQUE": lambda self: self._parse_unique(), 694 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 695 } 696 697 ALTER_PARSERS = { 698 "ADD": lambda self: self._parse_alter_table_add(), 699 "ALTER": lambda self: self._parse_alter_table_alter(), 700 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 701 "DROP": lambda self: self._parse_alter_table_drop(), 702 "RENAME": lambda self: self._parse_alter_table_rename(), 703 } 704 705 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 706 707 NO_PAREN_FUNCTION_PARSERS = { 708 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 709 TokenType.CASE: lambda self: self._parse_case(), 710 TokenType.IF: lambda self: self._parse_if(), 711 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 712 exp.NextValueFor, 713 this=self._parse_column(), 714 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 715 ), 716 } 717 718 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 719 720 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 721 "ANY_VALUE": lambda self: self._parse_any_value(), 722 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 723 "CONCAT": lambda self: self._parse_concat(), 724 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 725 "DECODE": lambda self: self._parse_decode(), 726 "EXTRACT": lambda self: self._parse_extract(), 727 "JSON_OBJECT": lambda self: self._parse_json_object(), 728 "LOG": lambda self: self._parse_logarithm(), 729 "MATCH": lambda self: self._parse_match_against(), 730 "OPENJSON": lambda self: self._parse_open_json(), 731 "POSITION": lambda self: self._parse_position(), 732 "SAFE_CAST": lambda self: self._parse_cast(False), 733 "STRING_AGG": lambda self: self._parse_string_agg(), 734 "SUBSTRING": lambda self: self._parse_substring(), 735 "TRIM": lambda self: self._parse_trim(), 736 "TRY_CAST": lambda self: self._parse_cast(False), 737 "TRY_CONVERT": lambda self: self._parse_convert(False), 738 } 739 740 QUERY_MODIFIER_PARSERS = { 741 "joins": lambda self: list(iter(self._parse_join, None)), 742 "laterals": lambda self: list(iter(self._parse_lateral, None)), 743 "match": lambda self: self._parse_match_recognize(), 744 "where": lambda self: self._parse_where(), 745 "group": lambda self: self._parse_group(), 746 "having": lambda self: self._parse_having(), 747 "qualify": lambda self: self._parse_qualify(), 748 "windows": lambda self: self._parse_window_clause(), 749 "order": lambda self: self._parse_order(), 750 "limit": lambda self: self._parse_limit(), 751 "offset": lambda self: self._parse_offset(), 752 "locks": lambda self: self._parse_locks(), 753 "sample": lambda self: self._parse_table_sample(as_modifier=True), 754 } 755 756 SET_PARSERS = { 757 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 758 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 759 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 760 "TRANSACTION": lambda self: self._parse_set_transaction(), 761 } 762 763 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 764 765 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 766 767 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 768 769 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 770 771 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 772 773 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 774 TRANSACTION_CHARACTERISTICS = { 775 "ISOLATION LEVEL REPEATABLE READ", 776 "ISOLATION LEVEL READ COMMITTED", 777 "ISOLATION LEVEL READ UNCOMMITTED", 778 "ISOLATION LEVEL SERIALIZABLE", 779 "READ WRITE", 780 "READ ONLY", 781 } 782 783 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 784 785 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 786 787 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 788 789 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 790 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 791 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 792 793 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 794 795 STRICT_CAST = True 796 797 # A NULL arg in CONCAT yields NULL by default 798 CONCAT_NULL_OUTPUTS_STRING = False 799 800 PREFIXED_PIVOT_COLUMNS = False 801 IDENTIFY_PIVOT_STRINGS = False 802 803 LOG_BASE_FIRST = True 804 LOG_DEFAULTS_TO_LN = False 805 806 __slots__ = ( 807 "error_level", 808 "error_message_context", 809 "max_errors", 810 "sql", 811 "errors", 812 "_tokens", 813 "_index", 814 "_curr", 815 "_next", 816 "_prev", 817 "_prev_comments", 818 ) 819 820 # Autofilled 821 INDEX_OFFSET: int = 0 822 UNNEST_COLUMN_ONLY: bool = False 823 ALIAS_POST_TABLESAMPLE: bool = False 824 STRICT_STRING_CONCAT = False 825 NULL_ORDERING: str = "nulls_are_small" 826 SHOW_TRIE: t.Dict = {} 827 SET_TRIE: t.Dict = {} 828 FORMAT_MAPPING: t.Dict[str, str] = {} 829 FORMAT_TRIE: t.Dict = {} 830 TIME_MAPPING: t.Dict[str, str] = {} 831 TIME_TRIE: t.Dict = {} 832 833 def __init__( 834 self, 835 error_level: t.Optional[ErrorLevel] = None, 836 error_message_context: int = 100, 837 max_errors: int = 3, 838 ): 839 self.error_level = error_level or ErrorLevel.IMMEDIATE 840 self.error_message_context = error_message_context 841 self.max_errors = max_errors 842 self.reset() 843 844 def reset(self): 845 self.sql = "" 846 self.errors = [] 847 self._tokens = [] 848 self._index = 0 849 self._curr = None 850 self._next = None 851 self._prev = None 852 self._prev_comments = None 853 854 def parse( 855 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 856 ) -> t.List[t.Optional[exp.Expression]]: 857 """ 858 Parses a list of tokens and returns a list of syntax trees, one tree 859 per parsed SQL statement. 860 861 Args: 862 raw_tokens: The list of tokens. 863 sql: The original SQL string, used to produce helpful debug messages. 864 865 Returns: 866 The list of the produced syntax trees. 867 """ 868 return self._parse( 869 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 870 ) 871 872 def parse_into( 873 self, 874 expression_types: exp.IntoType, 875 raw_tokens: t.List[Token], 876 sql: t.Optional[str] = None, 877 ) -> t.List[t.Optional[exp.Expression]]: 878 """ 879 Parses a list of tokens into a given Expression type. If a collection of Expression 880 types is given instead, this method will try to parse the token list into each one 881 of them, stopping at the first for which the parsing succeeds. 882 883 Args: 884 expression_types: The expression type(s) to try and parse the token list into. 885 raw_tokens: The list of tokens. 886 sql: The original SQL string, used to produce helpful debug messages. 887 888 Returns: 889 The target Expression. 890 """ 891 errors = [] 892 for expression_type in ensure_list(expression_types): 893 parser = self.EXPRESSION_PARSERS.get(expression_type) 894 if not parser: 895 raise TypeError(f"No parser registered for {expression_type}") 896 897 try: 898 return self._parse(parser, raw_tokens, sql) 899 except ParseError as e: 900 e.errors[0]["into_expression"] = expression_type 901 errors.append(e) 902 903 raise ParseError( 904 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 905 errors=merge_errors(errors), 906 ) from errors[-1] 907 908 def _parse( 909 self, 910 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 911 raw_tokens: t.List[Token], 912 sql: t.Optional[str] = None, 913 ) -> t.List[t.Optional[exp.Expression]]: 914 self.reset() 915 self.sql = sql or "" 916 917 total = len(raw_tokens) 918 chunks: t.List[t.List[Token]] = [[]] 919 920 for i, token in enumerate(raw_tokens): 921 if token.token_type == TokenType.SEMICOLON: 922 if i < total - 1: 923 chunks.append([]) 924 else: 925 chunks[-1].append(token) 926 927 expressions = [] 928 929 for tokens in chunks: 930 self._index = -1 931 self._tokens = tokens 932 self._advance() 933 934 expressions.append(parse_method(self)) 935 936 if self._index < len(self._tokens): 937 self.raise_error("Invalid expression / Unexpected token") 938 939 self.check_errors() 940 941 return expressions 942 943 def check_errors(self) -> None: 944 """Logs or raises any found errors, depending on the chosen error level setting.""" 945 if self.error_level == ErrorLevel.WARN: 946 for error in self.errors: 947 logger.error(str(error)) 948 elif self.error_level == ErrorLevel.RAISE and self.errors: 949 raise ParseError( 950 concat_messages(self.errors, self.max_errors), 951 errors=merge_errors(self.errors), 952 ) 953 954 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 955 """ 956 Appends an error in the list of recorded errors or raises it, depending on the chosen 957 error level setting. 958 """ 959 token = token or self._curr or self._prev or Token.string("") 960 start = token.start 961 end = token.end + 1 962 start_context = self.sql[max(start - self.error_message_context, 0) : start] 963 highlight = self.sql[start:end] 964 end_context = self.sql[end : end + self.error_message_context] 965 966 error = ParseError.new( 967 f"{message}. Line {token.line}, Col: {token.col}.\n" 968 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 969 description=message, 970 line=token.line, 971 col=token.col, 972 start_context=start_context, 973 highlight=highlight, 974 end_context=end_context, 975 ) 976 977 if self.error_level == ErrorLevel.IMMEDIATE: 978 raise error 979 980 self.errors.append(error) 981 982 def expression( 983 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 984 ) -> E: 985 """ 986 Creates a new, validated Expression. 987 988 Args: 989 exp_class: The expression class to instantiate. 990 comments: An optional list of comments to attach to the expression. 991 kwargs: The arguments to set for the expression along with their respective values. 992 993 Returns: 994 The target expression. 995 """ 996 instance = exp_class(**kwargs) 997 instance.add_comments(comments) if comments else self._add_comments(instance) 998 return self.validate_expression(instance) 999 1000 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1001 if expression and self._prev_comments: 1002 expression.add_comments(self._prev_comments) 1003 self._prev_comments = None 1004 1005 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1006 """ 1007 Validates an Expression, making sure that all its mandatory arguments are set. 1008 1009 Args: 1010 expression: The expression to validate. 1011 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1012 1013 Returns: 1014 The validated expression. 1015 """ 1016 if self.error_level != ErrorLevel.IGNORE: 1017 for error_message in expression.error_messages(args): 1018 self.raise_error(error_message) 1019 1020 return expression 1021 1022 def _find_sql(self, start: Token, end: Token) -> str: 1023 return self.sql[start.start : end.end + 1] 1024 1025 def _advance(self, times: int = 1) -> None: 1026 self._index += times 1027 self._curr = seq_get(self._tokens, self._index) 1028 self._next = seq_get(self._tokens, self._index + 1) 1029 1030 if self._index > 0: 1031 self._prev = self._tokens[self._index - 1] 1032 self._prev_comments = self._prev.comments 1033 else: 1034 self._prev = None 1035 self._prev_comments = None 1036 1037 def _retreat(self, index: int) -> None: 1038 if index != self._index: 1039 self._advance(index - self._index) 1040 1041 def _parse_command(self) -> exp.Command: 1042 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1043 1044 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1045 start = self._prev 1046 exists = self._parse_exists() if allow_exists else None 1047 1048 self._match(TokenType.ON) 1049 1050 kind = self._match_set(self.CREATABLES) and self._prev 1051 if not kind: 1052 return self._parse_as_command(start) 1053 1054 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1055 this = self._parse_user_defined_function(kind=kind.token_type) 1056 elif kind.token_type == TokenType.TABLE: 1057 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1058 elif kind.token_type == TokenType.COLUMN: 1059 this = self._parse_column() 1060 else: 1061 this = self._parse_id_var() 1062 1063 self._match(TokenType.IS) 1064 1065 return self.expression( 1066 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1067 ) 1068 1069 def _parse_to_table( 1070 self, 1071 ) -> exp.ToTableProperty: 1072 table = self._parse_table_parts(schema=True) 1073 return self.expression(exp.ToTableProperty, this=table) 1074 1075 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1076 def _parse_ttl(self) -> exp.Expression: 1077 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1078 this = self._parse_bitwise() 1079 1080 if self._match_text_seq("DELETE"): 1081 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1082 if self._match_text_seq("RECOMPRESS"): 1083 return self.expression( 1084 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1085 ) 1086 if self._match_text_seq("TO", "DISK"): 1087 return self.expression( 1088 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1089 ) 1090 if self._match_text_seq("TO", "VOLUME"): 1091 return self.expression( 1092 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1093 ) 1094 1095 return this 1096 1097 expressions = self._parse_csv(_parse_ttl_action) 1098 where = self._parse_where() 1099 group = self._parse_group() 1100 1101 aggregates = None 1102 if group and self._match(TokenType.SET): 1103 aggregates = self._parse_csv(self._parse_set_item) 1104 1105 return self.expression( 1106 exp.MergeTreeTTL, 1107 expressions=expressions, 1108 where=where, 1109 group=group, 1110 aggregates=aggregates, 1111 ) 1112 1113 def _parse_statement(self) -> t.Optional[exp.Expression]: 1114 if self._curr is None: 1115 return None 1116 1117 if self._match_set(self.STATEMENT_PARSERS): 1118 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1119 1120 if self._match_set(Tokenizer.COMMANDS): 1121 return self._parse_command() 1122 1123 expression = self._parse_expression() 1124 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1125 return self._parse_query_modifiers(expression) 1126 1127 def _parse_drop(self) -> exp.Drop | exp.Command: 1128 start = self._prev 1129 temporary = self._match(TokenType.TEMPORARY) 1130 materialized = self._match_text_seq("MATERIALIZED") 1131 1132 kind = self._match_set(self.CREATABLES) and self._prev.text 1133 if not kind: 1134 return self._parse_as_command(start) 1135 1136 return self.expression( 1137 exp.Drop, 1138 exists=self._parse_exists(), 1139 this=self._parse_table(schema=True), 1140 kind=kind, 1141 temporary=temporary, 1142 materialized=materialized, 1143 cascade=self._match_text_seq("CASCADE"), 1144 constraints=self._match_text_seq("CONSTRAINTS"), 1145 purge=self._match_text_seq("PURGE"), 1146 ) 1147 1148 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1149 return ( 1150 self._match(TokenType.IF) 1151 and (not not_ or self._match(TokenType.NOT)) 1152 and self._match(TokenType.EXISTS) 1153 ) 1154 1155 def _parse_create(self) -> exp.Create | exp.Command: 1156 # Note: this can't be None because we've matched a statement parser 1157 start = self._prev 1158 replace = start.text.upper() == "REPLACE" or self._match_pair( 1159 TokenType.OR, TokenType.REPLACE 1160 ) 1161 unique = self._match(TokenType.UNIQUE) 1162 1163 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1164 self._advance() 1165 1166 properties = None 1167 create_token = self._match_set(self.CREATABLES) and self._prev 1168 1169 if not create_token: 1170 # exp.Properties.Location.POST_CREATE 1171 properties = self._parse_properties() 1172 create_token = self._match_set(self.CREATABLES) and self._prev 1173 1174 if not properties or not create_token: 1175 return self._parse_as_command(start) 1176 1177 exists = self._parse_exists(not_=True) 1178 this = None 1179 expression = None 1180 indexes = None 1181 no_schema_binding = None 1182 begin = None 1183 clone = None 1184 1185 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1186 nonlocal properties 1187 if properties and temp_props: 1188 properties.expressions.extend(temp_props.expressions) 1189 elif temp_props: 1190 properties = temp_props 1191 1192 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1193 this = self._parse_user_defined_function(kind=create_token.token_type) 1194 1195 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1196 extend_props(self._parse_properties()) 1197 1198 self._match(TokenType.ALIAS) 1199 begin = self._match(TokenType.BEGIN) 1200 return_ = self._match_text_seq("RETURN") 1201 expression = self._parse_statement() 1202 1203 if return_: 1204 expression = self.expression(exp.Return, this=expression) 1205 elif create_token.token_type == TokenType.INDEX: 1206 this = self._parse_index(index=self._parse_id_var()) 1207 elif create_token.token_type in self.DB_CREATABLES: 1208 table_parts = self._parse_table_parts(schema=True) 1209 1210 # exp.Properties.Location.POST_NAME 1211 self._match(TokenType.COMMA) 1212 extend_props(self._parse_properties(before=True)) 1213 1214 this = self._parse_schema(this=table_parts) 1215 1216 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1217 extend_props(self._parse_properties()) 1218 1219 self._match(TokenType.ALIAS) 1220 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1221 # exp.Properties.Location.POST_ALIAS 1222 extend_props(self._parse_properties()) 1223 1224 expression = self._parse_ddl_select() 1225 1226 if create_token.token_type == TokenType.TABLE: 1227 indexes = [] 1228 while True: 1229 index = self._parse_index() 1230 1231 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1232 extend_props(self._parse_properties()) 1233 1234 if not index: 1235 break 1236 else: 1237 self._match(TokenType.COMMA) 1238 indexes.append(index) 1239 elif create_token.token_type == TokenType.VIEW: 1240 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1241 no_schema_binding = True 1242 1243 if self._match_text_seq("CLONE"): 1244 clone = self._parse_table(schema=True) 1245 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1246 clone_kind = ( 1247 self._match(TokenType.L_PAREN) 1248 and self._match_texts(self.CLONE_KINDS) 1249 and self._prev.text.upper() 1250 ) 1251 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1252 self._match(TokenType.R_PAREN) 1253 clone = self.expression( 1254 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1255 ) 1256 1257 return self.expression( 1258 exp.Create, 1259 this=this, 1260 kind=create_token.text, 1261 replace=replace, 1262 unique=unique, 1263 expression=expression, 1264 exists=exists, 1265 properties=properties, 1266 indexes=indexes, 1267 no_schema_binding=no_schema_binding, 1268 begin=begin, 1269 clone=clone, 1270 ) 1271 1272 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1273 # only used for teradata currently 1274 self._match(TokenType.COMMA) 1275 1276 kwargs = { 1277 "no": self._match_text_seq("NO"), 1278 "dual": self._match_text_seq("DUAL"), 1279 "before": self._match_text_seq("BEFORE"), 1280 "default": self._match_text_seq("DEFAULT"), 1281 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1282 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1283 "after": self._match_text_seq("AFTER"), 1284 "minimum": self._match_texts(("MIN", "MINIMUM")), 1285 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1286 } 1287 1288 if self._match_texts(self.PROPERTY_PARSERS): 1289 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1290 try: 1291 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1292 except TypeError: 1293 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1294 1295 return None 1296 1297 def _parse_property(self) -> t.Optional[exp.Expression]: 1298 if self._match_texts(self.PROPERTY_PARSERS): 1299 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1300 1301 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1302 return self._parse_character_set(default=True) 1303 1304 if self._match_text_seq("COMPOUND", "SORTKEY"): 1305 return self._parse_sortkey(compound=True) 1306 1307 if self._match_text_seq("SQL", "SECURITY"): 1308 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1309 1310 assignment = self._match_pair( 1311 TokenType.VAR, TokenType.EQ, advance=False 1312 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1313 1314 if assignment: 1315 key = self._parse_var_or_string() 1316 self._match(TokenType.EQ) 1317 return self.expression(exp.Property, this=key, value=self._parse_column()) 1318 1319 return None 1320 1321 def _parse_stored(self) -> exp.FileFormatProperty: 1322 self._match(TokenType.ALIAS) 1323 1324 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1325 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1326 1327 return self.expression( 1328 exp.FileFormatProperty, 1329 this=self.expression( 1330 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1331 ) 1332 if input_format or output_format 1333 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1334 ) 1335 1336 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1337 self._match(TokenType.EQ) 1338 self._match(TokenType.ALIAS) 1339 return self.expression(exp_class, this=self._parse_field()) 1340 1341 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1342 properties = [] 1343 while True: 1344 if before: 1345 prop = self._parse_property_before() 1346 else: 1347 prop = self._parse_property() 1348 1349 if not prop: 1350 break 1351 for p in ensure_list(prop): 1352 properties.append(p) 1353 1354 if properties: 1355 return self.expression(exp.Properties, expressions=properties) 1356 1357 return None 1358 1359 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1360 return self.expression( 1361 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1362 ) 1363 1364 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1365 if self._index >= 2: 1366 pre_volatile_token = self._tokens[self._index - 2] 1367 else: 1368 pre_volatile_token = None 1369 1370 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1371 return exp.VolatileProperty() 1372 1373 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1374 1375 def _parse_with_property( 1376 self, 1377 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1378 self._match(TokenType.WITH) 1379 if self._match(TokenType.L_PAREN, advance=False): 1380 return self._parse_wrapped_csv(self._parse_property) 1381 1382 if self._match_text_seq("JOURNAL"): 1383 return self._parse_withjournaltable() 1384 1385 if self._match_text_seq("DATA"): 1386 return self._parse_withdata(no=False) 1387 elif self._match_text_seq("NO", "DATA"): 1388 return self._parse_withdata(no=True) 1389 1390 if not self._next: 1391 return None 1392 1393 return self._parse_withisolatedloading() 1394 1395 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1396 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1397 self._match(TokenType.EQ) 1398 1399 user = self._parse_id_var() 1400 self._match(TokenType.PARAMETER) 1401 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1402 1403 if not user or not host: 1404 return None 1405 1406 return exp.DefinerProperty(this=f"{user}@{host}") 1407 1408 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1409 self._match(TokenType.TABLE) 1410 self._match(TokenType.EQ) 1411 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1412 1413 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1414 return self.expression(exp.LogProperty, no=no) 1415 1416 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1417 return self.expression(exp.JournalProperty, **kwargs) 1418 1419 def _parse_checksum(self) -> exp.ChecksumProperty: 1420 self._match(TokenType.EQ) 1421 1422 on = None 1423 if self._match(TokenType.ON): 1424 on = True 1425 elif self._match_text_seq("OFF"): 1426 on = False 1427 1428 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1429 1430 def _parse_cluster(self) -> exp.Cluster: 1431 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1432 1433 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1434 self._match_text_seq("BY") 1435 1436 self._match_l_paren() 1437 expressions = self._parse_csv(self._parse_column) 1438 self._match_r_paren() 1439 1440 if self._match_text_seq("SORTED", "BY"): 1441 self._match_l_paren() 1442 sorted_by = self._parse_csv(self._parse_ordered) 1443 self._match_r_paren() 1444 else: 1445 sorted_by = None 1446 1447 self._match(TokenType.INTO) 1448 buckets = self._parse_number() 1449 self._match_text_seq("BUCKETS") 1450 1451 return self.expression( 1452 exp.ClusteredByProperty, 1453 expressions=expressions, 1454 sorted_by=sorted_by, 1455 buckets=buckets, 1456 ) 1457 1458 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1459 if not self._match_text_seq("GRANTS"): 1460 self._retreat(self._index - 1) 1461 return None 1462 1463 return self.expression(exp.CopyGrantsProperty) 1464 1465 def _parse_freespace(self) -> exp.FreespaceProperty: 1466 self._match(TokenType.EQ) 1467 return self.expression( 1468 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1469 ) 1470 1471 def _parse_mergeblockratio( 1472 self, no: bool = False, default: bool = False 1473 ) -> exp.MergeBlockRatioProperty: 1474 if self._match(TokenType.EQ): 1475 return self.expression( 1476 exp.MergeBlockRatioProperty, 1477 this=self._parse_number(), 1478 percent=self._match(TokenType.PERCENT), 1479 ) 1480 1481 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1482 1483 def _parse_datablocksize( 1484 self, 1485 default: t.Optional[bool] = None, 1486 minimum: t.Optional[bool] = None, 1487 maximum: t.Optional[bool] = None, 1488 ) -> exp.DataBlocksizeProperty: 1489 self._match(TokenType.EQ) 1490 size = self._parse_number() 1491 1492 units = None 1493 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1494 units = self._prev.text 1495 1496 return self.expression( 1497 exp.DataBlocksizeProperty, 1498 size=size, 1499 units=units, 1500 default=default, 1501 minimum=minimum, 1502 maximum=maximum, 1503 ) 1504 1505 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1506 self._match(TokenType.EQ) 1507 always = self._match_text_seq("ALWAYS") 1508 manual = self._match_text_seq("MANUAL") 1509 never = self._match_text_seq("NEVER") 1510 default = self._match_text_seq("DEFAULT") 1511 1512 autotemp = None 1513 if self._match_text_seq("AUTOTEMP"): 1514 autotemp = self._parse_schema() 1515 1516 return self.expression( 1517 exp.BlockCompressionProperty, 1518 always=always, 1519 manual=manual, 1520 never=never, 1521 default=default, 1522 autotemp=autotemp, 1523 ) 1524 1525 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1526 no = self._match_text_seq("NO") 1527 concurrent = self._match_text_seq("CONCURRENT") 1528 self._match_text_seq("ISOLATED", "LOADING") 1529 for_all = self._match_text_seq("FOR", "ALL") 1530 for_insert = self._match_text_seq("FOR", "INSERT") 1531 for_none = self._match_text_seq("FOR", "NONE") 1532 return self.expression( 1533 exp.IsolatedLoadingProperty, 1534 no=no, 1535 concurrent=concurrent, 1536 for_all=for_all, 1537 for_insert=for_insert, 1538 for_none=for_none, 1539 ) 1540 1541 def _parse_locking(self) -> exp.LockingProperty: 1542 if self._match(TokenType.TABLE): 1543 kind = "TABLE" 1544 elif self._match(TokenType.VIEW): 1545 kind = "VIEW" 1546 elif self._match(TokenType.ROW): 1547 kind = "ROW" 1548 elif self._match_text_seq("DATABASE"): 1549 kind = "DATABASE" 1550 else: 1551 kind = None 1552 1553 if kind in ("DATABASE", "TABLE", "VIEW"): 1554 this = self._parse_table_parts() 1555 else: 1556 this = None 1557 1558 if self._match(TokenType.FOR): 1559 for_or_in = "FOR" 1560 elif self._match(TokenType.IN): 1561 for_or_in = "IN" 1562 else: 1563 for_or_in = None 1564 1565 if self._match_text_seq("ACCESS"): 1566 lock_type = "ACCESS" 1567 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1568 lock_type = "EXCLUSIVE" 1569 elif self._match_text_seq("SHARE"): 1570 lock_type = "SHARE" 1571 elif self._match_text_seq("READ"): 1572 lock_type = "READ" 1573 elif self._match_text_seq("WRITE"): 1574 lock_type = "WRITE" 1575 elif self._match_text_seq("CHECKSUM"): 1576 lock_type = "CHECKSUM" 1577 else: 1578 lock_type = None 1579 1580 override = self._match_text_seq("OVERRIDE") 1581 1582 return self.expression( 1583 exp.LockingProperty, 1584 this=this, 1585 kind=kind, 1586 for_or_in=for_or_in, 1587 lock_type=lock_type, 1588 override=override, 1589 ) 1590 1591 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1592 if self._match(TokenType.PARTITION_BY): 1593 return self._parse_csv(self._parse_conjunction) 1594 return [] 1595 1596 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1597 self._match(TokenType.EQ) 1598 return self.expression( 1599 exp.PartitionedByProperty, 1600 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1601 ) 1602 1603 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1604 if self._match_text_seq("AND", "STATISTICS"): 1605 statistics = True 1606 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1607 statistics = False 1608 else: 1609 statistics = None 1610 1611 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1612 1613 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1614 if self._match_text_seq("PRIMARY", "INDEX"): 1615 return exp.NoPrimaryIndexProperty() 1616 return None 1617 1618 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1619 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1620 return exp.OnCommitProperty() 1621 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1622 return exp.OnCommitProperty(delete=True) 1623 return None 1624 1625 def _parse_distkey(self) -> exp.DistKeyProperty: 1626 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1627 1628 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1629 table = self._parse_table(schema=True) 1630 1631 options = [] 1632 while self._match_texts(("INCLUDING", "EXCLUDING")): 1633 this = self._prev.text.upper() 1634 1635 id_var = self._parse_id_var() 1636 if not id_var: 1637 return None 1638 1639 options.append( 1640 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1641 ) 1642 1643 return self.expression(exp.LikeProperty, this=table, expressions=options) 1644 1645 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1646 return self.expression( 1647 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1648 ) 1649 1650 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1651 self._match(TokenType.EQ) 1652 return self.expression( 1653 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1654 ) 1655 1656 def _parse_returns(self) -> exp.ReturnsProperty: 1657 value: t.Optional[exp.Expression] 1658 is_table = self._match(TokenType.TABLE) 1659 1660 if is_table: 1661 if self._match(TokenType.LT): 1662 value = self.expression( 1663 exp.Schema, 1664 this="TABLE", 1665 expressions=self._parse_csv(self._parse_struct_types), 1666 ) 1667 if not self._match(TokenType.GT): 1668 self.raise_error("Expecting >") 1669 else: 1670 value = self._parse_schema(exp.var("TABLE")) 1671 else: 1672 value = self._parse_types() 1673 1674 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1675 1676 def _parse_describe(self) -> exp.Describe: 1677 kind = self._match_set(self.CREATABLES) and self._prev.text 1678 this = self._parse_table() 1679 return self.expression(exp.Describe, this=this, kind=kind) 1680 1681 def _parse_insert(self) -> exp.Insert: 1682 overwrite = self._match(TokenType.OVERWRITE) 1683 local = self._match_text_seq("LOCAL") 1684 alternative = None 1685 1686 if self._match_text_seq("DIRECTORY"): 1687 this: t.Optional[exp.Expression] = self.expression( 1688 exp.Directory, 1689 this=self._parse_var_or_string(), 1690 local=local, 1691 row_format=self._parse_row_format(match_row=True), 1692 ) 1693 else: 1694 if self._match(TokenType.OR): 1695 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1696 1697 self._match(TokenType.INTO) 1698 self._match(TokenType.TABLE) 1699 this = self._parse_table(schema=True) 1700 1701 return self.expression( 1702 exp.Insert, 1703 this=this, 1704 exists=self._parse_exists(), 1705 partition=self._parse_partition(), 1706 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1707 and self._parse_conjunction(), 1708 expression=self._parse_ddl_select(), 1709 conflict=self._parse_on_conflict(), 1710 returning=self._parse_returning(), 1711 overwrite=overwrite, 1712 alternative=alternative, 1713 ) 1714 1715 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1716 conflict = self._match_text_seq("ON", "CONFLICT") 1717 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1718 1719 if not conflict and not duplicate: 1720 return None 1721 1722 nothing = None 1723 expressions = None 1724 key = None 1725 constraint = None 1726 1727 if conflict: 1728 if self._match_text_seq("ON", "CONSTRAINT"): 1729 constraint = self._parse_id_var() 1730 else: 1731 key = self._parse_csv(self._parse_value) 1732 1733 self._match_text_seq("DO") 1734 if self._match_text_seq("NOTHING"): 1735 nothing = True 1736 else: 1737 self._match(TokenType.UPDATE) 1738 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1739 1740 return self.expression( 1741 exp.OnConflict, 1742 duplicate=duplicate, 1743 expressions=expressions, 1744 nothing=nothing, 1745 key=key, 1746 constraint=constraint, 1747 ) 1748 1749 def _parse_returning(self) -> t.Optional[exp.Returning]: 1750 if not self._match(TokenType.RETURNING): 1751 return None 1752 1753 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1754 1755 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1756 if not self._match(TokenType.FORMAT): 1757 return None 1758 return self._parse_row_format() 1759 1760 def _parse_row_format( 1761 self, match_row: bool = False 1762 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1763 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1764 return None 1765 1766 if self._match_text_seq("SERDE"): 1767 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1768 1769 self._match_text_seq("DELIMITED") 1770 1771 kwargs = {} 1772 1773 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1774 kwargs["fields"] = self._parse_string() 1775 if self._match_text_seq("ESCAPED", "BY"): 1776 kwargs["escaped"] = self._parse_string() 1777 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1778 kwargs["collection_items"] = self._parse_string() 1779 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1780 kwargs["map_keys"] = self._parse_string() 1781 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1782 kwargs["lines"] = self._parse_string() 1783 if self._match_text_seq("NULL", "DEFINED", "AS"): 1784 kwargs["null"] = self._parse_string() 1785 1786 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1787 1788 def _parse_load(self) -> exp.LoadData | exp.Command: 1789 if self._match_text_seq("DATA"): 1790 local = self._match_text_seq("LOCAL") 1791 self._match_text_seq("INPATH") 1792 inpath = self._parse_string() 1793 overwrite = self._match(TokenType.OVERWRITE) 1794 self._match_pair(TokenType.INTO, TokenType.TABLE) 1795 1796 return self.expression( 1797 exp.LoadData, 1798 this=self._parse_table(schema=True), 1799 local=local, 1800 overwrite=overwrite, 1801 inpath=inpath, 1802 partition=self._parse_partition(), 1803 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1804 serde=self._match_text_seq("SERDE") and self._parse_string(), 1805 ) 1806 return self._parse_as_command(self._prev) 1807 1808 def _parse_delete(self) -> exp.Delete: 1809 self._match(TokenType.FROM) 1810 1811 return self.expression( 1812 exp.Delete, 1813 this=self._parse_table(), 1814 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1815 where=self._parse_where(), 1816 returning=self._parse_returning(), 1817 limit=self._parse_limit(), 1818 ) 1819 1820 def _parse_update(self) -> exp.Update: 1821 return self.expression( 1822 exp.Update, 1823 **{ # type: ignore 1824 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1825 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1826 "from": self._parse_from(modifiers=True), 1827 "where": self._parse_where(), 1828 "returning": self._parse_returning(), 1829 "limit": self._parse_limit(), 1830 }, 1831 ) 1832 1833 def _parse_uncache(self) -> exp.Uncache: 1834 if not self._match(TokenType.TABLE): 1835 self.raise_error("Expecting TABLE after UNCACHE") 1836 1837 return self.expression( 1838 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1839 ) 1840 1841 def _parse_cache(self) -> exp.Cache: 1842 lazy = self._match_text_seq("LAZY") 1843 self._match(TokenType.TABLE) 1844 table = self._parse_table(schema=True) 1845 1846 options = [] 1847 if self._match_text_seq("OPTIONS"): 1848 self._match_l_paren() 1849 k = self._parse_string() 1850 self._match(TokenType.EQ) 1851 v = self._parse_string() 1852 options = [k, v] 1853 self._match_r_paren() 1854 1855 self._match(TokenType.ALIAS) 1856 return self.expression( 1857 exp.Cache, 1858 this=table, 1859 lazy=lazy, 1860 options=options, 1861 expression=self._parse_select(nested=True), 1862 ) 1863 1864 def _parse_partition(self) -> t.Optional[exp.Partition]: 1865 if not self._match(TokenType.PARTITION): 1866 return None 1867 1868 return self.expression( 1869 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1870 ) 1871 1872 def _parse_value(self) -> exp.Tuple: 1873 if self._match(TokenType.L_PAREN): 1874 expressions = self._parse_csv(self._parse_conjunction) 1875 self._match_r_paren() 1876 return self.expression(exp.Tuple, expressions=expressions) 1877 1878 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1879 # Source: https://prestodb.io/docs/current/sql/values.html 1880 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1881 1882 def _parse_select( 1883 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1884 ) -> t.Optional[exp.Expression]: 1885 cte = self._parse_with() 1886 if cte: 1887 this = self._parse_statement() 1888 1889 if not this: 1890 self.raise_error("Failed to parse any statement following CTE") 1891 return cte 1892 1893 if "with" in this.arg_types: 1894 this.set("with", cte) 1895 else: 1896 self.raise_error(f"{this.key} does not support CTE") 1897 this = cte 1898 elif self._match(TokenType.SELECT): 1899 comments = self._prev_comments 1900 1901 hint = self._parse_hint() 1902 all_ = self._match(TokenType.ALL) 1903 distinct = self._match(TokenType.DISTINCT) 1904 1905 kind = ( 1906 self._match(TokenType.ALIAS) 1907 and self._match_texts(("STRUCT", "VALUE")) 1908 and self._prev.text 1909 ) 1910 1911 if distinct: 1912 distinct = self.expression( 1913 exp.Distinct, 1914 on=self._parse_value() if self._match(TokenType.ON) else None, 1915 ) 1916 1917 if all_ and distinct: 1918 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1919 1920 limit = self._parse_limit(top=True) 1921 expressions = self._parse_csv(self._parse_expression) 1922 1923 this = self.expression( 1924 exp.Select, 1925 kind=kind, 1926 hint=hint, 1927 distinct=distinct, 1928 expressions=expressions, 1929 limit=limit, 1930 ) 1931 this.comments = comments 1932 1933 into = self._parse_into() 1934 if into: 1935 this.set("into", into) 1936 1937 from_ = self._parse_from() 1938 if from_: 1939 this.set("from", from_) 1940 1941 this = self._parse_query_modifiers(this) 1942 elif (table or nested) and self._match(TokenType.L_PAREN): 1943 if self._match(TokenType.PIVOT): 1944 this = self._parse_simplified_pivot() 1945 elif self._match(TokenType.FROM): 1946 this = exp.select("*").from_( 1947 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1948 ) 1949 else: 1950 this = self._parse_table() if table else self._parse_select(nested=True) 1951 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1952 1953 self._match_r_paren() 1954 1955 # early return so that subquery unions aren't parsed again 1956 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1957 # Union ALL should be a property of the top select node, not the subquery 1958 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1959 elif self._match(TokenType.VALUES): 1960 this = self.expression( 1961 exp.Values, 1962 expressions=self._parse_csv(self._parse_value), 1963 alias=self._parse_table_alias(), 1964 ) 1965 else: 1966 this = None 1967 1968 return self._parse_set_operations(this) 1969 1970 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1971 if not skip_with_token and not self._match(TokenType.WITH): 1972 return None 1973 1974 comments = self._prev_comments 1975 recursive = self._match(TokenType.RECURSIVE) 1976 1977 expressions = [] 1978 while True: 1979 expressions.append(self._parse_cte()) 1980 1981 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1982 break 1983 else: 1984 self._match(TokenType.WITH) 1985 1986 return self.expression( 1987 exp.With, comments=comments, expressions=expressions, recursive=recursive 1988 ) 1989 1990 def _parse_cte(self) -> exp.CTE: 1991 alias = self._parse_table_alias() 1992 if not alias or not alias.this: 1993 self.raise_error("Expected CTE to have alias") 1994 1995 self._match(TokenType.ALIAS) 1996 return self.expression( 1997 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1998 ) 1999 2000 def _parse_table_alias( 2001 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2002 ) -> t.Optional[exp.TableAlias]: 2003 any_token = self._match(TokenType.ALIAS) 2004 alias = ( 2005 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2006 or self._parse_string_as_identifier() 2007 ) 2008 2009 index = self._index 2010 if self._match(TokenType.L_PAREN): 2011 columns = self._parse_csv(self._parse_function_parameter) 2012 self._match_r_paren() if columns else self._retreat(index) 2013 else: 2014 columns = None 2015 2016 if not alias and not columns: 2017 return None 2018 2019 return self.expression(exp.TableAlias, this=alias, columns=columns) 2020 2021 def _parse_subquery( 2022 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2023 ) -> t.Optional[exp.Subquery]: 2024 if not this: 2025 return None 2026 2027 return self.expression( 2028 exp.Subquery, 2029 this=this, 2030 pivots=self._parse_pivots(), 2031 alias=self._parse_table_alias() if parse_alias else None, 2032 ) 2033 2034 def _parse_query_modifiers( 2035 self, this: t.Optional[exp.Expression] 2036 ) -> t.Optional[exp.Expression]: 2037 if isinstance(this, self.MODIFIABLES): 2038 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2039 expression = parser(self) 2040 2041 if expression: 2042 if key == "limit": 2043 offset = expression.args.pop("offset", None) 2044 if offset: 2045 this.set("offset", exp.Offset(expression=offset)) 2046 this.set(key, expression) 2047 return this 2048 2049 def _parse_hint(self) -> t.Optional[exp.Hint]: 2050 if self._match(TokenType.HINT): 2051 hints = self._parse_csv(self._parse_function) 2052 2053 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2054 self.raise_error("Expected */ after HINT") 2055 2056 return self.expression(exp.Hint, expressions=hints) 2057 2058 return None 2059 2060 def _parse_into(self) -> t.Optional[exp.Into]: 2061 if not self._match(TokenType.INTO): 2062 return None 2063 2064 temp = self._match(TokenType.TEMPORARY) 2065 unlogged = self._match_text_seq("UNLOGGED") 2066 self._match(TokenType.TABLE) 2067 2068 return self.expression( 2069 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2070 ) 2071 2072 def _parse_from( 2073 self, modifiers: bool = False, skip_from_token: bool = False 2074 ) -> t.Optional[exp.From]: 2075 if not skip_from_token and not self._match(TokenType.FROM): 2076 return None 2077 2078 comments = self._prev_comments 2079 this = self._parse_table() 2080 2081 return self.expression( 2082 exp.From, 2083 comments=comments, 2084 this=self._parse_query_modifiers(this) if modifiers else this, 2085 ) 2086 2087 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2088 if not self._match(TokenType.MATCH_RECOGNIZE): 2089 return None 2090 2091 self._match_l_paren() 2092 2093 partition = self._parse_partition_by() 2094 order = self._parse_order() 2095 measures = ( 2096 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2097 ) 2098 2099 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2100 rows = exp.var("ONE ROW PER MATCH") 2101 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2102 text = "ALL ROWS PER MATCH" 2103 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2104 text += f" SHOW EMPTY MATCHES" 2105 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2106 text += f" OMIT EMPTY MATCHES" 2107 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2108 text += f" WITH UNMATCHED ROWS" 2109 rows = exp.var(text) 2110 else: 2111 rows = None 2112 2113 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2114 text = "AFTER MATCH SKIP" 2115 if self._match_text_seq("PAST", "LAST", "ROW"): 2116 text += f" PAST LAST ROW" 2117 elif self._match_text_seq("TO", "NEXT", "ROW"): 2118 text += f" TO NEXT ROW" 2119 elif self._match_text_seq("TO", "FIRST"): 2120 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2121 elif self._match_text_seq("TO", "LAST"): 2122 text += f" TO LAST {self._advance_any().text}" # type: ignore 2123 after = exp.var(text) 2124 else: 2125 after = None 2126 2127 if self._match_text_seq("PATTERN"): 2128 self._match_l_paren() 2129 2130 if not self._curr: 2131 self.raise_error("Expecting )", self._curr) 2132 2133 paren = 1 2134 start = self._curr 2135 2136 while self._curr and paren > 0: 2137 if self._curr.token_type == TokenType.L_PAREN: 2138 paren += 1 2139 if self._curr.token_type == TokenType.R_PAREN: 2140 paren -= 1 2141 2142 end = self._prev 2143 self._advance() 2144 2145 if paren > 0: 2146 self.raise_error("Expecting )", self._curr) 2147 2148 pattern = exp.var(self._find_sql(start, end)) 2149 else: 2150 pattern = None 2151 2152 define = ( 2153 self._parse_csv( 2154 lambda: self.expression( 2155 exp.Alias, 2156 alias=self._parse_id_var(any_token=True), 2157 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2158 ) 2159 ) 2160 if self._match_text_seq("DEFINE") 2161 else None 2162 ) 2163 2164 self._match_r_paren() 2165 2166 return self.expression( 2167 exp.MatchRecognize, 2168 partition_by=partition, 2169 order=order, 2170 measures=measures, 2171 rows=rows, 2172 after=after, 2173 pattern=pattern, 2174 define=define, 2175 alias=self._parse_table_alias(), 2176 ) 2177 2178 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2179 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2180 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2181 2182 if outer_apply or cross_apply: 2183 this = self._parse_select(table=True) 2184 view = None 2185 outer = not cross_apply 2186 elif self._match(TokenType.LATERAL): 2187 this = self._parse_select(table=True) 2188 view = self._match(TokenType.VIEW) 2189 outer = self._match(TokenType.OUTER) 2190 else: 2191 return None 2192 2193 if not this: 2194 this = self._parse_function() or self._parse_id_var(any_token=False) 2195 while self._match(TokenType.DOT): 2196 this = exp.Dot( 2197 this=this, 2198 expression=self._parse_function() or self._parse_id_var(any_token=False), 2199 ) 2200 2201 if view: 2202 table = self._parse_id_var(any_token=False) 2203 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2204 table_alias: t.Optional[exp.TableAlias] = self.expression( 2205 exp.TableAlias, this=table, columns=columns 2206 ) 2207 elif isinstance(this, exp.Subquery) and this.alias: 2208 # Ensures parity between the Subquery's and the Lateral's "alias" args 2209 table_alias = this.args["alias"].copy() 2210 else: 2211 table_alias = self._parse_table_alias() 2212 2213 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2214 2215 def _parse_join_parts( 2216 self, 2217 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2218 return ( 2219 self._match_set(self.JOIN_METHODS) and self._prev, 2220 self._match_set(self.JOIN_SIDES) and self._prev, 2221 self._match_set(self.JOIN_KINDS) and self._prev, 2222 ) 2223 2224 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2225 if self._match(TokenType.COMMA): 2226 return self.expression(exp.Join, this=self._parse_table()) 2227 2228 index = self._index 2229 method, side, kind = self._parse_join_parts() 2230 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2231 join = self._match(TokenType.JOIN) 2232 2233 if not skip_join_token and not join: 2234 self._retreat(index) 2235 kind = None 2236 method = None 2237 side = None 2238 2239 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2240 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2241 2242 if not skip_join_token and not join and not outer_apply and not cross_apply: 2243 return None 2244 2245 if outer_apply: 2246 side = Token(TokenType.LEFT, "LEFT") 2247 2248 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2249 2250 if method: 2251 kwargs["method"] = method.text 2252 if side: 2253 kwargs["side"] = side.text 2254 if kind: 2255 kwargs["kind"] = kind.text 2256 if hint: 2257 kwargs["hint"] = hint 2258 2259 if self._match(TokenType.ON): 2260 kwargs["on"] = self._parse_conjunction() 2261 elif self._match(TokenType.USING): 2262 kwargs["using"] = self._parse_wrapped_id_vars() 2263 2264 return self.expression(exp.Join, **kwargs) 2265 2266 def _parse_index( 2267 self, 2268 index: t.Optional[exp.Expression] = None, 2269 ) -> t.Optional[exp.Index]: 2270 if index: 2271 unique = None 2272 primary = None 2273 amp = None 2274 2275 self._match(TokenType.ON) 2276 self._match(TokenType.TABLE) # hive 2277 table = self._parse_table_parts(schema=True) 2278 else: 2279 unique = self._match(TokenType.UNIQUE) 2280 primary = self._match_text_seq("PRIMARY") 2281 amp = self._match_text_seq("AMP") 2282 2283 if not self._match(TokenType.INDEX): 2284 return None 2285 2286 index = self._parse_id_var() 2287 table = None 2288 2289 using = self._parse_field() if self._match(TokenType.USING) else None 2290 2291 if self._match(TokenType.L_PAREN, advance=False): 2292 columns = self._parse_wrapped_csv(self._parse_ordered) 2293 else: 2294 columns = None 2295 2296 return self.expression( 2297 exp.Index, 2298 this=index, 2299 table=table, 2300 using=using, 2301 columns=columns, 2302 unique=unique, 2303 primary=primary, 2304 amp=amp, 2305 partition_by=self._parse_partition_by(), 2306 ) 2307 2308 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2309 hints: t.List[exp.Expression] = [] 2310 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2311 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2312 hints.append( 2313 self.expression( 2314 exp.WithTableHint, 2315 expressions=self._parse_csv( 2316 lambda: self._parse_function() or self._parse_var(any_token=True) 2317 ), 2318 ) 2319 ) 2320 self._match_r_paren() 2321 else: 2322 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2323 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2324 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2325 2326 self._match_texts({"INDEX", "KEY"}) 2327 if self._match(TokenType.FOR): 2328 hint.set("target", self._advance_any() and self._prev.text.upper()) 2329 2330 hint.set("expressions", self._parse_wrapped_id_vars()) 2331 hints.append(hint) 2332 2333 return hints or None 2334 2335 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2336 return ( 2337 (not schema and self._parse_function(optional_parens=False)) 2338 or self._parse_id_var(any_token=False) 2339 or self._parse_string_as_identifier() 2340 or self._parse_placeholder() 2341 ) 2342 2343 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2344 catalog = None 2345 db = None 2346 table = self._parse_table_part(schema=schema) 2347 2348 while self._match(TokenType.DOT): 2349 if catalog: 2350 # This allows nesting the table in arbitrarily many dot expressions if needed 2351 table = self.expression( 2352 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2353 ) 2354 else: 2355 catalog = db 2356 db = table 2357 table = self._parse_table_part(schema=schema) 2358 2359 if not table: 2360 self.raise_error(f"Expected table name but got {self._curr}") 2361 2362 return self.expression( 2363 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2364 ) 2365 2366 def _parse_table( 2367 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2368 ) -> t.Optional[exp.Expression]: 2369 lateral = self._parse_lateral() 2370 if lateral: 2371 return lateral 2372 2373 unnest = self._parse_unnest() 2374 if unnest: 2375 return unnest 2376 2377 values = self._parse_derived_table_values() 2378 if values: 2379 return values 2380 2381 subquery = self._parse_select(table=True) 2382 if subquery: 2383 if not subquery.args.get("pivots"): 2384 subquery.set("pivots", self._parse_pivots()) 2385 return subquery 2386 2387 this: exp.Expression = self._parse_table_parts(schema=schema) 2388 2389 if schema: 2390 return self._parse_schema(this=this) 2391 2392 if self.ALIAS_POST_TABLESAMPLE: 2393 table_sample = self._parse_table_sample() 2394 2395 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2396 if alias: 2397 this.set("alias", alias) 2398 2399 if not this.args.get("pivots"): 2400 this.set("pivots", self._parse_pivots()) 2401 2402 this.set("hints", self._parse_table_hints()) 2403 2404 if not self.ALIAS_POST_TABLESAMPLE: 2405 table_sample = self._parse_table_sample() 2406 2407 if table_sample: 2408 table_sample.set("this", this) 2409 this = table_sample 2410 2411 return this 2412 2413 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2414 if not self._match(TokenType.UNNEST): 2415 return None 2416 2417 expressions = self._parse_wrapped_csv(self._parse_type) 2418 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2419 2420 alias = self._parse_table_alias() if with_alias else None 2421 2422 if alias and self.UNNEST_COLUMN_ONLY: 2423 if alias.args.get("columns"): 2424 self.raise_error("Unexpected extra column alias in unnest.") 2425 2426 alias.set("columns", [alias.this]) 2427 alias.set("this", None) 2428 2429 offset = None 2430 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2431 self._match(TokenType.ALIAS) 2432 offset = self._parse_id_var() or exp.to_identifier("offset") 2433 2434 return self.expression( 2435 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2436 ) 2437 2438 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2439 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2440 if not is_derived and not self._match(TokenType.VALUES): 2441 return None 2442 2443 expressions = self._parse_csv(self._parse_value) 2444 alias = self._parse_table_alias() 2445 2446 if is_derived: 2447 self._match_r_paren() 2448 2449 return self.expression( 2450 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2451 ) 2452 2453 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2454 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2455 as_modifier and self._match_text_seq("USING", "SAMPLE") 2456 ): 2457 return None 2458 2459 bucket_numerator = None 2460 bucket_denominator = None 2461 bucket_field = None 2462 percent = None 2463 rows = None 2464 size = None 2465 seed = None 2466 2467 kind = ( 2468 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2469 ) 2470 method = self._parse_var(tokens=(TokenType.ROW,)) 2471 2472 self._match(TokenType.L_PAREN) 2473 2474 num = self._parse_number() 2475 2476 if self._match_text_seq("BUCKET"): 2477 bucket_numerator = self._parse_number() 2478 self._match_text_seq("OUT", "OF") 2479 bucket_denominator = bucket_denominator = self._parse_number() 2480 self._match(TokenType.ON) 2481 bucket_field = self._parse_field() 2482 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2483 percent = num 2484 elif self._match(TokenType.ROWS): 2485 rows = num 2486 else: 2487 size = num 2488 2489 self._match(TokenType.R_PAREN) 2490 2491 if self._match(TokenType.L_PAREN): 2492 method = self._parse_var() 2493 seed = self._match(TokenType.COMMA) and self._parse_number() 2494 self._match_r_paren() 2495 elif self._match_texts(("SEED", "REPEATABLE")): 2496 seed = self._parse_wrapped(self._parse_number) 2497 2498 return self.expression( 2499 exp.TableSample, 2500 method=method, 2501 bucket_numerator=bucket_numerator, 2502 bucket_denominator=bucket_denominator, 2503 bucket_field=bucket_field, 2504 percent=percent, 2505 rows=rows, 2506 size=size, 2507 seed=seed, 2508 kind=kind, 2509 ) 2510 2511 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2512 return list(iter(self._parse_pivot, None)) 2513 2514 # https://duckdb.org/docs/sql/statements/pivot 2515 def _parse_simplified_pivot(self) -> exp.Pivot: 2516 def _parse_on() -> t.Optional[exp.Expression]: 2517 this = self._parse_bitwise() 2518 return self._parse_in(this) if self._match(TokenType.IN) else this 2519 2520 this = self._parse_table() 2521 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2522 using = self._match(TokenType.USING) and self._parse_csv( 2523 lambda: self._parse_alias(self._parse_function()) 2524 ) 2525 group = self._parse_group() 2526 return self.expression( 2527 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2528 ) 2529 2530 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2531 index = self._index 2532 2533 if self._match(TokenType.PIVOT): 2534 unpivot = False 2535 elif self._match(TokenType.UNPIVOT): 2536 unpivot = True 2537 else: 2538 return None 2539 2540 expressions = [] 2541 field = None 2542 2543 if not self._match(TokenType.L_PAREN): 2544 self._retreat(index) 2545 return None 2546 2547 if unpivot: 2548 expressions = self._parse_csv(self._parse_column) 2549 else: 2550 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2551 2552 if not expressions: 2553 self.raise_error("Failed to parse PIVOT's aggregation list") 2554 2555 if not self._match(TokenType.FOR): 2556 self.raise_error("Expecting FOR") 2557 2558 value = self._parse_column() 2559 2560 if not self._match(TokenType.IN): 2561 self.raise_error("Expecting IN") 2562 2563 field = self._parse_in(value, alias=True) 2564 2565 self._match_r_paren() 2566 2567 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2568 2569 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2570 pivot.set("alias", self._parse_table_alias()) 2571 2572 if not unpivot: 2573 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2574 2575 columns: t.List[exp.Expression] = [] 2576 for fld in pivot.args["field"].expressions: 2577 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2578 for name in names: 2579 if self.PREFIXED_PIVOT_COLUMNS: 2580 name = f"{name}_{field_name}" if name else field_name 2581 else: 2582 name = f"{field_name}_{name}" if name else field_name 2583 2584 columns.append(exp.to_identifier(name)) 2585 2586 pivot.set("columns", columns) 2587 2588 return pivot 2589 2590 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2591 return [agg.alias for agg in aggregations] 2592 2593 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2594 if not skip_where_token and not self._match(TokenType.WHERE): 2595 return None 2596 2597 return self.expression( 2598 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2599 ) 2600 2601 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2602 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2603 return None 2604 2605 elements = defaultdict(list) 2606 2607 if self._match(TokenType.ALL): 2608 return self.expression(exp.Group, all=True) 2609 2610 while True: 2611 expressions = self._parse_csv(self._parse_conjunction) 2612 if expressions: 2613 elements["expressions"].extend(expressions) 2614 2615 grouping_sets = self._parse_grouping_sets() 2616 if grouping_sets: 2617 elements["grouping_sets"].extend(grouping_sets) 2618 2619 rollup = None 2620 cube = None 2621 totals = None 2622 2623 with_ = self._match(TokenType.WITH) 2624 if self._match(TokenType.ROLLUP): 2625 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2626 elements["rollup"].extend(ensure_list(rollup)) 2627 2628 if self._match(TokenType.CUBE): 2629 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2630 elements["cube"].extend(ensure_list(cube)) 2631 2632 if self._match_text_seq("TOTALS"): 2633 totals = True 2634 elements["totals"] = True # type: ignore 2635 2636 if not (grouping_sets or rollup or cube or totals): 2637 break 2638 2639 return self.expression(exp.Group, **elements) # type: ignore 2640 2641 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2642 if not self._match(TokenType.GROUPING_SETS): 2643 return None 2644 2645 return self._parse_wrapped_csv(self._parse_grouping_set) 2646 2647 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2648 if self._match(TokenType.L_PAREN): 2649 grouping_set = self._parse_csv(self._parse_column) 2650 self._match_r_paren() 2651 return self.expression(exp.Tuple, expressions=grouping_set) 2652 2653 return self._parse_column() 2654 2655 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2656 if not skip_having_token and not self._match(TokenType.HAVING): 2657 return None 2658 return self.expression(exp.Having, this=self._parse_conjunction()) 2659 2660 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2661 if not self._match(TokenType.QUALIFY): 2662 return None 2663 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2664 2665 def _parse_order( 2666 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2667 ) -> t.Optional[exp.Expression]: 2668 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2669 return this 2670 2671 return self.expression( 2672 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2673 ) 2674 2675 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2676 if not self._match(token): 2677 return None 2678 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2679 2680 def _parse_ordered(self) -> exp.Ordered: 2681 this = self._parse_conjunction() 2682 self._match(TokenType.ASC) 2683 2684 is_desc = self._match(TokenType.DESC) 2685 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2686 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2687 desc = is_desc or False 2688 asc = not desc 2689 nulls_first = is_nulls_first or False 2690 explicitly_null_ordered = is_nulls_first or is_nulls_last 2691 2692 if ( 2693 not explicitly_null_ordered 2694 and ( 2695 (asc and self.NULL_ORDERING == "nulls_are_small") 2696 or (desc and self.NULL_ORDERING != "nulls_are_small") 2697 ) 2698 and self.NULL_ORDERING != "nulls_are_last" 2699 ): 2700 nulls_first = True 2701 2702 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2703 2704 def _parse_limit( 2705 self, this: t.Optional[exp.Expression] = None, top: bool = False 2706 ) -> t.Optional[exp.Expression]: 2707 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2708 limit_paren = self._match(TokenType.L_PAREN) 2709 expression = self._parse_number() if top else self._parse_term() 2710 2711 if self._match(TokenType.COMMA): 2712 offset = expression 2713 expression = self._parse_term() 2714 else: 2715 offset = None 2716 2717 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2718 2719 if limit_paren: 2720 self._match_r_paren() 2721 2722 return limit_exp 2723 2724 if self._match(TokenType.FETCH): 2725 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2726 direction = self._prev.text if direction else "FIRST" 2727 2728 count = self._parse_number() 2729 percent = self._match(TokenType.PERCENT) 2730 2731 self._match_set((TokenType.ROW, TokenType.ROWS)) 2732 2733 only = self._match_text_seq("ONLY") 2734 with_ties = self._match_text_seq("WITH", "TIES") 2735 2736 if only and with_ties: 2737 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2738 2739 return self.expression( 2740 exp.Fetch, 2741 direction=direction, 2742 count=count, 2743 percent=percent, 2744 with_ties=with_ties, 2745 ) 2746 2747 return this 2748 2749 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2750 if not self._match(TokenType.OFFSET): 2751 return this 2752 2753 count = self._parse_number() 2754 self._match_set((TokenType.ROW, TokenType.ROWS)) 2755 return self.expression(exp.Offset, this=this, expression=count) 2756 2757 def _parse_locks(self) -> t.List[exp.Lock]: 2758 locks = [] 2759 while True: 2760 if self._match_text_seq("FOR", "UPDATE"): 2761 update = True 2762 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2763 "LOCK", "IN", "SHARE", "MODE" 2764 ): 2765 update = False 2766 else: 2767 break 2768 2769 expressions = None 2770 if self._match_text_seq("OF"): 2771 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2772 2773 wait: t.Optional[bool | exp.Expression] = None 2774 if self._match_text_seq("NOWAIT"): 2775 wait = True 2776 elif self._match_text_seq("WAIT"): 2777 wait = self._parse_primary() 2778 elif self._match_text_seq("SKIP", "LOCKED"): 2779 wait = False 2780 2781 locks.append( 2782 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2783 ) 2784 2785 return locks 2786 2787 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2788 if not self._match_set(self.SET_OPERATIONS): 2789 return this 2790 2791 token_type = self._prev.token_type 2792 2793 if token_type == TokenType.UNION: 2794 expression = exp.Union 2795 elif token_type == TokenType.EXCEPT: 2796 expression = exp.Except 2797 else: 2798 expression = exp.Intersect 2799 2800 return self.expression( 2801 expression, 2802 this=this, 2803 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2804 expression=self._parse_set_operations(self._parse_select(nested=True)), 2805 ) 2806 2807 def _parse_expression(self) -> t.Optional[exp.Expression]: 2808 return self._parse_alias(self._parse_conjunction()) 2809 2810 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2811 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2812 2813 def _parse_equality(self) -> t.Optional[exp.Expression]: 2814 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2815 2816 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2817 return self._parse_tokens(self._parse_range, self.COMPARISON) 2818 2819 def _parse_range(self) -> t.Optional[exp.Expression]: 2820 this = self._parse_bitwise() 2821 negate = self._match(TokenType.NOT) 2822 2823 if self._match_set(self.RANGE_PARSERS): 2824 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2825 if not expression: 2826 return this 2827 2828 this = expression 2829 elif self._match(TokenType.ISNULL): 2830 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2831 2832 # Postgres supports ISNULL and NOTNULL for conditions. 2833 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2834 if self._match(TokenType.NOTNULL): 2835 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2836 this = self.expression(exp.Not, this=this) 2837 2838 if negate: 2839 this = self.expression(exp.Not, this=this) 2840 2841 if self._match(TokenType.IS): 2842 this = self._parse_is(this) 2843 2844 return this 2845 2846 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2847 index = self._index - 1 2848 negate = self._match(TokenType.NOT) 2849 2850 if self._match_text_seq("DISTINCT", "FROM"): 2851 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2852 return self.expression(klass, this=this, expression=self._parse_expression()) 2853 2854 expression = self._parse_null() or self._parse_boolean() 2855 if not expression: 2856 self._retreat(index) 2857 return None 2858 2859 this = self.expression(exp.Is, this=this, expression=expression) 2860 return self.expression(exp.Not, this=this) if negate else this 2861 2862 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2863 unnest = self._parse_unnest(with_alias=False) 2864 if unnest: 2865 this = self.expression(exp.In, this=this, unnest=unnest) 2866 elif self._match(TokenType.L_PAREN): 2867 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2868 2869 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2870 this = self.expression(exp.In, this=this, query=expressions[0]) 2871 else: 2872 this = self.expression(exp.In, this=this, expressions=expressions) 2873 2874 self._match_r_paren(this) 2875 else: 2876 this = self.expression(exp.In, this=this, field=self._parse_field()) 2877 2878 return this 2879 2880 def _parse_between(self, this: exp.Expression) -> exp.Between: 2881 low = self._parse_bitwise() 2882 self._match(TokenType.AND) 2883 high = self._parse_bitwise() 2884 return self.expression(exp.Between, this=this, low=low, high=high) 2885 2886 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2887 if not self._match(TokenType.ESCAPE): 2888 return this 2889 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2890 2891 def _parse_interval(self) -> t.Optional[exp.Interval]: 2892 if not self._match(TokenType.INTERVAL): 2893 return None 2894 2895 if self._match(TokenType.STRING, advance=False): 2896 this = self._parse_primary() 2897 else: 2898 this = self._parse_term() 2899 2900 unit = self._parse_function() or self._parse_var() 2901 2902 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2903 # each INTERVAL expression into this canonical form so it's easy to transpile 2904 if this and this.is_number: 2905 this = exp.Literal.string(this.name) 2906 elif this and this.is_string: 2907 parts = this.name.split() 2908 2909 if len(parts) == 2: 2910 if unit: 2911 # this is not actually a unit, it's something else 2912 unit = None 2913 self._retreat(self._index - 1) 2914 else: 2915 this = exp.Literal.string(parts[0]) 2916 unit = self.expression(exp.Var, this=parts[1]) 2917 2918 return self.expression(exp.Interval, this=this, unit=unit) 2919 2920 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2921 this = self._parse_term() 2922 2923 while True: 2924 if self._match_set(self.BITWISE): 2925 this = self.expression( 2926 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2927 ) 2928 elif self._match_pair(TokenType.LT, TokenType.LT): 2929 this = self.expression( 2930 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2931 ) 2932 elif self._match_pair(TokenType.GT, TokenType.GT): 2933 this = self.expression( 2934 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2935 ) 2936 else: 2937 break 2938 2939 return this 2940 2941 def _parse_term(self) -> t.Optional[exp.Expression]: 2942 return self._parse_tokens(self._parse_factor, self.TERM) 2943 2944 def _parse_factor(self) -> t.Optional[exp.Expression]: 2945 return self._parse_tokens(self._parse_unary, self.FACTOR) 2946 2947 def _parse_unary(self) -> t.Optional[exp.Expression]: 2948 if self._match_set(self.UNARY_PARSERS): 2949 return self.UNARY_PARSERS[self._prev.token_type](self) 2950 return self._parse_at_time_zone(self._parse_type()) 2951 2952 def _parse_type(self) -> t.Optional[exp.Expression]: 2953 interval = self._parse_interval() 2954 if interval: 2955 return interval 2956 2957 index = self._index 2958 data_type = self._parse_types(check_func=True) 2959 this = self._parse_column() 2960 2961 if data_type: 2962 if isinstance(this, exp.Literal): 2963 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2964 if parser: 2965 return parser(self, this, data_type) 2966 return self.expression(exp.Cast, this=this, to=data_type) 2967 if not data_type.expressions: 2968 self._retreat(index) 2969 return self._parse_column() 2970 return self._parse_column_ops(data_type) 2971 2972 return this 2973 2974 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2975 this = self._parse_type() 2976 if not this: 2977 return None 2978 2979 return self.expression( 2980 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2981 ) 2982 2983 def _parse_types( 2984 self, check_func: bool = False, schema: bool = False 2985 ) -> t.Optional[exp.Expression]: 2986 index = self._index 2987 2988 prefix = self._match_text_seq("SYSUDTLIB", ".") 2989 2990 if not self._match_set(self.TYPE_TOKENS): 2991 return None 2992 2993 type_token = self._prev.token_type 2994 2995 if type_token == TokenType.PSEUDO_TYPE: 2996 return self.expression(exp.PseudoType, this=self._prev.text) 2997 2998 nested = type_token in self.NESTED_TYPE_TOKENS 2999 is_struct = type_token == TokenType.STRUCT 3000 expressions = None 3001 maybe_func = False 3002 3003 if self._match(TokenType.L_PAREN): 3004 if is_struct: 3005 expressions = self._parse_csv(self._parse_struct_types) 3006 elif nested: 3007 expressions = self._parse_csv( 3008 lambda: self._parse_types(check_func=check_func, schema=schema) 3009 ) 3010 elif type_token in self.ENUM_TYPE_TOKENS: 3011 expressions = self._parse_csv(self._parse_primary) 3012 else: 3013 expressions = self._parse_csv(self._parse_type_size) 3014 3015 if not expressions or not self._match(TokenType.R_PAREN): 3016 self._retreat(index) 3017 return None 3018 3019 maybe_func = True 3020 3021 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3022 this = exp.DataType( 3023 this=exp.DataType.Type.ARRAY, 3024 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3025 nested=True, 3026 ) 3027 3028 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3029 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3030 3031 return this 3032 3033 if self._match(TokenType.L_BRACKET): 3034 self._retreat(index) 3035 return None 3036 3037 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3038 if nested and self._match(TokenType.LT): 3039 if is_struct: 3040 expressions = self._parse_csv(self._parse_struct_types) 3041 else: 3042 expressions = self._parse_csv( 3043 lambda: self._parse_types(check_func=check_func, schema=schema) 3044 ) 3045 3046 if not self._match(TokenType.GT): 3047 self.raise_error("Expecting >") 3048 3049 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3050 values = self._parse_csv(self._parse_conjunction) 3051 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3052 3053 value: t.Optional[exp.Expression] = None 3054 if type_token in self.TIMESTAMPS: 3055 if self._match_text_seq("WITH", "TIME", "ZONE"): 3056 maybe_func = False 3057 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3058 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3059 maybe_func = False 3060 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3061 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3062 maybe_func = False 3063 elif type_token == TokenType.INTERVAL: 3064 unit = self._parse_var() 3065 3066 if not unit: 3067 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3068 else: 3069 value = self.expression(exp.Interval, unit=unit) 3070 3071 if maybe_func and check_func: 3072 index2 = self._index 3073 peek = self._parse_string() 3074 3075 if not peek: 3076 self._retreat(index) 3077 return None 3078 3079 self._retreat(index2) 3080 3081 if value: 3082 return value 3083 3084 return exp.DataType( 3085 this=exp.DataType.Type[type_token.value.upper()], 3086 expressions=expressions, 3087 nested=nested, 3088 values=values, 3089 prefix=prefix, 3090 ) 3091 3092 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3093 this = self._parse_type() or self._parse_id_var() 3094 self._match(TokenType.COLON) 3095 return self._parse_column_def(this) 3096 3097 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3098 if not self._match_text_seq("AT", "TIME", "ZONE"): 3099 return this 3100 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3101 3102 def _parse_column(self) -> t.Optional[exp.Expression]: 3103 this = self._parse_field() 3104 if isinstance(this, exp.Identifier): 3105 this = self.expression(exp.Column, this=this) 3106 elif not this: 3107 return self._parse_bracket(this) 3108 return self._parse_column_ops(this) 3109 3110 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3111 this = self._parse_bracket(this) 3112 3113 while self._match_set(self.COLUMN_OPERATORS): 3114 op_token = self._prev.token_type 3115 op = self.COLUMN_OPERATORS.get(op_token) 3116 3117 if op_token == TokenType.DCOLON: 3118 field = self._parse_types() 3119 if not field: 3120 self.raise_error("Expected type") 3121 elif op and self._curr: 3122 self._advance() 3123 value = self._prev.text 3124 field = ( 3125 exp.Literal.number(value) 3126 if self._prev.token_type == TokenType.NUMBER 3127 else exp.Literal.string(value) 3128 ) 3129 else: 3130 field = self._parse_field(anonymous_func=True, any_token=True) 3131 3132 if isinstance(field, exp.Func): 3133 # bigquery allows function calls like x.y.count(...) 3134 # SAFE.SUBSTR(...) 3135 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3136 this = self._replace_columns_with_dots(this) 3137 3138 if op: 3139 this = op(self, this, field) 3140 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3141 this = self.expression( 3142 exp.Column, 3143 this=field, 3144 table=this.this, 3145 db=this.args.get("table"), 3146 catalog=this.args.get("db"), 3147 ) 3148 else: 3149 this = self.expression(exp.Dot, this=this, expression=field) 3150 this = self._parse_bracket(this) 3151 return this 3152 3153 def _parse_primary(self) -> t.Optional[exp.Expression]: 3154 if self._match_set(self.PRIMARY_PARSERS): 3155 token_type = self._prev.token_type 3156 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3157 3158 if token_type == TokenType.STRING: 3159 expressions = [primary] 3160 while self._match(TokenType.STRING): 3161 expressions.append(exp.Literal.string(self._prev.text)) 3162 3163 if len(expressions) > 1: 3164 return self.expression(exp.Concat, expressions=expressions) 3165 3166 return primary 3167 3168 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3169 return exp.Literal.number(f"0.{self._prev.text}") 3170 3171 if self._match(TokenType.L_PAREN): 3172 comments = self._prev_comments 3173 query = self._parse_select() 3174 3175 if query: 3176 expressions = [query] 3177 else: 3178 expressions = self._parse_csv(self._parse_expression) 3179 3180 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3181 3182 if isinstance(this, exp.Subqueryable): 3183 this = self._parse_set_operations( 3184 self._parse_subquery(this=this, parse_alias=False) 3185 ) 3186 elif len(expressions) > 1: 3187 this = self.expression(exp.Tuple, expressions=expressions) 3188 else: 3189 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3190 3191 if this: 3192 this.add_comments(comments) 3193 3194 self._match_r_paren(expression=this) 3195 return this 3196 3197 return None 3198 3199 def _parse_field( 3200 self, 3201 any_token: bool = False, 3202 tokens: t.Optional[t.Collection[TokenType]] = None, 3203 anonymous_func: bool = False, 3204 ) -> t.Optional[exp.Expression]: 3205 return ( 3206 self._parse_primary() 3207 or self._parse_function(anonymous=anonymous_func) 3208 or self._parse_id_var(any_token=any_token, tokens=tokens) 3209 ) 3210 3211 def _parse_function( 3212 self, 3213 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3214 anonymous: bool = False, 3215 optional_parens: bool = True, 3216 ) -> t.Optional[exp.Expression]: 3217 if not self._curr: 3218 return None 3219 3220 token_type = self._curr.token_type 3221 3222 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3223 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3224 3225 if not self._next or self._next.token_type != TokenType.L_PAREN: 3226 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3227 self._advance() 3228 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3229 3230 return None 3231 3232 if token_type not in self.FUNC_TOKENS: 3233 return None 3234 3235 this = self._curr.text 3236 upper = this.upper() 3237 self._advance(2) 3238 3239 parser = self.FUNCTION_PARSERS.get(upper) 3240 3241 if parser and not anonymous: 3242 this = parser(self) 3243 else: 3244 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3245 3246 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3247 this = self.expression(subquery_predicate, this=self._parse_select()) 3248 self._match_r_paren() 3249 return this 3250 3251 if functions is None: 3252 functions = self.FUNCTIONS 3253 3254 function = functions.get(upper) 3255 3256 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3257 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3258 3259 if function and not anonymous: 3260 this = self.validate_expression(function(args), args) 3261 else: 3262 this = self.expression(exp.Anonymous, this=this, expressions=args) 3263 3264 self._match_r_paren(this) 3265 return self._parse_window(this) 3266 3267 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3268 return self._parse_column_def(self._parse_id_var()) 3269 3270 def _parse_user_defined_function( 3271 self, kind: t.Optional[TokenType] = None 3272 ) -> t.Optional[exp.Expression]: 3273 this = self._parse_id_var() 3274 3275 while self._match(TokenType.DOT): 3276 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3277 3278 if not self._match(TokenType.L_PAREN): 3279 return this 3280 3281 expressions = self._parse_csv(self._parse_function_parameter) 3282 self._match_r_paren() 3283 return self.expression( 3284 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3285 ) 3286 3287 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3288 literal = self._parse_primary() 3289 if literal: 3290 return self.expression(exp.Introducer, this=token.text, expression=literal) 3291 3292 return self.expression(exp.Identifier, this=token.text) 3293 3294 def _parse_session_parameter(self) -> exp.SessionParameter: 3295 kind = None 3296 this = self._parse_id_var() or self._parse_primary() 3297 3298 if this and self._match(TokenType.DOT): 3299 kind = this.name 3300 this = self._parse_var() or self._parse_primary() 3301 3302 return self.expression(exp.SessionParameter, this=this, kind=kind) 3303 3304 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3305 index = self._index 3306 3307 if self._match(TokenType.L_PAREN): 3308 expressions = self._parse_csv(self._parse_id_var) 3309 3310 if not self._match(TokenType.R_PAREN): 3311 self._retreat(index) 3312 else: 3313 expressions = [self._parse_id_var()] 3314 3315 if self._match_set(self.LAMBDAS): 3316 return self.LAMBDAS[self._prev.token_type](self, expressions) 3317 3318 self._retreat(index) 3319 3320 this: t.Optional[exp.Expression] 3321 3322 if self._match(TokenType.DISTINCT): 3323 this = self.expression( 3324 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3325 ) 3326 else: 3327 this = self._parse_select_or_expression(alias=alias) 3328 3329 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3330 3331 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3332 index = self._index 3333 3334 if not self.errors: 3335 try: 3336 if self._parse_select(nested=True): 3337 return this 3338 except ParseError: 3339 pass 3340 finally: 3341 self.errors.clear() 3342 self._retreat(index) 3343 3344 if not self._match(TokenType.L_PAREN): 3345 return this 3346 3347 args = self._parse_csv( 3348 lambda: self._parse_constraint() 3349 or self._parse_column_def(self._parse_field(any_token=True)) 3350 ) 3351 3352 self._match_r_paren() 3353 return self.expression(exp.Schema, this=this, expressions=args) 3354 3355 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3356 # column defs are not really columns, they're identifiers 3357 if isinstance(this, exp.Column): 3358 this = this.this 3359 3360 kind = self._parse_types(schema=True) 3361 3362 if self._match_text_seq("FOR", "ORDINALITY"): 3363 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3364 3365 constraints = [] 3366 while True: 3367 constraint = self._parse_column_constraint() 3368 if not constraint: 3369 break 3370 constraints.append(constraint) 3371 3372 if not kind and not constraints: 3373 return this 3374 3375 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3376 3377 def _parse_auto_increment( 3378 self, 3379 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3380 start = None 3381 increment = None 3382 3383 if self._match(TokenType.L_PAREN, advance=False): 3384 args = self._parse_wrapped_csv(self._parse_bitwise) 3385 start = seq_get(args, 0) 3386 increment = seq_get(args, 1) 3387 elif self._match_text_seq("START"): 3388 start = self._parse_bitwise() 3389 self._match_text_seq("INCREMENT") 3390 increment = self._parse_bitwise() 3391 3392 if start and increment: 3393 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3394 3395 return exp.AutoIncrementColumnConstraint() 3396 3397 def _parse_compress(self) -> exp.CompressColumnConstraint: 3398 if self._match(TokenType.L_PAREN, advance=False): 3399 return self.expression( 3400 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3401 ) 3402 3403 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3404 3405 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3406 if self._match_text_seq("BY", "DEFAULT"): 3407 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3408 this = self.expression( 3409 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3410 ) 3411 else: 3412 self._match_text_seq("ALWAYS") 3413 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3414 3415 self._match(TokenType.ALIAS) 3416 identity = self._match_text_seq("IDENTITY") 3417 3418 if self._match(TokenType.L_PAREN): 3419 if self._match_text_seq("START", "WITH"): 3420 this.set("start", self._parse_bitwise()) 3421 if self._match_text_seq("INCREMENT", "BY"): 3422 this.set("increment", self._parse_bitwise()) 3423 if self._match_text_seq("MINVALUE"): 3424 this.set("minvalue", self._parse_bitwise()) 3425 if self._match_text_seq("MAXVALUE"): 3426 this.set("maxvalue", self._parse_bitwise()) 3427 3428 if self._match_text_seq("CYCLE"): 3429 this.set("cycle", True) 3430 elif self._match_text_seq("NO", "CYCLE"): 3431 this.set("cycle", False) 3432 3433 if not identity: 3434 this.set("expression", self._parse_bitwise()) 3435 3436 self._match_r_paren() 3437 3438 return this 3439 3440 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3441 self._match_text_seq("LENGTH") 3442 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3443 3444 def _parse_not_constraint( 3445 self, 3446 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3447 if self._match_text_seq("NULL"): 3448 return self.expression(exp.NotNullColumnConstraint) 3449 if self._match_text_seq("CASESPECIFIC"): 3450 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3451 return None 3452 3453 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3454 if self._match(TokenType.CONSTRAINT): 3455 this = self._parse_id_var() 3456 else: 3457 this = None 3458 3459 if self._match_texts(self.CONSTRAINT_PARSERS): 3460 return self.expression( 3461 exp.ColumnConstraint, 3462 this=this, 3463 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3464 ) 3465 3466 return this 3467 3468 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3469 if not self._match(TokenType.CONSTRAINT): 3470 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3471 3472 this = self._parse_id_var() 3473 expressions = [] 3474 3475 while True: 3476 constraint = self._parse_unnamed_constraint() or self._parse_function() 3477 if not constraint: 3478 break 3479 expressions.append(constraint) 3480 3481 return self.expression(exp.Constraint, this=this, expressions=expressions) 3482 3483 def _parse_unnamed_constraint( 3484 self, constraints: t.Optional[t.Collection[str]] = None 3485 ) -> t.Optional[exp.Expression]: 3486 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3487 return None 3488 3489 constraint = self._prev.text.upper() 3490 if constraint not in self.CONSTRAINT_PARSERS: 3491 self.raise_error(f"No parser found for schema constraint {constraint}.") 3492 3493 return self.CONSTRAINT_PARSERS[constraint](self) 3494 3495 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3496 self._match_text_seq("KEY") 3497 return self.expression( 3498 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3499 ) 3500 3501 def _parse_key_constraint_options(self) -> t.List[str]: 3502 options = [] 3503 while True: 3504 if not self._curr: 3505 break 3506 3507 if self._match(TokenType.ON): 3508 action = None 3509 on = self._advance_any() and self._prev.text 3510 3511 if self._match_text_seq("NO", "ACTION"): 3512 action = "NO ACTION" 3513 elif self._match_text_seq("CASCADE"): 3514 action = "CASCADE" 3515 elif self._match_pair(TokenType.SET, TokenType.NULL): 3516 action = "SET NULL" 3517 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3518 action = "SET DEFAULT" 3519 else: 3520 self.raise_error("Invalid key constraint") 3521 3522 options.append(f"ON {on} {action}") 3523 elif self._match_text_seq("NOT", "ENFORCED"): 3524 options.append("NOT ENFORCED") 3525 elif self._match_text_seq("DEFERRABLE"): 3526 options.append("DEFERRABLE") 3527 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3528 options.append("INITIALLY DEFERRED") 3529 elif self._match_text_seq("NORELY"): 3530 options.append("NORELY") 3531 elif self._match_text_seq("MATCH", "FULL"): 3532 options.append("MATCH FULL") 3533 else: 3534 break 3535 3536 return options 3537 3538 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3539 if match and not self._match(TokenType.REFERENCES): 3540 return None 3541 3542 expressions = None 3543 this = self._parse_id_var() 3544 3545 if self._match(TokenType.L_PAREN, advance=False): 3546 expressions = self._parse_wrapped_id_vars() 3547 3548 options = self._parse_key_constraint_options() 3549 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3550 3551 def _parse_foreign_key(self) -> exp.ForeignKey: 3552 expressions = self._parse_wrapped_id_vars() 3553 reference = self._parse_references() 3554 options = {} 3555 3556 while self._match(TokenType.ON): 3557 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3558 self.raise_error("Expected DELETE or UPDATE") 3559 3560 kind = self._prev.text.lower() 3561 3562 if self._match_text_seq("NO", "ACTION"): 3563 action = "NO ACTION" 3564 elif self._match(TokenType.SET): 3565 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3566 action = "SET " + self._prev.text.upper() 3567 else: 3568 self._advance() 3569 action = self._prev.text.upper() 3570 3571 options[kind] = action 3572 3573 return self.expression( 3574 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3575 ) 3576 3577 def _parse_primary_key( 3578 self, wrapped_optional: bool = False, in_props: bool = False 3579 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3580 desc = ( 3581 self._match_set((TokenType.ASC, TokenType.DESC)) 3582 and self._prev.token_type == TokenType.DESC 3583 ) 3584 3585 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3586 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3587 3588 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3589 options = self._parse_key_constraint_options() 3590 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3591 3592 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3593 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3594 return this 3595 3596 bracket_kind = self._prev.token_type 3597 3598 if self._match(TokenType.COLON): 3599 expressions: t.List[t.Optional[exp.Expression]] = [ 3600 self.expression(exp.Slice, expression=self._parse_conjunction()) 3601 ] 3602 else: 3603 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3604 3605 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3606 if bracket_kind == TokenType.L_BRACE: 3607 this = self.expression(exp.Struct, expressions=expressions) 3608 elif not this or this.name.upper() == "ARRAY": 3609 this = self.expression(exp.Array, expressions=expressions) 3610 else: 3611 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3612 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3613 3614 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3615 self.raise_error("Expected ]") 3616 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3617 self.raise_error("Expected }") 3618 3619 self._add_comments(this) 3620 return self._parse_bracket(this) 3621 3622 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3623 if self._match(TokenType.COLON): 3624 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3625 return this 3626 3627 def _parse_case(self) -> t.Optional[exp.Expression]: 3628 ifs = [] 3629 default = None 3630 3631 expression = self._parse_conjunction() 3632 3633 while self._match(TokenType.WHEN): 3634 this = self._parse_conjunction() 3635 self._match(TokenType.THEN) 3636 then = self._parse_conjunction() 3637 ifs.append(self.expression(exp.If, this=this, true=then)) 3638 3639 if self._match(TokenType.ELSE): 3640 default = self._parse_conjunction() 3641 3642 if not self._match(TokenType.END): 3643 self.raise_error("Expected END after CASE", self._prev) 3644 3645 return self._parse_window( 3646 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3647 ) 3648 3649 def _parse_if(self) -> t.Optional[exp.Expression]: 3650 if self._match(TokenType.L_PAREN): 3651 args = self._parse_csv(self._parse_conjunction) 3652 this = self.validate_expression(exp.If.from_arg_list(args), args) 3653 self._match_r_paren() 3654 else: 3655 index = self._index - 1 3656 condition = self._parse_conjunction() 3657 3658 if not condition: 3659 self._retreat(index) 3660 return None 3661 3662 self._match(TokenType.THEN) 3663 true = self._parse_conjunction() 3664 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3665 self._match(TokenType.END) 3666 this = self.expression(exp.If, this=condition, true=true, false=false) 3667 3668 return self._parse_window(this) 3669 3670 def _parse_extract(self) -> exp.Extract: 3671 this = self._parse_function() or self._parse_var() or self._parse_type() 3672 3673 if self._match(TokenType.FROM): 3674 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3675 3676 if not self._match(TokenType.COMMA): 3677 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3678 3679 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3680 3681 def _parse_any_value(self) -> exp.AnyValue: 3682 this = self._parse_lambda() 3683 is_max = None 3684 having = None 3685 3686 if self._match(TokenType.HAVING): 3687 self._match_texts(("MAX", "MIN")) 3688 is_max = self._prev.text == "MAX" 3689 having = self._parse_column() 3690 3691 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3692 3693 def _parse_cast(self, strict: bool) -> exp.Expression: 3694 this = self._parse_conjunction() 3695 3696 if not self._match(TokenType.ALIAS): 3697 if self._match(TokenType.COMMA): 3698 return self.expression( 3699 exp.CastToStrType, this=this, expression=self._parse_string() 3700 ) 3701 else: 3702 self.raise_error("Expected AS after CAST") 3703 3704 fmt = None 3705 to = self._parse_types() 3706 3707 if not to: 3708 self.raise_error("Expected TYPE after CAST") 3709 elif to.this == exp.DataType.Type.CHAR: 3710 if self._match(TokenType.CHARACTER_SET): 3711 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3712 elif self._match(TokenType.FORMAT): 3713 fmt = self._parse_at_time_zone(self._parse_string()) 3714 3715 if to.this in exp.DataType.TEMPORAL_TYPES: 3716 return self.expression( 3717 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3718 this=this, 3719 format=exp.Literal.string( 3720 format_time( 3721 fmt.this if fmt else "", 3722 self.FORMAT_MAPPING or self.TIME_MAPPING, 3723 self.FORMAT_TRIE or self.TIME_TRIE, 3724 ) 3725 ), 3726 ) 3727 3728 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3729 3730 def _parse_concat(self) -> t.Optional[exp.Expression]: 3731 args = self._parse_csv(self._parse_conjunction) 3732 if self.CONCAT_NULL_OUTPUTS_STRING: 3733 args = [ 3734 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3735 for arg in args 3736 if arg 3737 ] 3738 3739 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3740 # we find such a call we replace it with its argument. 3741 if len(args) == 1: 3742 return args[0] 3743 3744 return self.expression( 3745 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3746 ) 3747 3748 def _parse_string_agg(self) -> exp.Expression: 3749 if self._match(TokenType.DISTINCT): 3750 args: t.List[t.Optional[exp.Expression]] = [ 3751 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3752 ] 3753 if self._match(TokenType.COMMA): 3754 args.extend(self._parse_csv(self._parse_conjunction)) 3755 else: 3756 args = self._parse_csv(self._parse_conjunction) 3757 3758 index = self._index 3759 if not self._match(TokenType.R_PAREN): 3760 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3761 return self.expression( 3762 exp.GroupConcat, 3763 this=seq_get(args, 0), 3764 separator=self._parse_order(this=seq_get(args, 1)), 3765 ) 3766 3767 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3768 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3769 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3770 if not self._match_text_seq("WITHIN", "GROUP"): 3771 self._retreat(index) 3772 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3773 3774 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3775 order = self._parse_order(this=seq_get(args, 0)) 3776 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3777 3778 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3779 this = self._parse_bitwise() 3780 3781 if self._match(TokenType.USING): 3782 to: t.Optional[exp.Expression] = self.expression( 3783 exp.CharacterSet, this=self._parse_var() 3784 ) 3785 elif self._match(TokenType.COMMA): 3786 to = self._parse_types() 3787 else: 3788 to = None 3789 3790 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3791 3792 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3793 """ 3794 There are generally two variants of the DECODE function: 3795 3796 - DECODE(bin, charset) 3797 - DECODE(expression, search, result [, search, result] ... [, default]) 3798 3799 The second variant will always be parsed into a CASE expression. Note that NULL 3800 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3801 instead of relying on pattern matching. 3802 """ 3803 args = self._parse_csv(self._parse_conjunction) 3804 3805 if len(args) < 3: 3806 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3807 3808 expression, *expressions = args 3809 if not expression: 3810 return None 3811 3812 ifs = [] 3813 for search, result in zip(expressions[::2], expressions[1::2]): 3814 if not search or not result: 3815 return None 3816 3817 if isinstance(search, exp.Literal): 3818 ifs.append( 3819 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3820 ) 3821 elif isinstance(search, exp.Null): 3822 ifs.append( 3823 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3824 ) 3825 else: 3826 cond = exp.or_( 3827 exp.EQ(this=expression.copy(), expression=search), 3828 exp.and_( 3829 exp.Is(this=expression.copy(), expression=exp.Null()), 3830 exp.Is(this=search.copy(), expression=exp.Null()), 3831 copy=False, 3832 ), 3833 copy=False, 3834 ) 3835 ifs.append(exp.If(this=cond, true=result)) 3836 3837 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3838 3839 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3840 self._match_text_seq("KEY") 3841 key = self._parse_field() 3842 self._match(TokenType.COLON) 3843 self._match_text_seq("VALUE") 3844 value = self._parse_field() 3845 3846 if not key and not value: 3847 return None 3848 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3849 3850 def _parse_json_object(self) -> exp.JSONObject: 3851 star = self._parse_star() 3852 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3853 3854 null_handling = None 3855 if self._match_text_seq("NULL", "ON", "NULL"): 3856 null_handling = "NULL ON NULL" 3857 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3858 null_handling = "ABSENT ON NULL" 3859 3860 unique_keys = None 3861 if self._match_text_seq("WITH", "UNIQUE"): 3862 unique_keys = True 3863 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3864 unique_keys = False 3865 3866 self._match_text_seq("KEYS") 3867 3868 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3869 format_json = self._match_text_seq("FORMAT", "JSON") 3870 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3871 3872 return self.expression( 3873 exp.JSONObject, 3874 expressions=expressions, 3875 null_handling=null_handling, 3876 unique_keys=unique_keys, 3877 return_type=return_type, 3878 format_json=format_json, 3879 encoding=encoding, 3880 ) 3881 3882 def _parse_logarithm(self) -> exp.Func: 3883 # Default argument order is base, expression 3884 args = self._parse_csv(self._parse_range) 3885 3886 if len(args) > 1: 3887 if not self.LOG_BASE_FIRST: 3888 args.reverse() 3889 return exp.Log.from_arg_list(args) 3890 3891 return self.expression( 3892 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3893 ) 3894 3895 def _parse_match_against(self) -> exp.MatchAgainst: 3896 expressions = self._parse_csv(self._parse_column) 3897 3898 self._match_text_seq(")", "AGAINST", "(") 3899 3900 this = self._parse_string() 3901 3902 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3903 modifier = "IN NATURAL LANGUAGE MODE" 3904 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3905 modifier = f"{modifier} WITH QUERY EXPANSION" 3906 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3907 modifier = "IN BOOLEAN MODE" 3908 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3909 modifier = "WITH QUERY EXPANSION" 3910 else: 3911 modifier = None 3912 3913 return self.expression( 3914 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3915 ) 3916 3917 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3918 def _parse_open_json(self) -> exp.OpenJSON: 3919 this = self._parse_bitwise() 3920 path = self._match(TokenType.COMMA) and self._parse_string() 3921 3922 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3923 this = self._parse_field(any_token=True) 3924 kind = self._parse_types() 3925 path = self._parse_string() 3926 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3927 3928 return self.expression( 3929 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3930 ) 3931 3932 expressions = None 3933 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3934 self._match_l_paren() 3935 expressions = self._parse_csv(_parse_open_json_column_def) 3936 3937 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3938 3939 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3940 args = self._parse_csv(self._parse_bitwise) 3941 3942 if self._match(TokenType.IN): 3943 return self.expression( 3944 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3945 ) 3946 3947 if haystack_first: 3948 haystack = seq_get(args, 0) 3949 needle = seq_get(args, 1) 3950 else: 3951 needle = seq_get(args, 0) 3952 haystack = seq_get(args, 1) 3953 3954 return self.expression( 3955 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3956 ) 3957 3958 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3959 args = self._parse_csv(self._parse_table) 3960 return exp.JoinHint(this=func_name.upper(), expressions=args) 3961 3962 def _parse_substring(self) -> exp.Substring: 3963 # Postgres supports the form: substring(string [from int] [for int]) 3964 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3965 3966 args = self._parse_csv(self._parse_bitwise) 3967 3968 if self._match(TokenType.FROM): 3969 args.append(self._parse_bitwise()) 3970 if self._match(TokenType.FOR): 3971 args.append(self._parse_bitwise()) 3972 3973 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3974 3975 def _parse_trim(self) -> exp.Trim: 3976 # https://www.w3resource.com/sql/character-functions/trim.php 3977 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3978 3979 position = None 3980 collation = None 3981 3982 if self._match_texts(self.TRIM_TYPES): 3983 position = self._prev.text.upper() 3984 3985 expression = self._parse_bitwise() 3986 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3987 this = self._parse_bitwise() 3988 else: 3989 this = expression 3990 expression = None 3991 3992 if self._match(TokenType.COLLATE): 3993 collation = self._parse_bitwise() 3994 3995 return self.expression( 3996 exp.Trim, this=this, position=position, expression=expression, collation=collation 3997 ) 3998 3999 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4000 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4001 4002 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4003 return self._parse_window(self._parse_id_var(), alias=True) 4004 4005 def _parse_respect_or_ignore_nulls( 4006 self, this: t.Optional[exp.Expression] 4007 ) -> t.Optional[exp.Expression]: 4008 if self._match_text_seq("IGNORE", "NULLS"): 4009 return self.expression(exp.IgnoreNulls, this=this) 4010 if self._match_text_seq("RESPECT", "NULLS"): 4011 return self.expression(exp.RespectNulls, this=this) 4012 return this 4013 4014 def _parse_window( 4015 self, this: t.Optional[exp.Expression], alias: bool = False 4016 ) -> t.Optional[exp.Expression]: 4017 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4018 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4019 self._match_r_paren() 4020 4021 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4022 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4023 if self._match_text_seq("WITHIN", "GROUP"): 4024 order = self._parse_wrapped(self._parse_order) 4025 this = self.expression(exp.WithinGroup, this=this, expression=order) 4026 4027 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4028 # Some dialects choose to implement and some do not. 4029 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4030 4031 # There is some code above in _parse_lambda that handles 4032 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4033 4034 # The below changes handle 4035 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4036 4037 # Oracle allows both formats 4038 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4039 # and Snowflake chose to do the same for familiarity 4040 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4041 this = self._parse_respect_or_ignore_nulls(this) 4042 4043 # bigquery select from window x AS (partition by ...) 4044 if alias: 4045 over = None 4046 self._match(TokenType.ALIAS) 4047 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4048 return this 4049 else: 4050 over = self._prev.text.upper() 4051 4052 if not self._match(TokenType.L_PAREN): 4053 return self.expression( 4054 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4055 ) 4056 4057 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4058 4059 first = self._match(TokenType.FIRST) 4060 if self._match_text_seq("LAST"): 4061 first = False 4062 4063 partition = self._parse_partition_by() 4064 order = self._parse_order() 4065 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4066 4067 if kind: 4068 self._match(TokenType.BETWEEN) 4069 start = self._parse_window_spec() 4070 self._match(TokenType.AND) 4071 end = self._parse_window_spec() 4072 4073 spec = self.expression( 4074 exp.WindowSpec, 4075 kind=kind, 4076 start=start["value"], 4077 start_side=start["side"], 4078 end=end["value"], 4079 end_side=end["side"], 4080 ) 4081 else: 4082 spec = None 4083 4084 self._match_r_paren() 4085 4086 return self.expression( 4087 exp.Window, 4088 this=this, 4089 partition_by=partition, 4090 order=order, 4091 spec=spec, 4092 alias=window_alias, 4093 over=over, 4094 first=first, 4095 ) 4096 4097 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4098 self._match(TokenType.BETWEEN) 4099 4100 return { 4101 "value": ( 4102 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4103 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4104 or self._parse_bitwise() 4105 ), 4106 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4107 } 4108 4109 def _parse_alias( 4110 self, this: t.Optional[exp.Expression], explicit: bool = False 4111 ) -> t.Optional[exp.Expression]: 4112 any_token = self._match(TokenType.ALIAS) 4113 4114 if explicit and not any_token: 4115 return this 4116 4117 if self._match(TokenType.L_PAREN): 4118 aliases = self.expression( 4119 exp.Aliases, 4120 this=this, 4121 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4122 ) 4123 self._match_r_paren(aliases) 4124 return aliases 4125 4126 alias = self._parse_id_var(any_token) 4127 4128 if alias: 4129 return self.expression(exp.Alias, this=this, alias=alias) 4130 4131 return this 4132 4133 def _parse_id_var( 4134 self, 4135 any_token: bool = True, 4136 tokens: t.Optional[t.Collection[TokenType]] = None, 4137 ) -> t.Optional[exp.Expression]: 4138 identifier = self._parse_identifier() 4139 4140 if identifier: 4141 return identifier 4142 4143 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4144 quoted = self._prev.token_type == TokenType.STRING 4145 return exp.Identifier(this=self._prev.text, quoted=quoted) 4146 4147 return None 4148 4149 def _parse_string(self) -> t.Optional[exp.Expression]: 4150 if self._match(TokenType.STRING): 4151 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4152 return self._parse_placeholder() 4153 4154 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4155 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4156 4157 def _parse_number(self) -> t.Optional[exp.Expression]: 4158 if self._match(TokenType.NUMBER): 4159 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4160 return self._parse_placeholder() 4161 4162 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4163 if self._match(TokenType.IDENTIFIER): 4164 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4165 return self._parse_placeholder() 4166 4167 def _parse_var( 4168 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4169 ) -> t.Optional[exp.Expression]: 4170 if ( 4171 (any_token and self._advance_any()) 4172 or self._match(TokenType.VAR) 4173 or (self._match_set(tokens) if tokens else False) 4174 ): 4175 return self.expression(exp.Var, this=self._prev.text) 4176 return self._parse_placeholder() 4177 4178 def _advance_any(self) -> t.Optional[Token]: 4179 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4180 self._advance() 4181 return self._prev 4182 return None 4183 4184 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4185 return self._parse_var() or self._parse_string() 4186 4187 def _parse_null(self) -> t.Optional[exp.Expression]: 4188 if self._match(TokenType.NULL): 4189 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4190 return None 4191 4192 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4193 if self._match(TokenType.TRUE): 4194 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4195 if self._match(TokenType.FALSE): 4196 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4197 return None 4198 4199 def _parse_star(self) -> t.Optional[exp.Expression]: 4200 if self._match(TokenType.STAR): 4201 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4202 return None 4203 4204 def _parse_parameter(self) -> exp.Parameter: 4205 wrapped = self._match(TokenType.L_BRACE) 4206 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4207 self._match(TokenType.R_BRACE) 4208 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4209 4210 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4211 if self._match_set(self.PLACEHOLDER_PARSERS): 4212 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4213 if placeholder: 4214 return placeholder 4215 self._advance(-1) 4216 return None 4217 4218 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4219 if not self._match(TokenType.EXCEPT): 4220 return None 4221 if self._match(TokenType.L_PAREN, advance=False): 4222 return self._parse_wrapped_csv(self._parse_column) 4223 return self._parse_csv(self._parse_column) 4224 4225 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4226 if not self._match(TokenType.REPLACE): 4227 return None 4228 if self._match(TokenType.L_PAREN, advance=False): 4229 return self._parse_wrapped_csv(self._parse_expression) 4230 return self._parse_csv(self._parse_expression) 4231 4232 def _parse_csv( 4233 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4234 ) -> t.List[t.Optional[exp.Expression]]: 4235 parse_result = parse_method() 4236 items = [parse_result] if parse_result is not None else [] 4237 4238 while self._match(sep): 4239 self._add_comments(parse_result) 4240 parse_result = parse_method() 4241 if parse_result is not None: 4242 items.append(parse_result) 4243 4244 return items 4245 4246 def _parse_tokens( 4247 self, parse_method: t.Callable, expressions: t.Dict 4248 ) -> t.Optional[exp.Expression]: 4249 this = parse_method() 4250 4251 while self._match_set(expressions): 4252 this = self.expression( 4253 expressions[self._prev.token_type], 4254 this=this, 4255 comments=self._prev_comments, 4256 expression=parse_method(), 4257 ) 4258 4259 return this 4260 4261 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4262 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4263 4264 def _parse_wrapped_csv( 4265 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4266 ) -> t.List[t.Optional[exp.Expression]]: 4267 return self._parse_wrapped( 4268 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4269 ) 4270 4271 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4272 wrapped = self._match(TokenType.L_PAREN) 4273 if not wrapped and not optional: 4274 self.raise_error("Expecting (") 4275 parse_result = parse_method() 4276 if wrapped: 4277 self._match_r_paren() 4278 return parse_result 4279 4280 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4281 return self._parse_select() or self._parse_set_operations( 4282 self._parse_expression() if alias else self._parse_conjunction() 4283 ) 4284 4285 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4286 return self._parse_query_modifiers( 4287 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4288 ) 4289 4290 def _parse_transaction(self) -> exp.Transaction: 4291 this = None 4292 if self._match_texts(self.TRANSACTION_KIND): 4293 this = self._prev.text 4294 4295 self._match_texts({"TRANSACTION", "WORK"}) 4296 4297 modes = [] 4298 while True: 4299 mode = [] 4300 while self._match(TokenType.VAR): 4301 mode.append(self._prev.text) 4302 4303 if mode: 4304 modes.append(" ".join(mode)) 4305 if not self._match(TokenType.COMMA): 4306 break 4307 4308 return self.expression(exp.Transaction, this=this, modes=modes) 4309 4310 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4311 chain = None 4312 savepoint = None 4313 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4314 4315 self._match_texts({"TRANSACTION", "WORK"}) 4316 4317 if self._match_text_seq("TO"): 4318 self._match_text_seq("SAVEPOINT") 4319 savepoint = self._parse_id_var() 4320 4321 if self._match(TokenType.AND): 4322 chain = not self._match_text_seq("NO") 4323 self._match_text_seq("CHAIN") 4324 4325 if is_rollback: 4326 return self.expression(exp.Rollback, savepoint=savepoint) 4327 4328 return self.expression(exp.Commit, chain=chain) 4329 4330 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4331 if not self._match_text_seq("ADD"): 4332 return None 4333 4334 self._match(TokenType.COLUMN) 4335 exists_column = self._parse_exists(not_=True) 4336 expression = self._parse_column_def(self._parse_field(any_token=True)) 4337 4338 if expression: 4339 expression.set("exists", exists_column) 4340 4341 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4342 if self._match_texts(("FIRST", "AFTER")): 4343 position = self._prev.text 4344 column_position = self.expression( 4345 exp.ColumnPosition, this=self._parse_column(), position=position 4346 ) 4347 expression.set("position", column_position) 4348 4349 return expression 4350 4351 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4352 drop = self._match(TokenType.DROP) and self._parse_drop() 4353 if drop and not isinstance(drop, exp.Command): 4354 drop.set("kind", drop.args.get("kind", "COLUMN")) 4355 return drop 4356 4357 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4358 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4359 return self.expression( 4360 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4361 ) 4362 4363 def _parse_add_constraint(self) -> exp.AddConstraint: 4364 this = None 4365 kind = self._prev.token_type 4366 4367 if kind == TokenType.CONSTRAINT: 4368 this = self._parse_id_var() 4369 4370 if self._match_text_seq("CHECK"): 4371 expression = self._parse_wrapped(self._parse_conjunction) 4372 enforced = self._match_text_seq("ENFORCED") 4373 4374 return self.expression( 4375 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4376 ) 4377 4378 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4379 expression = self._parse_foreign_key() 4380 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4381 expression = self._parse_primary_key() 4382 else: 4383 expression = None 4384 4385 return self.expression(exp.AddConstraint, this=this, expression=expression) 4386 4387 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4388 index = self._index - 1 4389 4390 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4391 return self._parse_csv(self._parse_add_constraint) 4392 4393 self._retreat(index) 4394 return self._parse_csv(self._parse_add_column) 4395 4396 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4397 self._match(TokenType.COLUMN) 4398 column = self._parse_field(any_token=True) 4399 4400 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4401 return self.expression(exp.AlterColumn, this=column, drop=True) 4402 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4403 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4404 4405 self._match_text_seq("SET", "DATA") 4406 return self.expression( 4407 exp.AlterColumn, 4408 this=column, 4409 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4410 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4411 using=self._match(TokenType.USING) and self._parse_conjunction(), 4412 ) 4413 4414 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4415 index = self._index - 1 4416 4417 partition_exists = self._parse_exists() 4418 if self._match(TokenType.PARTITION, advance=False): 4419 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4420 4421 self._retreat(index) 4422 return self._parse_csv(self._parse_drop_column) 4423 4424 def _parse_alter_table_rename(self) -> exp.RenameTable: 4425 self._match_text_seq("TO") 4426 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4427 4428 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4429 start = self._prev 4430 4431 if not self._match(TokenType.TABLE): 4432 return self._parse_as_command(start) 4433 4434 exists = self._parse_exists() 4435 this = self._parse_table(schema=True) 4436 4437 if self._next: 4438 self._advance() 4439 4440 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4441 if parser: 4442 actions = ensure_list(parser(self)) 4443 4444 if not self._curr: 4445 return self.expression( 4446 exp.AlterTable, 4447 this=this, 4448 exists=exists, 4449 actions=actions, 4450 ) 4451 return self._parse_as_command(start) 4452 4453 def _parse_merge(self) -> exp.Merge: 4454 self._match(TokenType.INTO) 4455 target = self._parse_table() 4456 4457 self._match(TokenType.USING) 4458 using = self._parse_table() 4459 4460 self._match(TokenType.ON) 4461 on = self._parse_conjunction() 4462 4463 whens = [] 4464 while self._match(TokenType.WHEN): 4465 matched = not self._match(TokenType.NOT) 4466 self._match_text_seq("MATCHED") 4467 source = ( 4468 False 4469 if self._match_text_seq("BY", "TARGET") 4470 else self._match_text_seq("BY", "SOURCE") 4471 ) 4472 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4473 4474 self._match(TokenType.THEN) 4475 4476 if self._match(TokenType.INSERT): 4477 _this = self._parse_star() 4478 if _this: 4479 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4480 else: 4481 then = self.expression( 4482 exp.Insert, 4483 this=self._parse_value(), 4484 expression=self._match(TokenType.VALUES) and self._parse_value(), 4485 ) 4486 elif self._match(TokenType.UPDATE): 4487 expressions = self._parse_star() 4488 if expressions: 4489 then = self.expression(exp.Update, expressions=expressions) 4490 else: 4491 then = self.expression( 4492 exp.Update, 4493 expressions=self._match(TokenType.SET) 4494 and self._parse_csv(self._parse_equality), 4495 ) 4496 elif self._match(TokenType.DELETE): 4497 then = self.expression(exp.Var, this=self._prev.text) 4498 else: 4499 then = None 4500 4501 whens.append( 4502 self.expression( 4503 exp.When, 4504 matched=matched, 4505 source=source, 4506 condition=condition, 4507 then=then, 4508 ) 4509 ) 4510 4511 return self.expression( 4512 exp.Merge, 4513 this=target, 4514 using=using, 4515 on=on, 4516 expressions=whens, 4517 ) 4518 4519 def _parse_show(self) -> t.Optional[exp.Expression]: 4520 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4521 if parser: 4522 return parser(self) 4523 self._advance() 4524 return self.expression(exp.Show, this=self._prev.text.upper()) 4525 4526 def _parse_set_item_assignment( 4527 self, kind: t.Optional[str] = None 4528 ) -> t.Optional[exp.Expression]: 4529 index = self._index 4530 4531 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4532 return self._parse_set_transaction(global_=kind == "GLOBAL") 4533 4534 left = self._parse_primary() or self._parse_id_var() 4535 4536 if not self._match_texts(("=", "TO")): 4537 self._retreat(index) 4538 return None 4539 4540 right = self._parse_statement() or self._parse_id_var() 4541 this = self.expression(exp.EQ, this=left, expression=right) 4542 4543 return self.expression(exp.SetItem, this=this, kind=kind) 4544 4545 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4546 self._match_text_seq("TRANSACTION") 4547 characteristics = self._parse_csv( 4548 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4549 ) 4550 return self.expression( 4551 exp.SetItem, 4552 expressions=characteristics, 4553 kind="TRANSACTION", 4554 **{"global": global_}, # type: ignore 4555 ) 4556 4557 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4558 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4559 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4560 4561 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4562 index = self._index 4563 set_ = self.expression( 4564 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4565 ) 4566 4567 if self._curr: 4568 self._retreat(index) 4569 return self._parse_as_command(self._prev) 4570 4571 return set_ 4572 4573 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4574 for option in options: 4575 if self._match_text_seq(*option.split(" ")): 4576 return exp.var(option) 4577 return None 4578 4579 def _parse_as_command(self, start: Token) -> exp.Command: 4580 while self._curr: 4581 self._advance() 4582 text = self._find_sql(start, self._prev) 4583 size = len(start.text) 4584 return exp.Command(this=text[:size], expression=text[size:]) 4585 4586 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4587 settings = [] 4588 4589 self._match_l_paren() 4590 kind = self._parse_id_var() 4591 4592 if self._match(TokenType.L_PAREN): 4593 while True: 4594 key = self._parse_id_var() 4595 value = self._parse_primary() 4596 4597 if not key and value is None: 4598 break 4599 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4600 self._match(TokenType.R_PAREN) 4601 4602 self._match_r_paren() 4603 4604 return self.expression( 4605 exp.DictProperty, 4606 this=this, 4607 kind=kind.this if kind else None, 4608 settings=settings, 4609 ) 4610 4611 def _parse_dict_range(self, this: str) -> exp.DictRange: 4612 self._match_l_paren() 4613 has_min = self._match_text_seq("MIN") 4614 if has_min: 4615 min = self._parse_var() or self._parse_primary() 4616 self._match_text_seq("MAX") 4617 max = self._parse_var() or self._parse_primary() 4618 else: 4619 max = self._parse_var() or self._parse_primary() 4620 min = exp.Literal.number(0) 4621 self._match_r_paren() 4622 return self.expression(exp.DictRange, this=this, min=min, max=max) 4623 4624 def _find_parser( 4625 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4626 ) -> t.Optional[t.Callable]: 4627 if not self._curr: 4628 return None 4629 4630 index = self._index 4631 this = [] 4632 while True: 4633 # The current token might be multiple words 4634 curr = self._curr.text.upper() 4635 key = curr.split(" ") 4636 this.append(curr) 4637 4638 self._advance() 4639 result, trie = in_trie(trie, key) 4640 if result == TrieResult.FAILED: 4641 break 4642 4643 if result == TrieResult.EXISTS: 4644 subparser = parsers[" ".join(this)] 4645 return subparser 4646 4647 self._retreat(index) 4648 return None 4649 4650 def _match(self, token_type, advance=True, expression=None): 4651 if not self._curr: 4652 return None 4653 4654 if self._curr.token_type == token_type: 4655 if advance: 4656 self._advance() 4657 self._add_comments(expression) 4658 return True 4659 4660 return None 4661 4662 def _match_set(self, types, advance=True): 4663 if not self._curr: 4664 return None 4665 4666 if self._curr.token_type in types: 4667 if advance: 4668 self._advance() 4669 return True 4670 4671 return None 4672 4673 def _match_pair(self, token_type_a, token_type_b, advance=True): 4674 if not self._curr or not self._next: 4675 return None 4676 4677 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4678 if advance: 4679 self._advance(2) 4680 return True 4681 4682 return None 4683 4684 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4685 if not self._match(TokenType.L_PAREN, expression=expression): 4686 self.raise_error("Expecting (") 4687 4688 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4689 if not self._match(TokenType.R_PAREN, expression=expression): 4690 self.raise_error("Expecting )") 4691 4692 def _match_texts(self, texts, advance=True): 4693 if self._curr and self._curr.text.upper() in texts: 4694 if advance: 4695 self._advance() 4696 return True 4697 return False 4698 4699 def _match_text_seq(self, *texts, advance=True): 4700 index = self._index 4701 for text in texts: 4702 if self._curr and self._curr.text.upper() == text: 4703 self._advance() 4704 else: 4705 self._retreat(index) 4706 return False 4707 4708 if not advance: 4709 self._retreat(index) 4710 4711 return True 4712 4713 @t.overload 4714 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4715 ... 4716 4717 @t.overload 4718 def _replace_columns_with_dots( 4719 self, this: t.Optional[exp.Expression] 4720 ) -> t.Optional[exp.Expression]: 4721 ... 4722 4723 def _replace_columns_with_dots(self, this): 4724 if isinstance(this, exp.Dot): 4725 exp.replace_children(this, self._replace_columns_with_dots) 4726 elif isinstance(this, exp.Column): 4727 exp.replace_children(this, self._replace_columns_with_dots) 4728 table = this.args.get("table") 4729 this = ( 4730 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4731 ) 4732 4733 return this 4734 4735 def _replace_lambda( 4736 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4737 ) -> t.Optional[exp.Expression]: 4738 if not node: 4739 return node 4740 4741 for column in node.find_all(exp.Column): 4742 if column.parts[0].name in lambda_variables: 4743 dot_or_id = column.to_dot() if column.table else column.this 4744 parent = column.parent 4745 4746 while isinstance(parent, exp.Dot): 4747 if not isinstance(parent.parent, exp.Dot): 4748 parent.replace(dot_or_id) 4749 break 4750 parent = parent.parent 4751 else: 4752 if column is node: 4753 node = dot_or_id 4754 else: 4755 column.replace(dot_or_id) 4756 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
833 def __init__( 834 self, 835 error_level: t.Optional[ErrorLevel] = None, 836 error_message_context: int = 100, 837 max_errors: int = 3, 838 ): 839 self.error_level = error_level or ErrorLevel.IMMEDIATE 840 self.error_message_context = error_message_context 841 self.max_errors = max_errors 842 self.reset()
854 def parse( 855 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 856 ) -> t.List[t.Optional[exp.Expression]]: 857 """ 858 Parses a list of tokens and returns a list of syntax trees, one tree 859 per parsed SQL statement. 860 861 Args: 862 raw_tokens: The list of tokens. 863 sql: The original SQL string, used to produce helpful debug messages. 864 865 Returns: 866 The list of the produced syntax trees. 867 """ 868 return self._parse( 869 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 870 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
872 def parse_into( 873 self, 874 expression_types: exp.IntoType, 875 raw_tokens: t.List[Token], 876 sql: t.Optional[str] = None, 877 ) -> t.List[t.Optional[exp.Expression]]: 878 """ 879 Parses a list of tokens into a given Expression type. If a collection of Expression 880 types is given instead, this method will try to parse the token list into each one 881 of them, stopping at the first for which the parsing succeeds. 882 883 Args: 884 expression_types: The expression type(s) to try and parse the token list into. 885 raw_tokens: The list of tokens. 886 sql: The original SQL string, used to produce helpful debug messages. 887 888 Returns: 889 The target Expression. 890 """ 891 errors = [] 892 for expression_type in ensure_list(expression_types): 893 parser = self.EXPRESSION_PARSERS.get(expression_type) 894 if not parser: 895 raise TypeError(f"No parser registered for {expression_type}") 896 897 try: 898 return self._parse(parser, raw_tokens, sql) 899 except ParseError as e: 900 e.errors[0]["into_expression"] = expression_type 901 errors.append(e) 902 903 raise ParseError( 904 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 905 errors=merge_errors(errors), 906 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
943 def check_errors(self) -> None: 944 """Logs or raises any found errors, depending on the chosen error level setting.""" 945 if self.error_level == ErrorLevel.WARN: 946 for error in self.errors: 947 logger.error(str(error)) 948 elif self.error_level == ErrorLevel.RAISE and self.errors: 949 raise ParseError( 950 concat_messages(self.errors, self.max_errors), 951 errors=merge_errors(self.errors), 952 )
Logs or raises any found errors, depending on the chosen error level setting.
954 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 955 """ 956 Appends an error in the list of recorded errors or raises it, depending on the chosen 957 error level setting. 958 """ 959 token = token or self._curr or self._prev or Token.string("") 960 start = token.start 961 end = token.end + 1 962 start_context = self.sql[max(start - self.error_message_context, 0) : start] 963 highlight = self.sql[start:end] 964 end_context = self.sql[end : end + self.error_message_context] 965 966 error = ParseError.new( 967 f"{message}. Line {token.line}, Col: {token.col}.\n" 968 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 969 description=message, 970 line=token.line, 971 col=token.col, 972 start_context=start_context, 973 highlight=highlight, 974 end_context=end_context, 975 ) 976 977 if self.error_level == ErrorLevel.IMMEDIATE: 978 raise error 979 980 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
982 def expression( 983 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 984 ) -> E: 985 """ 986 Creates a new, validated Expression. 987 988 Args: 989 exp_class: The expression class to instantiate. 990 comments: An optional list of comments to attach to the expression. 991 kwargs: The arguments to set for the expression along with their respective values. 992 993 Returns: 994 The target expression. 995 """ 996 instance = exp_class(**kwargs) 997 instance.add_comments(comments) if comments else self._add_comments(instance) 998 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1005 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1006 """ 1007 Validates an Expression, making sure that all its mandatory arguments are set. 1008 1009 Args: 1010 expression: The expression to validate. 1011 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1012 1013 Returns: 1014 The validated expression. 1015 """ 1016 if self.error_level != ErrorLevel.IGNORE: 1017 for error_message in expression.error_messages(args): 1018 self.raise_error(error_message) 1019 1020 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.