sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get 10from sqlglot.tokens import Token, Tokenizer, TokenType 11from sqlglot.trie import in_trie, new_trie 12 13if t.TYPE_CHECKING: 14 from sqlglot._typing import E 15 16logger = logging.getLogger("sqlglot") 17 18 19def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 20 if len(args) == 1 and args[0].is_star: 21 return exp.StarMap(this=args[0]) 22 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 ) 32 33 34def parse_like(args: t.List) -> exp.Expression: 35 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 36 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 37 38 39def binary_range_parser( 40 expr_type: t.Type[exp.Expression], 41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 42 return lambda self, this: self._parse_escape( 43 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 44 ) 45 46 47class _Parser(type): 48 def __new__(cls, clsname, bases, attrs): 49 klass = super().__new__(cls, clsname, bases, attrs) 50 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 51 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 52 53 return klass 54 55 56class Parser(metaclass=_Parser): 57 """ 58 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 59 a parsed syntax tree. 60 61 Args: 62 error_level: the desired error level. 63 Default: ErrorLevel.IMMEDIATE 64 error_message_context: determines the amount of context to capture from a 65 query string when displaying the error message (in number of characters). 66 Default: 50. 67 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 68 Default: 0 69 alias_post_tablesample: If the table alias comes after tablesample. 70 Default: False 71 max_errors: Maximum number of error messages to include in a raised ParseError. 72 This is only relevant if error_level is ErrorLevel.RAISE. 73 Default: 3 74 null_ordering: Indicates the default null ordering method to use if not explicitly set. 75 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 76 Default: "nulls_are_small" 77 """ 78 79 FUNCTIONS: t.Dict[str, t.Callable] = { 80 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 81 "DATE_TO_DATE_STR": lambda args: exp.Cast( 82 this=seq_get(args, 0), 83 to=exp.DataType(this=exp.DataType.Type.TEXT), 84 ), 85 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 86 "IFNULL": exp.Coalesce.from_arg_list, 87 "LIKE": parse_like, 88 "TIME_TO_TIME_STR": lambda args: exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 93 this=exp.Cast( 94 this=seq_get(args, 0), 95 to=exp.DataType(this=exp.DataType.Type.TEXT), 96 ), 97 start=exp.Literal.number(1), 98 length=exp.Literal.number(10), 99 ), 100 "VAR_MAP": parse_var_map, 101 } 102 103 NO_PAREN_FUNCTIONS = { 104 TokenType.CURRENT_DATE: exp.CurrentDate, 105 TokenType.CURRENT_DATETIME: exp.CurrentDate, 106 TokenType.CURRENT_TIME: exp.CurrentTime, 107 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 108 TokenType.CURRENT_USER: exp.CurrentUser, 109 } 110 111 JOIN_HINTS: t.Set[str] = set() 112 113 NESTED_TYPE_TOKENS = { 114 TokenType.ARRAY, 115 TokenType.MAP, 116 TokenType.NULLABLE, 117 TokenType.STRUCT, 118 } 119 120 TYPE_TOKENS = { 121 TokenType.BIT, 122 TokenType.BOOLEAN, 123 TokenType.TINYINT, 124 TokenType.UTINYINT, 125 TokenType.SMALLINT, 126 TokenType.USMALLINT, 127 TokenType.INT, 128 TokenType.UINT, 129 TokenType.BIGINT, 130 TokenType.UBIGINT, 131 TokenType.INT128, 132 TokenType.UINT128, 133 TokenType.INT256, 134 TokenType.UINT256, 135 TokenType.FLOAT, 136 TokenType.DOUBLE, 137 TokenType.CHAR, 138 TokenType.NCHAR, 139 TokenType.VARCHAR, 140 TokenType.NVARCHAR, 141 TokenType.TEXT, 142 TokenType.MEDIUMTEXT, 143 TokenType.LONGTEXT, 144 TokenType.MEDIUMBLOB, 145 TokenType.LONGBLOB, 146 TokenType.BINARY, 147 TokenType.VARBINARY, 148 TokenType.JSON, 149 TokenType.JSONB, 150 TokenType.INTERVAL, 151 TokenType.TIME, 152 TokenType.TIMESTAMP, 153 TokenType.TIMESTAMPTZ, 154 TokenType.TIMESTAMPLTZ, 155 TokenType.DATETIME, 156 TokenType.DATETIME64, 157 TokenType.DATE, 158 TokenType.DECIMAL, 159 TokenType.BIGDECIMAL, 160 TokenType.UUID, 161 TokenType.GEOGRAPHY, 162 TokenType.GEOMETRY, 163 TokenType.HLLSKETCH, 164 TokenType.HSTORE, 165 TokenType.PSEUDO_TYPE, 166 TokenType.SUPER, 167 TokenType.SERIAL, 168 TokenType.SMALLSERIAL, 169 TokenType.BIGSERIAL, 170 TokenType.XML, 171 TokenType.UNIQUEIDENTIFIER, 172 TokenType.MONEY, 173 TokenType.SMALLMONEY, 174 TokenType.ROWVERSION, 175 TokenType.IMAGE, 176 TokenType.VARIANT, 177 TokenType.OBJECT, 178 TokenType.INET, 179 *NESTED_TYPE_TOKENS, 180 } 181 182 SUBQUERY_PREDICATES = { 183 TokenType.ANY: exp.Any, 184 TokenType.ALL: exp.All, 185 TokenType.EXISTS: exp.Exists, 186 TokenType.SOME: exp.Any, 187 } 188 189 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 190 191 DB_CREATABLES = { 192 TokenType.DATABASE, 193 TokenType.SCHEMA, 194 TokenType.TABLE, 195 TokenType.VIEW, 196 } 197 198 CREATABLES = { 199 TokenType.COLUMN, 200 TokenType.FUNCTION, 201 TokenType.INDEX, 202 TokenType.PROCEDURE, 203 *DB_CREATABLES, 204 } 205 206 ID_VAR_TOKENS = { 207 TokenType.VAR, 208 TokenType.ANTI, 209 TokenType.APPLY, 210 TokenType.ASC, 211 TokenType.AUTO_INCREMENT, 212 TokenType.BEGIN, 213 TokenType.CACHE, 214 TokenType.COLLATE, 215 TokenType.COMMAND, 216 TokenType.COMMENT, 217 TokenType.COMMIT, 218 TokenType.CONSTRAINT, 219 TokenType.DEFAULT, 220 TokenType.DELETE, 221 TokenType.DESC, 222 TokenType.DESCRIBE, 223 TokenType.DIV, 224 TokenType.END, 225 TokenType.EXECUTE, 226 TokenType.ESCAPE, 227 TokenType.FALSE, 228 TokenType.FIRST, 229 TokenType.FILTER, 230 TokenType.FORMAT, 231 TokenType.FULL, 232 TokenType.IF, 233 TokenType.IS, 234 TokenType.ISNULL, 235 TokenType.INTERVAL, 236 TokenType.KEEP, 237 TokenType.LEFT, 238 TokenType.LOAD, 239 TokenType.MERGE, 240 TokenType.NATURAL, 241 TokenType.NEXT, 242 TokenType.OFFSET, 243 TokenType.ORDINALITY, 244 TokenType.OVERWRITE, 245 TokenType.PARTITION, 246 TokenType.PERCENT, 247 TokenType.PIVOT, 248 TokenType.PRAGMA, 249 TokenType.RANGE, 250 TokenType.REFERENCES, 251 TokenType.RIGHT, 252 TokenType.ROW, 253 TokenType.ROWS, 254 TokenType.SEMI, 255 TokenType.SET, 256 TokenType.SETTINGS, 257 TokenType.SHOW, 258 TokenType.TEMPORARY, 259 TokenType.TOP, 260 TokenType.TRUE, 261 TokenType.UNIQUE, 262 TokenType.UNPIVOT, 263 TokenType.VOLATILE, 264 TokenType.WINDOW, 265 *CREATABLES, 266 *SUBQUERY_PREDICATES, 267 *TYPE_TOKENS, 268 *NO_PAREN_FUNCTIONS, 269 } 270 271 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 272 273 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 274 TokenType.APPLY, 275 TokenType.FULL, 276 TokenType.LEFT, 277 TokenType.LOCK, 278 TokenType.NATURAL, 279 TokenType.OFFSET, 280 TokenType.RIGHT, 281 TokenType.WINDOW, 282 } 283 284 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 285 286 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 287 288 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 289 290 FUNC_TOKENS = { 291 TokenType.COMMAND, 292 TokenType.CURRENT_DATE, 293 TokenType.CURRENT_DATETIME, 294 TokenType.CURRENT_TIMESTAMP, 295 TokenType.CURRENT_TIME, 296 TokenType.CURRENT_USER, 297 TokenType.FILTER, 298 TokenType.FIRST, 299 TokenType.FORMAT, 300 TokenType.GLOB, 301 TokenType.IDENTIFIER, 302 TokenType.INDEX, 303 TokenType.ISNULL, 304 TokenType.ILIKE, 305 TokenType.LIKE, 306 TokenType.MERGE, 307 TokenType.OFFSET, 308 TokenType.PRIMARY_KEY, 309 TokenType.RANGE, 310 TokenType.REPLACE, 311 TokenType.ROW, 312 TokenType.UNNEST, 313 TokenType.VAR, 314 TokenType.LEFT, 315 TokenType.RIGHT, 316 TokenType.DATE, 317 TokenType.DATETIME, 318 TokenType.TABLE, 319 TokenType.TIMESTAMP, 320 TokenType.TIMESTAMPTZ, 321 TokenType.WINDOW, 322 *TYPE_TOKENS, 323 *SUBQUERY_PREDICATES, 324 } 325 326 CONJUNCTION = { 327 TokenType.AND: exp.And, 328 TokenType.OR: exp.Or, 329 } 330 331 EQUALITY = { 332 TokenType.EQ: exp.EQ, 333 TokenType.NEQ: exp.NEQ, 334 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 335 } 336 337 COMPARISON = { 338 TokenType.GT: exp.GT, 339 TokenType.GTE: exp.GTE, 340 TokenType.LT: exp.LT, 341 TokenType.LTE: exp.LTE, 342 } 343 344 BITWISE = { 345 TokenType.AMP: exp.BitwiseAnd, 346 TokenType.CARET: exp.BitwiseXor, 347 TokenType.PIPE: exp.BitwiseOr, 348 TokenType.DPIPE: exp.DPipe, 349 } 350 351 TERM = { 352 TokenType.DASH: exp.Sub, 353 TokenType.PLUS: exp.Add, 354 TokenType.MOD: exp.Mod, 355 TokenType.COLLATE: exp.Collate, 356 } 357 358 FACTOR = { 359 TokenType.DIV: exp.IntDiv, 360 TokenType.LR_ARROW: exp.Distance, 361 TokenType.SLASH: exp.Div, 362 TokenType.STAR: exp.Mul, 363 } 364 365 TIMESTAMPS = { 366 TokenType.TIME, 367 TokenType.TIMESTAMP, 368 TokenType.TIMESTAMPTZ, 369 TokenType.TIMESTAMPLTZ, 370 } 371 372 SET_OPERATIONS = { 373 TokenType.UNION, 374 TokenType.INTERSECT, 375 TokenType.EXCEPT, 376 } 377 378 JOIN_SIDES = { 379 TokenType.LEFT, 380 TokenType.RIGHT, 381 TokenType.FULL, 382 } 383 384 JOIN_KINDS = { 385 TokenType.INNER, 386 TokenType.OUTER, 387 TokenType.CROSS, 388 TokenType.SEMI, 389 TokenType.ANTI, 390 } 391 392 LAMBDAS = { 393 TokenType.ARROW: lambda self, expressions: self.expression( 394 exp.Lambda, 395 this=self._replace_lambda( 396 self._parse_conjunction(), 397 {node.name for node in expressions}, 398 ), 399 expressions=expressions, 400 ), 401 TokenType.FARROW: lambda self, expressions: self.expression( 402 exp.Kwarg, 403 this=exp.Var(this=expressions[0].name), 404 expression=self._parse_conjunction(), 405 ), 406 } 407 408 COLUMN_OPERATORS = { 409 TokenType.DOT: None, 410 TokenType.DCOLON: lambda self, this, to: self.expression( 411 exp.Cast if self.STRICT_CAST else exp.TryCast, 412 this=this, 413 to=to, 414 ), 415 TokenType.ARROW: lambda self, this, path: self.expression( 416 exp.JSONExtract, 417 this=this, 418 expression=path, 419 ), 420 TokenType.DARROW: lambda self, this, path: self.expression( 421 exp.JSONExtractScalar, 422 this=this, 423 expression=path, 424 ), 425 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 426 exp.JSONBExtract, 427 this=this, 428 expression=path, 429 ), 430 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 431 exp.JSONBExtractScalar, 432 this=this, 433 expression=path, 434 ), 435 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 436 exp.JSONBContains, 437 this=this, 438 expression=key, 439 ), 440 } 441 442 EXPRESSION_PARSERS = { 443 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 444 exp.Column: lambda self: self._parse_column(), 445 exp.Condition: lambda self: self._parse_conjunction(), 446 exp.DataType: lambda self: self._parse_types(), 447 exp.Expression: lambda self: self._parse_statement(), 448 exp.From: lambda self: self._parse_from(), 449 exp.Group: lambda self: self._parse_group(), 450 exp.Having: lambda self: self._parse_having(), 451 exp.Identifier: lambda self: self._parse_id_var(), 452 exp.Join: lambda self: self._parse_join(), 453 exp.Lambda: lambda self: self._parse_lambda(), 454 exp.Lateral: lambda self: self._parse_lateral(), 455 exp.Limit: lambda self: self._parse_limit(), 456 exp.Offset: lambda self: self._parse_offset(), 457 exp.Order: lambda self: self._parse_order(), 458 exp.Ordered: lambda self: self._parse_ordered(), 459 exp.Properties: lambda self: self._parse_properties(), 460 exp.Qualify: lambda self: self._parse_qualify(), 461 exp.Returning: lambda self: self._parse_returning(), 462 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 463 exp.Table: lambda self: self._parse_table_parts(), 464 exp.TableAlias: lambda self: self._parse_table_alias(), 465 exp.Where: lambda self: self._parse_where(), 466 exp.Window: lambda self: self._parse_named_window(), 467 exp.With: lambda self: self._parse_with(), 468 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 469 } 470 471 STATEMENT_PARSERS = { 472 TokenType.ALTER: lambda self: self._parse_alter(), 473 TokenType.BEGIN: lambda self: self._parse_transaction(), 474 TokenType.CACHE: lambda self: self._parse_cache(), 475 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 476 TokenType.COMMENT: lambda self: self._parse_comment(), 477 TokenType.CREATE: lambda self: self._parse_create(), 478 TokenType.DELETE: lambda self: self._parse_delete(), 479 TokenType.DESC: lambda self: self._parse_describe(), 480 TokenType.DESCRIBE: lambda self: self._parse_describe(), 481 TokenType.DROP: lambda self: self._parse_drop(), 482 TokenType.END: lambda self: self._parse_commit_or_rollback(), 483 TokenType.FROM: lambda self: exp.select("*").from_( 484 t.cast(exp.From, self._parse_from(skip_from_token=True)) 485 ), 486 TokenType.INSERT: lambda self: self._parse_insert(), 487 TokenType.LOAD: lambda self: self._parse_load(), 488 TokenType.MERGE: lambda self: self._parse_merge(), 489 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 490 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 491 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 492 TokenType.SET: lambda self: self._parse_set(), 493 TokenType.UNCACHE: lambda self: self._parse_uncache(), 494 TokenType.UPDATE: lambda self: self._parse_update(), 495 TokenType.USE: lambda self: self.expression( 496 exp.Use, 497 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 498 and exp.Var(this=self._prev.text), 499 this=self._parse_table(schema=False), 500 ), 501 } 502 503 UNARY_PARSERS = { 504 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 505 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 506 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 507 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 508 } 509 510 PRIMARY_PARSERS = { 511 TokenType.STRING: lambda self, token: self.expression( 512 exp.Literal, this=token.text, is_string=True 513 ), 514 TokenType.NUMBER: lambda self, token: self.expression( 515 exp.Literal, this=token.text, is_string=False 516 ), 517 TokenType.STAR: lambda self, _: self.expression( 518 exp.Star, 519 **{"except": self._parse_except(), "replace": self._parse_replace()}, 520 ), 521 TokenType.NULL: lambda self, _: self.expression(exp.Null), 522 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 523 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 524 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 525 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 526 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 527 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 528 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 529 exp.National, this=token.text 530 ), 531 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 532 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 533 } 534 535 PLACEHOLDER_PARSERS = { 536 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 537 TokenType.PARAMETER: lambda self: self._parse_parameter(), 538 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 539 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 540 else None, 541 } 542 543 RANGE_PARSERS = { 544 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 545 TokenType.GLOB: binary_range_parser(exp.Glob), 546 TokenType.ILIKE: binary_range_parser(exp.ILike), 547 TokenType.IN: lambda self, this: self._parse_in(this), 548 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 549 TokenType.IS: lambda self, this: self._parse_is(this), 550 TokenType.LIKE: binary_range_parser(exp.Like), 551 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 552 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 553 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 554 } 555 556 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 557 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 558 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 559 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 560 "CHARACTER SET": lambda self: self._parse_character_set(), 561 "CHECKSUM": lambda self: self._parse_checksum(), 562 "CLUSTER": lambda self: self._parse_cluster(), 563 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 564 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 565 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 566 "DEFINER": lambda self: self._parse_definer(), 567 "DETERMINISTIC": lambda self: self.expression( 568 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 569 ), 570 "DISTKEY": lambda self: self._parse_distkey(), 571 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 572 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 573 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 574 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 575 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 576 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 577 "FREESPACE": lambda self: self._parse_freespace(), 578 "IMMUTABLE": lambda self: self.expression( 579 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 580 ), 581 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 582 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 583 "LIKE": lambda self: self._parse_create_like(), 584 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 585 "LOCK": lambda self: self._parse_locking(), 586 "LOCKING": lambda self: self._parse_locking(), 587 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 588 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 589 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 590 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 591 "NO": lambda self: self._parse_no_property(), 592 "ON": lambda self: self._parse_on_property(), 593 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 594 "PARTITION BY": lambda self: self._parse_partitioned_by(), 595 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 596 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 597 "PRIMARY KEY": lambda self: self._parse_primary_key(), 598 "RETURNS": lambda self: self._parse_returns(), 599 "ROW": lambda self: self._parse_row(), 600 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 601 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 602 "SETTINGS": lambda self: self.expression( 603 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 604 ), 605 "SORTKEY": lambda self: self._parse_sortkey(), 606 "STABLE": lambda self: self.expression( 607 exp.StabilityProperty, this=exp.Literal.string("STABLE") 608 ), 609 "STORED": lambda self: self._parse_stored(), 610 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 611 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 612 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 613 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 614 "TTL": lambda self: self._parse_ttl(), 615 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 616 "VOLATILE": lambda self: self._parse_volatile_property(), 617 "WITH": lambda self: self._parse_with_property(), 618 } 619 620 CONSTRAINT_PARSERS = { 621 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 622 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 623 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 624 "CHARACTER SET": lambda self: self.expression( 625 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 626 ), 627 "CHECK": lambda self: self.expression( 628 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 629 ), 630 "COLLATE": lambda self: self.expression( 631 exp.CollateColumnConstraint, this=self._parse_var() 632 ), 633 "COMMENT": lambda self: self.expression( 634 exp.CommentColumnConstraint, this=self._parse_string() 635 ), 636 "COMPRESS": lambda self: self._parse_compress(), 637 "DEFAULT": lambda self: self.expression( 638 exp.DefaultColumnConstraint, this=self._parse_bitwise() 639 ), 640 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 641 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 642 "FORMAT": lambda self: self.expression( 643 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 644 ), 645 "GENERATED": lambda self: self._parse_generated_as_identity(), 646 "IDENTITY": lambda self: self._parse_auto_increment(), 647 "INLINE": lambda self: self._parse_inline(), 648 "LIKE": lambda self: self._parse_create_like(), 649 "NOT": lambda self: self._parse_not_constraint(), 650 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 651 "ON": lambda self: self._match(TokenType.UPDATE) 652 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 653 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 654 "PRIMARY KEY": lambda self: self._parse_primary_key(), 655 "REFERENCES": lambda self: self._parse_references(match=False), 656 "TITLE": lambda self: self.expression( 657 exp.TitleColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 660 "UNIQUE": lambda self: self._parse_unique(), 661 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 662 } 663 664 ALTER_PARSERS = { 665 "ADD": lambda self: self._parse_alter_table_add(), 666 "ALTER": lambda self: self._parse_alter_table_alter(), 667 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 668 "DROP": lambda self: self._parse_alter_table_drop(), 669 "RENAME": lambda self: self._parse_alter_table_rename(), 670 } 671 672 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 673 674 NO_PAREN_FUNCTION_PARSERS = { 675 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 676 TokenType.CASE: lambda self: self._parse_case(), 677 TokenType.IF: lambda self: self._parse_if(), 678 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 679 exp.NextValueFor, 680 this=self._parse_column(), 681 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 682 ), 683 } 684 685 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 686 687 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 688 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 689 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 690 "DECODE": lambda self: self._parse_decode(), 691 "EXTRACT": lambda self: self._parse_extract(), 692 "JSON_OBJECT": lambda self: self._parse_json_object(), 693 "LOG": lambda self: self._parse_logarithm(), 694 "MATCH": lambda self: self._parse_match_against(), 695 "OPENJSON": lambda self: self._parse_open_json(), 696 "POSITION": lambda self: self._parse_position(), 697 "SAFE_CAST": lambda self: self._parse_cast(False), 698 "STRING_AGG": lambda self: self._parse_string_agg(), 699 "SUBSTRING": lambda self: self._parse_substring(), 700 "TRIM": lambda self: self._parse_trim(), 701 "TRY_CAST": lambda self: self._parse_cast(False), 702 "TRY_CONVERT": lambda self: self._parse_convert(False), 703 } 704 705 QUERY_MODIFIER_PARSERS = { 706 "joins": lambda self: list(iter(self._parse_join, None)), 707 "laterals": lambda self: list(iter(self._parse_lateral, None)), 708 "match": lambda self: self._parse_match_recognize(), 709 "where": lambda self: self._parse_where(), 710 "group": lambda self: self._parse_group(), 711 "having": lambda self: self._parse_having(), 712 "qualify": lambda self: self._parse_qualify(), 713 "windows": lambda self: self._parse_window_clause(), 714 "order": lambda self: self._parse_order(), 715 "limit": lambda self: self._parse_limit(), 716 "offset": lambda self: self._parse_offset(), 717 "locks": lambda self: self._parse_locks(), 718 "sample": lambda self: self._parse_table_sample(as_modifier=True), 719 } 720 721 SET_PARSERS = { 722 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 723 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 724 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 725 "TRANSACTION": lambda self: self._parse_set_transaction(), 726 } 727 728 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 729 730 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 731 732 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 733 734 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 735 736 TRANSACTION_CHARACTERISTICS = { 737 "ISOLATION LEVEL REPEATABLE READ", 738 "ISOLATION LEVEL READ COMMITTED", 739 "ISOLATION LEVEL READ UNCOMMITTED", 740 "ISOLATION LEVEL SERIALIZABLE", 741 "READ WRITE", 742 "READ ONLY", 743 } 744 745 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 746 747 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 748 749 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 750 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 751 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 752 753 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 754 755 STRICT_CAST = True 756 757 CONVERT_TYPE_FIRST = False 758 759 PREFIXED_PIVOT_COLUMNS = False 760 IDENTIFY_PIVOT_STRINGS = False 761 762 LOG_BASE_FIRST = True 763 LOG_DEFAULTS_TO_LN = False 764 765 __slots__ = ( 766 "error_level", 767 "error_message_context", 768 "sql", 769 "errors", 770 "index_offset", 771 "unnest_column_only", 772 "alias_post_tablesample", 773 "max_errors", 774 "null_ordering", 775 "_tokens", 776 "_index", 777 "_curr", 778 "_next", 779 "_prev", 780 "_prev_comments", 781 "_show_trie", 782 "_set_trie", 783 ) 784 785 def __init__( 786 self, 787 error_level: t.Optional[ErrorLevel] = None, 788 error_message_context: int = 100, 789 index_offset: int = 0, 790 unnest_column_only: bool = False, 791 alias_post_tablesample: bool = False, 792 max_errors: int = 3, 793 null_ordering: t.Optional[str] = None, 794 ): 795 self.error_level = error_level or ErrorLevel.IMMEDIATE 796 self.error_message_context = error_message_context 797 self.index_offset = index_offset 798 self.unnest_column_only = unnest_column_only 799 self.alias_post_tablesample = alias_post_tablesample 800 self.max_errors = max_errors 801 self.null_ordering = null_ordering 802 self.reset() 803 804 def reset(self): 805 self.sql = "" 806 self.errors = [] 807 self._tokens = [] 808 self._index = 0 809 self._curr = None 810 self._next = None 811 self._prev = None 812 self._prev_comments = None 813 814 def parse( 815 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 816 ) -> t.List[t.Optional[exp.Expression]]: 817 """ 818 Parses a list of tokens and returns a list of syntax trees, one tree 819 per parsed SQL statement. 820 821 Args: 822 raw_tokens: the list of tokens. 823 sql: the original SQL string, used to produce helpful debug messages. 824 825 Returns: 826 The list of syntax trees. 827 """ 828 return self._parse( 829 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 830 ) 831 832 def parse_into( 833 self, 834 expression_types: exp.IntoType, 835 raw_tokens: t.List[Token], 836 sql: t.Optional[str] = None, 837 ) -> t.List[t.Optional[exp.Expression]]: 838 """ 839 Parses a list of tokens into a given Expression type. If a collection of Expression 840 types is given instead, this method will try to parse the token list into each one 841 of them, stopping at the first for which the parsing succeeds. 842 843 Args: 844 expression_types: the expression type(s) to try and parse the token list into. 845 raw_tokens: the list of tokens. 846 sql: the original SQL string, used to produce helpful debug messages. 847 848 Returns: 849 The target Expression. 850 """ 851 errors = [] 852 for expression_type in ensure_collection(expression_types): 853 parser = self.EXPRESSION_PARSERS.get(expression_type) 854 if not parser: 855 raise TypeError(f"No parser registered for {expression_type}") 856 try: 857 return self._parse(parser, raw_tokens, sql) 858 except ParseError as e: 859 e.errors[0]["into_expression"] = expression_type 860 errors.append(e) 861 raise ParseError( 862 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 863 errors=merge_errors(errors), 864 ) from errors[-1] 865 866 def _parse( 867 self, 868 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 869 raw_tokens: t.List[Token], 870 sql: t.Optional[str] = None, 871 ) -> t.List[t.Optional[exp.Expression]]: 872 self.reset() 873 self.sql = sql or "" 874 total = len(raw_tokens) 875 chunks: t.List[t.List[Token]] = [[]] 876 877 for i, token in enumerate(raw_tokens): 878 if token.token_type == TokenType.SEMICOLON: 879 if i < total - 1: 880 chunks.append([]) 881 else: 882 chunks[-1].append(token) 883 884 expressions = [] 885 886 for tokens in chunks: 887 self._index = -1 888 self._tokens = tokens 889 self._advance() 890 891 expressions.append(parse_method(self)) 892 893 if self._index < len(self._tokens): 894 self.raise_error("Invalid expression / Unexpected token") 895 896 self.check_errors() 897 898 return expressions 899 900 def check_errors(self) -> None: 901 """ 902 Logs or raises any found errors, depending on the chosen error level setting. 903 """ 904 if self.error_level == ErrorLevel.WARN: 905 for error in self.errors: 906 logger.error(str(error)) 907 elif self.error_level == ErrorLevel.RAISE and self.errors: 908 raise ParseError( 909 concat_messages(self.errors, self.max_errors), 910 errors=merge_errors(self.errors), 911 ) 912 913 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 914 """ 915 Appends an error in the list of recorded errors or raises it, depending on the chosen 916 error level setting. 917 """ 918 token = token or self._curr or self._prev or Token.string("") 919 start = token.start 920 end = token.end + 1 921 start_context = self.sql[max(start - self.error_message_context, 0) : start] 922 highlight = self.sql[start:end] 923 end_context = self.sql[end : end + self.error_message_context] 924 925 error = ParseError.new( 926 f"{message}. Line {token.line}, Col: {token.col}.\n" 927 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 928 description=message, 929 line=token.line, 930 col=token.col, 931 start_context=start_context, 932 highlight=highlight, 933 end_context=end_context, 934 ) 935 936 if self.error_level == ErrorLevel.IMMEDIATE: 937 raise error 938 939 self.errors.append(error) 940 941 def expression( 942 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 943 ) -> E: 944 """ 945 Creates a new, validated Expression. 946 947 Args: 948 exp_class: the expression class to instantiate. 949 comments: an optional list of comments to attach to the expression. 950 kwargs: the arguments to set for the expression along with their respective values. 951 952 Returns: 953 The target expression. 954 """ 955 instance = exp_class(**kwargs) 956 instance.add_comments(comments) if comments else self._add_comments(instance) 957 self.validate_expression(instance) 958 return instance 959 960 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 961 if expression and self._prev_comments: 962 expression.add_comments(self._prev_comments) 963 self._prev_comments = None 964 965 def validate_expression( 966 self, expression: exp.Expression, args: t.Optional[t.List] = None 967 ) -> None: 968 """ 969 Validates an already instantiated expression, making sure that all its mandatory arguments 970 are set. 971 972 Args: 973 expression: the expression to validate. 974 args: an optional list of items that was used to instantiate the expression, if it's a Func. 975 """ 976 if self.error_level == ErrorLevel.IGNORE: 977 return 978 979 for error_message in expression.error_messages(args): 980 self.raise_error(error_message) 981 982 def _find_sql(self, start: Token, end: Token) -> str: 983 return self.sql[start.start : end.end + 1] 984 985 def _advance(self, times: int = 1) -> None: 986 self._index += times 987 self._curr = seq_get(self._tokens, self._index) 988 self._next = seq_get(self._tokens, self._index + 1) 989 if self._index > 0: 990 self._prev = self._tokens[self._index - 1] 991 self._prev_comments = self._prev.comments 992 else: 993 self._prev = None 994 self._prev_comments = None 995 996 def _retreat(self, index: int) -> None: 997 if index != self._index: 998 self._advance(index - self._index) 999 1000 def _parse_command(self) -> exp.Command: 1001 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1002 1003 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1004 start = self._prev 1005 exists = self._parse_exists() if allow_exists else None 1006 1007 self._match(TokenType.ON) 1008 1009 kind = self._match_set(self.CREATABLES) and self._prev 1010 1011 if not kind: 1012 return self._parse_as_command(start) 1013 1014 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1015 this = self._parse_user_defined_function(kind=kind.token_type) 1016 elif kind.token_type == TokenType.TABLE: 1017 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1018 elif kind.token_type == TokenType.COLUMN: 1019 this = self._parse_column() 1020 else: 1021 this = self._parse_id_var() 1022 1023 self._match(TokenType.IS) 1024 1025 return self.expression( 1026 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1027 ) 1028 1029 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1030 def _parse_ttl(self) -> exp.Expression: 1031 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1032 this = self._parse_bitwise() 1033 1034 if self._match_text_seq("DELETE"): 1035 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1036 if self._match_text_seq("RECOMPRESS"): 1037 return self.expression( 1038 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1039 ) 1040 if self._match_text_seq("TO", "DISK"): 1041 return self.expression( 1042 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1043 ) 1044 if self._match_text_seq("TO", "VOLUME"): 1045 return self.expression( 1046 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1047 ) 1048 1049 return this 1050 1051 expressions = self._parse_csv(_parse_ttl_action) 1052 where = self._parse_where() 1053 group = self._parse_group() 1054 1055 aggregates = None 1056 if group and self._match(TokenType.SET): 1057 aggregates = self._parse_csv(self._parse_set_item) 1058 1059 return self.expression( 1060 exp.MergeTreeTTL, 1061 expressions=expressions, 1062 where=where, 1063 group=group, 1064 aggregates=aggregates, 1065 ) 1066 1067 def _parse_statement(self) -> t.Optional[exp.Expression]: 1068 if self._curr is None: 1069 return None 1070 1071 if self._match_set(self.STATEMENT_PARSERS): 1072 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1073 1074 if self._match_set(Tokenizer.COMMANDS): 1075 return self._parse_command() 1076 1077 expression = self._parse_expression() 1078 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1079 return self._parse_query_modifiers(expression) 1080 1081 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1082 start = self._prev 1083 temporary = self._match(TokenType.TEMPORARY) 1084 materialized = self._match_text_seq("MATERIALIZED") 1085 kind = self._match_set(self.CREATABLES) and self._prev.text 1086 if not kind: 1087 return self._parse_as_command(start) 1088 1089 return self.expression( 1090 exp.Drop, 1091 exists=self._parse_exists(), 1092 this=self._parse_table(schema=True), 1093 kind=kind, 1094 temporary=temporary, 1095 materialized=materialized, 1096 cascade=self._match_text_seq("CASCADE"), 1097 constraints=self._match_text_seq("CONSTRAINTS"), 1098 purge=self._match_text_seq("PURGE"), 1099 ) 1100 1101 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1102 return ( 1103 self._match(TokenType.IF) 1104 and (not not_ or self._match(TokenType.NOT)) 1105 and self._match(TokenType.EXISTS) 1106 ) 1107 1108 def _parse_create(self) -> t.Optional[exp.Expression]: 1109 start = self._prev 1110 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1111 TokenType.OR, TokenType.REPLACE 1112 ) 1113 unique = self._match(TokenType.UNIQUE) 1114 1115 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1116 self._match(TokenType.TABLE) 1117 1118 properties = None 1119 create_token = self._match_set(self.CREATABLES) and self._prev 1120 1121 if not create_token: 1122 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1123 create_token = self._match_set(self.CREATABLES) and self._prev 1124 1125 if not properties or not create_token: 1126 return self._parse_as_command(start) 1127 1128 exists = self._parse_exists(not_=True) 1129 this = None 1130 expression = None 1131 indexes = None 1132 no_schema_binding = None 1133 begin = None 1134 clone = None 1135 1136 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1137 this = self._parse_user_defined_function(kind=create_token.token_type) 1138 temp_properties = self._parse_properties() 1139 if properties and temp_properties: 1140 properties.expressions.extend(temp_properties.expressions) 1141 elif temp_properties: 1142 properties = temp_properties 1143 1144 self._match(TokenType.ALIAS) 1145 begin = self._match(TokenType.BEGIN) 1146 return_ = self._match_text_seq("RETURN") 1147 expression = self._parse_statement() 1148 1149 if return_: 1150 expression = self.expression(exp.Return, this=expression) 1151 elif create_token.token_type == TokenType.INDEX: 1152 this = self._parse_index(index=self._parse_id_var()) 1153 elif create_token.token_type in self.DB_CREATABLES: 1154 table_parts = self._parse_table_parts(schema=True) 1155 1156 # exp.Properties.Location.POST_NAME 1157 if self._match(TokenType.COMMA): 1158 temp_properties = self._parse_properties(before=True) 1159 if properties and temp_properties: 1160 properties.expressions.extend(temp_properties.expressions) 1161 elif temp_properties: 1162 properties = temp_properties 1163 1164 this = self._parse_schema(this=table_parts) 1165 1166 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1167 temp_properties = self._parse_properties() 1168 if properties and temp_properties: 1169 properties.expressions.extend(temp_properties.expressions) 1170 elif temp_properties: 1171 properties = temp_properties 1172 1173 self._match(TokenType.ALIAS) 1174 1175 # exp.Properties.Location.POST_ALIAS 1176 if not ( 1177 self._match(TokenType.SELECT, advance=False) 1178 or self._match(TokenType.WITH, advance=False) 1179 or self._match(TokenType.L_PAREN, advance=False) 1180 ): 1181 temp_properties = self._parse_properties() 1182 if properties and temp_properties: 1183 properties.expressions.extend(temp_properties.expressions) 1184 elif temp_properties: 1185 properties = temp_properties 1186 1187 expression = self._parse_ddl_select() 1188 1189 if create_token.token_type == TokenType.TABLE: 1190 indexes = [] 1191 while True: 1192 index = self._parse_index() 1193 1194 # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX 1195 temp_properties = self._parse_properties() 1196 if properties and temp_properties: 1197 properties.expressions.extend(temp_properties.expressions) 1198 elif temp_properties: 1199 properties = temp_properties 1200 1201 if not index: 1202 break 1203 else: 1204 self._match(TokenType.COMMA) 1205 indexes.append(index) 1206 elif create_token.token_type == TokenType.VIEW: 1207 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1208 no_schema_binding = True 1209 1210 if self._match_text_seq("CLONE"): 1211 clone = self._parse_table(schema=True) 1212 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1213 clone_kind = ( 1214 self._match(TokenType.L_PAREN) 1215 and self._match_texts(self.CLONE_KINDS) 1216 and self._prev.text.upper() 1217 ) 1218 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1219 self._match(TokenType.R_PAREN) 1220 clone = self.expression( 1221 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1222 ) 1223 1224 return self.expression( 1225 exp.Create, 1226 this=this, 1227 kind=create_token.text, 1228 replace=replace, 1229 unique=unique, 1230 expression=expression, 1231 exists=exists, 1232 properties=properties, 1233 indexes=indexes, 1234 no_schema_binding=no_schema_binding, 1235 begin=begin, 1236 clone=clone, 1237 ) 1238 1239 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1240 # only used for teradata currently 1241 self._match(TokenType.COMMA) 1242 1243 kwargs = { 1244 "no": self._match_text_seq("NO"), 1245 "dual": self._match_text_seq("DUAL"), 1246 "before": self._match_text_seq("BEFORE"), 1247 "default": self._match_text_seq("DEFAULT"), 1248 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1249 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1250 "after": self._match_text_seq("AFTER"), 1251 "minimum": self._match_texts(("MIN", "MINIMUM")), 1252 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1253 } 1254 1255 if self._match_texts(self.PROPERTY_PARSERS): 1256 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1257 try: 1258 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1259 except TypeError: 1260 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1261 1262 return None 1263 1264 def _parse_property(self) -> t.Optional[exp.Expression]: 1265 if self._match_texts(self.PROPERTY_PARSERS): 1266 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1267 1268 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1269 return self._parse_character_set(default=True) 1270 1271 if self._match_text_seq("COMPOUND", "SORTKEY"): 1272 return self._parse_sortkey(compound=True) 1273 1274 if self._match_text_seq("SQL", "SECURITY"): 1275 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1276 1277 assignment = self._match_pair( 1278 TokenType.VAR, TokenType.EQ, advance=False 1279 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1280 1281 if assignment: 1282 key = self._parse_var_or_string() 1283 self._match(TokenType.EQ) 1284 return self.expression(exp.Property, this=key, value=self._parse_column()) 1285 1286 return None 1287 1288 def _parse_stored(self) -> exp.Expression: 1289 self._match(TokenType.ALIAS) 1290 1291 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1292 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1293 1294 return self.expression( 1295 exp.FileFormatProperty, 1296 this=self.expression( 1297 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1298 ) 1299 if input_format or output_format 1300 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1301 ) 1302 1303 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1304 self._match(TokenType.EQ) 1305 self._match(TokenType.ALIAS) 1306 return self.expression(exp_class, this=self._parse_field()) 1307 1308 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Expression]: 1309 properties = [] 1310 1311 while True: 1312 if before: 1313 prop = self._parse_property_before() 1314 else: 1315 prop = self._parse_property() 1316 1317 if not prop: 1318 break 1319 for p in ensure_list(prop): 1320 properties.append(p) 1321 1322 if properties: 1323 return self.expression(exp.Properties, expressions=properties) 1324 1325 return None 1326 1327 def _parse_fallback(self, no: bool = False) -> exp.Expression: 1328 return self.expression( 1329 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1330 ) 1331 1332 def _parse_volatile_property(self) -> exp.Expression: 1333 if self._index >= 2: 1334 pre_volatile_token = self._tokens[self._index - 2] 1335 else: 1336 pre_volatile_token = None 1337 1338 if pre_volatile_token and pre_volatile_token.token_type in ( 1339 TokenType.CREATE, 1340 TokenType.REPLACE, 1341 TokenType.UNIQUE, 1342 ): 1343 return exp.VolatileProperty() 1344 1345 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1346 1347 def _parse_with_property( 1348 self, 1349 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1350 self._match(TokenType.WITH) 1351 if self._match(TokenType.L_PAREN, advance=False): 1352 return self._parse_wrapped_csv(self._parse_property) 1353 1354 if self._match_text_seq("JOURNAL"): 1355 return self._parse_withjournaltable() 1356 1357 if self._match_text_seq("DATA"): 1358 return self._parse_withdata(no=False) 1359 elif self._match_text_seq("NO", "DATA"): 1360 return self._parse_withdata(no=True) 1361 1362 if not self._next: 1363 return None 1364 1365 return self._parse_withisolatedloading() 1366 1367 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1368 def _parse_definer(self) -> t.Optional[exp.Expression]: 1369 self._match(TokenType.EQ) 1370 1371 user = self._parse_id_var() 1372 self._match(TokenType.PARAMETER) 1373 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1374 1375 if not user or not host: 1376 return None 1377 1378 return exp.DefinerProperty(this=f"{user}@{host}") 1379 1380 def _parse_withjournaltable(self) -> exp.Expression: 1381 self._match(TokenType.TABLE) 1382 self._match(TokenType.EQ) 1383 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1384 1385 def _parse_log(self, no: bool = False) -> exp.Expression: 1386 return self.expression(exp.LogProperty, no=no) 1387 1388 def _parse_journal(self, **kwargs) -> exp.Expression: 1389 return self.expression(exp.JournalProperty, **kwargs) 1390 1391 def _parse_checksum(self) -> exp.Expression: 1392 self._match(TokenType.EQ) 1393 1394 on = None 1395 if self._match(TokenType.ON): 1396 on = True 1397 elif self._match_text_seq("OFF"): 1398 on = False 1399 default = self._match(TokenType.DEFAULT) 1400 1401 return self.expression( 1402 exp.ChecksumProperty, 1403 on=on, 1404 default=default, 1405 ) 1406 1407 def _parse_cluster(self) -> t.Optional[exp.Expression]: 1408 if not self._match_text_seq("BY"): 1409 self._retreat(self._index - 1) 1410 return None 1411 return self.expression( 1412 exp.Cluster, 1413 expressions=self._parse_csv(self._parse_ordered), 1414 ) 1415 1416 def _parse_freespace(self) -> exp.Expression: 1417 self._match(TokenType.EQ) 1418 return self.expression( 1419 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1420 ) 1421 1422 def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression: 1423 if self._match(TokenType.EQ): 1424 return self.expression( 1425 exp.MergeBlockRatioProperty, 1426 this=self._parse_number(), 1427 percent=self._match(TokenType.PERCENT), 1428 ) 1429 return self.expression( 1430 exp.MergeBlockRatioProperty, 1431 no=no, 1432 default=default, 1433 ) 1434 1435 def _parse_datablocksize( 1436 self, 1437 default: t.Optional[bool] = None, 1438 minimum: t.Optional[bool] = None, 1439 maximum: t.Optional[bool] = None, 1440 ) -> exp.Expression: 1441 self._match(TokenType.EQ) 1442 size = self._parse_number() 1443 units = None 1444 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1445 units = self._prev.text 1446 return self.expression( 1447 exp.DataBlocksizeProperty, 1448 size=size, 1449 units=units, 1450 default=default, 1451 minimum=minimum, 1452 maximum=maximum, 1453 ) 1454 1455 def _parse_blockcompression(self) -> exp.Expression: 1456 self._match(TokenType.EQ) 1457 always = self._match_text_seq("ALWAYS") 1458 manual = self._match_text_seq("MANUAL") 1459 never = self._match_text_seq("NEVER") 1460 default = self._match_text_seq("DEFAULT") 1461 autotemp = None 1462 if self._match_text_seq("AUTOTEMP"): 1463 autotemp = self._parse_schema() 1464 1465 return self.expression( 1466 exp.BlockCompressionProperty, 1467 always=always, 1468 manual=manual, 1469 never=never, 1470 default=default, 1471 autotemp=autotemp, 1472 ) 1473 1474 def _parse_withisolatedloading(self) -> exp.Expression: 1475 no = self._match_text_seq("NO") 1476 concurrent = self._match_text_seq("CONCURRENT") 1477 self._match_text_seq("ISOLATED", "LOADING") 1478 for_all = self._match_text_seq("FOR", "ALL") 1479 for_insert = self._match_text_seq("FOR", "INSERT") 1480 for_none = self._match_text_seq("FOR", "NONE") 1481 return self.expression( 1482 exp.IsolatedLoadingProperty, 1483 no=no, 1484 concurrent=concurrent, 1485 for_all=for_all, 1486 for_insert=for_insert, 1487 for_none=for_none, 1488 ) 1489 1490 def _parse_locking(self) -> exp.Expression: 1491 if self._match(TokenType.TABLE): 1492 kind = "TABLE" 1493 elif self._match(TokenType.VIEW): 1494 kind = "VIEW" 1495 elif self._match(TokenType.ROW): 1496 kind = "ROW" 1497 elif self._match_text_seq("DATABASE"): 1498 kind = "DATABASE" 1499 else: 1500 kind = None 1501 1502 if kind in ("DATABASE", "TABLE", "VIEW"): 1503 this = self._parse_table_parts() 1504 else: 1505 this = None 1506 1507 if self._match(TokenType.FOR): 1508 for_or_in = "FOR" 1509 elif self._match(TokenType.IN): 1510 for_or_in = "IN" 1511 else: 1512 for_or_in = None 1513 1514 if self._match_text_seq("ACCESS"): 1515 lock_type = "ACCESS" 1516 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1517 lock_type = "EXCLUSIVE" 1518 elif self._match_text_seq("SHARE"): 1519 lock_type = "SHARE" 1520 elif self._match_text_seq("READ"): 1521 lock_type = "READ" 1522 elif self._match_text_seq("WRITE"): 1523 lock_type = "WRITE" 1524 elif self._match_text_seq("CHECKSUM"): 1525 lock_type = "CHECKSUM" 1526 else: 1527 lock_type = None 1528 1529 override = self._match_text_seq("OVERRIDE") 1530 1531 return self.expression( 1532 exp.LockingProperty, 1533 this=this, 1534 kind=kind, 1535 for_or_in=for_or_in, 1536 lock_type=lock_type, 1537 override=override, 1538 ) 1539 1540 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1541 if self._match(TokenType.PARTITION_BY): 1542 return self._parse_csv(self._parse_conjunction) 1543 return [] 1544 1545 def _parse_partitioned_by(self) -> exp.Expression: 1546 self._match(TokenType.EQ) 1547 return self.expression( 1548 exp.PartitionedByProperty, 1549 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1550 ) 1551 1552 def _parse_withdata(self, no: bool = False) -> exp.Expression: 1553 if self._match_text_seq("AND", "STATISTICS"): 1554 statistics = True 1555 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1556 statistics = False 1557 else: 1558 statistics = None 1559 1560 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1561 1562 def _parse_no_property(self) -> t.Optional[exp.Property]: 1563 if self._match_text_seq("PRIMARY", "INDEX"): 1564 return exp.NoPrimaryIndexProperty() 1565 return None 1566 1567 def _parse_on_property(self) -> t.Optional[exp.Property]: 1568 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1569 return exp.OnCommitProperty() 1570 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1571 return exp.OnCommitProperty(delete=True) 1572 return None 1573 1574 def _parse_distkey(self) -> exp.Expression: 1575 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1576 1577 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1578 table = self._parse_table(schema=True) 1579 options = [] 1580 while self._match_texts(("INCLUDING", "EXCLUDING")): 1581 this = self._prev.text.upper() 1582 id_var = self._parse_id_var() 1583 1584 if not id_var: 1585 return None 1586 1587 options.append( 1588 self.expression( 1589 exp.Property, 1590 this=this, 1591 value=exp.Var(this=id_var.this.upper()), 1592 ) 1593 ) 1594 return self.expression(exp.LikeProperty, this=table, expressions=options) 1595 1596 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1597 return self.expression( 1598 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1599 ) 1600 1601 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1602 self._match(TokenType.EQ) 1603 return self.expression( 1604 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1605 ) 1606 1607 def _parse_returns(self) -> exp.Expression: 1608 value: t.Optional[exp.Expression] 1609 is_table = self._match(TokenType.TABLE) 1610 1611 if is_table: 1612 if self._match(TokenType.LT): 1613 value = self.expression( 1614 exp.Schema, 1615 this="TABLE", 1616 expressions=self._parse_csv(self._parse_struct_types), 1617 ) 1618 if not self._match(TokenType.GT): 1619 self.raise_error("Expecting >") 1620 else: 1621 value = self._parse_schema(exp.Var(this="TABLE")) 1622 else: 1623 value = self._parse_types() 1624 1625 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1626 1627 def _parse_describe(self) -> exp.Expression: 1628 kind = self._match_set(self.CREATABLES) and self._prev.text 1629 this = self._parse_table() 1630 1631 return self.expression(exp.Describe, this=this, kind=kind) 1632 1633 def _parse_insert(self) -> exp.Expression: 1634 overwrite = self._match(TokenType.OVERWRITE) 1635 local = self._match_text_seq("LOCAL") 1636 alternative = None 1637 1638 if self._match_text_seq("DIRECTORY"): 1639 this: t.Optional[exp.Expression] = self.expression( 1640 exp.Directory, 1641 this=self._parse_var_or_string(), 1642 local=local, 1643 row_format=self._parse_row_format(match_row=True), 1644 ) 1645 else: 1646 if self._match(TokenType.OR): 1647 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1648 1649 self._match(TokenType.INTO) 1650 self._match(TokenType.TABLE) 1651 this = self._parse_table(schema=True) 1652 1653 return self.expression( 1654 exp.Insert, 1655 this=this, 1656 exists=self._parse_exists(), 1657 partition=self._parse_partition(), 1658 expression=self._parse_ddl_select(), 1659 conflict=self._parse_on_conflict(), 1660 returning=self._parse_returning(), 1661 overwrite=overwrite, 1662 alternative=alternative, 1663 ) 1664 1665 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1666 conflict = self._match_text_seq("ON", "CONFLICT") 1667 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1668 1669 if not (conflict or duplicate): 1670 return None 1671 1672 nothing = None 1673 expressions = None 1674 key = None 1675 constraint = None 1676 1677 if conflict: 1678 if self._match_text_seq("ON", "CONSTRAINT"): 1679 constraint = self._parse_id_var() 1680 else: 1681 key = self._parse_csv(self._parse_value) 1682 1683 self._match_text_seq("DO") 1684 if self._match_text_seq("NOTHING"): 1685 nothing = True 1686 else: 1687 self._match(TokenType.UPDATE) 1688 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1689 1690 return self.expression( 1691 exp.OnConflict, 1692 duplicate=duplicate, 1693 expressions=expressions, 1694 nothing=nothing, 1695 key=key, 1696 constraint=constraint, 1697 ) 1698 1699 def _parse_returning(self) -> t.Optional[exp.Expression]: 1700 if not self._match(TokenType.RETURNING): 1701 return None 1702 1703 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1704 1705 def _parse_row(self) -> t.Optional[exp.Expression]: 1706 if not self._match(TokenType.FORMAT): 1707 return None 1708 return self._parse_row_format() 1709 1710 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1711 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1712 return None 1713 1714 if self._match_text_seq("SERDE"): 1715 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1716 1717 self._match_text_seq("DELIMITED") 1718 1719 kwargs = {} 1720 1721 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1722 kwargs["fields"] = self._parse_string() 1723 if self._match_text_seq("ESCAPED", "BY"): 1724 kwargs["escaped"] = self._parse_string() 1725 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1726 kwargs["collection_items"] = self._parse_string() 1727 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1728 kwargs["map_keys"] = self._parse_string() 1729 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1730 kwargs["lines"] = self._parse_string() 1731 if self._match_text_seq("NULL", "DEFINED", "AS"): 1732 kwargs["null"] = self._parse_string() 1733 1734 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1735 1736 def _parse_load(self) -> exp.Expression: 1737 if self._match_text_seq("DATA"): 1738 local = self._match_text_seq("LOCAL") 1739 self._match_text_seq("INPATH") 1740 inpath = self._parse_string() 1741 overwrite = self._match(TokenType.OVERWRITE) 1742 self._match_pair(TokenType.INTO, TokenType.TABLE) 1743 1744 return self.expression( 1745 exp.LoadData, 1746 this=self._parse_table(schema=True), 1747 local=local, 1748 overwrite=overwrite, 1749 inpath=inpath, 1750 partition=self._parse_partition(), 1751 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1752 serde=self._match_text_seq("SERDE") and self._parse_string(), 1753 ) 1754 return self._parse_as_command(self._prev) 1755 1756 def _parse_delete(self) -> exp.Expression: 1757 self._match(TokenType.FROM) 1758 1759 return self.expression( 1760 exp.Delete, 1761 this=self._parse_table(), 1762 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1763 where=self._parse_where(), 1764 returning=self._parse_returning(), 1765 ) 1766 1767 def _parse_update(self) -> exp.Expression: 1768 return self.expression( 1769 exp.Update, 1770 **{ # type: ignore 1771 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1772 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1773 "from": self._parse_from(modifiers=True), 1774 "where": self._parse_where(), 1775 "returning": self._parse_returning(), 1776 }, 1777 ) 1778 1779 def _parse_uncache(self) -> exp.Expression: 1780 if not self._match(TokenType.TABLE): 1781 self.raise_error("Expecting TABLE after UNCACHE") 1782 1783 return self.expression( 1784 exp.Uncache, 1785 exists=self._parse_exists(), 1786 this=self._parse_table(schema=True), 1787 ) 1788 1789 def _parse_cache(self) -> exp.Expression: 1790 lazy = self._match_text_seq("LAZY") 1791 self._match(TokenType.TABLE) 1792 table = self._parse_table(schema=True) 1793 options = [] 1794 1795 if self._match_text_seq("OPTIONS"): 1796 self._match_l_paren() 1797 k = self._parse_string() 1798 self._match(TokenType.EQ) 1799 v = self._parse_string() 1800 options = [k, v] 1801 self._match_r_paren() 1802 1803 self._match(TokenType.ALIAS) 1804 return self.expression( 1805 exp.Cache, 1806 this=table, 1807 lazy=lazy, 1808 options=options, 1809 expression=self._parse_select(nested=True), 1810 ) 1811 1812 def _parse_partition(self) -> t.Optional[exp.Expression]: 1813 if not self._match(TokenType.PARTITION): 1814 return None 1815 1816 return self.expression( 1817 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1818 ) 1819 1820 def _parse_value(self) -> exp.Expression: 1821 if self._match(TokenType.L_PAREN): 1822 expressions = self._parse_csv(self._parse_conjunction) 1823 self._match_r_paren() 1824 return self.expression(exp.Tuple, expressions=expressions) 1825 1826 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1827 # Source: https://prestodb.io/docs/current/sql/values.html 1828 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1829 1830 def _parse_select( 1831 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1832 ) -> t.Optional[exp.Expression]: 1833 cte = self._parse_with() 1834 if cte: 1835 this = self._parse_statement() 1836 1837 if not this: 1838 self.raise_error("Failed to parse any statement following CTE") 1839 return cte 1840 1841 if "with" in this.arg_types: 1842 this.set("with", cte) 1843 else: 1844 self.raise_error(f"{this.key} does not support CTE") 1845 this = cte 1846 elif self._match(TokenType.SELECT): 1847 comments = self._prev_comments 1848 1849 hint = self._parse_hint() 1850 all_ = self._match(TokenType.ALL) 1851 distinct = self._match(TokenType.DISTINCT) 1852 1853 kind = ( 1854 self._match(TokenType.ALIAS) 1855 and self._match_texts(("STRUCT", "VALUE")) 1856 and self._prev.text 1857 ) 1858 1859 if distinct: 1860 distinct = self.expression( 1861 exp.Distinct, 1862 on=self._parse_value() if self._match(TokenType.ON) else None, 1863 ) 1864 1865 if all_ and distinct: 1866 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1867 1868 limit = self._parse_limit(top=True) 1869 expressions = self._parse_csv(self._parse_expression) 1870 1871 this = self.expression( 1872 exp.Select, 1873 kind=kind, 1874 hint=hint, 1875 distinct=distinct, 1876 expressions=expressions, 1877 limit=limit, 1878 ) 1879 this.comments = comments 1880 1881 into = self._parse_into() 1882 if into: 1883 this.set("into", into) 1884 1885 from_ = self._parse_from() 1886 if from_: 1887 this.set("from", from_) 1888 1889 this = self._parse_query_modifiers(this) 1890 elif (table or nested) and self._match(TokenType.L_PAREN): 1891 this = self._parse_table() if table else self._parse_select(nested=True) 1892 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1893 self._match_r_paren() 1894 1895 # early return so that subquery unions aren't parsed again 1896 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1897 # Union ALL should be a property of the top select node, not the subquery 1898 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1899 elif self._match(TokenType.VALUES): 1900 this = self.expression( 1901 exp.Values, 1902 expressions=self._parse_csv(self._parse_value), 1903 alias=self._parse_table_alias(), 1904 ) 1905 elif self._match(TokenType.PIVOT): 1906 this = self._parse_simplified_pivot() 1907 elif self._match(TokenType.FROM): 1908 this = exp.select("*").from_(t.cast(exp.From, self._parse_from(skip_from_token=True))) 1909 else: 1910 this = None 1911 1912 return self._parse_set_operations(this) 1913 1914 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1915 if not skip_with_token and not self._match(TokenType.WITH): 1916 return None 1917 1918 comments = self._prev_comments 1919 recursive = self._match(TokenType.RECURSIVE) 1920 1921 expressions = [] 1922 while True: 1923 expressions.append(self._parse_cte()) 1924 1925 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1926 break 1927 else: 1928 self._match(TokenType.WITH) 1929 1930 return self.expression( 1931 exp.With, comments=comments, expressions=expressions, recursive=recursive 1932 ) 1933 1934 def _parse_cte(self) -> exp.Expression: 1935 alias = self._parse_table_alias() 1936 if not alias or not alias.this: 1937 self.raise_error("Expected CTE to have alias") 1938 1939 self._match(TokenType.ALIAS) 1940 1941 return self.expression( 1942 exp.CTE, 1943 this=self._parse_wrapped(self._parse_statement), 1944 alias=alias, 1945 ) 1946 1947 def _parse_table_alias( 1948 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1949 ) -> t.Optional[exp.Expression]: 1950 any_token = self._match(TokenType.ALIAS) 1951 alias = ( 1952 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1953 or self._parse_string_as_identifier() 1954 ) 1955 1956 index = self._index 1957 if self._match(TokenType.L_PAREN): 1958 columns = self._parse_csv(self._parse_function_parameter) 1959 self._match_r_paren() if columns else self._retreat(index) 1960 else: 1961 columns = None 1962 1963 if not alias and not columns: 1964 return None 1965 1966 return self.expression(exp.TableAlias, this=alias, columns=columns) 1967 1968 def _parse_subquery( 1969 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1970 ) -> t.Optional[exp.Expression]: 1971 if not this: 1972 return None 1973 return self.expression( 1974 exp.Subquery, 1975 this=this, 1976 pivots=self._parse_pivots(), 1977 alias=self._parse_table_alias() if parse_alias else None, 1978 ) 1979 1980 def _parse_query_modifiers( 1981 self, this: t.Optional[exp.Expression] 1982 ) -> t.Optional[exp.Expression]: 1983 if isinstance(this, self.MODIFIABLES): 1984 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1985 expression = parser(self) 1986 1987 if expression: 1988 this.set(key, expression) 1989 return this 1990 1991 def _parse_hint(self) -> t.Optional[exp.Expression]: 1992 if self._match(TokenType.HINT): 1993 hints = self._parse_csv(self._parse_function) 1994 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1995 self.raise_error("Expected */ after HINT") 1996 return self.expression(exp.Hint, expressions=hints) 1997 1998 return None 1999 2000 def _parse_into(self) -> t.Optional[exp.Expression]: 2001 if not self._match(TokenType.INTO): 2002 return None 2003 2004 temp = self._match(TokenType.TEMPORARY) 2005 unlogged = self._match_text_seq("UNLOGGED") 2006 self._match(TokenType.TABLE) 2007 2008 return self.expression( 2009 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2010 ) 2011 2012 def _parse_from( 2013 self, modifiers: bool = False, skip_from_token: bool = False 2014 ) -> t.Optional[exp.From]: 2015 if not skip_from_token and not self._match(TokenType.FROM): 2016 return None 2017 2018 comments = self._prev_comments 2019 this = self._parse_table() 2020 2021 return self.expression( 2022 exp.From, 2023 comments=comments, 2024 this=self._parse_query_modifiers(this) if modifiers else this, 2025 ) 2026 2027 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2028 if not self._match(TokenType.MATCH_RECOGNIZE): 2029 return None 2030 2031 self._match_l_paren() 2032 2033 partition = self._parse_partition_by() 2034 order = self._parse_order() 2035 measures = ( 2036 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2037 ) 2038 2039 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2040 rows = exp.Var(this="ONE ROW PER MATCH") 2041 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2042 text = "ALL ROWS PER MATCH" 2043 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2044 text += f" SHOW EMPTY MATCHES" 2045 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2046 text += f" OMIT EMPTY MATCHES" 2047 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2048 text += f" WITH UNMATCHED ROWS" 2049 rows = exp.Var(this=text) 2050 else: 2051 rows = None 2052 2053 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2054 text = "AFTER MATCH SKIP" 2055 if self._match_text_seq("PAST", "LAST", "ROW"): 2056 text += f" PAST LAST ROW" 2057 elif self._match_text_seq("TO", "NEXT", "ROW"): 2058 text += f" TO NEXT ROW" 2059 elif self._match_text_seq("TO", "FIRST"): 2060 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2061 elif self._match_text_seq("TO", "LAST"): 2062 text += f" TO LAST {self._advance_any().text}" # type: ignore 2063 after = exp.Var(this=text) 2064 else: 2065 after = None 2066 2067 if self._match_text_seq("PATTERN"): 2068 self._match_l_paren() 2069 2070 if not self._curr: 2071 self.raise_error("Expecting )", self._curr) 2072 2073 paren = 1 2074 start = self._curr 2075 2076 while self._curr and paren > 0: 2077 if self._curr.token_type == TokenType.L_PAREN: 2078 paren += 1 2079 if self._curr.token_type == TokenType.R_PAREN: 2080 paren -= 1 2081 end = self._prev 2082 self._advance() 2083 if paren > 0: 2084 self.raise_error("Expecting )", self._curr) 2085 pattern = exp.Var(this=self._find_sql(start, end)) 2086 else: 2087 pattern = None 2088 2089 define = ( 2090 self._parse_csv( 2091 lambda: self.expression( 2092 exp.Alias, 2093 alias=self._parse_id_var(any_token=True), 2094 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2095 ) 2096 ) 2097 if self._match_text_seq("DEFINE") 2098 else None 2099 ) 2100 2101 self._match_r_paren() 2102 2103 return self.expression( 2104 exp.MatchRecognize, 2105 partition_by=partition, 2106 order=order, 2107 measures=measures, 2108 rows=rows, 2109 after=after, 2110 pattern=pattern, 2111 define=define, 2112 alias=self._parse_table_alias(), 2113 ) 2114 2115 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2116 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2117 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2118 2119 if outer_apply or cross_apply: 2120 this = self._parse_select(table=True) 2121 view = None 2122 outer = not cross_apply 2123 elif self._match(TokenType.LATERAL): 2124 this = self._parse_select(table=True) 2125 view = self._match(TokenType.VIEW) 2126 outer = self._match(TokenType.OUTER) 2127 else: 2128 return None 2129 2130 if not this: 2131 this = self._parse_function() or self._parse_id_var(any_token=False) 2132 while self._match(TokenType.DOT): 2133 this = exp.Dot( 2134 this=this, 2135 expression=self._parse_function() or self._parse_id_var(any_token=False), 2136 ) 2137 2138 table_alias: t.Optional[exp.Expression] 2139 2140 if view: 2141 table = self._parse_id_var(any_token=False) 2142 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2143 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2144 else: 2145 table_alias = self._parse_table_alias() 2146 2147 expression = self.expression( 2148 exp.Lateral, 2149 this=this, 2150 view=view, 2151 outer=outer, 2152 alias=table_alias, 2153 ) 2154 2155 return expression 2156 2157 def _parse_join_side_and_kind( 2158 self, 2159 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2160 return ( 2161 self._match(TokenType.NATURAL) and self._prev, 2162 self._match_set(self.JOIN_SIDES) and self._prev, 2163 self._match_set(self.JOIN_KINDS) and self._prev, 2164 ) 2165 2166 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2167 if self._match(TokenType.COMMA): 2168 return self.expression(exp.Join, this=self._parse_table()) 2169 2170 index = self._index 2171 natural, side, kind = self._parse_join_side_and_kind() 2172 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2173 join = self._match(TokenType.JOIN) 2174 2175 if not skip_join_token and not join: 2176 self._retreat(index) 2177 kind = None 2178 natural = None 2179 side = None 2180 2181 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2182 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2183 2184 if not skip_join_token and not join and not outer_apply and not cross_apply: 2185 return None 2186 2187 if outer_apply: 2188 side = Token(TokenType.LEFT, "LEFT") 2189 2190 kwargs: t.Dict[ 2191 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2192 ] = {"this": self._parse_table()} 2193 2194 if natural: 2195 kwargs["natural"] = True 2196 if side: 2197 kwargs["side"] = side.text 2198 if kind: 2199 kwargs["kind"] = kind.text 2200 if hint: 2201 kwargs["hint"] = hint 2202 2203 if self._match(TokenType.ON): 2204 kwargs["on"] = self._parse_conjunction() 2205 elif self._match(TokenType.USING): 2206 kwargs["using"] = self._parse_wrapped_id_vars() 2207 2208 return self.expression(exp.Join, **kwargs) # type: ignore 2209 2210 def _parse_index( 2211 self, 2212 index: t.Optional[exp.Expression] = None, 2213 ) -> t.Optional[exp.Expression]: 2214 if index: 2215 unique = None 2216 primary = None 2217 amp = None 2218 2219 self._match(TokenType.ON) 2220 self._match(TokenType.TABLE) # hive 2221 table = self._parse_table_parts(schema=True) 2222 else: 2223 unique = self._match(TokenType.UNIQUE) 2224 primary = self._match_text_seq("PRIMARY") 2225 amp = self._match_text_seq("AMP") 2226 if not self._match(TokenType.INDEX): 2227 return None 2228 index = self._parse_id_var() 2229 table = None 2230 2231 if self._match(TokenType.L_PAREN, advance=False): 2232 columns = self._parse_wrapped_csv(self._parse_ordered) 2233 else: 2234 columns = None 2235 2236 return self.expression( 2237 exp.Index, 2238 this=index, 2239 table=table, 2240 columns=columns, 2241 unique=unique, 2242 primary=primary, 2243 amp=amp, 2244 partition_by=self._parse_partition_by(), 2245 ) 2246 2247 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2248 return ( 2249 (not schema and self._parse_function()) 2250 or self._parse_id_var(any_token=False) 2251 or self._parse_string_as_identifier() 2252 or self._parse_placeholder() 2253 ) 2254 2255 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2256 catalog = None 2257 db = None 2258 table = self._parse_table_part(schema=schema) 2259 2260 while self._match(TokenType.DOT): 2261 if catalog: 2262 # This allows nesting the table in arbitrarily many dot expressions if needed 2263 table = self.expression( 2264 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2265 ) 2266 else: 2267 catalog = db 2268 db = table 2269 table = self._parse_table_part(schema=schema) 2270 2271 if not table: 2272 self.raise_error(f"Expected table name but got {self._curr}") 2273 2274 return self.expression( 2275 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2276 ) 2277 2278 def _parse_table( 2279 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2280 ) -> t.Optional[exp.Expression]: 2281 lateral = self._parse_lateral() 2282 if lateral: 2283 return lateral 2284 2285 unnest = self._parse_unnest() 2286 if unnest: 2287 return unnest 2288 2289 values = self._parse_derived_table_values() 2290 if values: 2291 return values 2292 2293 subquery = self._parse_select(table=True) 2294 if subquery: 2295 if not subquery.args.get("pivots"): 2296 subquery.set("pivots", self._parse_pivots()) 2297 return subquery 2298 2299 this: exp.Expression = self._parse_table_parts(schema=schema) 2300 2301 if schema: 2302 return self._parse_schema(this=this) 2303 2304 if self.alias_post_tablesample: 2305 table_sample = self._parse_table_sample() 2306 2307 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2308 if alias: 2309 this.set("alias", alias) 2310 2311 if not this.args.get("pivots"): 2312 this.set("pivots", self._parse_pivots()) 2313 2314 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2315 this.set( 2316 "hints", 2317 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2318 ) 2319 self._match_r_paren() 2320 2321 if not self.alias_post_tablesample: 2322 table_sample = self._parse_table_sample() 2323 2324 if table_sample: 2325 table_sample.set("this", this) 2326 this = table_sample 2327 2328 return this 2329 2330 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2331 if not self._match(TokenType.UNNEST): 2332 return None 2333 2334 expressions = self._parse_wrapped_csv(self._parse_type) 2335 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2336 alias = self._parse_table_alias() 2337 2338 if alias and self.unnest_column_only: 2339 if alias.args.get("columns"): 2340 self.raise_error("Unexpected extra column alias in unnest.") 2341 alias.set("columns", [alias.this]) 2342 alias.set("this", None) 2343 2344 offset = None 2345 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2346 self._match(TokenType.ALIAS) 2347 offset = self._parse_id_var() or exp.Identifier(this="offset") 2348 2349 return self.expression( 2350 exp.Unnest, 2351 expressions=expressions, 2352 ordinality=ordinality, 2353 alias=alias, 2354 offset=offset, 2355 ) 2356 2357 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2358 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2359 if not is_derived and not self._match(TokenType.VALUES): 2360 return None 2361 2362 expressions = self._parse_csv(self._parse_value) 2363 2364 if is_derived: 2365 self._match_r_paren() 2366 2367 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2368 2369 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2370 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2371 as_modifier and self._match_text_seq("USING", "SAMPLE") 2372 ): 2373 return None 2374 2375 bucket_numerator = None 2376 bucket_denominator = None 2377 bucket_field = None 2378 percent = None 2379 rows = None 2380 size = None 2381 seed = None 2382 2383 kind = ( 2384 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2385 ) 2386 method = self._parse_var(tokens=(TokenType.ROW,)) 2387 2388 self._match(TokenType.L_PAREN) 2389 2390 num = self._parse_number() 2391 2392 if self._match_text_seq("BUCKET"): 2393 bucket_numerator = self._parse_number() 2394 self._match_text_seq("OUT", "OF") 2395 bucket_denominator = bucket_denominator = self._parse_number() 2396 self._match(TokenType.ON) 2397 bucket_field = self._parse_field() 2398 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2399 percent = num 2400 elif self._match(TokenType.ROWS): 2401 rows = num 2402 else: 2403 size = num 2404 2405 self._match(TokenType.R_PAREN) 2406 2407 if self._match(TokenType.L_PAREN): 2408 method = self._parse_var() 2409 seed = self._match(TokenType.COMMA) and self._parse_number() 2410 self._match_r_paren() 2411 elif self._match_texts(("SEED", "REPEATABLE")): 2412 seed = self._parse_wrapped(self._parse_number) 2413 2414 return self.expression( 2415 exp.TableSample, 2416 method=method, 2417 bucket_numerator=bucket_numerator, 2418 bucket_denominator=bucket_denominator, 2419 bucket_field=bucket_field, 2420 percent=percent, 2421 rows=rows, 2422 size=size, 2423 seed=seed, 2424 kind=kind, 2425 ) 2426 2427 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2428 return list(iter(self._parse_pivot, None)) 2429 2430 # https://duckdb.org/docs/sql/statements/pivot 2431 def _parse_simplified_pivot(self) -> exp.Pivot: 2432 def _parse_on() -> t.Optional[exp.Expression]: 2433 this = self._parse_bitwise() 2434 return self._parse_in(this) if self._match(TokenType.IN) else this 2435 2436 this = self._parse_table() 2437 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2438 using = self._match(TokenType.USING) and self._parse_csv( 2439 lambda: self._parse_alias(self._parse_function()) 2440 ) 2441 group = self._parse_group() 2442 return self.expression( 2443 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2444 ) 2445 2446 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2447 index = self._index 2448 2449 if self._match(TokenType.PIVOT): 2450 unpivot = False 2451 elif self._match(TokenType.UNPIVOT): 2452 unpivot = True 2453 else: 2454 return None 2455 2456 expressions = [] 2457 field = None 2458 2459 if not self._match(TokenType.L_PAREN): 2460 self._retreat(index) 2461 return None 2462 2463 if unpivot: 2464 expressions = self._parse_csv(self._parse_column) 2465 else: 2466 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2467 2468 if not expressions: 2469 self.raise_error("Failed to parse PIVOT's aggregation list") 2470 2471 if not self._match(TokenType.FOR): 2472 self.raise_error("Expecting FOR") 2473 2474 value = self._parse_column() 2475 2476 if not self._match(TokenType.IN): 2477 self.raise_error("Expecting IN") 2478 2479 field = self._parse_in(value, alias=True) 2480 2481 self._match_r_paren() 2482 2483 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2484 2485 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2486 pivot.set("alias", self._parse_table_alias()) 2487 2488 if not unpivot: 2489 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2490 2491 columns: t.List[exp.Expression] = [] 2492 for fld in pivot.args["field"].expressions: 2493 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2494 for name in names: 2495 if self.PREFIXED_PIVOT_COLUMNS: 2496 name = f"{name}_{field_name}" if name else field_name 2497 else: 2498 name = f"{field_name}_{name}" if name else field_name 2499 2500 columns.append(exp.to_identifier(name)) 2501 2502 pivot.set("columns", columns) 2503 2504 return pivot 2505 2506 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2507 return [agg.alias for agg in aggregations] 2508 2509 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2510 if not skip_where_token and not self._match(TokenType.WHERE): 2511 return None 2512 2513 return self.expression( 2514 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2515 ) 2516 2517 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2518 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2519 return None 2520 2521 elements = defaultdict(list) 2522 2523 while True: 2524 expressions = self._parse_csv(self._parse_conjunction) 2525 if expressions: 2526 elements["expressions"].extend(expressions) 2527 2528 grouping_sets = self._parse_grouping_sets() 2529 if grouping_sets: 2530 elements["grouping_sets"].extend(grouping_sets) 2531 2532 rollup = None 2533 cube = None 2534 totals = None 2535 2536 with_ = self._match(TokenType.WITH) 2537 if self._match(TokenType.ROLLUP): 2538 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2539 elements["rollup"].extend(ensure_list(rollup)) 2540 2541 if self._match(TokenType.CUBE): 2542 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2543 elements["cube"].extend(ensure_list(cube)) 2544 2545 if self._match_text_seq("TOTALS"): 2546 totals = True 2547 elements["totals"] = True # type: ignore 2548 2549 if not (grouping_sets or rollup or cube or totals): 2550 break 2551 2552 return self.expression(exp.Group, **elements) # type: ignore 2553 2554 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2555 if not self._match(TokenType.GROUPING_SETS): 2556 return None 2557 2558 return self._parse_wrapped_csv(self._parse_grouping_set) 2559 2560 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2561 if self._match(TokenType.L_PAREN): 2562 grouping_set = self._parse_csv(self._parse_column) 2563 self._match_r_paren() 2564 return self.expression(exp.Tuple, expressions=grouping_set) 2565 2566 return self._parse_column() 2567 2568 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2569 if not skip_having_token and not self._match(TokenType.HAVING): 2570 return None 2571 return self.expression(exp.Having, this=self._parse_conjunction()) 2572 2573 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2574 if not self._match(TokenType.QUALIFY): 2575 return None 2576 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2577 2578 def _parse_order( 2579 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2580 ) -> t.Optional[exp.Expression]: 2581 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2582 return this 2583 2584 return self.expression( 2585 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2586 ) 2587 2588 def _parse_sort( 2589 self, exp_class: t.Type[exp.Expression], *texts: str 2590 ) -> t.Optional[exp.Expression]: 2591 if not self._match_text_seq(*texts): 2592 return None 2593 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2594 2595 def _parse_ordered(self) -> exp.Expression: 2596 this = self._parse_conjunction() 2597 self._match(TokenType.ASC) 2598 is_desc = self._match(TokenType.DESC) 2599 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2600 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2601 desc = is_desc or False 2602 asc = not desc 2603 nulls_first = is_nulls_first or False 2604 explicitly_null_ordered = is_nulls_first or is_nulls_last 2605 if ( 2606 not explicitly_null_ordered 2607 and ( 2608 (asc and self.null_ordering == "nulls_are_small") 2609 or (desc and self.null_ordering != "nulls_are_small") 2610 ) 2611 and self.null_ordering != "nulls_are_last" 2612 ): 2613 nulls_first = True 2614 2615 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2616 2617 def _parse_limit( 2618 self, this: t.Optional[exp.Expression] = None, top: bool = False 2619 ) -> t.Optional[exp.Expression]: 2620 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2621 limit_paren = self._match(TokenType.L_PAREN) 2622 limit_exp = self.expression( 2623 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2624 ) 2625 2626 if limit_paren: 2627 self._match_r_paren() 2628 2629 return limit_exp 2630 2631 if self._match(TokenType.FETCH): 2632 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2633 direction = self._prev.text if direction else "FIRST" 2634 2635 count = self._parse_number() 2636 percent = self._match(TokenType.PERCENT) 2637 2638 self._match_set((TokenType.ROW, TokenType.ROWS)) 2639 2640 only = self._match_text_seq("ONLY") 2641 with_ties = self._match_text_seq("WITH", "TIES") 2642 2643 if only and with_ties: 2644 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2645 2646 return self.expression( 2647 exp.Fetch, 2648 direction=direction, 2649 count=count, 2650 percent=percent, 2651 with_ties=with_ties, 2652 ) 2653 2654 return this 2655 2656 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2657 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2658 return this 2659 2660 count = self._parse_number() 2661 self._match_set((TokenType.ROW, TokenType.ROWS)) 2662 return self.expression(exp.Offset, this=this, expression=count) 2663 2664 def _parse_locks(self) -> t.List[exp.Expression]: 2665 # Lists are invariant, so we need to use a type hint here 2666 locks: t.List[exp.Expression] = [] 2667 2668 while True: 2669 if self._match_text_seq("FOR", "UPDATE"): 2670 update = True 2671 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2672 "LOCK", "IN", "SHARE", "MODE" 2673 ): 2674 update = False 2675 else: 2676 break 2677 2678 expressions = None 2679 if self._match_text_seq("OF"): 2680 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2681 2682 wait: t.Optional[bool | exp.Expression] = None 2683 if self._match_text_seq("NOWAIT"): 2684 wait = True 2685 elif self._match_text_seq("WAIT"): 2686 wait = self._parse_primary() 2687 elif self._match_text_seq("SKIP", "LOCKED"): 2688 wait = False 2689 2690 locks.append( 2691 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2692 ) 2693 2694 return locks 2695 2696 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2697 if not self._match_set(self.SET_OPERATIONS): 2698 return this 2699 2700 token_type = self._prev.token_type 2701 2702 if token_type == TokenType.UNION: 2703 expression = exp.Union 2704 elif token_type == TokenType.EXCEPT: 2705 expression = exp.Except 2706 else: 2707 expression = exp.Intersect 2708 2709 return self.expression( 2710 expression, 2711 this=this, 2712 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2713 expression=self._parse_set_operations(self._parse_select(nested=True)), 2714 ) 2715 2716 def _parse_expression(self) -> t.Optional[exp.Expression]: 2717 return self._parse_alias(self._parse_conjunction()) 2718 2719 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2720 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2721 2722 def _parse_equality(self) -> t.Optional[exp.Expression]: 2723 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2724 2725 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2726 return self._parse_tokens(self._parse_range, self.COMPARISON) 2727 2728 def _parse_range(self) -> t.Optional[exp.Expression]: 2729 this = self._parse_bitwise() 2730 negate = self._match(TokenType.NOT) 2731 2732 if self._match_set(self.RANGE_PARSERS): 2733 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2734 if not expression: 2735 return this 2736 2737 this = expression 2738 elif self._match(TokenType.ISNULL): 2739 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2740 2741 # Postgres supports ISNULL and NOTNULL for conditions. 2742 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2743 if self._match(TokenType.NOTNULL): 2744 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2745 this = self.expression(exp.Not, this=this) 2746 2747 if negate: 2748 this = self.expression(exp.Not, this=this) 2749 2750 if self._match(TokenType.IS): 2751 this = self._parse_is(this) 2752 2753 return this 2754 2755 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2756 index = self._index - 1 2757 negate = self._match(TokenType.NOT) 2758 if self._match_text_seq("DISTINCT", "FROM"): 2759 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2760 return self.expression(klass, this=this, expression=self._parse_expression()) 2761 2762 expression = self._parse_null() or self._parse_boolean() 2763 if not expression: 2764 self._retreat(index) 2765 return None 2766 2767 this = self.expression(exp.Is, this=this, expression=expression) 2768 return self.expression(exp.Not, this=this) if negate else this 2769 2770 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2771 unnest = self._parse_unnest() 2772 if unnest: 2773 this = self.expression(exp.In, this=this, unnest=unnest) 2774 elif self._match(TokenType.L_PAREN): 2775 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2776 2777 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2778 this = self.expression(exp.In, this=this, query=expressions[0]) 2779 else: 2780 this = self.expression(exp.In, this=this, expressions=expressions) 2781 2782 self._match_r_paren(this) 2783 else: 2784 this = self.expression(exp.In, this=this, field=self._parse_field()) 2785 2786 return this 2787 2788 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2789 low = self._parse_bitwise() 2790 self._match(TokenType.AND) 2791 high = self._parse_bitwise() 2792 return self.expression(exp.Between, this=this, low=low, high=high) 2793 2794 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2795 if not self._match(TokenType.ESCAPE): 2796 return this 2797 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2798 2799 def _parse_interval(self) -> t.Optional[exp.Expression]: 2800 if not self._match(TokenType.INTERVAL): 2801 return None 2802 2803 this = self._parse_primary() or self._parse_term() 2804 unit = self._parse_function() or self._parse_var() 2805 2806 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2807 # each INTERVAL expression into this canonical form so it's easy to transpile 2808 if this and this.is_number: 2809 this = exp.Literal.string(this.name) 2810 elif this and this.is_string: 2811 parts = this.name.split() 2812 2813 if len(parts) == 2: 2814 if unit: 2815 # this is not actually a unit, it's something else 2816 unit = None 2817 self._retreat(self._index - 1) 2818 else: 2819 this = exp.Literal.string(parts[0]) 2820 unit = self.expression(exp.Var, this=parts[1]) 2821 2822 return self.expression(exp.Interval, this=this, unit=unit) 2823 2824 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2825 this = self._parse_term() 2826 2827 while True: 2828 if self._match_set(self.BITWISE): 2829 this = self.expression( 2830 self.BITWISE[self._prev.token_type], 2831 this=this, 2832 expression=self._parse_term(), 2833 ) 2834 elif self._match_pair(TokenType.LT, TokenType.LT): 2835 this = self.expression( 2836 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2837 ) 2838 elif self._match_pair(TokenType.GT, TokenType.GT): 2839 this = self.expression( 2840 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2841 ) 2842 else: 2843 break 2844 2845 return this 2846 2847 def _parse_term(self) -> t.Optional[exp.Expression]: 2848 return self._parse_tokens(self._parse_factor, self.TERM) 2849 2850 def _parse_factor(self) -> t.Optional[exp.Expression]: 2851 return self._parse_tokens(self._parse_unary, self.FACTOR) 2852 2853 def _parse_unary(self) -> t.Optional[exp.Expression]: 2854 if self._match_set(self.UNARY_PARSERS): 2855 return self.UNARY_PARSERS[self._prev.token_type](self) 2856 return self._parse_at_time_zone(self._parse_type()) 2857 2858 def _parse_type(self) -> t.Optional[exp.Expression]: 2859 interval = self._parse_interval() 2860 if interval: 2861 return interval 2862 2863 index = self._index 2864 data_type = self._parse_types(check_func=True) 2865 this = self._parse_column() 2866 2867 if data_type: 2868 if isinstance(this, exp.Literal): 2869 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2870 if parser: 2871 return parser(self, this, data_type) 2872 return self.expression(exp.Cast, this=this, to=data_type) 2873 if not data_type.expressions: 2874 self._retreat(index) 2875 return self._parse_column() 2876 return self._parse_column_ops(data_type) 2877 2878 return this 2879 2880 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2881 this = self._parse_type() 2882 if not this: 2883 return None 2884 2885 return self.expression( 2886 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2887 ) 2888 2889 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2890 index = self._index 2891 2892 prefix = self._match_text_seq("SYSUDTLIB", ".") 2893 2894 if not self._match_set(self.TYPE_TOKENS): 2895 return None 2896 2897 type_token = self._prev.token_type 2898 2899 if type_token == TokenType.PSEUDO_TYPE: 2900 return self.expression(exp.PseudoType, this=self._prev.text) 2901 2902 nested = type_token in self.NESTED_TYPE_TOKENS 2903 is_struct = type_token == TokenType.STRUCT 2904 expressions = None 2905 maybe_func = False 2906 2907 if self._match(TokenType.L_PAREN): 2908 if is_struct: 2909 expressions = self._parse_csv(self._parse_struct_types) 2910 elif nested: 2911 expressions = self._parse_csv(self._parse_types) 2912 else: 2913 expressions = self._parse_csv(self._parse_type_size) 2914 2915 if not expressions or not self._match(TokenType.R_PAREN): 2916 self._retreat(index) 2917 return None 2918 2919 maybe_func = True 2920 2921 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2922 this = exp.DataType( 2923 this=exp.DataType.Type.ARRAY, 2924 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2925 nested=True, 2926 ) 2927 2928 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2929 this = exp.DataType( 2930 this=exp.DataType.Type.ARRAY, 2931 expressions=[this], 2932 nested=True, 2933 ) 2934 2935 return this 2936 2937 if self._match(TokenType.L_BRACKET): 2938 self._retreat(index) 2939 return None 2940 2941 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2942 if nested and self._match(TokenType.LT): 2943 if is_struct: 2944 expressions = self._parse_csv(self._parse_struct_types) 2945 else: 2946 expressions = self._parse_csv(self._parse_types) 2947 2948 if not self._match(TokenType.GT): 2949 self.raise_error("Expecting >") 2950 2951 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2952 values = self._parse_csv(self._parse_conjunction) 2953 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2954 2955 value: t.Optional[exp.Expression] = None 2956 if type_token in self.TIMESTAMPS: 2957 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2958 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2959 elif ( 2960 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2961 or type_token == TokenType.TIMESTAMPLTZ 2962 ): 2963 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2964 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2965 if type_token == TokenType.TIME: 2966 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2967 else: 2968 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2969 2970 maybe_func = maybe_func and value is None 2971 2972 if value is None: 2973 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2974 elif type_token == TokenType.INTERVAL: 2975 unit = self._parse_var() 2976 2977 if not unit: 2978 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2979 else: 2980 value = self.expression(exp.Interval, unit=unit) 2981 2982 if maybe_func and check_func: 2983 index2 = self._index 2984 peek = self._parse_string() 2985 2986 if not peek: 2987 self._retreat(index) 2988 return None 2989 2990 self._retreat(index2) 2991 2992 if value: 2993 return value 2994 2995 return exp.DataType( 2996 this=exp.DataType.Type[type_token.value.upper()], 2997 expressions=expressions, 2998 nested=nested, 2999 values=values, 3000 prefix=prefix, 3001 ) 3002 3003 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3004 this = self._parse_type() or self._parse_id_var() 3005 self._match(TokenType.COLON) 3006 return self._parse_column_def(this) 3007 3008 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3009 if not self._match_text_seq("AT", "TIME", "ZONE"): 3010 return this 3011 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3012 3013 def _parse_column(self) -> t.Optional[exp.Expression]: 3014 this = self._parse_field() 3015 if isinstance(this, exp.Identifier): 3016 this = self.expression(exp.Column, this=this) 3017 elif not this: 3018 return self._parse_bracket(this) 3019 return self._parse_column_ops(this) 3020 3021 def _parse_column_ops(self, this: exp.Expression) -> exp.Expression: 3022 this = self._parse_bracket(this) 3023 3024 while self._match_set(self.COLUMN_OPERATORS): 3025 op_token = self._prev.token_type 3026 op = self.COLUMN_OPERATORS.get(op_token) 3027 3028 if op_token == TokenType.DCOLON: 3029 field = self._parse_types() 3030 if not field: 3031 self.raise_error("Expected type") 3032 elif op and self._curr: 3033 self._advance() 3034 value = self._prev.text 3035 field = ( 3036 exp.Literal.number(value) 3037 if self._prev.token_type == TokenType.NUMBER 3038 else exp.Literal.string(value) 3039 ) 3040 else: 3041 field = ( 3042 self._parse_star() 3043 or self._parse_function(anonymous=True) 3044 or self._parse_id_var() 3045 ) 3046 3047 if isinstance(field, exp.Func): 3048 # bigquery allows function calls like x.y.count(...) 3049 # SAFE.SUBSTR(...) 3050 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3051 this = self._replace_columns_with_dots(this) 3052 3053 if op: 3054 this = op(self, this, field) 3055 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3056 this = self.expression( 3057 exp.Column, 3058 this=field, 3059 table=this.this, 3060 db=this.args.get("table"), 3061 catalog=this.args.get("db"), 3062 ) 3063 else: 3064 this = self.expression(exp.Dot, this=this, expression=field) 3065 this = self._parse_bracket(this) 3066 return this 3067 3068 def _parse_primary(self) -> t.Optional[exp.Expression]: 3069 if self._match_set(self.PRIMARY_PARSERS): 3070 token_type = self._prev.token_type 3071 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3072 3073 if token_type == TokenType.STRING: 3074 expressions = [primary] 3075 while self._match(TokenType.STRING): 3076 expressions.append(exp.Literal.string(self._prev.text)) 3077 if len(expressions) > 1: 3078 return self.expression(exp.Concat, expressions=expressions) 3079 return primary 3080 3081 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3082 return exp.Literal.number(f"0.{self._prev.text}") 3083 3084 if self._match(TokenType.L_PAREN): 3085 comments = self._prev_comments 3086 query = self._parse_select() 3087 3088 if query: 3089 expressions = [query] 3090 else: 3091 expressions = self._parse_csv(self._parse_expression) 3092 3093 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3094 3095 if isinstance(this, exp.Subqueryable): 3096 this = self._parse_set_operations( 3097 self._parse_subquery(this=this, parse_alias=False) 3098 ) 3099 elif len(expressions) > 1: 3100 this = self.expression(exp.Tuple, expressions=expressions) 3101 else: 3102 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3103 3104 if this: 3105 this.add_comments(comments) 3106 self._match_r_paren(expression=this) 3107 3108 return this 3109 3110 return None 3111 3112 def _parse_field( 3113 self, 3114 any_token: bool = False, 3115 tokens: t.Optional[t.Collection[TokenType]] = None, 3116 ) -> t.Optional[exp.Expression]: 3117 return ( 3118 self._parse_primary() 3119 or self._parse_function() 3120 or self._parse_id_var(any_token=any_token, tokens=tokens) 3121 ) 3122 3123 def _parse_function( 3124 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3125 ) -> t.Optional[exp.Expression]: 3126 if not self._curr: 3127 return None 3128 3129 token_type = self._curr.token_type 3130 3131 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3132 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3133 3134 if not self._next or self._next.token_type != TokenType.L_PAREN: 3135 if token_type in self.NO_PAREN_FUNCTIONS: 3136 self._advance() 3137 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3138 3139 return None 3140 3141 if token_type not in self.FUNC_TOKENS: 3142 return None 3143 3144 this = self._curr.text 3145 upper = this.upper() 3146 self._advance(2) 3147 3148 parser = self.FUNCTION_PARSERS.get(upper) 3149 3150 if parser and not anonymous: 3151 this = parser(self) 3152 else: 3153 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3154 3155 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3156 this = self.expression(subquery_predicate, this=self._parse_select()) 3157 self._match_r_paren() 3158 return this 3159 3160 if functions is None: 3161 functions = self.FUNCTIONS 3162 3163 function = functions.get(upper) 3164 3165 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3166 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3167 3168 if function and not anonymous: 3169 this = function(args) 3170 self.validate_expression(this, args) 3171 else: 3172 this = self.expression(exp.Anonymous, this=this, expressions=args) 3173 3174 self._match_r_paren(this) 3175 return self._parse_window(this) 3176 3177 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3178 return self._parse_column_def(self._parse_id_var()) 3179 3180 def _parse_user_defined_function( 3181 self, kind: t.Optional[TokenType] = None 3182 ) -> t.Optional[exp.Expression]: 3183 this = self._parse_id_var() 3184 3185 while self._match(TokenType.DOT): 3186 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3187 3188 if not self._match(TokenType.L_PAREN): 3189 return this 3190 3191 expressions = self._parse_csv(self._parse_function_parameter) 3192 self._match_r_paren() 3193 return self.expression( 3194 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3195 ) 3196 3197 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3198 literal = self._parse_primary() 3199 if literal: 3200 return self.expression(exp.Introducer, this=token.text, expression=literal) 3201 3202 return self.expression(exp.Identifier, this=token.text) 3203 3204 def _parse_session_parameter(self) -> exp.Expression: 3205 kind = None 3206 this = self._parse_id_var() or self._parse_primary() 3207 3208 if this and self._match(TokenType.DOT): 3209 kind = this.name 3210 this = self._parse_var() or self._parse_primary() 3211 3212 return self.expression(exp.SessionParameter, this=this, kind=kind) 3213 3214 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3215 index = self._index 3216 3217 if self._match(TokenType.L_PAREN): 3218 expressions = self._parse_csv(self._parse_id_var) 3219 3220 if not self._match(TokenType.R_PAREN): 3221 self._retreat(index) 3222 else: 3223 expressions = [self._parse_id_var()] 3224 3225 if self._match_set(self.LAMBDAS): 3226 return self.LAMBDAS[self._prev.token_type](self, expressions) 3227 3228 self._retreat(index) 3229 3230 this: t.Optional[exp.Expression] 3231 3232 if self._match(TokenType.DISTINCT): 3233 this = self.expression( 3234 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3235 ) 3236 else: 3237 this = self._parse_select_or_expression(alias=alias) 3238 3239 if isinstance(this, exp.EQ): 3240 left = this.this 3241 if isinstance(left, exp.Column): 3242 left.replace(exp.Var(this=left.text("this"))) 3243 3244 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3245 3246 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3247 index = self._index 3248 3249 if not self.errors: 3250 try: 3251 if self._parse_select(nested=True): 3252 return this 3253 except ParseError: 3254 pass 3255 finally: 3256 self.errors.clear() 3257 self._retreat(index) 3258 3259 if not self._match(TokenType.L_PAREN): 3260 return this 3261 3262 args = self._parse_csv( 3263 lambda: self._parse_constraint() 3264 or self._parse_column_def(self._parse_field(any_token=True)) 3265 ) 3266 self._match_r_paren() 3267 return self.expression(exp.Schema, this=this, expressions=args) 3268 3269 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3270 # column defs are not really columns, they're identifiers 3271 if isinstance(this, exp.Column): 3272 this = this.this 3273 kind = self._parse_types() 3274 3275 if self._match_text_seq("FOR", "ORDINALITY"): 3276 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3277 3278 constraints = [] 3279 while True: 3280 constraint = self._parse_column_constraint() 3281 if not constraint: 3282 break 3283 constraints.append(constraint) 3284 3285 if not kind and not constraints: 3286 return this 3287 3288 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3289 3290 def _parse_auto_increment(self) -> exp.Expression: 3291 start = None 3292 increment = None 3293 3294 if self._match(TokenType.L_PAREN, advance=False): 3295 args = self._parse_wrapped_csv(self._parse_bitwise) 3296 start = seq_get(args, 0) 3297 increment = seq_get(args, 1) 3298 elif self._match_text_seq("START"): 3299 start = self._parse_bitwise() 3300 self._match_text_seq("INCREMENT") 3301 increment = self._parse_bitwise() 3302 3303 if start and increment: 3304 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3305 3306 return exp.AutoIncrementColumnConstraint() 3307 3308 def _parse_compress(self) -> exp.Expression: 3309 if self._match(TokenType.L_PAREN, advance=False): 3310 return self.expression( 3311 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3312 ) 3313 3314 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3315 3316 def _parse_generated_as_identity(self) -> exp.Expression: 3317 if self._match_text_seq("BY", "DEFAULT"): 3318 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3319 this = self.expression( 3320 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3321 ) 3322 else: 3323 self._match_text_seq("ALWAYS") 3324 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3325 3326 self._match(TokenType.ALIAS) 3327 identity = self._match_text_seq("IDENTITY") 3328 3329 if self._match(TokenType.L_PAREN): 3330 if self._match_text_seq("START", "WITH"): 3331 this.set("start", self._parse_bitwise()) 3332 if self._match_text_seq("INCREMENT", "BY"): 3333 this.set("increment", self._parse_bitwise()) 3334 if self._match_text_seq("MINVALUE"): 3335 this.set("minvalue", self._parse_bitwise()) 3336 if self._match_text_seq("MAXVALUE"): 3337 this.set("maxvalue", self._parse_bitwise()) 3338 3339 if self._match_text_seq("CYCLE"): 3340 this.set("cycle", True) 3341 elif self._match_text_seq("NO", "CYCLE"): 3342 this.set("cycle", False) 3343 3344 if not identity: 3345 this.set("expression", self._parse_bitwise()) 3346 3347 self._match_r_paren() 3348 3349 return this 3350 3351 def _parse_inline(self) -> t.Optional[exp.Expression]: 3352 self._match_text_seq("LENGTH") 3353 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3354 3355 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3356 if self._match_text_seq("NULL"): 3357 return self.expression(exp.NotNullColumnConstraint) 3358 if self._match_text_seq("CASESPECIFIC"): 3359 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3360 return None 3361 3362 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3363 if self._match(TokenType.CONSTRAINT): 3364 this = self._parse_id_var() 3365 else: 3366 this = None 3367 3368 if self._match_texts(self.CONSTRAINT_PARSERS): 3369 return self.expression( 3370 exp.ColumnConstraint, 3371 this=this, 3372 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3373 ) 3374 3375 return this 3376 3377 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3378 if not self._match(TokenType.CONSTRAINT): 3379 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3380 3381 this = self._parse_id_var() 3382 expressions = [] 3383 3384 while True: 3385 constraint = self._parse_unnamed_constraint() or self._parse_function() 3386 if not constraint: 3387 break 3388 expressions.append(constraint) 3389 3390 return self.expression(exp.Constraint, this=this, expressions=expressions) 3391 3392 def _parse_unnamed_constraint( 3393 self, constraints: t.Optional[t.Collection[str]] = None 3394 ) -> t.Optional[exp.Expression]: 3395 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3396 return None 3397 3398 constraint = self._prev.text.upper() 3399 if constraint not in self.CONSTRAINT_PARSERS: 3400 self.raise_error(f"No parser found for schema constraint {constraint}.") 3401 3402 return self.CONSTRAINT_PARSERS[constraint](self) 3403 3404 def _parse_unique(self) -> exp.Expression: 3405 self._match_text_seq("KEY") 3406 return self.expression( 3407 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3408 ) 3409 3410 def _parse_key_constraint_options(self) -> t.List[str]: 3411 options = [] 3412 while True: 3413 if not self._curr: 3414 break 3415 3416 if self._match(TokenType.ON): 3417 action = None 3418 on = self._advance_any() and self._prev.text 3419 3420 if self._match_text_seq("NO", "ACTION"): 3421 action = "NO ACTION" 3422 elif self._match_text_seq("CASCADE"): 3423 action = "CASCADE" 3424 elif self._match_pair(TokenType.SET, TokenType.NULL): 3425 action = "SET NULL" 3426 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3427 action = "SET DEFAULT" 3428 else: 3429 self.raise_error("Invalid key constraint") 3430 3431 options.append(f"ON {on} {action}") 3432 elif self._match_text_seq("NOT", "ENFORCED"): 3433 options.append("NOT ENFORCED") 3434 elif self._match_text_seq("DEFERRABLE"): 3435 options.append("DEFERRABLE") 3436 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3437 options.append("INITIALLY DEFERRED") 3438 elif self._match_text_seq("NORELY"): 3439 options.append("NORELY") 3440 elif self._match_text_seq("MATCH", "FULL"): 3441 options.append("MATCH FULL") 3442 else: 3443 break 3444 3445 return options 3446 3447 def _parse_references(self, match: bool = True) -> t.Optional[exp.Expression]: 3448 if match and not self._match(TokenType.REFERENCES): 3449 return None 3450 3451 expressions = None 3452 this = self._parse_id_var() 3453 3454 if self._match(TokenType.L_PAREN, advance=False): 3455 expressions = self._parse_wrapped_id_vars() 3456 3457 options = self._parse_key_constraint_options() 3458 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3459 3460 def _parse_foreign_key(self) -> exp.Expression: 3461 expressions = self._parse_wrapped_id_vars() 3462 reference = self._parse_references() 3463 options = {} 3464 3465 while self._match(TokenType.ON): 3466 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3467 self.raise_error("Expected DELETE or UPDATE") 3468 3469 kind = self._prev.text.lower() 3470 3471 if self._match_text_seq("NO", "ACTION"): 3472 action = "NO ACTION" 3473 elif self._match(TokenType.SET): 3474 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3475 action = "SET " + self._prev.text.upper() 3476 else: 3477 self._advance() 3478 action = self._prev.text.upper() 3479 3480 options[kind] = action 3481 3482 return self.expression( 3483 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3484 ) 3485 3486 def _parse_primary_key(self) -> exp.Expression: 3487 desc = ( 3488 self._match_set((TokenType.ASC, TokenType.DESC)) 3489 and self._prev.token_type == TokenType.DESC 3490 ) 3491 3492 if not self._match(TokenType.L_PAREN, advance=False): 3493 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3494 3495 expressions = self._parse_wrapped_csv(self._parse_field) 3496 options = self._parse_key_constraint_options() 3497 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3498 3499 @t.overload 3500 def _parse_bracket(self, this: exp.Expression) -> exp.Expression: 3501 ... 3502 3503 @t.overload 3504 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3505 ... 3506 3507 def _parse_bracket(self, this): 3508 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3509 return this 3510 3511 bracket_kind = self._prev.token_type 3512 expressions: t.List[t.Optional[exp.Expression]] 3513 3514 if self._match(TokenType.COLON): 3515 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3516 else: 3517 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3518 3519 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3520 if bracket_kind == TokenType.L_BRACE: 3521 this = self.expression(exp.Struct, expressions=expressions) 3522 elif not this or this.name.upper() == "ARRAY": 3523 this = self.expression(exp.Array, expressions=expressions) 3524 else: 3525 expressions = apply_index_offset(this, expressions, -self.index_offset) 3526 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3527 3528 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3529 self.raise_error("Expected ]") 3530 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3531 self.raise_error("Expected }") 3532 3533 self._add_comments(this) 3534 return self._parse_bracket(this) 3535 3536 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3537 if self._match(TokenType.COLON): 3538 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3539 return this 3540 3541 def _parse_case(self) -> t.Optional[exp.Expression]: 3542 ifs = [] 3543 default = None 3544 3545 expression = self._parse_conjunction() 3546 3547 while self._match(TokenType.WHEN): 3548 this = self._parse_conjunction() 3549 self._match(TokenType.THEN) 3550 then = self._parse_conjunction() 3551 ifs.append(self.expression(exp.If, this=this, true=then)) 3552 3553 if self._match(TokenType.ELSE): 3554 default = self._parse_conjunction() 3555 3556 if not self._match(TokenType.END): 3557 self.raise_error("Expected END after CASE", self._prev) 3558 3559 return self._parse_window( 3560 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3561 ) 3562 3563 def _parse_if(self) -> t.Optional[exp.Expression]: 3564 if self._match(TokenType.L_PAREN): 3565 args = self._parse_csv(self._parse_conjunction) 3566 this = exp.If.from_arg_list(args) 3567 self.validate_expression(this, args) 3568 self._match_r_paren() 3569 else: 3570 index = self._index - 1 3571 condition = self._parse_conjunction() 3572 3573 if not condition: 3574 self._retreat(index) 3575 return None 3576 3577 self._match(TokenType.THEN) 3578 true = self._parse_conjunction() 3579 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3580 self._match(TokenType.END) 3581 this = self.expression(exp.If, this=condition, true=true, false=false) 3582 3583 return self._parse_window(this) 3584 3585 def _parse_extract(self) -> exp.Expression: 3586 this = self._parse_function() or self._parse_var() or self._parse_type() 3587 3588 if self._match(TokenType.FROM): 3589 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3590 3591 if not self._match(TokenType.COMMA): 3592 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3593 3594 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3595 3596 def _parse_cast(self, strict: bool) -> exp.Expression: 3597 this = self._parse_conjunction() 3598 3599 if not self._match(TokenType.ALIAS): 3600 if self._match(TokenType.COMMA): 3601 return self.expression( 3602 exp.CastToStrType, this=this, expression=self._parse_string() 3603 ) 3604 else: 3605 self.raise_error("Expected AS after CAST") 3606 3607 to = self._parse_types() 3608 3609 if not to: 3610 self.raise_error("Expected TYPE after CAST") 3611 elif to.this == exp.DataType.Type.CHAR: 3612 if self._match(TokenType.CHARACTER_SET): 3613 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3614 3615 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3616 3617 def _parse_string_agg(self) -> exp.Expression: 3618 expression: t.Optional[exp.Expression] 3619 3620 if self._match(TokenType.DISTINCT): 3621 args = self._parse_csv(self._parse_conjunction) 3622 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3623 else: 3624 args = self._parse_csv(self._parse_conjunction) 3625 expression = seq_get(args, 0) 3626 3627 index = self._index 3628 if not self._match(TokenType.R_PAREN): 3629 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3630 order = self._parse_order(this=expression) 3631 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3632 3633 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3634 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3635 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3636 if not self._match_text_seq("WITHIN", "GROUP"): 3637 self._retreat(index) 3638 this = exp.GroupConcat.from_arg_list(args) 3639 self.validate_expression(this, args) 3640 return this 3641 3642 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3643 order = self._parse_order(this=expression) 3644 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3645 3646 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3647 to: t.Optional[exp.Expression] 3648 this = self._parse_bitwise() 3649 3650 if self._match(TokenType.USING): 3651 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3652 elif self._match(TokenType.COMMA): 3653 to = self._parse_bitwise() 3654 else: 3655 to = None 3656 3657 # Swap the argument order if needed to produce the correct AST 3658 if self.CONVERT_TYPE_FIRST: 3659 this, to = to, this 3660 3661 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3662 3663 def _parse_decode(self) -> t.Optional[exp.Expression]: 3664 """ 3665 There are generally two variants of the DECODE function: 3666 3667 - DECODE(bin, charset) 3668 - DECODE(expression, search, result [, search, result] ... [, default]) 3669 3670 The second variant will always be parsed into a CASE expression. Note that NULL 3671 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3672 instead of relying on pattern matching. 3673 """ 3674 args = self._parse_csv(self._parse_conjunction) 3675 3676 if len(args) < 3: 3677 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3678 3679 expression, *expressions = args 3680 if not expression: 3681 return None 3682 3683 ifs = [] 3684 for search, result in zip(expressions[::2], expressions[1::2]): 3685 if not search or not result: 3686 return None 3687 3688 if isinstance(search, exp.Literal): 3689 ifs.append( 3690 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3691 ) 3692 elif isinstance(search, exp.Null): 3693 ifs.append( 3694 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3695 ) 3696 else: 3697 cond = exp.or_( 3698 exp.EQ(this=expression.copy(), expression=search), 3699 exp.and_( 3700 exp.Is(this=expression.copy(), expression=exp.Null()), 3701 exp.Is(this=search.copy(), expression=exp.Null()), 3702 copy=False, 3703 ), 3704 copy=False, 3705 ) 3706 ifs.append(exp.If(this=cond, true=result)) 3707 3708 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3709 3710 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3711 self._match_text_seq("KEY") 3712 key = self._parse_field() 3713 self._match(TokenType.COLON) 3714 self._match_text_seq("VALUE") 3715 value = self._parse_field() 3716 if not key and not value: 3717 return None 3718 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3719 3720 def _parse_json_object(self) -> exp.Expression: 3721 expressions = self._parse_csv(self._parse_json_key_value) 3722 3723 null_handling = None 3724 if self._match_text_seq("NULL", "ON", "NULL"): 3725 null_handling = "NULL ON NULL" 3726 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3727 null_handling = "ABSENT ON NULL" 3728 3729 unique_keys = None 3730 if self._match_text_seq("WITH", "UNIQUE"): 3731 unique_keys = True 3732 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3733 unique_keys = False 3734 3735 self._match_text_seq("KEYS") 3736 3737 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3738 format_json = self._match_text_seq("FORMAT", "JSON") 3739 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3740 3741 return self.expression( 3742 exp.JSONObject, 3743 expressions=expressions, 3744 null_handling=null_handling, 3745 unique_keys=unique_keys, 3746 return_type=return_type, 3747 format_json=format_json, 3748 encoding=encoding, 3749 ) 3750 3751 def _parse_logarithm(self) -> exp.Expression: 3752 # Default argument order is base, expression 3753 args = self._parse_csv(self._parse_range) 3754 3755 if len(args) > 1: 3756 if not self.LOG_BASE_FIRST: 3757 args.reverse() 3758 return exp.Log.from_arg_list(args) 3759 3760 return self.expression( 3761 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3762 ) 3763 3764 def _parse_match_against(self) -> exp.Expression: 3765 expressions = self._parse_csv(self._parse_column) 3766 3767 self._match_text_seq(")", "AGAINST", "(") 3768 3769 this = self._parse_string() 3770 3771 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3772 modifier = "IN NATURAL LANGUAGE MODE" 3773 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3774 modifier = f"{modifier} WITH QUERY EXPANSION" 3775 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3776 modifier = "IN BOOLEAN MODE" 3777 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3778 modifier = "WITH QUERY EXPANSION" 3779 else: 3780 modifier = None 3781 3782 return self.expression( 3783 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3784 ) 3785 3786 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3787 def _parse_open_json(self) -> exp.Expression: 3788 this = self._parse_bitwise() 3789 path = self._match(TokenType.COMMA) and self._parse_string() 3790 3791 def _parse_open_json_column_def() -> exp.Expression: 3792 this = self._parse_field(any_token=True) 3793 kind = self._parse_types() 3794 path = self._parse_string() 3795 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3796 return self.expression( 3797 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3798 ) 3799 3800 expressions = None 3801 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3802 self._match_l_paren() 3803 expressions = self._parse_csv(_parse_open_json_column_def) 3804 3805 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3806 3807 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3808 args = self._parse_csv(self._parse_bitwise) 3809 3810 if self._match(TokenType.IN): 3811 return self.expression( 3812 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3813 ) 3814 3815 if haystack_first: 3816 haystack = seq_get(args, 0) 3817 needle = seq_get(args, 1) 3818 else: 3819 needle = seq_get(args, 0) 3820 haystack = seq_get(args, 1) 3821 3822 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3823 3824 self.validate_expression(this, args) 3825 3826 return this 3827 3828 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3829 args = self._parse_csv(self._parse_table) 3830 return exp.JoinHint(this=func_name.upper(), expressions=args) 3831 3832 def _parse_substring(self) -> exp.Expression: 3833 # Postgres supports the form: substring(string [from int] [for int]) 3834 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3835 3836 args = self._parse_csv(self._parse_bitwise) 3837 3838 if self._match(TokenType.FROM): 3839 args.append(self._parse_bitwise()) 3840 if self._match(TokenType.FOR): 3841 args.append(self._parse_bitwise()) 3842 3843 this = exp.Substring.from_arg_list(args) 3844 self.validate_expression(this, args) 3845 3846 return this 3847 3848 def _parse_trim(self) -> exp.Expression: 3849 # https://www.w3resource.com/sql/character-functions/trim.php 3850 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3851 3852 position = None 3853 collation = None 3854 3855 if self._match_texts(self.TRIM_TYPES): 3856 position = self._prev.text.upper() 3857 3858 expression = self._parse_bitwise() 3859 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3860 this = self._parse_bitwise() 3861 else: 3862 this = expression 3863 expression = None 3864 3865 if self._match(TokenType.COLLATE): 3866 collation = self._parse_bitwise() 3867 3868 return self.expression( 3869 exp.Trim, 3870 this=this, 3871 position=position, 3872 expression=expression, 3873 collation=collation, 3874 ) 3875 3876 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3877 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3878 3879 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3880 return self._parse_window(self._parse_id_var(), alias=True) 3881 3882 def _parse_respect_or_ignore_nulls( 3883 self, this: t.Optional[exp.Expression] 3884 ) -> t.Optional[exp.Expression]: 3885 if self._match_text_seq("IGNORE", "NULLS"): 3886 return self.expression(exp.IgnoreNulls, this=this) 3887 if self._match_text_seq("RESPECT", "NULLS"): 3888 return self.expression(exp.RespectNulls, this=this) 3889 return this 3890 3891 def _parse_window( 3892 self, this: t.Optional[exp.Expression], alias: bool = False 3893 ) -> t.Optional[exp.Expression]: 3894 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3895 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3896 self._match_r_paren() 3897 3898 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3899 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3900 if self._match_text_seq("WITHIN", "GROUP"): 3901 order = self._parse_wrapped(self._parse_order) 3902 this = self.expression(exp.WithinGroup, this=this, expression=order) 3903 3904 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3905 # Some dialects choose to implement and some do not. 3906 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3907 3908 # There is some code above in _parse_lambda that handles 3909 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3910 3911 # The below changes handle 3912 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3913 3914 # Oracle allows both formats 3915 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3916 # and Snowflake chose to do the same for familiarity 3917 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3918 this = self._parse_respect_or_ignore_nulls(this) 3919 3920 # bigquery select from window x AS (partition by ...) 3921 if alias: 3922 over = None 3923 self._match(TokenType.ALIAS) 3924 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3925 return this 3926 else: 3927 over = self._prev.text.upper() 3928 3929 if not self._match(TokenType.L_PAREN): 3930 return self.expression( 3931 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3932 ) 3933 3934 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3935 3936 first = self._match(TokenType.FIRST) 3937 if self._match_text_seq("LAST"): 3938 first = False 3939 3940 partition = self._parse_partition_by() 3941 order = self._parse_order() 3942 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3943 3944 if kind: 3945 self._match(TokenType.BETWEEN) 3946 start = self._parse_window_spec() 3947 self._match(TokenType.AND) 3948 end = self._parse_window_spec() 3949 3950 spec = self.expression( 3951 exp.WindowSpec, 3952 kind=kind, 3953 start=start["value"], 3954 start_side=start["side"], 3955 end=end["value"], 3956 end_side=end["side"], 3957 ) 3958 else: 3959 spec = None 3960 3961 self._match_r_paren() 3962 3963 return self.expression( 3964 exp.Window, 3965 this=this, 3966 partition_by=partition, 3967 order=order, 3968 spec=spec, 3969 alias=window_alias, 3970 over=over, 3971 first=first, 3972 ) 3973 3974 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3975 self._match(TokenType.BETWEEN) 3976 3977 return { 3978 "value": ( 3979 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 3980 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 3981 or self._parse_bitwise() 3982 ), 3983 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 3984 } 3985 3986 def _parse_alias( 3987 self, this: t.Optional[exp.Expression], explicit: bool = False 3988 ) -> t.Optional[exp.Expression]: 3989 any_token = self._match(TokenType.ALIAS) 3990 3991 if explicit and not any_token: 3992 return this 3993 3994 if self._match(TokenType.L_PAREN): 3995 aliases = self.expression( 3996 exp.Aliases, 3997 this=this, 3998 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3999 ) 4000 self._match_r_paren(aliases) 4001 return aliases 4002 4003 alias = self._parse_id_var(any_token) 4004 4005 if alias: 4006 return self.expression(exp.Alias, this=this, alias=alias) 4007 4008 return this 4009 4010 def _parse_id_var( 4011 self, 4012 any_token: bool = True, 4013 tokens: t.Optional[t.Collection[TokenType]] = None, 4014 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 4015 ) -> t.Optional[exp.Expression]: 4016 identifier = self._parse_identifier() 4017 4018 if identifier: 4019 return identifier 4020 4021 prefix = "" 4022 4023 if prefix_tokens: 4024 while self._match_set(prefix_tokens): 4025 prefix += self._prev.text 4026 4027 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4028 quoted = self._prev.token_type == TokenType.STRING 4029 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 4030 4031 return None 4032 4033 def _parse_string(self) -> t.Optional[exp.Expression]: 4034 if self._match(TokenType.STRING): 4035 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4036 return self._parse_placeholder() 4037 4038 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 4039 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4040 4041 def _parse_number(self) -> t.Optional[exp.Expression]: 4042 if self._match(TokenType.NUMBER): 4043 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4044 return self._parse_placeholder() 4045 4046 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4047 if self._match(TokenType.IDENTIFIER): 4048 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4049 return self._parse_placeholder() 4050 4051 def _parse_var( 4052 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4053 ) -> t.Optional[exp.Expression]: 4054 if ( 4055 (any_token and self._advance_any()) 4056 or self._match(TokenType.VAR) 4057 or (self._match_set(tokens) if tokens else False) 4058 ): 4059 return self.expression(exp.Var, this=self._prev.text) 4060 return self._parse_placeholder() 4061 4062 def _advance_any(self) -> t.Optional[Token]: 4063 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4064 self._advance() 4065 return self._prev 4066 return None 4067 4068 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4069 return self._parse_var() or self._parse_string() 4070 4071 def _parse_null(self) -> t.Optional[exp.Expression]: 4072 if self._match(TokenType.NULL): 4073 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4074 return None 4075 4076 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4077 if self._match(TokenType.TRUE): 4078 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4079 if self._match(TokenType.FALSE): 4080 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4081 return None 4082 4083 def _parse_star(self) -> t.Optional[exp.Expression]: 4084 if self._match(TokenType.STAR): 4085 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4086 return None 4087 4088 def _parse_parameter(self) -> exp.Expression: 4089 wrapped = self._match(TokenType.L_BRACE) 4090 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4091 self._match(TokenType.R_BRACE) 4092 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4093 4094 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4095 if self._match_set(self.PLACEHOLDER_PARSERS): 4096 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4097 if placeholder: 4098 return placeholder 4099 self._advance(-1) 4100 return None 4101 4102 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4103 if not self._match(TokenType.EXCEPT): 4104 return None 4105 if self._match(TokenType.L_PAREN, advance=False): 4106 return self._parse_wrapped_csv(self._parse_column) 4107 return self._parse_csv(self._parse_column) 4108 4109 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4110 if not self._match(TokenType.REPLACE): 4111 return None 4112 if self._match(TokenType.L_PAREN, advance=False): 4113 return self._parse_wrapped_csv(self._parse_expression) 4114 return self._parse_csv(self._parse_expression) 4115 4116 def _parse_csv( 4117 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4118 ) -> t.List[t.Optional[exp.Expression]]: 4119 parse_result = parse_method() 4120 items = [parse_result] if parse_result is not None else [] 4121 4122 while self._match(sep): 4123 self._add_comments(parse_result) 4124 parse_result = parse_method() 4125 if parse_result is not None: 4126 items.append(parse_result) 4127 4128 return items 4129 4130 def _parse_tokens( 4131 self, parse_method: t.Callable, expressions: t.Dict 4132 ) -> t.Optional[exp.Expression]: 4133 this = parse_method() 4134 4135 while self._match_set(expressions): 4136 this = self.expression( 4137 expressions[self._prev.token_type], 4138 this=this, 4139 comments=self._prev_comments, 4140 expression=parse_method(), 4141 ) 4142 4143 return this 4144 4145 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4146 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4147 4148 def _parse_wrapped_csv( 4149 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4150 ) -> t.List[t.Optional[exp.Expression]]: 4151 return self._parse_wrapped( 4152 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4153 ) 4154 4155 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4156 wrapped = self._match(TokenType.L_PAREN) 4157 if not wrapped and not optional: 4158 self.raise_error("Expecting (") 4159 parse_result = parse_method() 4160 if wrapped: 4161 self._match_r_paren() 4162 return parse_result 4163 4164 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4165 return self._parse_select() or self._parse_set_operations( 4166 self._parse_expression() if alias else self._parse_conjunction() 4167 ) 4168 4169 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4170 return self._parse_query_modifiers( 4171 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4172 ) 4173 4174 def _parse_transaction(self) -> exp.Expression: 4175 this = None 4176 if self._match_texts(self.TRANSACTION_KIND): 4177 this = self._prev.text 4178 4179 self._match_texts({"TRANSACTION", "WORK"}) 4180 4181 modes = [] 4182 while True: 4183 mode = [] 4184 while self._match(TokenType.VAR): 4185 mode.append(self._prev.text) 4186 4187 if mode: 4188 modes.append(" ".join(mode)) 4189 if not self._match(TokenType.COMMA): 4190 break 4191 4192 return self.expression(exp.Transaction, this=this, modes=modes) 4193 4194 def _parse_commit_or_rollback(self) -> exp.Expression: 4195 chain = None 4196 savepoint = None 4197 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4198 4199 self._match_texts({"TRANSACTION", "WORK"}) 4200 4201 if self._match_text_seq("TO"): 4202 self._match_text_seq("SAVEPOINT") 4203 savepoint = self._parse_id_var() 4204 4205 if self._match(TokenType.AND): 4206 chain = not self._match_text_seq("NO") 4207 self._match_text_seq("CHAIN") 4208 4209 if is_rollback: 4210 return self.expression(exp.Rollback, savepoint=savepoint) 4211 return self.expression(exp.Commit, chain=chain) 4212 4213 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4214 if not self._match_text_seq("ADD"): 4215 return None 4216 4217 self._match(TokenType.COLUMN) 4218 exists_column = self._parse_exists(not_=True) 4219 expression = self._parse_column_def(self._parse_field(any_token=True)) 4220 4221 if expression: 4222 expression.set("exists", exists_column) 4223 4224 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4225 if self._match_texts(("FIRST", "AFTER")): 4226 position = self._prev.text 4227 column_position = self.expression( 4228 exp.ColumnPosition, this=self._parse_column(), position=position 4229 ) 4230 expression.set("position", column_position) 4231 4232 return expression 4233 4234 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4235 drop = self._match(TokenType.DROP) and self._parse_drop() 4236 if drop and not isinstance(drop, exp.Command): 4237 drop.set("kind", drop.args.get("kind", "COLUMN")) 4238 return drop 4239 4240 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4241 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4242 return self.expression( 4243 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4244 ) 4245 4246 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4247 this = None 4248 kind = self._prev.token_type 4249 4250 if kind == TokenType.CONSTRAINT: 4251 this = self._parse_id_var() 4252 4253 if self._match_text_seq("CHECK"): 4254 expression = self._parse_wrapped(self._parse_conjunction) 4255 enforced = self._match_text_seq("ENFORCED") 4256 4257 return self.expression( 4258 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4259 ) 4260 4261 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4262 expression = self._parse_foreign_key() 4263 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4264 expression = self._parse_primary_key() 4265 else: 4266 expression = None 4267 4268 return self.expression(exp.AddConstraint, this=this, expression=expression) 4269 4270 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4271 index = self._index - 1 4272 4273 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4274 return self._parse_csv(self._parse_add_constraint) 4275 4276 self._retreat(index) 4277 return self._parse_csv(self._parse_add_column) 4278 4279 def _parse_alter_table_alter(self) -> exp.Expression: 4280 self._match(TokenType.COLUMN) 4281 column = self._parse_field(any_token=True) 4282 4283 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4284 return self.expression(exp.AlterColumn, this=column, drop=True) 4285 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4286 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4287 4288 self._match_text_seq("SET", "DATA") 4289 return self.expression( 4290 exp.AlterColumn, 4291 this=column, 4292 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4293 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4294 using=self._match(TokenType.USING) and self._parse_conjunction(), 4295 ) 4296 4297 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4298 index = self._index - 1 4299 4300 partition_exists = self._parse_exists() 4301 if self._match(TokenType.PARTITION, advance=False): 4302 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4303 4304 self._retreat(index) 4305 return self._parse_csv(self._parse_drop_column) 4306 4307 def _parse_alter_table_rename(self) -> exp.Expression: 4308 self._match_text_seq("TO") 4309 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4310 4311 def _parse_alter(self) -> t.Optional[exp.Expression]: 4312 start = self._prev 4313 4314 if not self._match(TokenType.TABLE): 4315 return self._parse_as_command(start) 4316 4317 exists = self._parse_exists() 4318 this = self._parse_table(schema=True) 4319 4320 if self._next: 4321 self._advance() 4322 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4323 4324 if parser: 4325 actions = ensure_list(parser(self)) 4326 4327 if not self._curr: 4328 return self.expression( 4329 exp.AlterTable, 4330 this=this, 4331 exists=exists, 4332 actions=actions, 4333 ) 4334 return self._parse_as_command(start) 4335 4336 def _parse_merge(self) -> exp.Expression: 4337 self._match(TokenType.INTO) 4338 target = self._parse_table() 4339 4340 self._match(TokenType.USING) 4341 using = self._parse_table() 4342 4343 self._match(TokenType.ON) 4344 on = self._parse_conjunction() 4345 4346 whens = [] 4347 while self._match(TokenType.WHEN): 4348 matched = not self._match(TokenType.NOT) 4349 self._match_text_seq("MATCHED") 4350 source = ( 4351 False 4352 if self._match_text_seq("BY", "TARGET") 4353 else self._match_text_seq("BY", "SOURCE") 4354 ) 4355 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4356 4357 self._match(TokenType.THEN) 4358 4359 if self._match(TokenType.INSERT): 4360 _this = self._parse_star() 4361 if _this: 4362 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4363 else: 4364 then = self.expression( 4365 exp.Insert, 4366 this=self._parse_value(), 4367 expression=self._match(TokenType.VALUES) and self._parse_value(), 4368 ) 4369 elif self._match(TokenType.UPDATE): 4370 expressions = self._parse_star() 4371 if expressions: 4372 then = self.expression(exp.Update, expressions=expressions) 4373 else: 4374 then = self.expression( 4375 exp.Update, 4376 expressions=self._match(TokenType.SET) 4377 and self._parse_csv(self._parse_equality), 4378 ) 4379 elif self._match(TokenType.DELETE): 4380 then = self.expression(exp.Var, this=self._prev.text) 4381 else: 4382 then = None 4383 4384 whens.append( 4385 self.expression( 4386 exp.When, 4387 matched=matched, 4388 source=source, 4389 condition=condition, 4390 then=then, 4391 ) 4392 ) 4393 4394 return self.expression( 4395 exp.Merge, 4396 this=target, 4397 using=using, 4398 on=on, 4399 expressions=whens, 4400 ) 4401 4402 def _parse_show(self) -> t.Optional[exp.Expression]: 4403 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4404 if parser: 4405 return parser(self) 4406 self._advance() 4407 return self.expression(exp.Show, this=self._prev.text.upper()) 4408 4409 def _parse_set_item_assignment( 4410 self, kind: t.Optional[str] = None 4411 ) -> t.Optional[exp.Expression]: 4412 index = self._index 4413 4414 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4415 return self._parse_set_transaction(global_=kind == "GLOBAL") 4416 4417 left = self._parse_primary() or self._parse_id_var() 4418 4419 if not self._match_texts(("=", "TO")): 4420 self._retreat(index) 4421 return None 4422 4423 right = self._parse_statement() or self._parse_id_var() 4424 this = self.expression( 4425 exp.EQ, 4426 this=left, 4427 expression=right, 4428 ) 4429 4430 return self.expression( 4431 exp.SetItem, 4432 this=this, 4433 kind=kind, 4434 ) 4435 4436 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4437 self._match_text_seq("TRANSACTION") 4438 characteristics = self._parse_csv( 4439 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4440 ) 4441 return self.expression( 4442 exp.SetItem, 4443 expressions=characteristics, 4444 kind="TRANSACTION", 4445 **{"global": global_}, # type: ignore 4446 ) 4447 4448 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4449 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4450 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4451 4452 def _parse_set(self) -> exp.Expression: 4453 index = self._index 4454 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4455 4456 if self._curr: 4457 self._retreat(index) 4458 return self._parse_as_command(self._prev) 4459 4460 return set_ 4461 4462 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4463 for option in options: 4464 if self._match_text_seq(*option.split(" ")): 4465 return exp.Var(this=option) 4466 return None 4467 4468 def _parse_as_command(self, start: Token) -> exp.Command: 4469 while self._curr: 4470 self._advance() 4471 text = self._find_sql(start, self._prev) 4472 size = len(start.text) 4473 return exp.Command(this=text[:size], expression=text[size:]) 4474 4475 def _find_parser( 4476 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4477 ) -> t.Optional[t.Callable]: 4478 if not self._curr: 4479 return None 4480 4481 index = self._index 4482 this = [] 4483 while True: 4484 # The current token might be multiple words 4485 curr = self._curr.text.upper() 4486 key = curr.split(" ") 4487 this.append(curr) 4488 self._advance() 4489 result, trie = in_trie(trie, key) 4490 if result == 0: 4491 break 4492 if result == 2: 4493 subparser = parsers[" ".join(this)] 4494 return subparser 4495 self._retreat(index) 4496 return None 4497 4498 def _match(self, token_type, advance=True, expression=None): 4499 if not self._curr: 4500 return None 4501 4502 if self._curr.token_type == token_type: 4503 if advance: 4504 self._advance() 4505 self._add_comments(expression) 4506 return True 4507 4508 return None 4509 4510 def _match_set(self, types, advance=True): 4511 if not self._curr: 4512 return None 4513 4514 if self._curr.token_type in types: 4515 if advance: 4516 self._advance() 4517 return True 4518 4519 return None 4520 4521 def _match_pair(self, token_type_a, token_type_b, advance=True): 4522 if not self._curr or not self._next: 4523 return None 4524 4525 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4526 if advance: 4527 self._advance(2) 4528 return True 4529 4530 return None 4531 4532 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4533 if not self._match(TokenType.L_PAREN, expression=expression): 4534 self.raise_error("Expecting (") 4535 4536 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4537 if not self._match(TokenType.R_PAREN, expression=expression): 4538 self.raise_error("Expecting )") 4539 4540 def _match_texts(self, texts, advance=True): 4541 if self._curr and self._curr.text.upper() in texts: 4542 if advance: 4543 self._advance() 4544 return True 4545 return False 4546 4547 def _match_text_seq(self, *texts, advance=True): 4548 index = self._index 4549 for text in texts: 4550 if self._curr and self._curr.text.upper() == text: 4551 self._advance() 4552 else: 4553 self._retreat(index) 4554 return False 4555 4556 if not advance: 4557 self._retreat(index) 4558 4559 return True 4560 4561 @t.overload 4562 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4563 ... 4564 4565 @t.overload 4566 def _replace_columns_with_dots( 4567 self, this: t.Optional[exp.Expression] 4568 ) -> t.Optional[exp.Expression]: 4569 ... 4570 4571 def _replace_columns_with_dots(self, this): 4572 if isinstance(this, exp.Dot): 4573 exp.replace_children(this, self._replace_columns_with_dots) 4574 elif isinstance(this, exp.Column): 4575 exp.replace_children(this, self._replace_columns_with_dots) 4576 table = this.args.get("table") 4577 this = ( 4578 self.expression(exp.Dot, this=table, expression=this.this) 4579 if table 4580 else self.expression(exp.Var, this=this.name) 4581 ) 4582 elif isinstance(this, exp.Identifier): 4583 this = self.expression(exp.Var, this=this.name) 4584 4585 return this 4586 4587 def _replace_lambda( 4588 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4589 ) -> t.Optional[exp.Expression]: 4590 if not node: 4591 return node 4592 4593 for column in node.find_all(exp.Column): 4594 if column.parts[0].name in lambda_variables: 4595 dot_or_id = column.to_dot() if column.table else column.this 4596 parent = column.parent 4597 4598 while isinstance(parent, exp.Dot): 4599 if not isinstance(parent.parent, exp.Dot): 4600 parent.replace(dot_or_id) 4601 break 4602 parent = parent.parent 4603 else: 4604 if column is node: 4605 node = dot_or_id 4606 else: 4607 column.replace(dot_or_id) 4608 return node
20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 return exp.VarMap( 30 keys=exp.Array(expressions=keys), 31 values=exp.Array(expressions=values), 32 )
57class Parser(metaclass=_Parser): 58 """ 59 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 60 a parsed syntax tree. 61 62 Args: 63 error_level: the desired error level. 64 Default: ErrorLevel.IMMEDIATE 65 error_message_context: determines the amount of context to capture from a 66 query string when displaying the error message (in number of characters). 67 Default: 50. 68 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 69 Default: 0 70 alias_post_tablesample: If the table alias comes after tablesample. 71 Default: False 72 max_errors: Maximum number of error messages to include in a raised ParseError. 73 This is only relevant if error_level is ErrorLevel.RAISE. 74 Default: 3 75 null_ordering: Indicates the default null ordering method to use if not explicitly set. 76 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 77 Default: "nulls_are_small" 78 """ 79 80 FUNCTIONS: t.Dict[str, t.Callable] = { 81 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 82 "DATE_TO_DATE_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 87 "IFNULL": exp.Coalesce.from_arg_list, 88 "LIKE": parse_like, 89 "TIME_TO_TIME_STR": lambda args: exp.Cast( 90 this=seq_get(args, 0), 91 to=exp.DataType(this=exp.DataType.Type.TEXT), 92 ), 93 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 94 this=exp.Cast( 95 this=seq_get(args, 0), 96 to=exp.DataType(this=exp.DataType.Type.TEXT), 97 ), 98 start=exp.Literal.number(1), 99 length=exp.Literal.number(10), 100 ), 101 "VAR_MAP": parse_var_map, 102 } 103 104 NO_PAREN_FUNCTIONS = { 105 TokenType.CURRENT_DATE: exp.CurrentDate, 106 TokenType.CURRENT_DATETIME: exp.CurrentDate, 107 TokenType.CURRENT_TIME: exp.CurrentTime, 108 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 109 TokenType.CURRENT_USER: exp.CurrentUser, 110 } 111 112 JOIN_HINTS: t.Set[str] = set() 113 114 NESTED_TYPE_TOKENS = { 115 TokenType.ARRAY, 116 TokenType.MAP, 117 TokenType.NULLABLE, 118 TokenType.STRUCT, 119 } 120 121 TYPE_TOKENS = { 122 TokenType.BIT, 123 TokenType.BOOLEAN, 124 TokenType.TINYINT, 125 TokenType.UTINYINT, 126 TokenType.SMALLINT, 127 TokenType.USMALLINT, 128 TokenType.INT, 129 TokenType.UINT, 130 TokenType.BIGINT, 131 TokenType.UBIGINT, 132 TokenType.INT128, 133 TokenType.UINT128, 134 TokenType.INT256, 135 TokenType.UINT256, 136 TokenType.FLOAT, 137 TokenType.DOUBLE, 138 TokenType.CHAR, 139 TokenType.NCHAR, 140 TokenType.VARCHAR, 141 TokenType.NVARCHAR, 142 TokenType.TEXT, 143 TokenType.MEDIUMTEXT, 144 TokenType.LONGTEXT, 145 TokenType.MEDIUMBLOB, 146 TokenType.LONGBLOB, 147 TokenType.BINARY, 148 TokenType.VARBINARY, 149 TokenType.JSON, 150 TokenType.JSONB, 151 TokenType.INTERVAL, 152 TokenType.TIME, 153 TokenType.TIMESTAMP, 154 TokenType.TIMESTAMPTZ, 155 TokenType.TIMESTAMPLTZ, 156 TokenType.DATETIME, 157 TokenType.DATETIME64, 158 TokenType.DATE, 159 TokenType.DECIMAL, 160 TokenType.BIGDECIMAL, 161 TokenType.UUID, 162 TokenType.GEOGRAPHY, 163 TokenType.GEOMETRY, 164 TokenType.HLLSKETCH, 165 TokenType.HSTORE, 166 TokenType.PSEUDO_TYPE, 167 TokenType.SUPER, 168 TokenType.SERIAL, 169 TokenType.SMALLSERIAL, 170 TokenType.BIGSERIAL, 171 TokenType.XML, 172 TokenType.UNIQUEIDENTIFIER, 173 TokenType.MONEY, 174 TokenType.SMALLMONEY, 175 TokenType.ROWVERSION, 176 TokenType.IMAGE, 177 TokenType.VARIANT, 178 TokenType.OBJECT, 179 TokenType.INET, 180 *NESTED_TYPE_TOKENS, 181 } 182 183 SUBQUERY_PREDICATES = { 184 TokenType.ANY: exp.Any, 185 TokenType.ALL: exp.All, 186 TokenType.EXISTS: exp.Exists, 187 TokenType.SOME: exp.Any, 188 } 189 190 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 191 192 DB_CREATABLES = { 193 TokenType.DATABASE, 194 TokenType.SCHEMA, 195 TokenType.TABLE, 196 TokenType.VIEW, 197 } 198 199 CREATABLES = { 200 TokenType.COLUMN, 201 TokenType.FUNCTION, 202 TokenType.INDEX, 203 TokenType.PROCEDURE, 204 *DB_CREATABLES, 205 } 206 207 ID_VAR_TOKENS = { 208 TokenType.VAR, 209 TokenType.ANTI, 210 TokenType.APPLY, 211 TokenType.ASC, 212 TokenType.AUTO_INCREMENT, 213 TokenType.BEGIN, 214 TokenType.CACHE, 215 TokenType.COLLATE, 216 TokenType.COMMAND, 217 TokenType.COMMENT, 218 TokenType.COMMIT, 219 TokenType.CONSTRAINT, 220 TokenType.DEFAULT, 221 TokenType.DELETE, 222 TokenType.DESC, 223 TokenType.DESCRIBE, 224 TokenType.DIV, 225 TokenType.END, 226 TokenType.EXECUTE, 227 TokenType.ESCAPE, 228 TokenType.FALSE, 229 TokenType.FIRST, 230 TokenType.FILTER, 231 TokenType.FORMAT, 232 TokenType.FULL, 233 TokenType.IF, 234 TokenType.IS, 235 TokenType.ISNULL, 236 TokenType.INTERVAL, 237 TokenType.KEEP, 238 TokenType.LEFT, 239 TokenType.LOAD, 240 TokenType.MERGE, 241 TokenType.NATURAL, 242 TokenType.NEXT, 243 TokenType.OFFSET, 244 TokenType.ORDINALITY, 245 TokenType.OVERWRITE, 246 TokenType.PARTITION, 247 TokenType.PERCENT, 248 TokenType.PIVOT, 249 TokenType.PRAGMA, 250 TokenType.RANGE, 251 TokenType.REFERENCES, 252 TokenType.RIGHT, 253 TokenType.ROW, 254 TokenType.ROWS, 255 TokenType.SEMI, 256 TokenType.SET, 257 TokenType.SETTINGS, 258 TokenType.SHOW, 259 TokenType.TEMPORARY, 260 TokenType.TOP, 261 TokenType.TRUE, 262 TokenType.UNIQUE, 263 TokenType.UNPIVOT, 264 TokenType.VOLATILE, 265 TokenType.WINDOW, 266 *CREATABLES, 267 *SUBQUERY_PREDICATES, 268 *TYPE_TOKENS, 269 *NO_PAREN_FUNCTIONS, 270 } 271 272 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 273 274 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 275 TokenType.APPLY, 276 TokenType.FULL, 277 TokenType.LEFT, 278 TokenType.LOCK, 279 TokenType.NATURAL, 280 TokenType.OFFSET, 281 TokenType.RIGHT, 282 TokenType.WINDOW, 283 } 284 285 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 286 287 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 288 289 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 290 291 FUNC_TOKENS = { 292 TokenType.COMMAND, 293 TokenType.CURRENT_DATE, 294 TokenType.CURRENT_DATETIME, 295 TokenType.CURRENT_TIMESTAMP, 296 TokenType.CURRENT_TIME, 297 TokenType.CURRENT_USER, 298 TokenType.FILTER, 299 TokenType.FIRST, 300 TokenType.FORMAT, 301 TokenType.GLOB, 302 TokenType.IDENTIFIER, 303 TokenType.INDEX, 304 TokenType.ISNULL, 305 TokenType.ILIKE, 306 TokenType.LIKE, 307 TokenType.MERGE, 308 TokenType.OFFSET, 309 TokenType.PRIMARY_KEY, 310 TokenType.RANGE, 311 TokenType.REPLACE, 312 TokenType.ROW, 313 TokenType.UNNEST, 314 TokenType.VAR, 315 TokenType.LEFT, 316 TokenType.RIGHT, 317 TokenType.DATE, 318 TokenType.DATETIME, 319 TokenType.TABLE, 320 TokenType.TIMESTAMP, 321 TokenType.TIMESTAMPTZ, 322 TokenType.WINDOW, 323 *TYPE_TOKENS, 324 *SUBQUERY_PREDICATES, 325 } 326 327 CONJUNCTION = { 328 TokenType.AND: exp.And, 329 TokenType.OR: exp.Or, 330 } 331 332 EQUALITY = { 333 TokenType.EQ: exp.EQ, 334 TokenType.NEQ: exp.NEQ, 335 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 336 } 337 338 COMPARISON = { 339 TokenType.GT: exp.GT, 340 TokenType.GTE: exp.GTE, 341 TokenType.LT: exp.LT, 342 TokenType.LTE: exp.LTE, 343 } 344 345 BITWISE = { 346 TokenType.AMP: exp.BitwiseAnd, 347 TokenType.CARET: exp.BitwiseXor, 348 TokenType.PIPE: exp.BitwiseOr, 349 TokenType.DPIPE: exp.DPipe, 350 } 351 352 TERM = { 353 TokenType.DASH: exp.Sub, 354 TokenType.PLUS: exp.Add, 355 TokenType.MOD: exp.Mod, 356 TokenType.COLLATE: exp.Collate, 357 } 358 359 FACTOR = { 360 TokenType.DIV: exp.IntDiv, 361 TokenType.LR_ARROW: exp.Distance, 362 TokenType.SLASH: exp.Div, 363 TokenType.STAR: exp.Mul, 364 } 365 366 TIMESTAMPS = { 367 TokenType.TIME, 368 TokenType.TIMESTAMP, 369 TokenType.TIMESTAMPTZ, 370 TokenType.TIMESTAMPLTZ, 371 } 372 373 SET_OPERATIONS = { 374 TokenType.UNION, 375 TokenType.INTERSECT, 376 TokenType.EXCEPT, 377 } 378 379 JOIN_SIDES = { 380 TokenType.LEFT, 381 TokenType.RIGHT, 382 TokenType.FULL, 383 } 384 385 JOIN_KINDS = { 386 TokenType.INNER, 387 TokenType.OUTER, 388 TokenType.CROSS, 389 TokenType.SEMI, 390 TokenType.ANTI, 391 } 392 393 LAMBDAS = { 394 TokenType.ARROW: lambda self, expressions: self.expression( 395 exp.Lambda, 396 this=self._replace_lambda( 397 self._parse_conjunction(), 398 {node.name for node in expressions}, 399 ), 400 expressions=expressions, 401 ), 402 TokenType.FARROW: lambda self, expressions: self.expression( 403 exp.Kwarg, 404 this=exp.Var(this=expressions[0].name), 405 expression=self._parse_conjunction(), 406 ), 407 } 408 409 COLUMN_OPERATORS = { 410 TokenType.DOT: None, 411 TokenType.DCOLON: lambda self, this, to: self.expression( 412 exp.Cast if self.STRICT_CAST else exp.TryCast, 413 this=this, 414 to=to, 415 ), 416 TokenType.ARROW: lambda self, this, path: self.expression( 417 exp.JSONExtract, 418 this=this, 419 expression=path, 420 ), 421 TokenType.DARROW: lambda self, this, path: self.expression( 422 exp.JSONExtractScalar, 423 this=this, 424 expression=path, 425 ), 426 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 427 exp.JSONBExtract, 428 this=this, 429 expression=path, 430 ), 431 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 432 exp.JSONBExtractScalar, 433 this=this, 434 expression=path, 435 ), 436 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 437 exp.JSONBContains, 438 this=this, 439 expression=key, 440 ), 441 } 442 443 EXPRESSION_PARSERS = { 444 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 445 exp.Column: lambda self: self._parse_column(), 446 exp.Condition: lambda self: self._parse_conjunction(), 447 exp.DataType: lambda self: self._parse_types(), 448 exp.Expression: lambda self: self._parse_statement(), 449 exp.From: lambda self: self._parse_from(), 450 exp.Group: lambda self: self._parse_group(), 451 exp.Having: lambda self: self._parse_having(), 452 exp.Identifier: lambda self: self._parse_id_var(), 453 exp.Join: lambda self: self._parse_join(), 454 exp.Lambda: lambda self: self._parse_lambda(), 455 exp.Lateral: lambda self: self._parse_lateral(), 456 exp.Limit: lambda self: self._parse_limit(), 457 exp.Offset: lambda self: self._parse_offset(), 458 exp.Order: lambda self: self._parse_order(), 459 exp.Ordered: lambda self: self._parse_ordered(), 460 exp.Properties: lambda self: self._parse_properties(), 461 exp.Qualify: lambda self: self._parse_qualify(), 462 exp.Returning: lambda self: self._parse_returning(), 463 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 464 exp.Table: lambda self: self._parse_table_parts(), 465 exp.TableAlias: lambda self: self._parse_table_alias(), 466 exp.Where: lambda self: self._parse_where(), 467 exp.Window: lambda self: self._parse_named_window(), 468 exp.With: lambda self: self._parse_with(), 469 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 470 } 471 472 STATEMENT_PARSERS = { 473 TokenType.ALTER: lambda self: self._parse_alter(), 474 TokenType.BEGIN: lambda self: self._parse_transaction(), 475 TokenType.CACHE: lambda self: self._parse_cache(), 476 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 477 TokenType.COMMENT: lambda self: self._parse_comment(), 478 TokenType.CREATE: lambda self: self._parse_create(), 479 TokenType.DELETE: lambda self: self._parse_delete(), 480 TokenType.DESC: lambda self: self._parse_describe(), 481 TokenType.DESCRIBE: lambda self: self._parse_describe(), 482 TokenType.DROP: lambda self: self._parse_drop(), 483 TokenType.END: lambda self: self._parse_commit_or_rollback(), 484 TokenType.FROM: lambda self: exp.select("*").from_( 485 t.cast(exp.From, self._parse_from(skip_from_token=True)) 486 ), 487 TokenType.INSERT: lambda self: self._parse_insert(), 488 TokenType.LOAD: lambda self: self._parse_load(), 489 TokenType.MERGE: lambda self: self._parse_merge(), 490 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 491 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 492 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 493 TokenType.SET: lambda self: self._parse_set(), 494 TokenType.UNCACHE: lambda self: self._parse_uncache(), 495 TokenType.UPDATE: lambda self: self._parse_update(), 496 TokenType.USE: lambda self: self.expression( 497 exp.Use, 498 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 499 and exp.Var(this=self._prev.text), 500 this=self._parse_table(schema=False), 501 ), 502 } 503 504 UNARY_PARSERS = { 505 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 506 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 507 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 508 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 509 } 510 511 PRIMARY_PARSERS = { 512 TokenType.STRING: lambda self, token: self.expression( 513 exp.Literal, this=token.text, is_string=True 514 ), 515 TokenType.NUMBER: lambda self, token: self.expression( 516 exp.Literal, this=token.text, is_string=False 517 ), 518 TokenType.STAR: lambda self, _: self.expression( 519 exp.Star, 520 **{"except": self._parse_except(), "replace": self._parse_replace()}, 521 ), 522 TokenType.NULL: lambda self, _: self.expression(exp.Null), 523 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 524 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 525 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 526 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 527 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 528 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 529 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 530 exp.National, this=token.text 531 ), 532 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 533 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 534 } 535 536 PLACEHOLDER_PARSERS = { 537 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 538 TokenType.PARAMETER: lambda self: self._parse_parameter(), 539 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 540 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 541 else None, 542 } 543 544 RANGE_PARSERS = { 545 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 546 TokenType.GLOB: binary_range_parser(exp.Glob), 547 TokenType.ILIKE: binary_range_parser(exp.ILike), 548 TokenType.IN: lambda self, this: self._parse_in(this), 549 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 550 TokenType.IS: lambda self, this: self._parse_is(this), 551 TokenType.LIKE: binary_range_parser(exp.Like), 552 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 553 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 554 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 555 } 556 557 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 558 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 559 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 560 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 561 "CHARACTER SET": lambda self: self._parse_character_set(), 562 "CHECKSUM": lambda self: self._parse_checksum(), 563 "CLUSTER": lambda self: self._parse_cluster(), 564 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 565 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 566 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 567 "DEFINER": lambda self: self._parse_definer(), 568 "DETERMINISTIC": lambda self: self.expression( 569 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 570 ), 571 "DISTKEY": lambda self: self._parse_distkey(), 572 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 573 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 574 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 575 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 576 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 577 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 578 "FREESPACE": lambda self: self._parse_freespace(), 579 "IMMUTABLE": lambda self: self.expression( 580 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 581 ), 582 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 583 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 584 "LIKE": lambda self: self._parse_create_like(), 585 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 586 "LOCK": lambda self: self._parse_locking(), 587 "LOCKING": lambda self: self._parse_locking(), 588 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 589 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 590 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 591 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 592 "NO": lambda self: self._parse_no_property(), 593 "ON": lambda self: self._parse_on_property(), 594 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 595 "PARTITION BY": lambda self: self._parse_partitioned_by(), 596 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 597 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 598 "PRIMARY KEY": lambda self: self._parse_primary_key(), 599 "RETURNS": lambda self: self._parse_returns(), 600 "ROW": lambda self: self._parse_row(), 601 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 602 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 603 "SETTINGS": lambda self: self.expression( 604 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 605 ), 606 "SORTKEY": lambda self: self._parse_sortkey(), 607 "STABLE": lambda self: self.expression( 608 exp.StabilityProperty, this=exp.Literal.string("STABLE") 609 ), 610 "STORED": lambda self: self._parse_stored(), 611 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 612 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 613 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 614 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 615 "TTL": lambda self: self._parse_ttl(), 616 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 617 "VOLATILE": lambda self: self._parse_volatile_property(), 618 "WITH": lambda self: self._parse_with_property(), 619 } 620 621 CONSTRAINT_PARSERS = { 622 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 623 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 624 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 625 "CHARACTER SET": lambda self: self.expression( 626 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 627 ), 628 "CHECK": lambda self: self.expression( 629 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 630 ), 631 "COLLATE": lambda self: self.expression( 632 exp.CollateColumnConstraint, this=self._parse_var() 633 ), 634 "COMMENT": lambda self: self.expression( 635 exp.CommentColumnConstraint, this=self._parse_string() 636 ), 637 "COMPRESS": lambda self: self._parse_compress(), 638 "DEFAULT": lambda self: self.expression( 639 exp.DefaultColumnConstraint, this=self._parse_bitwise() 640 ), 641 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 642 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 643 "FORMAT": lambda self: self.expression( 644 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 645 ), 646 "GENERATED": lambda self: self._parse_generated_as_identity(), 647 "IDENTITY": lambda self: self._parse_auto_increment(), 648 "INLINE": lambda self: self._parse_inline(), 649 "LIKE": lambda self: self._parse_create_like(), 650 "NOT": lambda self: self._parse_not_constraint(), 651 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 652 "ON": lambda self: self._match(TokenType.UPDATE) 653 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 654 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 655 "PRIMARY KEY": lambda self: self._parse_primary_key(), 656 "REFERENCES": lambda self: self._parse_references(match=False), 657 "TITLE": lambda self: self.expression( 658 exp.TitleColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 661 "UNIQUE": lambda self: self._parse_unique(), 662 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 663 } 664 665 ALTER_PARSERS = { 666 "ADD": lambda self: self._parse_alter_table_add(), 667 "ALTER": lambda self: self._parse_alter_table_alter(), 668 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 669 "DROP": lambda self: self._parse_alter_table_drop(), 670 "RENAME": lambda self: self._parse_alter_table_rename(), 671 } 672 673 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 674 675 NO_PAREN_FUNCTION_PARSERS = { 676 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 677 TokenType.CASE: lambda self: self._parse_case(), 678 TokenType.IF: lambda self: self._parse_if(), 679 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 680 exp.NextValueFor, 681 this=self._parse_column(), 682 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 683 ), 684 } 685 686 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 687 688 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 689 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 690 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 691 "DECODE": lambda self: self._parse_decode(), 692 "EXTRACT": lambda self: self._parse_extract(), 693 "JSON_OBJECT": lambda self: self._parse_json_object(), 694 "LOG": lambda self: self._parse_logarithm(), 695 "MATCH": lambda self: self._parse_match_against(), 696 "OPENJSON": lambda self: self._parse_open_json(), 697 "POSITION": lambda self: self._parse_position(), 698 "SAFE_CAST": lambda self: self._parse_cast(False), 699 "STRING_AGG": lambda self: self._parse_string_agg(), 700 "SUBSTRING": lambda self: self._parse_substring(), 701 "TRIM": lambda self: self._parse_trim(), 702 "TRY_CAST": lambda self: self._parse_cast(False), 703 "TRY_CONVERT": lambda self: self._parse_convert(False), 704 } 705 706 QUERY_MODIFIER_PARSERS = { 707 "joins": lambda self: list(iter(self._parse_join, None)), 708 "laterals": lambda self: list(iter(self._parse_lateral, None)), 709 "match": lambda self: self._parse_match_recognize(), 710 "where": lambda self: self._parse_where(), 711 "group": lambda self: self._parse_group(), 712 "having": lambda self: self._parse_having(), 713 "qualify": lambda self: self._parse_qualify(), 714 "windows": lambda self: self._parse_window_clause(), 715 "order": lambda self: self._parse_order(), 716 "limit": lambda self: self._parse_limit(), 717 "offset": lambda self: self._parse_offset(), 718 "locks": lambda self: self._parse_locks(), 719 "sample": lambda self: self._parse_table_sample(as_modifier=True), 720 } 721 722 SET_PARSERS = { 723 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 724 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 725 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 726 "TRANSACTION": lambda self: self._parse_set_transaction(), 727 } 728 729 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 730 731 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 732 733 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 734 735 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 736 737 TRANSACTION_CHARACTERISTICS = { 738 "ISOLATION LEVEL REPEATABLE READ", 739 "ISOLATION LEVEL READ COMMITTED", 740 "ISOLATION LEVEL READ UNCOMMITTED", 741 "ISOLATION LEVEL SERIALIZABLE", 742 "READ WRITE", 743 "READ ONLY", 744 } 745 746 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 747 748 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 749 750 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 751 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 752 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 753 754 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 755 756 STRICT_CAST = True 757 758 CONVERT_TYPE_FIRST = False 759 760 PREFIXED_PIVOT_COLUMNS = False 761 IDENTIFY_PIVOT_STRINGS = False 762 763 LOG_BASE_FIRST = True 764 LOG_DEFAULTS_TO_LN = False 765 766 __slots__ = ( 767 "error_level", 768 "error_message_context", 769 "sql", 770 "errors", 771 "index_offset", 772 "unnest_column_only", 773 "alias_post_tablesample", 774 "max_errors", 775 "null_ordering", 776 "_tokens", 777 "_index", 778 "_curr", 779 "_next", 780 "_prev", 781 "_prev_comments", 782 "_show_trie", 783 "_set_trie", 784 ) 785 786 def __init__( 787 self, 788 error_level: t.Optional[ErrorLevel] = None, 789 error_message_context: int = 100, 790 index_offset: int = 0, 791 unnest_column_only: bool = False, 792 alias_post_tablesample: bool = False, 793 max_errors: int = 3, 794 null_ordering: t.Optional[str] = None, 795 ): 796 self.error_level = error_level or ErrorLevel.IMMEDIATE 797 self.error_message_context = error_message_context 798 self.index_offset = index_offset 799 self.unnest_column_only = unnest_column_only 800 self.alias_post_tablesample = alias_post_tablesample 801 self.max_errors = max_errors 802 self.null_ordering = null_ordering 803 self.reset() 804 805 def reset(self): 806 self.sql = "" 807 self.errors = [] 808 self._tokens = [] 809 self._index = 0 810 self._curr = None 811 self._next = None 812 self._prev = None 813 self._prev_comments = None 814 815 def parse( 816 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 817 ) -> t.List[t.Optional[exp.Expression]]: 818 """ 819 Parses a list of tokens and returns a list of syntax trees, one tree 820 per parsed SQL statement. 821 822 Args: 823 raw_tokens: the list of tokens. 824 sql: the original SQL string, used to produce helpful debug messages. 825 826 Returns: 827 The list of syntax trees. 828 """ 829 return self._parse( 830 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 831 ) 832 833 def parse_into( 834 self, 835 expression_types: exp.IntoType, 836 raw_tokens: t.List[Token], 837 sql: t.Optional[str] = None, 838 ) -> t.List[t.Optional[exp.Expression]]: 839 """ 840 Parses a list of tokens into a given Expression type. If a collection of Expression 841 types is given instead, this method will try to parse the token list into each one 842 of them, stopping at the first for which the parsing succeeds. 843 844 Args: 845 expression_types: the expression type(s) to try and parse the token list into. 846 raw_tokens: the list of tokens. 847 sql: the original SQL string, used to produce helpful debug messages. 848 849 Returns: 850 The target Expression. 851 """ 852 errors = [] 853 for expression_type in ensure_collection(expression_types): 854 parser = self.EXPRESSION_PARSERS.get(expression_type) 855 if not parser: 856 raise TypeError(f"No parser registered for {expression_type}") 857 try: 858 return self._parse(parser, raw_tokens, sql) 859 except ParseError as e: 860 e.errors[0]["into_expression"] = expression_type 861 errors.append(e) 862 raise ParseError( 863 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 864 errors=merge_errors(errors), 865 ) from errors[-1] 866 867 def _parse( 868 self, 869 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 870 raw_tokens: t.List[Token], 871 sql: t.Optional[str] = None, 872 ) -> t.List[t.Optional[exp.Expression]]: 873 self.reset() 874 self.sql = sql or "" 875 total = len(raw_tokens) 876 chunks: t.List[t.List[Token]] = [[]] 877 878 for i, token in enumerate(raw_tokens): 879 if token.token_type == TokenType.SEMICOLON: 880 if i < total - 1: 881 chunks.append([]) 882 else: 883 chunks[-1].append(token) 884 885 expressions = [] 886 887 for tokens in chunks: 888 self._index = -1 889 self._tokens = tokens 890 self._advance() 891 892 expressions.append(parse_method(self)) 893 894 if self._index < len(self._tokens): 895 self.raise_error("Invalid expression / Unexpected token") 896 897 self.check_errors() 898 899 return expressions 900 901 def check_errors(self) -> None: 902 """ 903 Logs or raises any found errors, depending on the chosen error level setting. 904 """ 905 if self.error_level == ErrorLevel.WARN: 906 for error in self.errors: 907 logger.error(str(error)) 908 elif self.error_level == ErrorLevel.RAISE and self.errors: 909 raise ParseError( 910 concat_messages(self.errors, self.max_errors), 911 errors=merge_errors(self.errors), 912 ) 913 914 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 915 """ 916 Appends an error in the list of recorded errors or raises it, depending on the chosen 917 error level setting. 918 """ 919 token = token or self._curr or self._prev or Token.string("") 920 start = token.start 921 end = token.end + 1 922 start_context = self.sql[max(start - self.error_message_context, 0) : start] 923 highlight = self.sql[start:end] 924 end_context = self.sql[end : end + self.error_message_context] 925 926 error = ParseError.new( 927 f"{message}. Line {token.line}, Col: {token.col}.\n" 928 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 929 description=message, 930 line=token.line, 931 col=token.col, 932 start_context=start_context, 933 highlight=highlight, 934 end_context=end_context, 935 ) 936 937 if self.error_level == ErrorLevel.IMMEDIATE: 938 raise error 939 940 self.errors.append(error) 941 942 def expression( 943 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 944 ) -> E: 945 """ 946 Creates a new, validated Expression. 947 948 Args: 949 exp_class: the expression class to instantiate. 950 comments: an optional list of comments to attach to the expression. 951 kwargs: the arguments to set for the expression along with their respective values. 952 953 Returns: 954 The target expression. 955 """ 956 instance = exp_class(**kwargs) 957 instance.add_comments(comments) if comments else self._add_comments(instance) 958 self.validate_expression(instance) 959 return instance 960 961 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 962 if expression and self._prev_comments: 963 expression.add_comments(self._prev_comments) 964 self._prev_comments = None 965 966 def validate_expression( 967 self, expression: exp.Expression, args: t.Optional[t.List] = None 968 ) -> None: 969 """ 970 Validates an already instantiated expression, making sure that all its mandatory arguments 971 are set. 972 973 Args: 974 expression: the expression to validate. 975 args: an optional list of items that was used to instantiate the expression, if it's a Func. 976 """ 977 if self.error_level == ErrorLevel.IGNORE: 978 return 979 980 for error_message in expression.error_messages(args): 981 self.raise_error(error_message) 982 983 def _find_sql(self, start: Token, end: Token) -> str: 984 return self.sql[start.start : end.end + 1] 985 986 def _advance(self, times: int = 1) -> None: 987 self._index += times 988 self._curr = seq_get(self._tokens, self._index) 989 self._next = seq_get(self._tokens, self._index + 1) 990 if self._index > 0: 991 self._prev = self._tokens[self._index - 1] 992 self._prev_comments = self._prev.comments 993 else: 994 self._prev = None 995 self._prev_comments = None 996 997 def _retreat(self, index: int) -> None: 998 if index != self._index: 999 self._advance(index - self._index) 1000 1001 def _parse_command(self) -> exp.Command: 1002 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1003 1004 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1005 start = self._prev 1006 exists = self._parse_exists() if allow_exists else None 1007 1008 self._match(TokenType.ON) 1009 1010 kind = self._match_set(self.CREATABLES) and self._prev 1011 1012 if not kind: 1013 return self._parse_as_command(start) 1014 1015 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1016 this = self._parse_user_defined_function(kind=kind.token_type) 1017 elif kind.token_type == TokenType.TABLE: 1018 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1019 elif kind.token_type == TokenType.COLUMN: 1020 this = self._parse_column() 1021 else: 1022 this = self._parse_id_var() 1023 1024 self._match(TokenType.IS) 1025 1026 return self.expression( 1027 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1028 ) 1029 1030 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1031 def _parse_ttl(self) -> exp.Expression: 1032 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1033 this = self._parse_bitwise() 1034 1035 if self._match_text_seq("DELETE"): 1036 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1037 if self._match_text_seq("RECOMPRESS"): 1038 return self.expression( 1039 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1040 ) 1041 if self._match_text_seq("TO", "DISK"): 1042 return self.expression( 1043 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1044 ) 1045 if self._match_text_seq("TO", "VOLUME"): 1046 return self.expression( 1047 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1048 ) 1049 1050 return this 1051 1052 expressions = self._parse_csv(_parse_ttl_action) 1053 where = self._parse_where() 1054 group = self._parse_group() 1055 1056 aggregates = None 1057 if group and self._match(TokenType.SET): 1058 aggregates = self._parse_csv(self._parse_set_item) 1059 1060 return self.expression( 1061 exp.MergeTreeTTL, 1062 expressions=expressions, 1063 where=where, 1064 group=group, 1065 aggregates=aggregates, 1066 ) 1067 1068 def _parse_statement(self) -> t.Optional[exp.Expression]: 1069 if self._curr is None: 1070 return None 1071 1072 if self._match_set(self.STATEMENT_PARSERS): 1073 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1074 1075 if self._match_set(Tokenizer.COMMANDS): 1076 return self._parse_command() 1077 1078 expression = self._parse_expression() 1079 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1080 return self._parse_query_modifiers(expression) 1081 1082 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1083 start = self._prev 1084 temporary = self._match(TokenType.TEMPORARY) 1085 materialized = self._match_text_seq("MATERIALIZED") 1086 kind = self._match_set(self.CREATABLES) and self._prev.text 1087 if not kind: 1088 return self._parse_as_command(start) 1089 1090 return self.expression( 1091 exp.Drop, 1092 exists=self._parse_exists(), 1093 this=self._parse_table(schema=True), 1094 kind=kind, 1095 temporary=temporary, 1096 materialized=materialized, 1097 cascade=self._match_text_seq("CASCADE"), 1098 constraints=self._match_text_seq("CONSTRAINTS"), 1099 purge=self._match_text_seq("PURGE"), 1100 ) 1101 1102 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1103 return ( 1104 self._match(TokenType.IF) 1105 and (not not_ or self._match(TokenType.NOT)) 1106 and self._match(TokenType.EXISTS) 1107 ) 1108 1109 def _parse_create(self) -> t.Optional[exp.Expression]: 1110 start = self._prev 1111 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1112 TokenType.OR, TokenType.REPLACE 1113 ) 1114 unique = self._match(TokenType.UNIQUE) 1115 1116 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1117 self._match(TokenType.TABLE) 1118 1119 properties = None 1120 create_token = self._match_set(self.CREATABLES) and self._prev 1121 1122 if not create_token: 1123 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1124 create_token = self._match_set(self.CREATABLES) and self._prev 1125 1126 if not properties or not create_token: 1127 return self._parse_as_command(start) 1128 1129 exists = self._parse_exists(not_=True) 1130 this = None 1131 expression = None 1132 indexes = None 1133 no_schema_binding = None 1134 begin = None 1135 clone = None 1136 1137 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1138 this = self._parse_user_defined_function(kind=create_token.token_type) 1139 temp_properties = self._parse_properties() 1140 if properties and temp_properties: 1141 properties.expressions.extend(temp_properties.expressions) 1142 elif temp_properties: 1143 properties = temp_properties 1144 1145 self._match(TokenType.ALIAS) 1146 begin = self._match(TokenType.BEGIN) 1147 return_ = self._match_text_seq("RETURN") 1148 expression = self._parse_statement() 1149 1150 if return_: 1151 expression = self.expression(exp.Return, this=expression) 1152 elif create_token.token_type == TokenType.INDEX: 1153 this = self._parse_index(index=self._parse_id_var()) 1154 elif create_token.token_type in self.DB_CREATABLES: 1155 table_parts = self._parse_table_parts(schema=True) 1156 1157 # exp.Properties.Location.POST_NAME 1158 if self._match(TokenType.COMMA): 1159 temp_properties = self._parse_properties(before=True) 1160 if properties and temp_properties: 1161 properties.expressions.extend(temp_properties.expressions) 1162 elif temp_properties: 1163 properties = temp_properties 1164 1165 this = self._parse_schema(this=table_parts) 1166 1167 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1168 temp_properties = self._parse_properties() 1169 if properties and temp_properties: 1170 properties.expressions.extend(temp_properties.expressions) 1171 elif temp_properties: 1172 properties = temp_properties 1173 1174 self._match(TokenType.ALIAS) 1175 1176 # exp.Properties.Location.POST_ALIAS 1177 if not ( 1178 self._match(TokenType.SELECT, advance=False) 1179 or self._match(TokenType.WITH, advance=False) 1180 or self._match(TokenType.L_PAREN, advance=False) 1181 ): 1182 temp_properties = self._parse_properties() 1183 if properties and temp_properties: 1184 properties.expressions.extend(temp_properties.expressions) 1185 elif temp_properties: 1186 properties = temp_properties 1187 1188 expression = self._parse_ddl_select() 1189 1190 if create_token.token_type == TokenType.TABLE: 1191 indexes = [] 1192 while True: 1193 index = self._parse_index() 1194 1195 # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX 1196 temp_properties = self._parse_properties() 1197 if properties and temp_properties: 1198 properties.expressions.extend(temp_properties.expressions) 1199 elif temp_properties: 1200 properties = temp_properties 1201 1202 if not index: 1203 break 1204 else: 1205 self._match(TokenType.COMMA) 1206 indexes.append(index) 1207 elif create_token.token_type == TokenType.VIEW: 1208 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1209 no_schema_binding = True 1210 1211 if self._match_text_seq("CLONE"): 1212 clone = self._parse_table(schema=True) 1213 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1214 clone_kind = ( 1215 self._match(TokenType.L_PAREN) 1216 and self._match_texts(self.CLONE_KINDS) 1217 and self._prev.text.upper() 1218 ) 1219 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1220 self._match(TokenType.R_PAREN) 1221 clone = self.expression( 1222 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1223 ) 1224 1225 return self.expression( 1226 exp.Create, 1227 this=this, 1228 kind=create_token.text, 1229 replace=replace, 1230 unique=unique, 1231 expression=expression, 1232 exists=exists, 1233 properties=properties, 1234 indexes=indexes, 1235 no_schema_binding=no_schema_binding, 1236 begin=begin, 1237 clone=clone, 1238 ) 1239 1240 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1241 # only used for teradata currently 1242 self._match(TokenType.COMMA) 1243 1244 kwargs = { 1245 "no": self._match_text_seq("NO"), 1246 "dual": self._match_text_seq("DUAL"), 1247 "before": self._match_text_seq("BEFORE"), 1248 "default": self._match_text_seq("DEFAULT"), 1249 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1250 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1251 "after": self._match_text_seq("AFTER"), 1252 "minimum": self._match_texts(("MIN", "MINIMUM")), 1253 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1254 } 1255 1256 if self._match_texts(self.PROPERTY_PARSERS): 1257 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1258 try: 1259 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1260 except TypeError: 1261 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1262 1263 return None 1264 1265 def _parse_property(self) -> t.Optional[exp.Expression]: 1266 if self._match_texts(self.PROPERTY_PARSERS): 1267 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1268 1269 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1270 return self._parse_character_set(default=True) 1271 1272 if self._match_text_seq("COMPOUND", "SORTKEY"): 1273 return self._parse_sortkey(compound=True) 1274 1275 if self._match_text_seq("SQL", "SECURITY"): 1276 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1277 1278 assignment = self._match_pair( 1279 TokenType.VAR, TokenType.EQ, advance=False 1280 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1281 1282 if assignment: 1283 key = self._parse_var_or_string() 1284 self._match(TokenType.EQ) 1285 return self.expression(exp.Property, this=key, value=self._parse_column()) 1286 1287 return None 1288 1289 def _parse_stored(self) -> exp.Expression: 1290 self._match(TokenType.ALIAS) 1291 1292 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1293 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1294 1295 return self.expression( 1296 exp.FileFormatProperty, 1297 this=self.expression( 1298 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1299 ) 1300 if input_format or output_format 1301 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1302 ) 1303 1304 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1305 self._match(TokenType.EQ) 1306 self._match(TokenType.ALIAS) 1307 return self.expression(exp_class, this=self._parse_field()) 1308 1309 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Expression]: 1310 properties = [] 1311 1312 while True: 1313 if before: 1314 prop = self._parse_property_before() 1315 else: 1316 prop = self._parse_property() 1317 1318 if not prop: 1319 break 1320 for p in ensure_list(prop): 1321 properties.append(p) 1322 1323 if properties: 1324 return self.expression(exp.Properties, expressions=properties) 1325 1326 return None 1327 1328 def _parse_fallback(self, no: bool = False) -> exp.Expression: 1329 return self.expression( 1330 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1331 ) 1332 1333 def _parse_volatile_property(self) -> exp.Expression: 1334 if self._index >= 2: 1335 pre_volatile_token = self._tokens[self._index - 2] 1336 else: 1337 pre_volatile_token = None 1338 1339 if pre_volatile_token and pre_volatile_token.token_type in ( 1340 TokenType.CREATE, 1341 TokenType.REPLACE, 1342 TokenType.UNIQUE, 1343 ): 1344 return exp.VolatileProperty() 1345 1346 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1347 1348 def _parse_with_property( 1349 self, 1350 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1351 self._match(TokenType.WITH) 1352 if self._match(TokenType.L_PAREN, advance=False): 1353 return self._parse_wrapped_csv(self._parse_property) 1354 1355 if self._match_text_seq("JOURNAL"): 1356 return self._parse_withjournaltable() 1357 1358 if self._match_text_seq("DATA"): 1359 return self._parse_withdata(no=False) 1360 elif self._match_text_seq("NO", "DATA"): 1361 return self._parse_withdata(no=True) 1362 1363 if not self._next: 1364 return None 1365 1366 return self._parse_withisolatedloading() 1367 1368 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1369 def _parse_definer(self) -> t.Optional[exp.Expression]: 1370 self._match(TokenType.EQ) 1371 1372 user = self._parse_id_var() 1373 self._match(TokenType.PARAMETER) 1374 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1375 1376 if not user or not host: 1377 return None 1378 1379 return exp.DefinerProperty(this=f"{user}@{host}") 1380 1381 def _parse_withjournaltable(self) -> exp.Expression: 1382 self._match(TokenType.TABLE) 1383 self._match(TokenType.EQ) 1384 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1385 1386 def _parse_log(self, no: bool = False) -> exp.Expression: 1387 return self.expression(exp.LogProperty, no=no) 1388 1389 def _parse_journal(self, **kwargs) -> exp.Expression: 1390 return self.expression(exp.JournalProperty, **kwargs) 1391 1392 def _parse_checksum(self) -> exp.Expression: 1393 self._match(TokenType.EQ) 1394 1395 on = None 1396 if self._match(TokenType.ON): 1397 on = True 1398 elif self._match_text_seq("OFF"): 1399 on = False 1400 default = self._match(TokenType.DEFAULT) 1401 1402 return self.expression( 1403 exp.ChecksumProperty, 1404 on=on, 1405 default=default, 1406 ) 1407 1408 def _parse_cluster(self) -> t.Optional[exp.Expression]: 1409 if not self._match_text_seq("BY"): 1410 self._retreat(self._index - 1) 1411 return None 1412 return self.expression( 1413 exp.Cluster, 1414 expressions=self._parse_csv(self._parse_ordered), 1415 ) 1416 1417 def _parse_freespace(self) -> exp.Expression: 1418 self._match(TokenType.EQ) 1419 return self.expression( 1420 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1421 ) 1422 1423 def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression: 1424 if self._match(TokenType.EQ): 1425 return self.expression( 1426 exp.MergeBlockRatioProperty, 1427 this=self._parse_number(), 1428 percent=self._match(TokenType.PERCENT), 1429 ) 1430 return self.expression( 1431 exp.MergeBlockRatioProperty, 1432 no=no, 1433 default=default, 1434 ) 1435 1436 def _parse_datablocksize( 1437 self, 1438 default: t.Optional[bool] = None, 1439 minimum: t.Optional[bool] = None, 1440 maximum: t.Optional[bool] = None, 1441 ) -> exp.Expression: 1442 self._match(TokenType.EQ) 1443 size = self._parse_number() 1444 units = None 1445 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1446 units = self._prev.text 1447 return self.expression( 1448 exp.DataBlocksizeProperty, 1449 size=size, 1450 units=units, 1451 default=default, 1452 minimum=minimum, 1453 maximum=maximum, 1454 ) 1455 1456 def _parse_blockcompression(self) -> exp.Expression: 1457 self._match(TokenType.EQ) 1458 always = self._match_text_seq("ALWAYS") 1459 manual = self._match_text_seq("MANUAL") 1460 never = self._match_text_seq("NEVER") 1461 default = self._match_text_seq("DEFAULT") 1462 autotemp = None 1463 if self._match_text_seq("AUTOTEMP"): 1464 autotemp = self._parse_schema() 1465 1466 return self.expression( 1467 exp.BlockCompressionProperty, 1468 always=always, 1469 manual=manual, 1470 never=never, 1471 default=default, 1472 autotemp=autotemp, 1473 ) 1474 1475 def _parse_withisolatedloading(self) -> exp.Expression: 1476 no = self._match_text_seq("NO") 1477 concurrent = self._match_text_seq("CONCURRENT") 1478 self._match_text_seq("ISOLATED", "LOADING") 1479 for_all = self._match_text_seq("FOR", "ALL") 1480 for_insert = self._match_text_seq("FOR", "INSERT") 1481 for_none = self._match_text_seq("FOR", "NONE") 1482 return self.expression( 1483 exp.IsolatedLoadingProperty, 1484 no=no, 1485 concurrent=concurrent, 1486 for_all=for_all, 1487 for_insert=for_insert, 1488 for_none=for_none, 1489 ) 1490 1491 def _parse_locking(self) -> exp.Expression: 1492 if self._match(TokenType.TABLE): 1493 kind = "TABLE" 1494 elif self._match(TokenType.VIEW): 1495 kind = "VIEW" 1496 elif self._match(TokenType.ROW): 1497 kind = "ROW" 1498 elif self._match_text_seq("DATABASE"): 1499 kind = "DATABASE" 1500 else: 1501 kind = None 1502 1503 if kind in ("DATABASE", "TABLE", "VIEW"): 1504 this = self._parse_table_parts() 1505 else: 1506 this = None 1507 1508 if self._match(TokenType.FOR): 1509 for_or_in = "FOR" 1510 elif self._match(TokenType.IN): 1511 for_or_in = "IN" 1512 else: 1513 for_or_in = None 1514 1515 if self._match_text_seq("ACCESS"): 1516 lock_type = "ACCESS" 1517 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1518 lock_type = "EXCLUSIVE" 1519 elif self._match_text_seq("SHARE"): 1520 lock_type = "SHARE" 1521 elif self._match_text_seq("READ"): 1522 lock_type = "READ" 1523 elif self._match_text_seq("WRITE"): 1524 lock_type = "WRITE" 1525 elif self._match_text_seq("CHECKSUM"): 1526 lock_type = "CHECKSUM" 1527 else: 1528 lock_type = None 1529 1530 override = self._match_text_seq("OVERRIDE") 1531 1532 return self.expression( 1533 exp.LockingProperty, 1534 this=this, 1535 kind=kind, 1536 for_or_in=for_or_in, 1537 lock_type=lock_type, 1538 override=override, 1539 ) 1540 1541 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1542 if self._match(TokenType.PARTITION_BY): 1543 return self._parse_csv(self._parse_conjunction) 1544 return [] 1545 1546 def _parse_partitioned_by(self) -> exp.Expression: 1547 self._match(TokenType.EQ) 1548 return self.expression( 1549 exp.PartitionedByProperty, 1550 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1551 ) 1552 1553 def _parse_withdata(self, no: bool = False) -> exp.Expression: 1554 if self._match_text_seq("AND", "STATISTICS"): 1555 statistics = True 1556 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1557 statistics = False 1558 else: 1559 statistics = None 1560 1561 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1562 1563 def _parse_no_property(self) -> t.Optional[exp.Property]: 1564 if self._match_text_seq("PRIMARY", "INDEX"): 1565 return exp.NoPrimaryIndexProperty() 1566 return None 1567 1568 def _parse_on_property(self) -> t.Optional[exp.Property]: 1569 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1570 return exp.OnCommitProperty() 1571 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1572 return exp.OnCommitProperty(delete=True) 1573 return None 1574 1575 def _parse_distkey(self) -> exp.Expression: 1576 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1577 1578 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1579 table = self._parse_table(schema=True) 1580 options = [] 1581 while self._match_texts(("INCLUDING", "EXCLUDING")): 1582 this = self._prev.text.upper() 1583 id_var = self._parse_id_var() 1584 1585 if not id_var: 1586 return None 1587 1588 options.append( 1589 self.expression( 1590 exp.Property, 1591 this=this, 1592 value=exp.Var(this=id_var.this.upper()), 1593 ) 1594 ) 1595 return self.expression(exp.LikeProperty, this=table, expressions=options) 1596 1597 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1598 return self.expression( 1599 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1600 ) 1601 1602 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1603 self._match(TokenType.EQ) 1604 return self.expression( 1605 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1606 ) 1607 1608 def _parse_returns(self) -> exp.Expression: 1609 value: t.Optional[exp.Expression] 1610 is_table = self._match(TokenType.TABLE) 1611 1612 if is_table: 1613 if self._match(TokenType.LT): 1614 value = self.expression( 1615 exp.Schema, 1616 this="TABLE", 1617 expressions=self._parse_csv(self._parse_struct_types), 1618 ) 1619 if not self._match(TokenType.GT): 1620 self.raise_error("Expecting >") 1621 else: 1622 value = self._parse_schema(exp.Var(this="TABLE")) 1623 else: 1624 value = self._parse_types() 1625 1626 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1627 1628 def _parse_describe(self) -> exp.Expression: 1629 kind = self._match_set(self.CREATABLES) and self._prev.text 1630 this = self._parse_table() 1631 1632 return self.expression(exp.Describe, this=this, kind=kind) 1633 1634 def _parse_insert(self) -> exp.Expression: 1635 overwrite = self._match(TokenType.OVERWRITE) 1636 local = self._match_text_seq("LOCAL") 1637 alternative = None 1638 1639 if self._match_text_seq("DIRECTORY"): 1640 this: t.Optional[exp.Expression] = self.expression( 1641 exp.Directory, 1642 this=self._parse_var_or_string(), 1643 local=local, 1644 row_format=self._parse_row_format(match_row=True), 1645 ) 1646 else: 1647 if self._match(TokenType.OR): 1648 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1649 1650 self._match(TokenType.INTO) 1651 self._match(TokenType.TABLE) 1652 this = self._parse_table(schema=True) 1653 1654 return self.expression( 1655 exp.Insert, 1656 this=this, 1657 exists=self._parse_exists(), 1658 partition=self._parse_partition(), 1659 expression=self._parse_ddl_select(), 1660 conflict=self._parse_on_conflict(), 1661 returning=self._parse_returning(), 1662 overwrite=overwrite, 1663 alternative=alternative, 1664 ) 1665 1666 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1667 conflict = self._match_text_seq("ON", "CONFLICT") 1668 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1669 1670 if not (conflict or duplicate): 1671 return None 1672 1673 nothing = None 1674 expressions = None 1675 key = None 1676 constraint = None 1677 1678 if conflict: 1679 if self._match_text_seq("ON", "CONSTRAINT"): 1680 constraint = self._parse_id_var() 1681 else: 1682 key = self._parse_csv(self._parse_value) 1683 1684 self._match_text_seq("DO") 1685 if self._match_text_seq("NOTHING"): 1686 nothing = True 1687 else: 1688 self._match(TokenType.UPDATE) 1689 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1690 1691 return self.expression( 1692 exp.OnConflict, 1693 duplicate=duplicate, 1694 expressions=expressions, 1695 nothing=nothing, 1696 key=key, 1697 constraint=constraint, 1698 ) 1699 1700 def _parse_returning(self) -> t.Optional[exp.Expression]: 1701 if not self._match(TokenType.RETURNING): 1702 return None 1703 1704 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1705 1706 def _parse_row(self) -> t.Optional[exp.Expression]: 1707 if not self._match(TokenType.FORMAT): 1708 return None 1709 return self._parse_row_format() 1710 1711 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1712 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1713 return None 1714 1715 if self._match_text_seq("SERDE"): 1716 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1717 1718 self._match_text_seq("DELIMITED") 1719 1720 kwargs = {} 1721 1722 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1723 kwargs["fields"] = self._parse_string() 1724 if self._match_text_seq("ESCAPED", "BY"): 1725 kwargs["escaped"] = self._parse_string() 1726 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1727 kwargs["collection_items"] = self._parse_string() 1728 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1729 kwargs["map_keys"] = self._parse_string() 1730 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1731 kwargs["lines"] = self._parse_string() 1732 if self._match_text_seq("NULL", "DEFINED", "AS"): 1733 kwargs["null"] = self._parse_string() 1734 1735 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1736 1737 def _parse_load(self) -> exp.Expression: 1738 if self._match_text_seq("DATA"): 1739 local = self._match_text_seq("LOCAL") 1740 self._match_text_seq("INPATH") 1741 inpath = self._parse_string() 1742 overwrite = self._match(TokenType.OVERWRITE) 1743 self._match_pair(TokenType.INTO, TokenType.TABLE) 1744 1745 return self.expression( 1746 exp.LoadData, 1747 this=self._parse_table(schema=True), 1748 local=local, 1749 overwrite=overwrite, 1750 inpath=inpath, 1751 partition=self._parse_partition(), 1752 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1753 serde=self._match_text_seq("SERDE") and self._parse_string(), 1754 ) 1755 return self._parse_as_command(self._prev) 1756 1757 def _parse_delete(self) -> exp.Expression: 1758 self._match(TokenType.FROM) 1759 1760 return self.expression( 1761 exp.Delete, 1762 this=self._parse_table(), 1763 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1764 where=self._parse_where(), 1765 returning=self._parse_returning(), 1766 ) 1767 1768 def _parse_update(self) -> exp.Expression: 1769 return self.expression( 1770 exp.Update, 1771 **{ # type: ignore 1772 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1773 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1774 "from": self._parse_from(modifiers=True), 1775 "where": self._parse_where(), 1776 "returning": self._parse_returning(), 1777 }, 1778 ) 1779 1780 def _parse_uncache(self) -> exp.Expression: 1781 if not self._match(TokenType.TABLE): 1782 self.raise_error("Expecting TABLE after UNCACHE") 1783 1784 return self.expression( 1785 exp.Uncache, 1786 exists=self._parse_exists(), 1787 this=self._parse_table(schema=True), 1788 ) 1789 1790 def _parse_cache(self) -> exp.Expression: 1791 lazy = self._match_text_seq("LAZY") 1792 self._match(TokenType.TABLE) 1793 table = self._parse_table(schema=True) 1794 options = [] 1795 1796 if self._match_text_seq("OPTIONS"): 1797 self._match_l_paren() 1798 k = self._parse_string() 1799 self._match(TokenType.EQ) 1800 v = self._parse_string() 1801 options = [k, v] 1802 self._match_r_paren() 1803 1804 self._match(TokenType.ALIAS) 1805 return self.expression( 1806 exp.Cache, 1807 this=table, 1808 lazy=lazy, 1809 options=options, 1810 expression=self._parse_select(nested=True), 1811 ) 1812 1813 def _parse_partition(self) -> t.Optional[exp.Expression]: 1814 if not self._match(TokenType.PARTITION): 1815 return None 1816 1817 return self.expression( 1818 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1819 ) 1820 1821 def _parse_value(self) -> exp.Expression: 1822 if self._match(TokenType.L_PAREN): 1823 expressions = self._parse_csv(self._parse_conjunction) 1824 self._match_r_paren() 1825 return self.expression(exp.Tuple, expressions=expressions) 1826 1827 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1828 # Source: https://prestodb.io/docs/current/sql/values.html 1829 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1830 1831 def _parse_select( 1832 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1833 ) -> t.Optional[exp.Expression]: 1834 cte = self._parse_with() 1835 if cte: 1836 this = self._parse_statement() 1837 1838 if not this: 1839 self.raise_error("Failed to parse any statement following CTE") 1840 return cte 1841 1842 if "with" in this.arg_types: 1843 this.set("with", cte) 1844 else: 1845 self.raise_error(f"{this.key} does not support CTE") 1846 this = cte 1847 elif self._match(TokenType.SELECT): 1848 comments = self._prev_comments 1849 1850 hint = self._parse_hint() 1851 all_ = self._match(TokenType.ALL) 1852 distinct = self._match(TokenType.DISTINCT) 1853 1854 kind = ( 1855 self._match(TokenType.ALIAS) 1856 and self._match_texts(("STRUCT", "VALUE")) 1857 and self._prev.text 1858 ) 1859 1860 if distinct: 1861 distinct = self.expression( 1862 exp.Distinct, 1863 on=self._parse_value() if self._match(TokenType.ON) else None, 1864 ) 1865 1866 if all_ and distinct: 1867 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1868 1869 limit = self._parse_limit(top=True) 1870 expressions = self._parse_csv(self._parse_expression) 1871 1872 this = self.expression( 1873 exp.Select, 1874 kind=kind, 1875 hint=hint, 1876 distinct=distinct, 1877 expressions=expressions, 1878 limit=limit, 1879 ) 1880 this.comments = comments 1881 1882 into = self._parse_into() 1883 if into: 1884 this.set("into", into) 1885 1886 from_ = self._parse_from() 1887 if from_: 1888 this.set("from", from_) 1889 1890 this = self._parse_query_modifiers(this) 1891 elif (table or nested) and self._match(TokenType.L_PAREN): 1892 this = self._parse_table() if table else self._parse_select(nested=True) 1893 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1894 self._match_r_paren() 1895 1896 # early return so that subquery unions aren't parsed again 1897 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1898 # Union ALL should be a property of the top select node, not the subquery 1899 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1900 elif self._match(TokenType.VALUES): 1901 this = self.expression( 1902 exp.Values, 1903 expressions=self._parse_csv(self._parse_value), 1904 alias=self._parse_table_alias(), 1905 ) 1906 elif self._match(TokenType.PIVOT): 1907 this = self._parse_simplified_pivot() 1908 elif self._match(TokenType.FROM): 1909 this = exp.select("*").from_(t.cast(exp.From, self._parse_from(skip_from_token=True))) 1910 else: 1911 this = None 1912 1913 return self._parse_set_operations(this) 1914 1915 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1916 if not skip_with_token and not self._match(TokenType.WITH): 1917 return None 1918 1919 comments = self._prev_comments 1920 recursive = self._match(TokenType.RECURSIVE) 1921 1922 expressions = [] 1923 while True: 1924 expressions.append(self._parse_cte()) 1925 1926 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1927 break 1928 else: 1929 self._match(TokenType.WITH) 1930 1931 return self.expression( 1932 exp.With, comments=comments, expressions=expressions, recursive=recursive 1933 ) 1934 1935 def _parse_cte(self) -> exp.Expression: 1936 alias = self._parse_table_alias() 1937 if not alias or not alias.this: 1938 self.raise_error("Expected CTE to have alias") 1939 1940 self._match(TokenType.ALIAS) 1941 1942 return self.expression( 1943 exp.CTE, 1944 this=self._parse_wrapped(self._parse_statement), 1945 alias=alias, 1946 ) 1947 1948 def _parse_table_alias( 1949 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1950 ) -> t.Optional[exp.Expression]: 1951 any_token = self._match(TokenType.ALIAS) 1952 alias = ( 1953 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1954 or self._parse_string_as_identifier() 1955 ) 1956 1957 index = self._index 1958 if self._match(TokenType.L_PAREN): 1959 columns = self._parse_csv(self._parse_function_parameter) 1960 self._match_r_paren() if columns else self._retreat(index) 1961 else: 1962 columns = None 1963 1964 if not alias and not columns: 1965 return None 1966 1967 return self.expression(exp.TableAlias, this=alias, columns=columns) 1968 1969 def _parse_subquery( 1970 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1971 ) -> t.Optional[exp.Expression]: 1972 if not this: 1973 return None 1974 return self.expression( 1975 exp.Subquery, 1976 this=this, 1977 pivots=self._parse_pivots(), 1978 alias=self._parse_table_alias() if parse_alias else None, 1979 ) 1980 1981 def _parse_query_modifiers( 1982 self, this: t.Optional[exp.Expression] 1983 ) -> t.Optional[exp.Expression]: 1984 if isinstance(this, self.MODIFIABLES): 1985 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1986 expression = parser(self) 1987 1988 if expression: 1989 this.set(key, expression) 1990 return this 1991 1992 def _parse_hint(self) -> t.Optional[exp.Expression]: 1993 if self._match(TokenType.HINT): 1994 hints = self._parse_csv(self._parse_function) 1995 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1996 self.raise_error("Expected */ after HINT") 1997 return self.expression(exp.Hint, expressions=hints) 1998 1999 return None 2000 2001 def _parse_into(self) -> t.Optional[exp.Expression]: 2002 if not self._match(TokenType.INTO): 2003 return None 2004 2005 temp = self._match(TokenType.TEMPORARY) 2006 unlogged = self._match_text_seq("UNLOGGED") 2007 self._match(TokenType.TABLE) 2008 2009 return self.expression( 2010 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2011 ) 2012 2013 def _parse_from( 2014 self, modifiers: bool = False, skip_from_token: bool = False 2015 ) -> t.Optional[exp.From]: 2016 if not skip_from_token and not self._match(TokenType.FROM): 2017 return None 2018 2019 comments = self._prev_comments 2020 this = self._parse_table() 2021 2022 return self.expression( 2023 exp.From, 2024 comments=comments, 2025 this=self._parse_query_modifiers(this) if modifiers else this, 2026 ) 2027 2028 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2029 if not self._match(TokenType.MATCH_RECOGNIZE): 2030 return None 2031 2032 self._match_l_paren() 2033 2034 partition = self._parse_partition_by() 2035 order = self._parse_order() 2036 measures = ( 2037 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2038 ) 2039 2040 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2041 rows = exp.Var(this="ONE ROW PER MATCH") 2042 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2043 text = "ALL ROWS PER MATCH" 2044 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2045 text += f" SHOW EMPTY MATCHES" 2046 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2047 text += f" OMIT EMPTY MATCHES" 2048 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2049 text += f" WITH UNMATCHED ROWS" 2050 rows = exp.Var(this=text) 2051 else: 2052 rows = None 2053 2054 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2055 text = "AFTER MATCH SKIP" 2056 if self._match_text_seq("PAST", "LAST", "ROW"): 2057 text += f" PAST LAST ROW" 2058 elif self._match_text_seq("TO", "NEXT", "ROW"): 2059 text += f" TO NEXT ROW" 2060 elif self._match_text_seq("TO", "FIRST"): 2061 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2062 elif self._match_text_seq("TO", "LAST"): 2063 text += f" TO LAST {self._advance_any().text}" # type: ignore 2064 after = exp.Var(this=text) 2065 else: 2066 after = None 2067 2068 if self._match_text_seq("PATTERN"): 2069 self._match_l_paren() 2070 2071 if not self._curr: 2072 self.raise_error("Expecting )", self._curr) 2073 2074 paren = 1 2075 start = self._curr 2076 2077 while self._curr and paren > 0: 2078 if self._curr.token_type == TokenType.L_PAREN: 2079 paren += 1 2080 if self._curr.token_type == TokenType.R_PAREN: 2081 paren -= 1 2082 end = self._prev 2083 self._advance() 2084 if paren > 0: 2085 self.raise_error("Expecting )", self._curr) 2086 pattern = exp.Var(this=self._find_sql(start, end)) 2087 else: 2088 pattern = None 2089 2090 define = ( 2091 self._parse_csv( 2092 lambda: self.expression( 2093 exp.Alias, 2094 alias=self._parse_id_var(any_token=True), 2095 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2096 ) 2097 ) 2098 if self._match_text_seq("DEFINE") 2099 else None 2100 ) 2101 2102 self._match_r_paren() 2103 2104 return self.expression( 2105 exp.MatchRecognize, 2106 partition_by=partition, 2107 order=order, 2108 measures=measures, 2109 rows=rows, 2110 after=after, 2111 pattern=pattern, 2112 define=define, 2113 alias=self._parse_table_alias(), 2114 ) 2115 2116 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2117 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2118 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2119 2120 if outer_apply or cross_apply: 2121 this = self._parse_select(table=True) 2122 view = None 2123 outer = not cross_apply 2124 elif self._match(TokenType.LATERAL): 2125 this = self._parse_select(table=True) 2126 view = self._match(TokenType.VIEW) 2127 outer = self._match(TokenType.OUTER) 2128 else: 2129 return None 2130 2131 if not this: 2132 this = self._parse_function() or self._parse_id_var(any_token=False) 2133 while self._match(TokenType.DOT): 2134 this = exp.Dot( 2135 this=this, 2136 expression=self._parse_function() or self._parse_id_var(any_token=False), 2137 ) 2138 2139 table_alias: t.Optional[exp.Expression] 2140 2141 if view: 2142 table = self._parse_id_var(any_token=False) 2143 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2144 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2145 else: 2146 table_alias = self._parse_table_alias() 2147 2148 expression = self.expression( 2149 exp.Lateral, 2150 this=this, 2151 view=view, 2152 outer=outer, 2153 alias=table_alias, 2154 ) 2155 2156 return expression 2157 2158 def _parse_join_side_and_kind( 2159 self, 2160 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2161 return ( 2162 self._match(TokenType.NATURAL) and self._prev, 2163 self._match_set(self.JOIN_SIDES) and self._prev, 2164 self._match_set(self.JOIN_KINDS) and self._prev, 2165 ) 2166 2167 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2168 if self._match(TokenType.COMMA): 2169 return self.expression(exp.Join, this=self._parse_table()) 2170 2171 index = self._index 2172 natural, side, kind = self._parse_join_side_and_kind() 2173 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2174 join = self._match(TokenType.JOIN) 2175 2176 if not skip_join_token and not join: 2177 self._retreat(index) 2178 kind = None 2179 natural = None 2180 side = None 2181 2182 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2183 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2184 2185 if not skip_join_token and not join and not outer_apply and not cross_apply: 2186 return None 2187 2188 if outer_apply: 2189 side = Token(TokenType.LEFT, "LEFT") 2190 2191 kwargs: t.Dict[ 2192 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2193 ] = {"this": self._parse_table()} 2194 2195 if natural: 2196 kwargs["natural"] = True 2197 if side: 2198 kwargs["side"] = side.text 2199 if kind: 2200 kwargs["kind"] = kind.text 2201 if hint: 2202 kwargs["hint"] = hint 2203 2204 if self._match(TokenType.ON): 2205 kwargs["on"] = self._parse_conjunction() 2206 elif self._match(TokenType.USING): 2207 kwargs["using"] = self._parse_wrapped_id_vars() 2208 2209 return self.expression(exp.Join, **kwargs) # type: ignore 2210 2211 def _parse_index( 2212 self, 2213 index: t.Optional[exp.Expression] = None, 2214 ) -> t.Optional[exp.Expression]: 2215 if index: 2216 unique = None 2217 primary = None 2218 amp = None 2219 2220 self._match(TokenType.ON) 2221 self._match(TokenType.TABLE) # hive 2222 table = self._parse_table_parts(schema=True) 2223 else: 2224 unique = self._match(TokenType.UNIQUE) 2225 primary = self._match_text_seq("PRIMARY") 2226 amp = self._match_text_seq("AMP") 2227 if not self._match(TokenType.INDEX): 2228 return None 2229 index = self._parse_id_var() 2230 table = None 2231 2232 if self._match(TokenType.L_PAREN, advance=False): 2233 columns = self._parse_wrapped_csv(self._parse_ordered) 2234 else: 2235 columns = None 2236 2237 return self.expression( 2238 exp.Index, 2239 this=index, 2240 table=table, 2241 columns=columns, 2242 unique=unique, 2243 primary=primary, 2244 amp=amp, 2245 partition_by=self._parse_partition_by(), 2246 ) 2247 2248 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2249 return ( 2250 (not schema and self._parse_function()) 2251 or self._parse_id_var(any_token=False) 2252 or self._parse_string_as_identifier() 2253 or self._parse_placeholder() 2254 ) 2255 2256 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2257 catalog = None 2258 db = None 2259 table = self._parse_table_part(schema=schema) 2260 2261 while self._match(TokenType.DOT): 2262 if catalog: 2263 # This allows nesting the table in arbitrarily many dot expressions if needed 2264 table = self.expression( 2265 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2266 ) 2267 else: 2268 catalog = db 2269 db = table 2270 table = self._parse_table_part(schema=schema) 2271 2272 if not table: 2273 self.raise_error(f"Expected table name but got {self._curr}") 2274 2275 return self.expression( 2276 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2277 ) 2278 2279 def _parse_table( 2280 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2281 ) -> t.Optional[exp.Expression]: 2282 lateral = self._parse_lateral() 2283 if lateral: 2284 return lateral 2285 2286 unnest = self._parse_unnest() 2287 if unnest: 2288 return unnest 2289 2290 values = self._parse_derived_table_values() 2291 if values: 2292 return values 2293 2294 subquery = self._parse_select(table=True) 2295 if subquery: 2296 if not subquery.args.get("pivots"): 2297 subquery.set("pivots", self._parse_pivots()) 2298 return subquery 2299 2300 this: exp.Expression = self._parse_table_parts(schema=schema) 2301 2302 if schema: 2303 return self._parse_schema(this=this) 2304 2305 if self.alias_post_tablesample: 2306 table_sample = self._parse_table_sample() 2307 2308 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2309 if alias: 2310 this.set("alias", alias) 2311 2312 if not this.args.get("pivots"): 2313 this.set("pivots", self._parse_pivots()) 2314 2315 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2316 this.set( 2317 "hints", 2318 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2319 ) 2320 self._match_r_paren() 2321 2322 if not self.alias_post_tablesample: 2323 table_sample = self._parse_table_sample() 2324 2325 if table_sample: 2326 table_sample.set("this", this) 2327 this = table_sample 2328 2329 return this 2330 2331 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2332 if not self._match(TokenType.UNNEST): 2333 return None 2334 2335 expressions = self._parse_wrapped_csv(self._parse_type) 2336 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2337 alias = self._parse_table_alias() 2338 2339 if alias and self.unnest_column_only: 2340 if alias.args.get("columns"): 2341 self.raise_error("Unexpected extra column alias in unnest.") 2342 alias.set("columns", [alias.this]) 2343 alias.set("this", None) 2344 2345 offset = None 2346 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2347 self._match(TokenType.ALIAS) 2348 offset = self._parse_id_var() or exp.Identifier(this="offset") 2349 2350 return self.expression( 2351 exp.Unnest, 2352 expressions=expressions, 2353 ordinality=ordinality, 2354 alias=alias, 2355 offset=offset, 2356 ) 2357 2358 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2359 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2360 if not is_derived and not self._match(TokenType.VALUES): 2361 return None 2362 2363 expressions = self._parse_csv(self._parse_value) 2364 2365 if is_derived: 2366 self._match_r_paren() 2367 2368 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2369 2370 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2371 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2372 as_modifier and self._match_text_seq("USING", "SAMPLE") 2373 ): 2374 return None 2375 2376 bucket_numerator = None 2377 bucket_denominator = None 2378 bucket_field = None 2379 percent = None 2380 rows = None 2381 size = None 2382 seed = None 2383 2384 kind = ( 2385 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2386 ) 2387 method = self._parse_var(tokens=(TokenType.ROW,)) 2388 2389 self._match(TokenType.L_PAREN) 2390 2391 num = self._parse_number() 2392 2393 if self._match_text_seq("BUCKET"): 2394 bucket_numerator = self._parse_number() 2395 self._match_text_seq("OUT", "OF") 2396 bucket_denominator = bucket_denominator = self._parse_number() 2397 self._match(TokenType.ON) 2398 bucket_field = self._parse_field() 2399 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2400 percent = num 2401 elif self._match(TokenType.ROWS): 2402 rows = num 2403 else: 2404 size = num 2405 2406 self._match(TokenType.R_PAREN) 2407 2408 if self._match(TokenType.L_PAREN): 2409 method = self._parse_var() 2410 seed = self._match(TokenType.COMMA) and self._parse_number() 2411 self._match_r_paren() 2412 elif self._match_texts(("SEED", "REPEATABLE")): 2413 seed = self._parse_wrapped(self._parse_number) 2414 2415 return self.expression( 2416 exp.TableSample, 2417 method=method, 2418 bucket_numerator=bucket_numerator, 2419 bucket_denominator=bucket_denominator, 2420 bucket_field=bucket_field, 2421 percent=percent, 2422 rows=rows, 2423 size=size, 2424 seed=seed, 2425 kind=kind, 2426 ) 2427 2428 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2429 return list(iter(self._parse_pivot, None)) 2430 2431 # https://duckdb.org/docs/sql/statements/pivot 2432 def _parse_simplified_pivot(self) -> exp.Pivot: 2433 def _parse_on() -> t.Optional[exp.Expression]: 2434 this = self._parse_bitwise() 2435 return self._parse_in(this) if self._match(TokenType.IN) else this 2436 2437 this = self._parse_table() 2438 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2439 using = self._match(TokenType.USING) and self._parse_csv( 2440 lambda: self._parse_alias(self._parse_function()) 2441 ) 2442 group = self._parse_group() 2443 return self.expression( 2444 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2445 ) 2446 2447 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2448 index = self._index 2449 2450 if self._match(TokenType.PIVOT): 2451 unpivot = False 2452 elif self._match(TokenType.UNPIVOT): 2453 unpivot = True 2454 else: 2455 return None 2456 2457 expressions = [] 2458 field = None 2459 2460 if not self._match(TokenType.L_PAREN): 2461 self._retreat(index) 2462 return None 2463 2464 if unpivot: 2465 expressions = self._parse_csv(self._parse_column) 2466 else: 2467 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2468 2469 if not expressions: 2470 self.raise_error("Failed to parse PIVOT's aggregation list") 2471 2472 if not self._match(TokenType.FOR): 2473 self.raise_error("Expecting FOR") 2474 2475 value = self._parse_column() 2476 2477 if not self._match(TokenType.IN): 2478 self.raise_error("Expecting IN") 2479 2480 field = self._parse_in(value, alias=True) 2481 2482 self._match_r_paren() 2483 2484 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2485 2486 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2487 pivot.set("alias", self._parse_table_alias()) 2488 2489 if not unpivot: 2490 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2491 2492 columns: t.List[exp.Expression] = [] 2493 for fld in pivot.args["field"].expressions: 2494 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2495 for name in names: 2496 if self.PREFIXED_PIVOT_COLUMNS: 2497 name = f"{name}_{field_name}" if name else field_name 2498 else: 2499 name = f"{field_name}_{name}" if name else field_name 2500 2501 columns.append(exp.to_identifier(name)) 2502 2503 pivot.set("columns", columns) 2504 2505 return pivot 2506 2507 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2508 return [agg.alias for agg in aggregations] 2509 2510 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2511 if not skip_where_token and not self._match(TokenType.WHERE): 2512 return None 2513 2514 return self.expression( 2515 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2516 ) 2517 2518 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2519 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2520 return None 2521 2522 elements = defaultdict(list) 2523 2524 while True: 2525 expressions = self._parse_csv(self._parse_conjunction) 2526 if expressions: 2527 elements["expressions"].extend(expressions) 2528 2529 grouping_sets = self._parse_grouping_sets() 2530 if grouping_sets: 2531 elements["grouping_sets"].extend(grouping_sets) 2532 2533 rollup = None 2534 cube = None 2535 totals = None 2536 2537 with_ = self._match(TokenType.WITH) 2538 if self._match(TokenType.ROLLUP): 2539 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2540 elements["rollup"].extend(ensure_list(rollup)) 2541 2542 if self._match(TokenType.CUBE): 2543 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2544 elements["cube"].extend(ensure_list(cube)) 2545 2546 if self._match_text_seq("TOTALS"): 2547 totals = True 2548 elements["totals"] = True # type: ignore 2549 2550 if not (grouping_sets or rollup or cube or totals): 2551 break 2552 2553 return self.expression(exp.Group, **elements) # type: ignore 2554 2555 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2556 if not self._match(TokenType.GROUPING_SETS): 2557 return None 2558 2559 return self._parse_wrapped_csv(self._parse_grouping_set) 2560 2561 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2562 if self._match(TokenType.L_PAREN): 2563 grouping_set = self._parse_csv(self._parse_column) 2564 self._match_r_paren() 2565 return self.expression(exp.Tuple, expressions=grouping_set) 2566 2567 return self._parse_column() 2568 2569 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2570 if not skip_having_token and not self._match(TokenType.HAVING): 2571 return None 2572 return self.expression(exp.Having, this=self._parse_conjunction()) 2573 2574 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2575 if not self._match(TokenType.QUALIFY): 2576 return None 2577 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2578 2579 def _parse_order( 2580 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2581 ) -> t.Optional[exp.Expression]: 2582 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2583 return this 2584 2585 return self.expression( 2586 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2587 ) 2588 2589 def _parse_sort( 2590 self, exp_class: t.Type[exp.Expression], *texts: str 2591 ) -> t.Optional[exp.Expression]: 2592 if not self._match_text_seq(*texts): 2593 return None 2594 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2595 2596 def _parse_ordered(self) -> exp.Expression: 2597 this = self._parse_conjunction() 2598 self._match(TokenType.ASC) 2599 is_desc = self._match(TokenType.DESC) 2600 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2601 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2602 desc = is_desc or False 2603 asc = not desc 2604 nulls_first = is_nulls_first or False 2605 explicitly_null_ordered = is_nulls_first or is_nulls_last 2606 if ( 2607 not explicitly_null_ordered 2608 and ( 2609 (asc and self.null_ordering == "nulls_are_small") 2610 or (desc and self.null_ordering != "nulls_are_small") 2611 ) 2612 and self.null_ordering != "nulls_are_last" 2613 ): 2614 nulls_first = True 2615 2616 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2617 2618 def _parse_limit( 2619 self, this: t.Optional[exp.Expression] = None, top: bool = False 2620 ) -> t.Optional[exp.Expression]: 2621 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2622 limit_paren = self._match(TokenType.L_PAREN) 2623 limit_exp = self.expression( 2624 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2625 ) 2626 2627 if limit_paren: 2628 self._match_r_paren() 2629 2630 return limit_exp 2631 2632 if self._match(TokenType.FETCH): 2633 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2634 direction = self._prev.text if direction else "FIRST" 2635 2636 count = self._parse_number() 2637 percent = self._match(TokenType.PERCENT) 2638 2639 self._match_set((TokenType.ROW, TokenType.ROWS)) 2640 2641 only = self._match_text_seq("ONLY") 2642 with_ties = self._match_text_seq("WITH", "TIES") 2643 2644 if only and with_ties: 2645 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2646 2647 return self.expression( 2648 exp.Fetch, 2649 direction=direction, 2650 count=count, 2651 percent=percent, 2652 with_ties=with_ties, 2653 ) 2654 2655 return this 2656 2657 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2658 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2659 return this 2660 2661 count = self._parse_number() 2662 self._match_set((TokenType.ROW, TokenType.ROWS)) 2663 return self.expression(exp.Offset, this=this, expression=count) 2664 2665 def _parse_locks(self) -> t.List[exp.Expression]: 2666 # Lists are invariant, so we need to use a type hint here 2667 locks: t.List[exp.Expression] = [] 2668 2669 while True: 2670 if self._match_text_seq("FOR", "UPDATE"): 2671 update = True 2672 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2673 "LOCK", "IN", "SHARE", "MODE" 2674 ): 2675 update = False 2676 else: 2677 break 2678 2679 expressions = None 2680 if self._match_text_seq("OF"): 2681 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2682 2683 wait: t.Optional[bool | exp.Expression] = None 2684 if self._match_text_seq("NOWAIT"): 2685 wait = True 2686 elif self._match_text_seq("WAIT"): 2687 wait = self._parse_primary() 2688 elif self._match_text_seq("SKIP", "LOCKED"): 2689 wait = False 2690 2691 locks.append( 2692 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2693 ) 2694 2695 return locks 2696 2697 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2698 if not self._match_set(self.SET_OPERATIONS): 2699 return this 2700 2701 token_type = self._prev.token_type 2702 2703 if token_type == TokenType.UNION: 2704 expression = exp.Union 2705 elif token_type == TokenType.EXCEPT: 2706 expression = exp.Except 2707 else: 2708 expression = exp.Intersect 2709 2710 return self.expression( 2711 expression, 2712 this=this, 2713 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2714 expression=self._parse_set_operations(self._parse_select(nested=True)), 2715 ) 2716 2717 def _parse_expression(self) -> t.Optional[exp.Expression]: 2718 return self._parse_alias(self._parse_conjunction()) 2719 2720 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2721 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2722 2723 def _parse_equality(self) -> t.Optional[exp.Expression]: 2724 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2725 2726 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2727 return self._parse_tokens(self._parse_range, self.COMPARISON) 2728 2729 def _parse_range(self) -> t.Optional[exp.Expression]: 2730 this = self._parse_bitwise() 2731 negate = self._match(TokenType.NOT) 2732 2733 if self._match_set(self.RANGE_PARSERS): 2734 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2735 if not expression: 2736 return this 2737 2738 this = expression 2739 elif self._match(TokenType.ISNULL): 2740 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2741 2742 # Postgres supports ISNULL and NOTNULL for conditions. 2743 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2744 if self._match(TokenType.NOTNULL): 2745 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2746 this = self.expression(exp.Not, this=this) 2747 2748 if negate: 2749 this = self.expression(exp.Not, this=this) 2750 2751 if self._match(TokenType.IS): 2752 this = self._parse_is(this) 2753 2754 return this 2755 2756 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2757 index = self._index - 1 2758 negate = self._match(TokenType.NOT) 2759 if self._match_text_seq("DISTINCT", "FROM"): 2760 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2761 return self.expression(klass, this=this, expression=self._parse_expression()) 2762 2763 expression = self._parse_null() or self._parse_boolean() 2764 if not expression: 2765 self._retreat(index) 2766 return None 2767 2768 this = self.expression(exp.Is, this=this, expression=expression) 2769 return self.expression(exp.Not, this=this) if negate else this 2770 2771 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2772 unnest = self._parse_unnest() 2773 if unnest: 2774 this = self.expression(exp.In, this=this, unnest=unnest) 2775 elif self._match(TokenType.L_PAREN): 2776 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2777 2778 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2779 this = self.expression(exp.In, this=this, query=expressions[0]) 2780 else: 2781 this = self.expression(exp.In, this=this, expressions=expressions) 2782 2783 self._match_r_paren(this) 2784 else: 2785 this = self.expression(exp.In, this=this, field=self._parse_field()) 2786 2787 return this 2788 2789 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2790 low = self._parse_bitwise() 2791 self._match(TokenType.AND) 2792 high = self._parse_bitwise() 2793 return self.expression(exp.Between, this=this, low=low, high=high) 2794 2795 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2796 if not self._match(TokenType.ESCAPE): 2797 return this 2798 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2799 2800 def _parse_interval(self) -> t.Optional[exp.Expression]: 2801 if not self._match(TokenType.INTERVAL): 2802 return None 2803 2804 this = self._parse_primary() or self._parse_term() 2805 unit = self._parse_function() or self._parse_var() 2806 2807 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2808 # each INTERVAL expression into this canonical form so it's easy to transpile 2809 if this and this.is_number: 2810 this = exp.Literal.string(this.name) 2811 elif this and this.is_string: 2812 parts = this.name.split() 2813 2814 if len(parts) == 2: 2815 if unit: 2816 # this is not actually a unit, it's something else 2817 unit = None 2818 self._retreat(self._index - 1) 2819 else: 2820 this = exp.Literal.string(parts[0]) 2821 unit = self.expression(exp.Var, this=parts[1]) 2822 2823 return self.expression(exp.Interval, this=this, unit=unit) 2824 2825 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2826 this = self._parse_term() 2827 2828 while True: 2829 if self._match_set(self.BITWISE): 2830 this = self.expression( 2831 self.BITWISE[self._prev.token_type], 2832 this=this, 2833 expression=self._parse_term(), 2834 ) 2835 elif self._match_pair(TokenType.LT, TokenType.LT): 2836 this = self.expression( 2837 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2838 ) 2839 elif self._match_pair(TokenType.GT, TokenType.GT): 2840 this = self.expression( 2841 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2842 ) 2843 else: 2844 break 2845 2846 return this 2847 2848 def _parse_term(self) -> t.Optional[exp.Expression]: 2849 return self._parse_tokens(self._parse_factor, self.TERM) 2850 2851 def _parse_factor(self) -> t.Optional[exp.Expression]: 2852 return self._parse_tokens(self._parse_unary, self.FACTOR) 2853 2854 def _parse_unary(self) -> t.Optional[exp.Expression]: 2855 if self._match_set(self.UNARY_PARSERS): 2856 return self.UNARY_PARSERS[self._prev.token_type](self) 2857 return self._parse_at_time_zone(self._parse_type()) 2858 2859 def _parse_type(self) -> t.Optional[exp.Expression]: 2860 interval = self._parse_interval() 2861 if interval: 2862 return interval 2863 2864 index = self._index 2865 data_type = self._parse_types(check_func=True) 2866 this = self._parse_column() 2867 2868 if data_type: 2869 if isinstance(this, exp.Literal): 2870 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2871 if parser: 2872 return parser(self, this, data_type) 2873 return self.expression(exp.Cast, this=this, to=data_type) 2874 if not data_type.expressions: 2875 self._retreat(index) 2876 return self._parse_column() 2877 return self._parse_column_ops(data_type) 2878 2879 return this 2880 2881 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2882 this = self._parse_type() 2883 if not this: 2884 return None 2885 2886 return self.expression( 2887 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2888 ) 2889 2890 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2891 index = self._index 2892 2893 prefix = self._match_text_seq("SYSUDTLIB", ".") 2894 2895 if not self._match_set(self.TYPE_TOKENS): 2896 return None 2897 2898 type_token = self._prev.token_type 2899 2900 if type_token == TokenType.PSEUDO_TYPE: 2901 return self.expression(exp.PseudoType, this=self._prev.text) 2902 2903 nested = type_token in self.NESTED_TYPE_TOKENS 2904 is_struct = type_token == TokenType.STRUCT 2905 expressions = None 2906 maybe_func = False 2907 2908 if self._match(TokenType.L_PAREN): 2909 if is_struct: 2910 expressions = self._parse_csv(self._parse_struct_types) 2911 elif nested: 2912 expressions = self._parse_csv(self._parse_types) 2913 else: 2914 expressions = self._parse_csv(self._parse_type_size) 2915 2916 if not expressions or not self._match(TokenType.R_PAREN): 2917 self._retreat(index) 2918 return None 2919 2920 maybe_func = True 2921 2922 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2923 this = exp.DataType( 2924 this=exp.DataType.Type.ARRAY, 2925 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2926 nested=True, 2927 ) 2928 2929 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2930 this = exp.DataType( 2931 this=exp.DataType.Type.ARRAY, 2932 expressions=[this], 2933 nested=True, 2934 ) 2935 2936 return this 2937 2938 if self._match(TokenType.L_BRACKET): 2939 self._retreat(index) 2940 return None 2941 2942 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2943 if nested and self._match(TokenType.LT): 2944 if is_struct: 2945 expressions = self._parse_csv(self._parse_struct_types) 2946 else: 2947 expressions = self._parse_csv(self._parse_types) 2948 2949 if not self._match(TokenType.GT): 2950 self.raise_error("Expecting >") 2951 2952 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2953 values = self._parse_csv(self._parse_conjunction) 2954 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2955 2956 value: t.Optional[exp.Expression] = None 2957 if type_token in self.TIMESTAMPS: 2958 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2959 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2960 elif ( 2961 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2962 or type_token == TokenType.TIMESTAMPLTZ 2963 ): 2964 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2965 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2966 if type_token == TokenType.TIME: 2967 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2968 else: 2969 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2970 2971 maybe_func = maybe_func and value is None 2972 2973 if value is None: 2974 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2975 elif type_token == TokenType.INTERVAL: 2976 unit = self._parse_var() 2977 2978 if not unit: 2979 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2980 else: 2981 value = self.expression(exp.Interval, unit=unit) 2982 2983 if maybe_func and check_func: 2984 index2 = self._index 2985 peek = self._parse_string() 2986 2987 if not peek: 2988 self._retreat(index) 2989 return None 2990 2991 self._retreat(index2) 2992 2993 if value: 2994 return value 2995 2996 return exp.DataType( 2997 this=exp.DataType.Type[type_token.value.upper()], 2998 expressions=expressions, 2999 nested=nested, 3000 values=values, 3001 prefix=prefix, 3002 ) 3003 3004 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3005 this = self._parse_type() or self._parse_id_var() 3006 self._match(TokenType.COLON) 3007 return self._parse_column_def(this) 3008 3009 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3010 if not self._match_text_seq("AT", "TIME", "ZONE"): 3011 return this 3012 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3013 3014 def _parse_column(self) -> t.Optional[exp.Expression]: 3015 this = self._parse_field() 3016 if isinstance(this, exp.Identifier): 3017 this = self.expression(exp.Column, this=this) 3018 elif not this: 3019 return self._parse_bracket(this) 3020 return self._parse_column_ops(this) 3021 3022 def _parse_column_ops(self, this: exp.Expression) -> exp.Expression: 3023 this = self._parse_bracket(this) 3024 3025 while self._match_set(self.COLUMN_OPERATORS): 3026 op_token = self._prev.token_type 3027 op = self.COLUMN_OPERATORS.get(op_token) 3028 3029 if op_token == TokenType.DCOLON: 3030 field = self._parse_types() 3031 if not field: 3032 self.raise_error("Expected type") 3033 elif op and self._curr: 3034 self._advance() 3035 value = self._prev.text 3036 field = ( 3037 exp.Literal.number(value) 3038 if self._prev.token_type == TokenType.NUMBER 3039 else exp.Literal.string(value) 3040 ) 3041 else: 3042 field = ( 3043 self._parse_star() 3044 or self._parse_function(anonymous=True) 3045 or self._parse_id_var() 3046 ) 3047 3048 if isinstance(field, exp.Func): 3049 # bigquery allows function calls like x.y.count(...) 3050 # SAFE.SUBSTR(...) 3051 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3052 this = self._replace_columns_with_dots(this) 3053 3054 if op: 3055 this = op(self, this, field) 3056 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3057 this = self.expression( 3058 exp.Column, 3059 this=field, 3060 table=this.this, 3061 db=this.args.get("table"), 3062 catalog=this.args.get("db"), 3063 ) 3064 else: 3065 this = self.expression(exp.Dot, this=this, expression=field) 3066 this = self._parse_bracket(this) 3067 return this 3068 3069 def _parse_primary(self) -> t.Optional[exp.Expression]: 3070 if self._match_set(self.PRIMARY_PARSERS): 3071 token_type = self._prev.token_type 3072 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3073 3074 if token_type == TokenType.STRING: 3075 expressions = [primary] 3076 while self._match(TokenType.STRING): 3077 expressions.append(exp.Literal.string(self._prev.text)) 3078 if len(expressions) > 1: 3079 return self.expression(exp.Concat, expressions=expressions) 3080 return primary 3081 3082 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3083 return exp.Literal.number(f"0.{self._prev.text}") 3084 3085 if self._match(TokenType.L_PAREN): 3086 comments = self._prev_comments 3087 query = self._parse_select() 3088 3089 if query: 3090 expressions = [query] 3091 else: 3092 expressions = self._parse_csv(self._parse_expression) 3093 3094 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3095 3096 if isinstance(this, exp.Subqueryable): 3097 this = self._parse_set_operations( 3098 self._parse_subquery(this=this, parse_alias=False) 3099 ) 3100 elif len(expressions) > 1: 3101 this = self.expression(exp.Tuple, expressions=expressions) 3102 else: 3103 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3104 3105 if this: 3106 this.add_comments(comments) 3107 self._match_r_paren(expression=this) 3108 3109 return this 3110 3111 return None 3112 3113 def _parse_field( 3114 self, 3115 any_token: bool = False, 3116 tokens: t.Optional[t.Collection[TokenType]] = None, 3117 ) -> t.Optional[exp.Expression]: 3118 return ( 3119 self._parse_primary() 3120 or self._parse_function() 3121 or self._parse_id_var(any_token=any_token, tokens=tokens) 3122 ) 3123 3124 def _parse_function( 3125 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3126 ) -> t.Optional[exp.Expression]: 3127 if not self._curr: 3128 return None 3129 3130 token_type = self._curr.token_type 3131 3132 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3133 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3134 3135 if not self._next or self._next.token_type != TokenType.L_PAREN: 3136 if token_type in self.NO_PAREN_FUNCTIONS: 3137 self._advance() 3138 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3139 3140 return None 3141 3142 if token_type not in self.FUNC_TOKENS: 3143 return None 3144 3145 this = self._curr.text 3146 upper = this.upper() 3147 self._advance(2) 3148 3149 parser = self.FUNCTION_PARSERS.get(upper) 3150 3151 if parser and not anonymous: 3152 this = parser(self) 3153 else: 3154 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3155 3156 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3157 this = self.expression(subquery_predicate, this=self._parse_select()) 3158 self._match_r_paren() 3159 return this 3160 3161 if functions is None: 3162 functions = self.FUNCTIONS 3163 3164 function = functions.get(upper) 3165 3166 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3167 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3168 3169 if function and not anonymous: 3170 this = function(args) 3171 self.validate_expression(this, args) 3172 else: 3173 this = self.expression(exp.Anonymous, this=this, expressions=args) 3174 3175 self._match_r_paren(this) 3176 return self._parse_window(this) 3177 3178 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3179 return self._parse_column_def(self._parse_id_var()) 3180 3181 def _parse_user_defined_function( 3182 self, kind: t.Optional[TokenType] = None 3183 ) -> t.Optional[exp.Expression]: 3184 this = self._parse_id_var() 3185 3186 while self._match(TokenType.DOT): 3187 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3188 3189 if not self._match(TokenType.L_PAREN): 3190 return this 3191 3192 expressions = self._parse_csv(self._parse_function_parameter) 3193 self._match_r_paren() 3194 return self.expression( 3195 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3196 ) 3197 3198 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3199 literal = self._parse_primary() 3200 if literal: 3201 return self.expression(exp.Introducer, this=token.text, expression=literal) 3202 3203 return self.expression(exp.Identifier, this=token.text) 3204 3205 def _parse_session_parameter(self) -> exp.Expression: 3206 kind = None 3207 this = self._parse_id_var() or self._parse_primary() 3208 3209 if this and self._match(TokenType.DOT): 3210 kind = this.name 3211 this = self._parse_var() or self._parse_primary() 3212 3213 return self.expression(exp.SessionParameter, this=this, kind=kind) 3214 3215 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3216 index = self._index 3217 3218 if self._match(TokenType.L_PAREN): 3219 expressions = self._parse_csv(self._parse_id_var) 3220 3221 if not self._match(TokenType.R_PAREN): 3222 self._retreat(index) 3223 else: 3224 expressions = [self._parse_id_var()] 3225 3226 if self._match_set(self.LAMBDAS): 3227 return self.LAMBDAS[self._prev.token_type](self, expressions) 3228 3229 self._retreat(index) 3230 3231 this: t.Optional[exp.Expression] 3232 3233 if self._match(TokenType.DISTINCT): 3234 this = self.expression( 3235 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3236 ) 3237 else: 3238 this = self._parse_select_or_expression(alias=alias) 3239 3240 if isinstance(this, exp.EQ): 3241 left = this.this 3242 if isinstance(left, exp.Column): 3243 left.replace(exp.Var(this=left.text("this"))) 3244 3245 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3246 3247 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3248 index = self._index 3249 3250 if not self.errors: 3251 try: 3252 if self._parse_select(nested=True): 3253 return this 3254 except ParseError: 3255 pass 3256 finally: 3257 self.errors.clear() 3258 self._retreat(index) 3259 3260 if not self._match(TokenType.L_PAREN): 3261 return this 3262 3263 args = self._parse_csv( 3264 lambda: self._parse_constraint() 3265 or self._parse_column_def(self._parse_field(any_token=True)) 3266 ) 3267 self._match_r_paren() 3268 return self.expression(exp.Schema, this=this, expressions=args) 3269 3270 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3271 # column defs are not really columns, they're identifiers 3272 if isinstance(this, exp.Column): 3273 this = this.this 3274 kind = self._parse_types() 3275 3276 if self._match_text_seq("FOR", "ORDINALITY"): 3277 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3278 3279 constraints = [] 3280 while True: 3281 constraint = self._parse_column_constraint() 3282 if not constraint: 3283 break 3284 constraints.append(constraint) 3285 3286 if not kind and not constraints: 3287 return this 3288 3289 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3290 3291 def _parse_auto_increment(self) -> exp.Expression: 3292 start = None 3293 increment = None 3294 3295 if self._match(TokenType.L_PAREN, advance=False): 3296 args = self._parse_wrapped_csv(self._parse_bitwise) 3297 start = seq_get(args, 0) 3298 increment = seq_get(args, 1) 3299 elif self._match_text_seq("START"): 3300 start = self._parse_bitwise() 3301 self._match_text_seq("INCREMENT") 3302 increment = self._parse_bitwise() 3303 3304 if start and increment: 3305 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3306 3307 return exp.AutoIncrementColumnConstraint() 3308 3309 def _parse_compress(self) -> exp.Expression: 3310 if self._match(TokenType.L_PAREN, advance=False): 3311 return self.expression( 3312 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3313 ) 3314 3315 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3316 3317 def _parse_generated_as_identity(self) -> exp.Expression: 3318 if self._match_text_seq("BY", "DEFAULT"): 3319 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3320 this = self.expression( 3321 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3322 ) 3323 else: 3324 self._match_text_seq("ALWAYS") 3325 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3326 3327 self._match(TokenType.ALIAS) 3328 identity = self._match_text_seq("IDENTITY") 3329 3330 if self._match(TokenType.L_PAREN): 3331 if self._match_text_seq("START", "WITH"): 3332 this.set("start", self._parse_bitwise()) 3333 if self._match_text_seq("INCREMENT", "BY"): 3334 this.set("increment", self._parse_bitwise()) 3335 if self._match_text_seq("MINVALUE"): 3336 this.set("minvalue", self._parse_bitwise()) 3337 if self._match_text_seq("MAXVALUE"): 3338 this.set("maxvalue", self._parse_bitwise()) 3339 3340 if self._match_text_seq("CYCLE"): 3341 this.set("cycle", True) 3342 elif self._match_text_seq("NO", "CYCLE"): 3343 this.set("cycle", False) 3344 3345 if not identity: 3346 this.set("expression", self._parse_bitwise()) 3347 3348 self._match_r_paren() 3349 3350 return this 3351 3352 def _parse_inline(self) -> t.Optional[exp.Expression]: 3353 self._match_text_seq("LENGTH") 3354 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3355 3356 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3357 if self._match_text_seq("NULL"): 3358 return self.expression(exp.NotNullColumnConstraint) 3359 if self._match_text_seq("CASESPECIFIC"): 3360 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3361 return None 3362 3363 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3364 if self._match(TokenType.CONSTRAINT): 3365 this = self._parse_id_var() 3366 else: 3367 this = None 3368 3369 if self._match_texts(self.CONSTRAINT_PARSERS): 3370 return self.expression( 3371 exp.ColumnConstraint, 3372 this=this, 3373 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3374 ) 3375 3376 return this 3377 3378 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3379 if not self._match(TokenType.CONSTRAINT): 3380 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3381 3382 this = self._parse_id_var() 3383 expressions = [] 3384 3385 while True: 3386 constraint = self._parse_unnamed_constraint() or self._parse_function() 3387 if not constraint: 3388 break 3389 expressions.append(constraint) 3390 3391 return self.expression(exp.Constraint, this=this, expressions=expressions) 3392 3393 def _parse_unnamed_constraint( 3394 self, constraints: t.Optional[t.Collection[str]] = None 3395 ) -> t.Optional[exp.Expression]: 3396 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3397 return None 3398 3399 constraint = self._prev.text.upper() 3400 if constraint not in self.CONSTRAINT_PARSERS: 3401 self.raise_error(f"No parser found for schema constraint {constraint}.") 3402 3403 return self.CONSTRAINT_PARSERS[constraint](self) 3404 3405 def _parse_unique(self) -> exp.Expression: 3406 self._match_text_seq("KEY") 3407 return self.expression( 3408 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3409 ) 3410 3411 def _parse_key_constraint_options(self) -> t.List[str]: 3412 options = [] 3413 while True: 3414 if not self._curr: 3415 break 3416 3417 if self._match(TokenType.ON): 3418 action = None 3419 on = self._advance_any() and self._prev.text 3420 3421 if self._match_text_seq("NO", "ACTION"): 3422 action = "NO ACTION" 3423 elif self._match_text_seq("CASCADE"): 3424 action = "CASCADE" 3425 elif self._match_pair(TokenType.SET, TokenType.NULL): 3426 action = "SET NULL" 3427 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3428 action = "SET DEFAULT" 3429 else: 3430 self.raise_error("Invalid key constraint") 3431 3432 options.append(f"ON {on} {action}") 3433 elif self._match_text_seq("NOT", "ENFORCED"): 3434 options.append("NOT ENFORCED") 3435 elif self._match_text_seq("DEFERRABLE"): 3436 options.append("DEFERRABLE") 3437 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3438 options.append("INITIALLY DEFERRED") 3439 elif self._match_text_seq("NORELY"): 3440 options.append("NORELY") 3441 elif self._match_text_seq("MATCH", "FULL"): 3442 options.append("MATCH FULL") 3443 else: 3444 break 3445 3446 return options 3447 3448 def _parse_references(self, match: bool = True) -> t.Optional[exp.Expression]: 3449 if match and not self._match(TokenType.REFERENCES): 3450 return None 3451 3452 expressions = None 3453 this = self._parse_id_var() 3454 3455 if self._match(TokenType.L_PAREN, advance=False): 3456 expressions = self._parse_wrapped_id_vars() 3457 3458 options = self._parse_key_constraint_options() 3459 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3460 3461 def _parse_foreign_key(self) -> exp.Expression: 3462 expressions = self._parse_wrapped_id_vars() 3463 reference = self._parse_references() 3464 options = {} 3465 3466 while self._match(TokenType.ON): 3467 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3468 self.raise_error("Expected DELETE or UPDATE") 3469 3470 kind = self._prev.text.lower() 3471 3472 if self._match_text_seq("NO", "ACTION"): 3473 action = "NO ACTION" 3474 elif self._match(TokenType.SET): 3475 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3476 action = "SET " + self._prev.text.upper() 3477 else: 3478 self._advance() 3479 action = self._prev.text.upper() 3480 3481 options[kind] = action 3482 3483 return self.expression( 3484 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3485 ) 3486 3487 def _parse_primary_key(self) -> exp.Expression: 3488 desc = ( 3489 self._match_set((TokenType.ASC, TokenType.DESC)) 3490 and self._prev.token_type == TokenType.DESC 3491 ) 3492 3493 if not self._match(TokenType.L_PAREN, advance=False): 3494 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3495 3496 expressions = self._parse_wrapped_csv(self._parse_field) 3497 options = self._parse_key_constraint_options() 3498 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3499 3500 @t.overload 3501 def _parse_bracket(self, this: exp.Expression) -> exp.Expression: 3502 ... 3503 3504 @t.overload 3505 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3506 ... 3507 3508 def _parse_bracket(self, this): 3509 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3510 return this 3511 3512 bracket_kind = self._prev.token_type 3513 expressions: t.List[t.Optional[exp.Expression]] 3514 3515 if self._match(TokenType.COLON): 3516 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3517 else: 3518 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3519 3520 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3521 if bracket_kind == TokenType.L_BRACE: 3522 this = self.expression(exp.Struct, expressions=expressions) 3523 elif not this or this.name.upper() == "ARRAY": 3524 this = self.expression(exp.Array, expressions=expressions) 3525 else: 3526 expressions = apply_index_offset(this, expressions, -self.index_offset) 3527 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3528 3529 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3530 self.raise_error("Expected ]") 3531 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3532 self.raise_error("Expected }") 3533 3534 self._add_comments(this) 3535 return self._parse_bracket(this) 3536 3537 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3538 if self._match(TokenType.COLON): 3539 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3540 return this 3541 3542 def _parse_case(self) -> t.Optional[exp.Expression]: 3543 ifs = [] 3544 default = None 3545 3546 expression = self._parse_conjunction() 3547 3548 while self._match(TokenType.WHEN): 3549 this = self._parse_conjunction() 3550 self._match(TokenType.THEN) 3551 then = self._parse_conjunction() 3552 ifs.append(self.expression(exp.If, this=this, true=then)) 3553 3554 if self._match(TokenType.ELSE): 3555 default = self._parse_conjunction() 3556 3557 if not self._match(TokenType.END): 3558 self.raise_error("Expected END after CASE", self._prev) 3559 3560 return self._parse_window( 3561 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3562 ) 3563 3564 def _parse_if(self) -> t.Optional[exp.Expression]: 3565 if self._match(TokenType.L_PAREN): 3566 args = self._parse_csv(self._parse_conjunction) 3567 this = exp.If.from_arg_list(args) 3568 self.validate_expression(this, args) 3569 self._match_r_paren() 3570 else: 3571 index = self._index - 1 3572 condition = self._parse_conjunction() 3573 3574 if not condition: 3575 self._retreat(index) 3576 return None 3577 3578 self._match(TokenType.THEN) 3579 true = self._parse_conjunction() 3580 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3581 self._match(TokenType.END) 3582 this = self.expression(exp.If, this=condition, true=true, false=false) 3583 3584 return self._parse_window(this) 3585 3586 def _parse_extract(self) -> exp.Expression: 3587 this = self._parse_function() or self._parse_var() or self._parse_type() 3588 3589 if self._match(TokenType.FROM): 3590 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3591 3592 if not self._match(TokenType.COMMA): 3593 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3594 3595 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3596 3597 def _parse_cast(self, strict: bool) -> exp.Expression: 3598 this = self._parse_conjunction() 3599 3600 if not self._match(TokenType.ALIAS): 3601 if self._match(TokenType.COMMA): 3602 return self.expression( 3603 exp.CastToStrType, this=this, expression=self._parse_string() 3604 ) 3605 else: 3606 self.raise_error("Expected AS after CAST") 3607 3608 to = self._parse_types() 3609 3610 if not to: 3611 self.raise_error("Expected TYPE after CAST") 3612 elif to.this == exp.DataType.Type.CHAR: 3613 if self._match(TokenType.CHARACTER_SET): 3614 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3615 3616 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3617 3618 def _parse_string_agg(self) -> exp.Expression: 3619 expression: t.Optional[exp.Expression] 3620 3621 if self._match(TokenType.DISTINCT): 3622 args = self._parse_csv(self._parse_conjunction) 3623 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3624 else: 3625 args = self._parse_csv(self._parse_conjunction) 3626 expression = seq_get(args, 0) 3627 3628 index = self._index 3629 if not self._match(TokenType.R_PAREN): 3630 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3631 order = self._parse_order(this=expression) 3632 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3633 3634 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3635 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3636 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3637 if not self._match_text_seq("WITHIN", "GROUP"): 3638 self._retreat(index) 3639 this = exp.GroupConcat.from_arg_list(args) 3640 self.validate_expression(this, args) 3641 return this 3642 3643 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3644 order = self._parse_order(this=expression) 3645 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3646 3647 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3648 to: t.Optional[exp.Expression] 3649 this = self._parse_bitwise() 3650 3651 if self._match(TokenType.USING): 3652 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3653 elif self._match(TokenType.COMMA): 3654 to = self._parse_bitwise() 3655 else: 3656 to = None 3657 3658 # Swap the argument order if needed to produce the correct AST 3659 if self.CONVERT_TYPE_FIRST: 3660 this, to = to, this 3661 3662 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3663 3664 def _parse_decode(self) -> t.Optional[exp.Expression]: 3665 """ 3666 There are generally two variants of the DECODE function: 3667 3668 - DECODE(bin, charset) 3669 - DECODE(expression, search, result [, search, result] ... [, default]) 3670 3671 The second variant will always be parsed into a CASE expression. Note that NULL 3672 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3673 instead of relying on pattern matching. 3674 """ 3675 args = self._parse_csv(self._parse_conjunction) 3676 3677 if len(args) < 3: 3678 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3679 3680 expression, *expressions = args 3681 if not expression: 3682 return None 3683 3684 ifs = [] 3685 for search, result in zip(expressions[::2], expressions[1::2]): 3686 if not search or not result: 3687 return None 3688 3689 if isinstance(search, exp.Literal): 3690 ifs.append( 3691 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3692 ) 3693 elif isinstance(search, exp.Null): 3694 ifs.append( 3695 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3696 ) 3697 else: 3698 cond = exp.or_( 3699 exp.EQ(this=expression.copy(), expression=search), 3700 exp.and_( 3701 exp.Is(this=expression.copy(), expression=exp.Null()), 3702 exp.Is(this=search.copy(), expression=exp.Null()), 3703 copy=False, 3704 ), 3705 copy=False, 3706 ) 3707 ifs.append(exp.If(this=cond, true=result)) 3708 3709 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3710 3711 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3712 self._match_text_seq("KEY") 3713 key = self._parse_field() 3714 self._match(TokenType.COLON) 3715 self._match_text_seq("VALUE") 3716 value = self._parse_field() 3717 if not key and not value: 3718 return None 3719 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3720 3721 def _parse_json_object(self) -> exp.Expression: 3722 expressions = self._parse_csv(self._parse_json_key_value) 3723 3724 null_handling = None 3725 if self._match_text_seq("NULL", "ON", "NULL"): 3726 null_handling = "NULL ON NULL" 3727 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3728 null_handling = "ABSENT ON NULL" 3729 3730 unique_keys = None 3731 if self._match_text_seq("WITH", "UNIQUE"): 3732 unique_keys = True 3733 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3734 unique_keys = False 3735 3736 self._match_text_seq("KEYS") 3737 3738 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3739 format_json = self._match_text_seq("FORMAT", "JSON") 3740 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3741 3742 return self.expression( 3743 exp.JSONObject, 3744 expressions=expressions, 3745 null_handling=null_handling, 3746 unique_keys=unique_keys, 3747 return_type=return_type, 3748 format_json=format_json, 3749 encoding=encoding, 3750 ) 3751 3752 def _parse_logarithm(self) -> exp.Expression: 3753 # Default argument order is base, expression 3754 args = self._parse_csv(self._parse_range) 3755 3756 if len(args) > 1: 3757 if not self.LOG_BASE_FIRST: 3758 args.reverse() 3759 return exp.Log.from_arg_list(args) 3760 3761 return self.expression( 3762 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3763 ) 3764 3765 def _parse_match_against(self) -> exp.Expression: 3766 expressions = self._parse_csv(self._parse_column) 3767 3768 self._match_text_seq(")", "AGAINST", "(") 3769 3770 this = self._parse_string() 3771 3772 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3773 modifier = "IN NATURAL LANGUAGE MODE" 3774 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3775 modifier = f"{modifier} WITH QUERY EXPANSION" 3776 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3777 modifier = "IN BOOLEAN MODE" 3778 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3779 modifier = "WITH QUERY EXPANSION" 3780 else: 3781 modifier = None 3782 3783 return self.expression( 3784 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3785 ) 3786 3787 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3788 def _parse_open_json(self) -> exp.Expression: 3789 this = self._parse_bitwise() 3790 path = self._match(TokenType.COMMA) and self._parse_string() 3791 3792 def _parse_open_json_column_def() -> exp.Expression: 3793 this = self._parse_field(any_token=True) 3794 kind = self._parse_types() 3795 path = self._parse_string() 3796 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3797 return self.expression( 3798 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3799 ) 3800 3801 expressions = None 3802 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3803 self._match_l_paren() 3804 expressions = self._parse_csv(_parse_open_json_column_def) 3805 3806 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3807 3808 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3809 args = self._parse_csv(self._parse_bitwise) 3810 3811 if self._match(TokenType.IN): 3812 return self.expression( 3813 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3814 ) 3815 3816 if haystack_first: 3817 haystack = seq_get(args, 0) 3818 needle = seq_get(args, 1) 3819 else: 3820 needle = seq_get(args, 0) 3821 haystack = seq_get(args, 1) 3822 3823 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3824 3825 self.validate_expression(this, args) 3826 3827 return this 3828 3829 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3830 args = self._parse_csv(self._parse_table) 3831 return exp.JoinHint(this=func_name.upper(), expressions=args) 3832 3833 def _parse_substring(self) -> exp.Expression: 3834 # Postgres supports the form: substring(string [from int] [for int]) 3835 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3836 3837 args = self._parse_csv(self._parse_bitwise) 3838 3839 if self._match(TokenType.FROM): 3840 args.append(self._parse_bitwise()) 3841 if self._match(TokenType.FOR): 3842 args.append(self._parse_bitwise()) 3843 3844 this = exp.Substring.from_arg_list(args) 3845 self.validate_expression(this, args) 3846 3847 return this 3848 3849 def _parse_trim(self) -> exp.Expression: 3850 # https://www.w3resource.com/sql/character-functions/trim.php 3851 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3852 3853 position = None 3854 collation = None 3855 3856 if self._match_texts(self.TRIM_TYPES): 3857 position = self._prev.text.upper() 3858 3859 expression = self._parse_bitwise() 3860 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3861 this = self._parse_bitwise() 3862 else: 3863 this = expression 3864 expression = None 3865 3866 if self._match(TokenType.COLLATE): 3867 collation = self._parse_bitwise() 3868 3869 return self.expression( 3870 exp.Trim, 3871 this=this, 3872 position=position, 3873 expression=expression, 3874 collation=collation, 3875 ) 3876 3877 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3878 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3879 3880 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3881 return self._parse_window(self._parse_id_var(), alias=True) 3882 3883 def _parse_respect_or_ignore_nulls( 3884 self, this: t.Optional[exp.Expression] 3885 ) -> t.Optional[exp.Expression]: 3886 if self._match_text_seq("IGNORE", "NULLS"): 3887 return self.expression(exp.IgnoreNulls, this=this) 3888 if self._match_text_seq("RESPECT", "NULLS"): 3889 return self.expression(exp.RespectNulls, this=this) 3890 return this 3891 3892 def _parse_window( 3893 self, this: t.Optional[exp.Expression], alias: bool = False 3894 ) -> t.Optional[exp.Expression]: 3895 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3896 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3897 self._match_r_paren() 3898 3899 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3900 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3901 if self._match_text_seq("WITHIN", "GROUP"): 3902 order = self._parse_wrapped(self._parse_order) 3903 this = self.expression(exp.WithinGroup, this=this, expression=order) 3904 3905 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3906 # Some dialects choose to implement and some do not. 3907 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3908 3909 # There is some code above in _parse_lambda that handles 3910 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3911 3912 # The below changes handle 3913 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3914 3915 # Oracle allows both formats 3916 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3917 # and Snowflake chose to do the same for familiarity 3918 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3919 this = self._parse_respect_or_ignore_nulls(this) 3920 3921 # bigquery select from window x AS (partition by ...) 3922 if alias: 3923 over = None 3924 self._match(TokenType.ALIAS) 3925 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3926 return this 3927 else: 3928 over = self._prev.text.upper() 3929 3930 if not self._match(TokenType.L_PAREN): 3931 return self.expression( 3932 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3933 ) 3934 3935 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3936 3937 first = self._match(TokenType.FIRST) 3938 if self._match_text_seq("LAST"): 3939 first = False 3940 3941 partition = self._parse_partition_by() 3942 order = self._parse_order() 3943 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3944 3945 if kind: 3946 self._match(TokenType.BETWEEN) 3947 start = self._parse_window_spec() 3948 self._match(TokenType.AND) 3949 end = self._parse_window_spec() 3950 3951 spec = self.expression( 3952 exp.WindowSpec, 3953 kind=kind, 3954 start=start["value"], 3955 start_side=start["side"], 3956 end=end["value"], 3957 end_side=end["side"], 3958 ) 3959 else: 3960 spec = None 3961 3962 self._match_r_paren() 3963 3964 return self.expression( 3965 exp.Window, 3966 this=this, 3967 partition_by=partition, 3968 order=order, 3969 spec=spec, 3970 alias=window_alias, 3971 over=over, 3972 first=first, 3973 ) 3974 3975 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3976 self._match(TokenType.BETWEEN) 3977 3978 return { 3979 "value": ( 3980 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 3981 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 3982 or self._parse_bitwise() 3983 ), 3984 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 3985 } 3986 3987 def _parse_alias( 3988 self, this: t.Optional[exp.Expression], explicit: bool = False 3989 ) -> t.Optional[exp.Expression]: 3990 any_token = self._match(TokenType.ALIAS) 3991 3992 if explicit and not any_token: 3993 return this 3994 3995 if self._match(TokenType.L_PAREN): 3996 aliases = self.expression( 3997 exp.Aliases, 3998 this=this, 3999 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4000 ) 4001 self._match_r_paren(aliases) 4002 return aliases 4003 4004 alias = self._parse_id_var(any_token) 4005 4006 if alias: 4007 return self.expression(exp.Alias, this=this, alias=alias) 4008 4009 return this 4010 4011 def _parse_id_var( 4012 self, 4013 any_token: bool = True, 4014 tokens: t.Optional[t.Collection[TokenType]] = None, 4015 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 4016 ) -> t.Optional[exp.Expression]: 4017 identifier = self._parse_identifier() 4018 4019 if identifier: 4020 return identifier 4021 4022 prefix = "" 4023 4024 if prefix_tokens: 4025 while self._match_set(prefix_tokens): 4026 prefix += self._prev.text 4027 4028 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4029 quoted = self._prev.token_type == TokenType.STRING 4030 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 4031 4032 return None 4033 4034 def _parse_string(self) -> t.Optional[exp.Expression]: 4035 if self._match(TokenType.STRING): 4036 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4037 return self._parse_placeholder() 4038 4039 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 4040 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4041 4042 def _parse_number(self) -> t.Optional[exp.Expression]: 4043 if self._match(TokenType.NUMBER): 4044 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4045 return self._parse_placeholder() 4046 4047 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4048 if self._match(TokenType.IDENTIFIER): 4049 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4050 return self._parse_placeholder() 4051 4052 def _parse_var( 4053 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4054 ) -> t.Optional[exp.Expression]: 4055 if ( 4056 (any_token and self._advance_any()) 4057 or self._match(TokenType.VAR) 4058 or (self._match_set(tokens) if tokens else False) 4059 ): 4060 return self.expression(exp.Var, this=self._prev.text) 4061 return self._parse_placeholder() 4062 4063 def _advance_any(self) -> t.Optional[Token]: 4064 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4065 self._advance() 4066 return self._prev 4067 return None 4068 4069 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4070 return self._parse_var() or self._parse_string() 4071 4072 def _parse_null(self) -> t.Optional[exp.Expression]: 4073 if self._match(TokenType.NULL): 4074 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4075 return None 4076 4077 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4078 if self._match(TokenType.TRUE): 4079 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4080 if self._match(TokenType.FALSE): 4081 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4082 return None 4083 4084 def _parse_star(self) -> t.Optional[exp.Expression]: 4085 if self._match(TokenType.STAR): 4086 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4087 return None 4088 4089 def _parse_parameter(self) -> exp.Expression: 4090 wrapped = self._match(TokenType.L_BRACE) 4091 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4092 self._match(TokenType.R_BRACE) 4093 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4094 4095 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4096 if self._match_set(self.PLACEHOLDER_PARSERS): 4097 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4098 if placeholder: 4099 return placeholder 4100 self._advance(-1) 4101 return None 4102 4103 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4104 if not self._match(TokenType.EXCEPT): 4105 return None 4106 if self._match(TokenType.L_PAREN, advance=False): 4107 return self._parse_wrapped_csv(self._parse_column) 4108 return self._parse_csv(self._parse_column) 4109 4110 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4111 if not self._match(TokenType.REPLACE): 4112 return None 4113 if self._match(TokenType.L_PAREN, advance=False): 4114 return self._parse_wrapped_csv(self._parse_expression) 4115 return self._parse_csv(self._parse_expression) 4116 4117 def _parse_csv( 4118 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4119 ) -> t.List[t.Optional[exp.Expression]]: 4120 parse_result = parse_method() 4121 items = [parse_result] if parse_result is not None else [] 4122 4123 while self._match(sep): 4124 self._add_comments(parse_result) 4125 parse_result = parse_method() 4126 if parse_result is not None: 4127 items.append(parse_result) 4128 4129 return items 4130 4131 def _parse_tokens( 4132 self, parse_method: t.Callable, expressions: t.Dict 4133 ) -> t.Optional[exp.Expression]: 4134 this = parse_method() 4135 4136 while self._match_set(expressions): 4137 this = self.expression( 4138 expressions[self._prev.token_type], 4139 this=this, 4140 comments=self._prev_comments, 4141 expression=parse_method(), 4142 ) 4143 4144 return this 4145 4146 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4147 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4148 4149 def _parse_wrapped_csv( 4150 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4151 ) -> t.List[t.Optional[exp.Expression]]: 4152 return self._parse_wrapped( 4153 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4154 ) 4155 4156 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4157 wrapped = self._match(TokenType.L_PAREN) 4158 if not wrapped and not optional: 4159 self.raise_error("Expecting (") 4160 parse_result = parse_method() 4161 if wrapped: 4162 self._match_r_paren() 4163 return parse_result 4164 4165 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4166 return self._parse_select() or self._parse_set_operations( 4167 self._parse_expression() if alias else self._parse_conjunction() 4168 ) 4169 4170 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4171 return self._parse_query_modifiers( 4172 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4173 ) 4174 4175 def _parse_transaction(self) -> exp.Expression: 4176 this = None 4177 if self._match_texts(self.TRANSACTION_KIND): 4178 this = self._prev.text 4179 4180 self._match_texts({"TRANSACTION", "WORK"}) 4181 4182 modes = [] 4183 while True: 4184 mode = [] 4185 while self._match(TokenType.VAR): 4186 mode.append(self._prev.text) 4187 4188 if mode: 4189 modes.append(" ".join(mode)) 4190 if not self._match(TokenType.COMMA): 4191 break 4192 4193 return self.expression(exp.Transaction, this=this, modes=modes) 4194 4195 def _parse_commit_or_rollback(self) -> exp.Expression: 4196 chain = None 4197 savepoint = None 4198 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4199 4200 self._match_texts({"TRANSACTION", "WORK"}) 4201 4202 if self._match_text_seq("TO"): 4203 self._match_text_seq("SAVEPOINT") 4204 savepoint = self._parse_id_var() 4205 4206 if self._match(TokenType.AND): 4207 chain = not self._match_text_seq("NO") 4208 self._match_text_seq("CHAIN") 4209 4210 if is_rollback: 4211 return self.expression(exp.Rollback, savepoint=savepoint) 4212 return self.expression(exp.Commit, chain=chain) 4213 4214 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4215 if not self._match_text_seq("ADD"): 4216 return None 4217 4218 self._match(TokenType.COLUMN) 4219 exists_column = self._parse_exists(not_=True) 4220 expression = self._parse_column_def(self._parse_field(any_token=True)) 4221 4222 if expression: 4223 expression.set("exists", exists_column) 4224 4225 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4226 if self._match_texts(("FIRST", "AFTER")): 4227 position = self._prev.text 4228 column_position = self.expression( 4229 exp.ColumnPosition, this=self._parse_column(), position=position 4230 ) 4231 expression.set("position", column_position) 4232 4233 return expression 4234 4235 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4236 drop = self._match(TokenType.DROP) and self._parse_drop() 4237 if drop and not isinstance(drop, exp.Command): 4238 drop.set("kind", drop.args.get("kind", "COLUMN")) 4239 return drop 4240 4241 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4242 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4243 return self.expression( 4244 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4245 ) 4246 4247 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4248 this = None 4249 kind = self._prev.token_type 4250 4251 if kind == TokenType.CONSTRAINT: 4252 this = self._parse_id_var() 4253 4254 if self._match_text_seq("CHECK"): 4255 expression = self._parse_wrapped(self._parse_conjunction) 4256 enforced = self._match_text_seq("ENFORCED") 4257 4258 return self.expression( 4259 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4260 ) 4261 4262 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4263 expression = self._parse_foreign_key() 4264 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4265 expression = self._parse_primary_key() 4266 else: 4267 expression = None 4268 4269 return self.expression(exp.AddConstraint, this=this, expression=expression) 4270 4271 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4272 index = self._index - 1 4273 4274 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4275 return self._parse_csv(self._parse_add_constraint) 4276 4277 self._retreat(index) 4278 return self._parse_csv(self._parse_add_column) 4279 4280 def _parse_alter_table_alter(self) -> exp.Expression: 4281 self._match(TokenType.COLUMN) 4282 column = self._parse_field(any_token=True) 4283 4284 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4285 return self.expression(exp.AlterColumn, this=column, drop=True) 4286 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4287 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4288 4289 self._match_text_seq("SET", "DATA") 4290 return self.expression( 4291 exp.AlterColumn, 4292 this=column, 4293 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4294 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4295 using=self._match(TokenType.USING) and self._parse_conjunction(), 4296 ) 4297 4298 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4299 index = self._index - 1 4300 4301 partition_exists = self._parse_exists() 4302 if self._match(TokenType.PARTITION, advance=False): 4303 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4304 4305 self._retreat(index) 4306 return self._parse_csv(self._parse_drop_column) 4307 4308 def _parse_alter_table_rename(self) -> exp.Expression: 4309 self._match_text_seq("TO") 4310 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4311 4312 def _parse_alter(self) -> t.Optional[exp.Expression]: 4313 start = self._prev 4314 4315 if not self._match(TokenType.TABLE): 4316 return self._parse_as_command(start) 4317 4318 exists = self._parse_exists() 4319 this = self._parse_table(schema=True) 4320 4321 if self._next: 4322 self._advance() 4323 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4324 4325 if parser: 4326 actions = ensure_list(parser(self)) 4327 4328 if not self._curr: 4329 return self.expression( 4330 exp.AlterTable, 4331 this=this, 4332 exists=exists, 4333 actions=actions, 4334 ) 4335 return self._parse_as_command(start) 4336 4337 def _parse_merge(self) -> exp.Expression: 4338 self._match(TokenType.INTO) 4339 target = self._parse_table() 4340 4341 self._match(TokenType.USING) 4342 using = self._parse_table() 4343 4344 self._match(TokenType.ON) 4345 on = self._parse_conjunction() 4346 4347 whens = [] 4348 while self._match(TokenType.WHEN): 4349 matched = not self._match(TokenType.NOT) 4350 self._match_text_seq("MATCHED") 4351 source = ( 4352 False 4353 if self._match_text_seq("BY", "TARGET") 4354 else self._match_text_seq("BY", "SOURCE") 4355 ) 4356 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4357 4358 self._match(TokenType.THEN) 4359 4360 if self._match(TokenType.INSERT): 4361 _this = self._parse_star() 4362 if _this: 4363 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4364 else: 4365 then = self.expression( 4366 exp.Insert, 4367 this=self._parse_value(), 4368 expression=self._match(TokenType.VALUES) and self._parse_value(), 4369 ) 4370 elif self._match(TokenType.UPDATE): 4371 expressions = self._parse_star() 4372 if expressions: 4373 then = self.expression(exp.Update, expressions=expressions) 4374 else: 4375 then = self.expression( 4376 exp.Update, 4377 expressions=self._match(TokenType.SET) 4378 and self._parse_csv(self._parse_equality), 4379 ) 4380 elif self._match(TokenType.DELETE): 4381 then = self.expression(exp.Var, this=self._prev.text) 4382 else: 4383 then = None 4384 4385 whens.append( 4386 self.expression( 4387 exp.When, 4388 matched=matched, 4389 source=source, 4390 condition=condition, 4391 then=then, 4392 ) 4393 ) 4394 4395 return self.expression( 4396 exp.Merge, 4397 this=target, 4398 using=using, 4399 on=on, 4400 expressions=whens, 4401 ) 4402 4403 def _parse_show(self) -> t.Optional[exp.Expression]: 4404 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4405 if parser: 4406 return parser(self) 4407 self._advance() 4408 return self.expression(exp.Show, this=self._prev.text.upper()) 4409 4410 def _parse_set_item_assignment( 4411 self, kind: t.Optional[str] = None 4412 ) -> t.Optional[exp.Expression]: 4413 index = self._index 4414 4415 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4416 return self._parse_set_transaction(global_=kind == "GLOBAL") 4417 4418 left = self._parse_primary() or self._parse_id_var() 4419 4420 if not self._match_texts(("=", "TO")): 4421 self._retreat(index) 4422 return None 4423 4424 right = self._parse_statement() or self._parse_id_var() 4425 this = self.expression( 4426 exp.EQ, 4427 this=left, 4428 expression=right, 4429 ) 4430 4431 return self.expression( 4432 exp.SetItem, 4433 this=this, 4434 kind=kind, 4435 ) 4436 4437 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4438 self._match_text_seq("TRANSACTION") 4439 characteristics = self._parse_csv( 4440 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4441 ) 4442 return self.expression( 4443 exp.SetItem, 4444 expressions=characteristics, 4445 kind="TRANSACTION", 4446 **{"global": global_}, # type: ignore 4447 ) 4448 4449 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4450 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4451 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4452 4453 def _parse_set(self) -> exp.Expression: 4454 index = self._index 4455 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4456 4457 if self._curr: 4458 self._retreat(index) 4459 return self._parse_as_command(self._prev) 4460 4461 return set_ 4462 4463 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4464 for option in options: 4465 if self._match_text_seq(*option.split(" ")): 4466 return exp.Var(this=option) 4467 return None 4468 4469 def _parse_as_command(self, start: Token) -> exp.Command: 4470 while self._curr: 4471 self._advance() 4472 text = self._find_sql(start, self._prev) 4473 size = len(start.text) 4474 return exp.Command(this=text[:size], expression=text[size:]) 4475 4476 def _find_parser( 4477 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4478 ) -> t.Optional[t.Callable]: 4479 if not self._curr: 4480 return None 4481 4482 index = self._index 4483 this = [] 4484 while True: 4485 # The current token might be multiple words 4486 curr = self._curr.text.upper() 4487 key = curr.split(" ") 4488 this.append(curr) 4489 self._advance() 4490 result, trie = in_trie(trie, key) 4491 if result == 0: 4492 break 4493 if result == 2: 4494 subparser = parsers[" ".join(this)] 4495 return subparser 4496 self._retreat(index) 4497 return None 4498 4499 def _match(self, token_type, advance=True, expression=None): 4500 if not self._curr: 4501 return None 4502 4503 if self._curr.token_type == token_type: 4504 if advance: 4505 self._advance() 4506 self._add_comments(expression) 4507 return True 4508 4509 return None 4510 4511 def _match_set(self, types, advance=True): 4512 if not self._curr: 4513 return None 4514 4515 if self._curr.token_type in types: 4516 if advance: 4517 self._advance() 4518 return True 4519 4520 return None 4521 4522 def _match_pair(self, token_type_a, token_type_b, advance=True): 4523 if not self._curr or not self._next: 4524 return None 4525 4526 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4527 if advance: 4528 self._advance(2) 4529 return True 4530 4531 return None 4532 4533 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4534 if not self._match(TokenType.L_PAREN, expression=expression): 4535 self.raise_error("Expecting (") 4536 4537 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4538 if not self._match(TokenType.R_PAREN, expression=expression): 4539 self.raise_error("Expecting )") 4540 4541 def _match_texts(self, texts, advance=True): 4542 if self._curr and self._curr.text.upper() in texts: 4543 if advance: 4544 self._advance() 4545 return True 4546 return False 4547 4548 def _match_text_seq(self, *texts, advance=True): 4549 index = self._index 4550 for text in texts: 4551 if self._curr and self._curr.text.upper() == text: 4552 self._advance() 4553 else: 4554 self._retreat(index) 4555 return False 4556 4557 if not advance: 4558 self._retreat(index) 4559 4560 return True 4561 4562 @t.overload 4563 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4564 ... 4565 4566 @t.overload 4567 def _replace_columns_with_dots( 4568 self, this: t.Optional[exp.Expression] 4569 ) -> t.Optional[exp.Expression]: 4570 ... 4571 4572 def _replace_columns_with_dots(self, this): 4573 if isinstance(this, exp.Dot): 4574 exp.replace_children(this, self._replace_columns_with_dots) 4575 elif isinstance(this, exp.Column): 4576 exp.replace_children(this, self._replace_columns_with_dots) 4577 table = this.args.get("table") 4578 this = ( 4579 self.expression(exp.Dot, this=table, expression=this.this) 4580 if table 4581 else self.expression(exp.Var, this=this.name) 4582 ) 4583 elif isinstance(this, exp.Identifier): 4584 this = self.expression(exp.Var, this=this.name) 4585 4586 return this 4587 4588 def _replace_lambda( 4589 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4590 ) -> t.Optional[exp.Expression]: 4591 if not node: 4592 return node 4593 4594 for column in node.find_all(exp.Column): 4595 if column.parts[0].name in lambda_variables: 4596 dot_or_id = column.to_dot() if column.table else column.this 4597 parent = column.parent 4598 4599 while isinstance(parent, exp.Dot): 4600 if not isinstance(parent.parent, exp.Dot): 4601 parent.replace(dot_or_id) 4602 break 4603 parent = parent.parent 4604 else: 4605 if column is node: 4606 node = dot_or_id 4607 else: 4608 column.replace(dot_or_id) 4609 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
786 def __init__( 787 self, 788 error_level: t.Optional[ErrorLevel] = None, 789 error_message_context: int = 100, 790 index_offset: int = 0, 791 unnest_column_only: bool = False, 792 alias_post_tablesample: bool = False, 793 max_errors: int = 3, 794 null_ordering: t.Optional[str] = None, 795 ): 796 self.error_level = error_level or ErrorLevel.IMMEDIATE 797 self.error_message_context = error_message_context 798 self.index_offset = index_offset 799 self.unnest_column_only = unnest_column_only 800 self.alias_post_tablesample = alias_post_tablesample 801 self.max_errors = max_errors 802 self.null_ordering = null_ordering 803 self.reset()
815 def parse( 816 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 817 ) -> t.List[t.Optional[exp.Expression]]: 818 """ 819 Parses a list of tokens and returns a list of syntax trees, one tree 820 per parsed SQL statement. 821 822 Args: 823 raw_tokens: the list of tokens. 824 sql: the original SQL string, used to produce helpful debug messages. 825 826 Returns: 827 The list of syntax trees. 828 """ 829 return self._parse( 830 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 831 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
833 def parse_into( 834 self, 835 expression_types: exp.IntoType, 836 raw_tokens: t.List[Token], 837 sql: t.Optional[str] = None, 838 ) -> t.List[t.Optional[exp.Expression]]: 839 """ 840 Parses a list of tokens into a given Expression type. If a collection of Expression 841 types is given instead, this method will try to parse the token list into each one 842 of them, stopping at the first for which the parsing succeeds. 843 844 Args: 845 expression_types: the expression type(s) to try and parse the token list into. 846 raw_tokens: the list of tokens. 847 sql: the original SQL string, used to produce helpful debug messages. 848 849 Returns: 850 The target Expression. 851 """ 852 errors = [] 853 for expression_type in ensure_collection(expression_types): 854 parser = self.EXPRESSION_PARSERS.get(expression_type) 855 if not parser: 856 raise TypeError(f"No parser registered for {expression_type}") 857 try: 858 return self._parse(parser, raw_tokens, sql) 859 except ParseError as e: 860 e.errors[0]["into_expression"] = expression_type 861 errors.append(e) 862 raise ParseError( 863 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 864 errors=merge_errors(errors), 865 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
901 def check_errors(self) -> None: 902 """ 903 Logs or raises any found errors, depending on the chosen error level setting. 904 """ 905 if self.error_level == ErrorLevel.WARN: 906 for error in self.errors: 907 logger.error(str(error)) 908 elif self.error_level == ErrorLevel.RAISE and self.errors: 909 raise ParseError( 910 concat_messages(self.errors, self.max_errors), 911 errors=merge_errors(self.errors), 912 )
Logs or raises any found errors, depending on the chosen error level setting.
914 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 915 """ 916 Appends an error in the list of recorded errors or raises it, depending on the chosen 917 error level setting. 918 """ 919 token = token or self._curr or self._prev or Token.string("") 920 start = token.start 921 end = token.end + 1 922 start_context = self.sql[max(start - self.error_message_context, 0) : start] 923 highlight = self.sql[start:end] 924 end_context = self.sql[end : end + self.error_message_context] 925 926 error = ParseError.new( 927 f"{message}. Line {token.line}, Col: {token.col}.\n" 928 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 929 description=message, 930 line=token.line, 931 col=token.col, 932 start_context=start_context, 933 highlight=highlight, 934 end_context=end_context, 935 ) 936 937 if self.error_level == ErrorLevel.IMMEDIATE: 938 raise error 939 940 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
942 def expression( 943 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 944 ) -> E: 945 """ 946 Creates a new, validated Expression. 947 948 Args: 949 exp_class: the expression class to instantiate. 950 comments: an optional list of comments to attach to the expression. 951 kwargs: the arguments to set for the expression along with their respective values. 952 953 Returns: 954 The target expression. 955 """ 956 instance = exp_class(**kwargs) 957 instance.add_comments(comments) if comments else self._add_comments(instance) 958 self.validate_expression(instance) 959 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
966 def validate_expression( 967 self, expression: exp.Expression, args: t.Optional[t.List] = None 968 ) -> None: 969 """ 970 Validates an already instantiated expression, making sure that all its mandatory arguments 971 are set. 972 973 Args: 974 expression: the expression to validate. 975 args: an optional list of items that was used to instantiate the expression, if it's a Func. 976 """ 977 if self.error_level == ErrorLevel.IGNORE: 978 return 979 980 for error_message in expression.error_messages(args): 981 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.