sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get 10from sqlglot.tokens import Token, Tokenizer, TokenType 11from sqlglot.trie import in_trie, new_trie 12 13if t.TYPE_CHECKING: 14 from sqlglot._typing import E 15 16logger = logging.getLogger("sqlglot") 17 18 19def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 20 if len(args) == 1 and args[0].is_star: 21 return exp.StarMap(this=args[0]) 22 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 ) 32 33 34def parse_like(args: t.List) -> exp.Expression: 35 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 36 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 37 38 39def binary_range_parser( 40 expr_type: t.Type[exp.Expression], 41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 42 return lambda self, this: self._parse_escape( 43 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 44 ) 45 46 47class _Parser(type): 48 def __new__(cls, clsname, bases, attrs): 49 klass = super().__new__(cls, clsname, bases, attrs) 50 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 51 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 52 53 return klass 54 55 56class Parser(metaclass=_Parser): 57 """ 58 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 59 a parsed syntax tree. 60 61 Args: 62 error_level: the desired error level. 63 Default: ErrorLevel.IMMEDIATE 64 error_message_context: determines the amount of context to capture from a 65 query string when displaying the error message (in number of characters). 66 Default: 50. 67 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 68 Default: 0 69 alias_post_tablesample: If the table alias comes after tablesample. 70 Default: False 71 max_errors: Maximum number of error messages to include in a raised ParseError. 72 This is only relevant if error_level is ErrorLevel.RAISE. 73 Default: 3 74 null_ordering: Indicates the default null ordering method to use if not explicitly set. 75 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 76 Default: "nulls_are_small" 77 """ 78 79 FUNCTIONS: t.Dict[str, t.Callable] = { 80 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 81 "DATE_TO_DATE_STR": lambda args: exp.Cast( 82 this=seq_get(args, 0), 83 to=exp.DataType(this=exp.DataType.Type.TEXT), 84 ), 85 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 86 "IFNULL": exp.Coalesce.from_arg_list, 87 "LIKE": parse_like, 88 "TIME_TO_TIME_STR": lambda args: exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 93 this=exp.Cast( 94 this=seq_get(args, 0), 95 to=exp.DataType(this=exp.DataType.Type.TEXT), 96 ), 97 start=exp.Literal.number(1), 98 length=exp.Literal.number(10), 99 ), 100 "VAR_MAP": parse_var_map, 101 } 102 103 NO_PAREN_FUNCTIONS = { 104 TokenType.CURRENT_DATE: exp.CurrentDate, 105 TokenType.CURRENT_DATETIME: exp.CurrentDate, 106 TokenType.CURRENT_TIME: exp.CurrentTime, 107 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 108 TokenType.CURRENT_USER: exp.CurrentUser, 109 } 110 111 JOIN_HINTS: t.Set[str] = set() 112 113 NESTED_TYPE_TOKENS = { 114 TokenType.ARRAY, 115 TokenType.MAP, 116 TokenType.NULLABLE, 117 TokenType.STRUCT, 118 } 119 120 TYPE_TOKENS = { 121 TokenType.BIT, 122 TokenType.BOOLEAN, 123 TokenType.TINYINT, 124 TokenType.UTINYINT, 125 TokenType.SMALLINT, 126 TokenType.USMALLINT, 127 TokenType.INT, 128 TokenType.UINT, 129 TokenType.BIGINT, 130 TokenType.UBIGINT, 131 TokenType.INT128, 132 TokenType.UINT128, 133 TokenType.INT256, 134 TokenType.UINT256, 135 TokenType.FLOAT, 136 TokenType.DOUBLE, 137 TokenType.CHAR, 138 TokenType.NCHAR, 139 TokenType.VARCHAR, 140 TokenType.NVARCHAR, 141 TokenType.TEXT, 142 TokenType.MEDIUMTEXT, 143 TokenType.LONGTEXT, 144 TokenType.MEDIUMBLOB, 145 TokenType.LONGBLOB, 146 TokenType.BINARY, 147 TokenType.VARBINARY, 148 TokenType.JSON, 149 TokenType.JSONB, 150 TokenType.INTERVAL, 151 TokenType.TIME, 152 TokenType.TIMESTAMP, 153 TokenType.TIMESTAMPTZ, 154 TokenType.TIMESTAMPLTZ, 155 TokenType.DATETIME, 156 TokenType.DATETIME64, 157 TokenType.DATE, 158 TokenType.INT4RANGE, 159 TokenType.INT4MULTIRANGE, 160 TokenType.INT8RANGE, 161 TokenType.INT8MULTIRANGE, 162 TokenType.NUMRANGE, 163 TokenType.NUMMULTIRANGE, 164 TokenType.TSRANGE, 165 TokenType.TSMULTIRANGE, 166 TokenType.TSTZRANGE, 167 TokenType.TSTZMULTIRANGE, 168 TokenType.DATERANGE, 169 TokenType.DATEMULTIRANGE, 170 TokenType.DECIMAL, 171 TokenType.BIGDECIMAL, 172 TokenType.UUID, 173 TokenType.GEOGRAPHY, 174 TokenType.GEOMETRY, 175 TokenType.HLLSKETCH, 176 TokenType.HSTORE, 177 TokenType.PSEUDO_TYPE, 178 TokenType.SUPER, 179 TokenType.SERIAL, 180 TokenType.SMALLSERIAL, 181 TokenType.BIGSERIAL, 182 TokenType.XML, 183 TokenType.UNIQUEIDENTIFIER, 184 TokenType.MONEY, 185 TokenType.SMALLMONEY, 186 TokenType.ROWVERSION, 187 TokenType.IMAGE, 188 TokenType.VARIANT, 189 TokenType.OBJECT, 190 TokenType.INET, 191 *NESTED_TYPE_TOKENS, 192 } 193 194 SUBQUERY_PREDICATES = { 195 TokenType.ANY: exp.Any, 196 TokenType.ALL: exp.All, 197 TokenType.EXISTS: exp.Exists, 198 TokenType.SOME: exp.Any, 199 } 200 201 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 202 203 DB_CREATABLES = { 204 TokenType.DATABASE, 205 TokenType.SCHEMA, 206 TokenType.TABLE, 207 TokenType.VIEW, 208 TokenType.DICTIONARY, 209 } 210 211 CREATABLES = { 212 TokenType.COLUMN, 213 TokenType.FUNCTION, 214 TokenType.INDEX, 215 TokenType.PROCEDURE, 216 *DB_CREATABLES, 217 } 218 219 ID_VAR_TOKENS = { 220 TokenType.VAR, 221 TokenType.ANTI, 222 TokenType.APPLY, 223 TokenType.ASC, 224 TokenType.AUTO_INCREMENT, 225 TokenType.BEGIN, 226 TokenType.CACHE, 227 TokenType.COLLATE, 228 TokenType.COMMAND, 229 TokenType.COMMENT, 230 TokenType.COMMIT, 231 TokenType.CONSTRAINT, 232 TokenType.DEFAULT, 233 TokenType.DELETE, 234 TokenType.DESC, 235 TokenType.DESCRIBE, 236 TokenType.DICTIONARY, 237 TokenType.DIV, 238 TokenType.END, 239 TokenType.EXECUTE, 240 TokenType.ESCAPE, 241 TokenType.FALSE, 242 TokenType.FIRST, 243 TokenType.FILTER, 244 TokenType.FORMAT, 245 TokenType.FULL, 246 TokenType.IF, 247 TokenType.IS, 248 TokenType.ISNULL, 249 TokenType.INTERVAL, 250 TokenType.KEEP, 251 TokenType.LEFT, 252 TokenType.LOAD, 253 TokenType.MERGE, 254 TokenType.NATURAL, 255 TokenType.NEXT, 256 TokenType.OFFSET, 257 TokenType.ORDINALITY, 258 TokenType.OVERWRITE, 259 TokenType.PARTITION, 260 TokenType.PERCENT, 261 TokenType.PIVOT, 262 TokenType.PRAGMA, 263 TokenType.RANGE, 264 TokenType.REFERENCES, 265 TokenType.RIGHT, 266 TokenType.ROW, 267 TokenType.ROWS, 268 TokenType.SEMI, 269 TokenType.SET, 270 TokenType.SETTINGS, 271 TokenType.SHOW, 272 TokenType.TEMPORARY, 273 TokenType.TOP, 274 TokenType.TRUE, 275 TokenType.UNIQUE, 276 TokenType.UNPIVOT, 277 TokenType.VOLATILE, 278 TokenType.WINDOW, 279 *CREATABLES, 280 *SUBQUERY_PREDICATES, 281 *TYPE_TOKENS, 282 *NO_PAREN_FUNCTIONS, 283 } 284 285 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 286 287 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 288 TokenType.APPLY, 289 TokenType.ASOF, 290 TokenType.FULL, 291 TokenType.LEFT, 292 TokenType.LOCK, 293 TokenType.NATURAL, 294 TokenType.OFFSET, 295 TokenType.RIGHT, 296 TokenType.WINDOW, 297 } 298 299 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 300 301 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 302 303 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 304 305 FUNC_TOKENS = { 306 TokenType.COMMAND, 307 TokenType.CURRENT_DATE, 308 TokenType.CURRENT_DATETIME, 309 TokenType.CURRENT_TIMESTAMP, 310 TokenType.CURRENT_TIME, 311 TokenType.CURRENT_USER, 312 TokenType.FILTER, 313 TokenType.FIRST, 314 TokenType.FORMAT, 315 TokenType.GLOB, 316 TokenType.IDENTIFIER, 317 TokenType.INDEX, 318 TokenType.ISNULL, 319 TokenType.ILIKE, 320 TokenType.LIKE, 321 TokenType.MERGE, 322 TokenType.OFFSET, 323 TokenType.PRIMARY_KEY, 324 TokenType.RANGE, 325 TokenType.REPLACE, 326 TokenType.ROW, 327 TokenType.UNNEST, 328 TokenType.VAR, 329 TokenType.LEFT, 330 TokenType.RIGHT, 331 TokenType.DATE, 332 TokenType.DATETIME, 333 TokenType.TABLE, 334 TokenType.TIMESTAMP, 335 TokenType.TIMESTAMPTZ, 336 TokenType.WINDOW, 337 *TYPE_TOKENS, 338 *SUBQUERY_PREDICATES, 339 } 340 341 CONJUNCTION = { 342 TokenType.AND: exp.And, 343 TokenType.OR: exp.Or, 344 } 345 346 EQUALITY = { 347 TokenType.EQ: exp.EQ, 348 TokenType.NEQ: exp.NEQ, 349 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 350 } 351 352 COMPARISON = { 353 TokenType.GT: exp.GT, 354 TokenType.GTE: exp.GTE, 355 TokenType.LT: exp.LT, 356 TokenType.LTE: exp.LTE, 357 } 358 359 BITWISE = { 360 TokenType.AMP: exp.BitwiseAnd, 361 TokenType.CARET: exp.BitwiseXor, 362 TokenType.PIPE: exp.BitwiseOr, 363 TokenType.DPIPE: exp.DPipe, 364 } 365 366 TERM = { 367 TokenType.DASH: exp.Sub, 368 TokenType.PLUS: exp.Add, 369 TokenType.MOD: exp.Mod, 370 TokenType.COLLATE: exp.Collate, 371 } 372 373 FACTOR = { 374 TokenType.DIV: exp.IntDiv, 375 TokenType.LR_ARROW: exp.Distance, 376 TokenType.SLASH: exp.Div, 377 TokenType.STAR: exp.Mul, 378 } 379 380 TIMESTAMPS = { 381 TokenType.TIME, 382 TokenType.TIMESTAMP, 383 TokenType.TIMESTAMPTZ, 384 TokenType.TIMESTAMPLTZ, 385 } 386 387 SET_OPERATIONS = { 388 TokenType.UNION, 389 TokenType.INTERSECT, 390 TokenType.EXCEPT, 391 } 392 393 JOIN_METHODS = { 394 TokenType.NATURAL, 395 TokenType.ASOF, 396 } 397 398 JOIN_SIDES = { 399 TokenType.LEFT, 400 TokenType.RIGHT, 401 TokenType.FULL, 402 } 403 404 JOIN_KINDS = { 405 TokenType.INNER, 406 TokenType.OUTER, 407 TokenType.CROSS, 408 TokenType.SEMI, 409 TokenType.ANTI, 410 } 411 412 LAMBDAS = { 413 TokenType.ARROW: lambda self, expressions: self.expression( 414 exp.Lambda, 415 this=self._replace_lambda( 416 self._parse_conjunction(), 417 {node.name for node in expressions}, 418 ), 419 expressions=expressions, 420 ), 421 TokenType.FARROW: lambda self, expressions: self.expression( 422 exp.Kwarg, 423 this=exp.Var(this=expressions[0].name), 424 expression=self._parse_conjunction(), 425 ), 426 } 427 428 COLUMN_OPERATORS = { 429 TokenType.DOT: None, 430 TokenType.DCOLON: lambda self, this, to: self.expression( 431 exp.Cast if self.STRICT_CAST else exp.TryCast, 432 this=this, 433 to=to, 434 ), 435 TokenType.ARROW: lambda self, this, path: self.expression( 436 exp.JSONExtract, 437 this=this, 438 expression=path, 439 ), 440 TokenType.DARROW: lambda self, this, path: self.expression( 441 exp.JSONExtractScalar, 442 this=this, 443 expression=path, 444 ), 445 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 446 exp.JSONBExtract, 447 this=this, 448 expression=path, 449 ), 450 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 451 exp.JSONBExtractScalar, 452 this=this, 453 expression=path, 454 ), 455 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 456 exp.JSONBContains, 457 this=this, 458 expression=key, 459 ), 460 } 461 462 EXPRESSION_PARSERS = { 463 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 464 exp.Column: lambda self: self._parse_column(), 465 exp.Condition: lambda self: self._parse_conjunction(), 466 exp.DataType: lambda self: self._parse_types(), 467 exp.Expression: lambda self: self._parse_statement(), 468 exp.From: lambda self: self._parse_from(), 469 exp.Group: lambda self: self._parse_group(), 470 exp.Having: lambda self: self._parse_having(), 471 exp.Identifier: lambda self: self._parse_id_var(), 472 exp.Join: lambda self: self._parse_join(), 473 exp.Lambda: lambda self: self._parse_lambda(), 474 exp.Lateral: lambda self: self._parse_lateral(), 475 exp.Limit: lambda self: self._parse_limit(), 476 exp.Offset: lambda self: self._parse_offset(), 477 exp.Order: lambda self: self._parse_order(), 478 exp.Ordered: lambda self: self._parse_ordered(), 479 exp.Properties: lambda self: self._parse_properties(), 480 exp.Qualify: lambda self: self._parse_qualify(), 481 exp.Returning: lambda self: self._parse_returning(), 482 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 483 exp.Table: lambda self: self._parse_table_parts(), 484 exp.TableAlias: lambda self: self._parse_table_alias(), 485 exp.Where: lambda self: self._parse_where(), 486 exp.Window: lambda self: self._parse_named_window(), 487 exp.With: lambda self: self._parse_with(), 488 "JOIN_TYPE": lambda self: self._parse_join_parts(), 489 } 490 491 STATEMENT_PARSERS = { 492 TokenType.ALTER: lambda self: self._parse_alter(), 493 TokenType.BEGIN: lambda self: self._parse_transaction(), 494 TokenType.CACHE: lambda self: self._parse_cache(), 495 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 496 TokenType.COMMENT: lambda self: self._parse_comment(), 497 TokenType.CREATE: lambda self: self._parse_create(), 498 TokenType.DELETE: lambda self: self._parse_delete(), 499 TokenType.DESC: lambda self: self._parse_describe(), 500 TokenType.DESCRIBE: lambda self: self._parse_describe(), 501 TokenType.DROP: lambda self: self._parse_drop(), 502 TokenType.END: lambda self: self._parse_commit_or_rollback(), 503 TokenType.FROM: lambda self: exp.select("*").from_( 504 t.cast(exp.From, self._parse_from(skip_from_token=True)) 505 ), 506 TokenType.INSERT: lambda self: self._parse_insert(), 507 TokenType.LOAD: lambda self: self._parse_load(), 508 TokenType.MERGE: lambda self: self._parse_merge(), 509 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 510 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 511 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 512 TokenType.SET: lambda self: self._parse_set(), 513 TokenType.UNCACHE: lambda self: self._parse_uncache(), 514 TokenType.UPDATE: lambda self: self._parse_update(), 515 TokenType.USE: lambda self: self.expression( 516 exp.Use, 517 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 518 and exp.Var(this=self._prev.text), 519 this=self._parse_table(schema=False), 520 ), 521 } 522 523 UNARY_PARSERS = { 524 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 525 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 526 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 527 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 528 } 529 530 PRIMARY_PARSERS = { 531 TokenType.STRING: lambda self, token: self.expression( 532 exp.Literal, this=token.text, is_string=True 533 ), 534 TokenType.NUMBER: lambda self, token: self.expression( 535 exp.Literal, this=token.text, is_string=False 536 ), 537 TokenType.STAR: lambda self, _: self.expression( 538 exp.Star, 539 **{"except": self._parse_except(), "replace": self._parse_replace()}, 540 ), 541 TokenType.NULL: lambda self, _: self.expression(exp.Null), 542 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 543 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 544 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 545 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 546 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 547 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 548 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 549 exp.National, this=token.text 550 ), 551 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 552 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 553 } 554 555 PLACEHOLDER_PARSERS = { 556 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 557 TokenType.PARAMETER: lambda self: self._parse_parameter(), 558 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 559 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 560 else None, 561 } 562 563 RANGE_PARSERS = { 564 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 565 TokenType.GLOB: binary_range_parser(exp.Glob), 566 TokenType.ILIKE: binary_range_parser(exp.ILike), 567 TokenType.IN: lambda self, this: self._parse_in(this), 568 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 569 TokenType.IS: lambda self, this: self._parse_is(this), 570 TokenType.LIKE: binary_range_parser(exp.Like), 571 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 572 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 573 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 574 } 575 576 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 577 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 578 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 579 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 580 "CHARACTER SET": lambda self: self._parse_character_set(), 581 "CHECKSUM": lambda self: self._parse_checksum(), 582 "CLUSTER": lambda self: self._parse_cluster(), 583 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 584 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 585 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 586 "DEFINER": lambda self: self._parse_definer(), 587 "DETERMINISTIC": lambda self: self.expression( 588 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 589 ), 590 "DISTKEY": lambda self: self._parse_distkey(), 591 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 592 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 593 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 594 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 595 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 596 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 597 "FREESPACE": lambda self: self._parse_freespace(), 598 "IMMUTABLE": lambda self: self.expression( 599 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 600 ), 601 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 602 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 603 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 604 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 605 "LIKE": lambda self: self._parse_create_like(), 606 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 607 "LOCK": lambda self: self._parse_locking(), 608 "LOCKING": lambda self: self._parse_locking(), 609 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 610 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 611 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 612 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 613 "NO": lambda self: self._parse_no_property(), 614 "ON": lambda self: self._parse_on_property(), 615 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 616 "PARTITION BY": lambda self: self._parse_partitioned_by(), 617 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 618 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 619 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 620 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 621 "RETURNS": lambda self: self._parse_returns(), 622 "ROW": lambda self: self._parse_row(), 623 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 624 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 625 "SETTINGS": lambda self: self.expression( 626 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 627 ), 628 "SORTKEY": lambda self: self._parse_sortkey(), 629 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 630 "STABLE": lambda self: self.expression( 631 exp.StabilityProperty, this=exp.Literal.string("STABLE") 632 ), 633 "STORED": lambda self: self._parse_stored(), 634 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 635 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 636 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 637 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 638 "TTL": lambda self: self._parse_ttl(), 639 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 640 "VOLATILE": lambda self: self._parse_volatile_property(), 641 "WITH": lambda self: self._parse_with_property(), 642 } 643 644 CONSTRAINT_PARSERS = { 645 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 646 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 647 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 648 "CHARACTER SET": lambda self: self.expression( 649 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 650 ), 651 "CHECK": lambda self: self.expression( 652 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 653 ), 654 "COLLATE": lambda self: self.expression( 655 exp.CollateColumnConstraint, this=self._parse_var() 656 ), 657 "COMMENT": lambda self: self.expression( 658 exp.CommentColumnConstraint, this=self._parse_string() 659 ), 660 "COMPRESS": lambda self: self._parse_compress(), 661 "DEFAULT": lambda self: self.expression( 662 exp.DefaultColumnConstraint, this=self._parse_bitwise() 663 ), 664 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 665 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 666 "FORMAT": lambda self: self.expression( 667 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 668 ), 669 "GENERATED": lambda self: self._parse_generated_as_identity(), 670 "IDENTITY": lambda self: self._parse_auto_increment(), 671 "INLINE": lambda self: self._parse_inline(), 672 "LIKE": lambda self: self._parse_create_like(), 673 "NOT": lambda self: self._parse_not_constraint(), 674 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 675 "ON": lambda self: self._match(TokenType.UPDATE) 676 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 677 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 678 "PRIMARY KEY": lambda self: self._parse_primary_key(), 679 "REFERENCES": lambda self: self._parse_references(match=False), 680 "TITLE": lambda self: self.expression( 681 exp.TitleColumnConstraint, this=self._parse_var_or_string() 682 ), 683 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 684 "UNIQUE": lambda self: self._parse_unique(), 685 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 686 } 687 688 ALTER_PARSERS = { 689 "ADD": lambda self: self._parse_alter_table_add(), 690 "ALTER": lambda self: self._parse_alter_table_alter(), 691 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 692 "DROP": lambda self: self._parse_alter_table_drop(), 693 "RENAME": lambda self: self._parse_alter_table_rename(), 694 } 695 696 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 697 698 NO_PAREN_FUNCTION_PARSERS = { 699 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 700 TokenType.CASE: lambda self: self._parse_case(), 701 TokenType.IF: lambda self: self._parse_if(), 702 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 703 exp.NextValueFor, 704 this=self._parse_column(), 705 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 706 ), 707 } 708 709 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 710 711 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 712 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 713 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 714 "DECODE": lambda self: self._parse_decode(), 715 "EXTRACT": lambda self: self._parse_extract(), 716 "JSON_OBJECT": lambda self: self._parse_json_object(), 717 "LOG": lambda self: self._parse_logarithm(), 718 "MATCH": lambda self: self._parse_match_against(), 719 "OPENJSON": lambda self: self._parse_open_json(), 720 "POSITION": lambda self: self._parse_position(), 721 "SAFE_CAST": lambda self: self._parse_cast(False), 722 "STRING_AGG": lambda self: self._parse_string_agg(), 723 "SUBSTRING": lambda self: self._parse_substring(), 724 "TRIM": lambda self: self._parse_trim(), 725 "TRY_CAST": lambda self: self._parse_cast(False), 726 "TRY_CONVERT": lambda self: self._parse_convert(False), 727 } 728 729 QUERY_MODIFIER_PARSERS = { 730 "joins": lambda self: list(iter(self._parse_join, None)), 731 "laterals": lambda self: list(iter(self._parse_lateral, None)), 732 "match": lambda self: self._parse_match_recognize(), 733 "where": lambda self: self._parse_where(), 734 "group": lambda self: self._parse_group(), 735 "having": lambda self: self._parse_having(), 736 "qualify": lambda self: self._parse_qualify(), 737 "windows": lambda self: self._parse_window_clause(), 738 "order": lambda self: self._parse_order(), 739 "limit": lambda self: self._parse_limit(), 740 "offset": lambda self: self._parse_offset(), 741 "locks": lambda self: self._parse_locks(), 742 "sample": lambda self: self._parse_table_sample(as_modifier=True), 743 } 744 745 SET_PARSERS = { 746 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 747 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 748 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 749 "TRANSACTION": lambda self: self._parse_set_transaction(), 750 } 751 752 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 753 754 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 755 756 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 757 758 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 759 760 TRANSACTION_CHARACTERISTICS = { 761 "ISOLATION LEVEL REPEATABLE READ", 762 "ISOLATION LEVEL READ COMMITTED", 763 "ISOLATION LEVEL READ UNCOMMITTED", 764 "ISOLATION LEVEL SERIALIZABLE", 765 "READ WRITE", 766 "READ ONLY", 767 } 768 769 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 770 771 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 772 773 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 774 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 775 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 776 777 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 778 779 STRICT_CAST = True 780 781 CONVERT_TYPE_FIRST = False 782 783 PREFIXED_PIVOT_COLUMNS = False 784 IDENTIFY_PIVOT_STRINGS = False 785 786 LOG_BASE_FIRST = True 787 LOG_DEFAULTS_TO_LN = False 788 789 __slots__ = ( 790 "error_level", 791 "error_message_context", 792 "sql", 793 "errors", 794 "index_offset", 795 "unnest_column_only", 796 "alias_post_tablesample", 797 "max_errors", 798 "null_ordering", 799 "_tokens", 800 "_index", 801 "_curr", 802 "_next", 803 "_prev", 804 "_prev_comments", 805 "_show_trie", 806 "_set_trie", 807 ) 808 809 def __init__( 810 self, 811 error_level: t.Optional[ErrorLevel] = None, 812 error_message_context: int = 100, 813 index_offset: int = 0, 814 unnest_column_only: bool = False, 815 alias_post_tablesample: bool = False, 816 max_errors: int = 3, 817 null_ordering: t.Optional[str] = None, 818 ): 819 self.error_level = error_level or ErrorLevel.IMMEDIATE 820 self.error_message_context = error_message_context 821 self.index_offset = index_offset 822 self.unnest_column_only = unnest_column_only 823 self.alias_post_tablesample = alias_post_tablesample 824 self.max_errors = max_errors 825 self.null_ordering = null_ordering 826 self.reset() 827 828 def reset(self): 829 self.sql = "" 830 self.errors = [] 831 self._tokens = [] 832 self._index = 0 833 self._curr = None 834 self._next = None 835 self._prev = None 836 self._prev_comments = None 837 838 def parse( 839 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 840 ) -> t.List[t.Optional[exp.Expression]]: 841 """ 842 Parses a list of tokens and returns a list of syntax trees, one tree 843 per parsed SQL statement. 844 845 Args: 846 raw_tokens: the list of tokens. 847 sql: the original SQL string, used to produce helpful debug messages. 848 849 Returns: 850 The list of syntax trees. 851 """ 852 return self._parse( 853 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 854 ) 855 856 def parse_into( 857 self, 858 expression_types: exp.IntoType, 859 raw_tokens: t.List[Token], 860 sql: t.Optional[str] = None, 861 ) -> t.List[t.Optional[exp.Expression]]: 862 """ 863 Parses a list of tokens into a given Expression type. If a collection of Expression 864 types is given instead, this method will try to parse the token list into each one 865 of them, stopping at the first for which the parsing succeeds. 866 867 Args: 868 expression_types: the expression type(s) to try and parse the token list into. 869 raw_tokens: the list of tokens. 870 sql: the original SQL string, used to produce helpful debug messages. 871 872 Returns: 873 The target Expression. 874 """ 875 errors = [] 876 for expression_type in ensure_collection(expression_types): 877 parser = self.EXPRESSION_PARSERS.get(expression_type) 878 if not parser: 879 raise TypeError(f"No parser registered for {expression_type}") 880 try: 881 return self._parse(parser, raw_tokens, sql) 882 except ParseError as e: 883 e.errors[0]["into_expression"] = expression_type 884 errors.append(e) 885 raise ParseError( 886 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 887 errors=merge_errors(errors), 888 ) from errors[-1] 889 890 def _parse( 891 self, 892 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 893 raw_tokens: t.List[Token], 894 sql: t.Optional[str] = None, 895 ) -> t.List[t.Optional[exp.Expression]]: 896 self.reset() 897 self.sql = sql or "" 898 total = len(raw_tokens) 899 chunks: t.List[t.List[Token]] = [[]] 900 901 for i, token in enumerate(raw_tokens): 902 if token.token_type == TokenType.SEMICOLON: 903 if i < total - 1: 904 chunks.append([]) 905 else: 906 chunks[-1].append(token) 907 908 expressions = [] 909 910 for tokens in chunks: 911 self._index = -1 912 self._tokens = tokens 913 self._advance() 914 915 expressions.append(parse_method(self)) 916 917 if self._index < len(self._tokens): 918 self.raise_error("Invalid expression / Unexpected token") 919 920 self.check_errors() 921 922 return expressions 923 924 def check_errors(self) -> None: 925 """ 926 Logs or raises any found errors, depending on the chosen error level setting. 927 """ 928 if self.error_level == ErrorLevel.WARN: 929 for error in self.errors: 930 logger.error(str(error)) 931 elif self.error_level == ErrorLevel.RAISE and self.errors: 932 raise ParseError( 933 concat_messages(self.errors, self.max_errors), 934 errors=merge_errors(self.errors), 935 ) 936 937 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 938 """ 939 Appends an error in the list of recorded errors or raises it, depending on the chosen 940 error level setting. 941 """ 942 token = token or self._curr or self._prev or Token.string("") 943 start = token.start 944 end = token.end + 1 945 start_context = self.sql[max(start - self.error_message_context, 0) : start] 946 highlight = self.sql[start:end] 947 end_context = self.sql[end : end + self.error_message_context] 948 949 error = ParseError.new( 950 f"{message}. Line {token.line}, Col: {token.col}.\n" 951 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 952 description=message, 953 line=token.line, 954 col=token.col, 955 start_context=start_context, 956 highlight=highlight, 957 end_context=end_context, 958 ) 959 960 if self.error_level == ErrorLevel.IMMEDIATE: 961 raise error 962 963 self.errors.append(error) 964 965 def expression( 966 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 967 ) -> E: 968 """ 969 Creates a new, validated Expression. 970 971 Args: 972 exp_class: the expression class to instantiate. 973 comments: an optional list of comments to attach to the expression. 974 kwargs: the arguments to set for the expression along with their respective values. 975 976 Returns: 977 The target expression. 978 """ 979 instance = exp_class(**kwargs) 980 instance.add_comments(comments) if comments else self._add_comments(instance) 981 self.validate_expression(instance) 982 return instance 983 984 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 985 if expression and self._prev_comments: 986 expression.add_comments(self._prev_comments) 987 self._prev_comments = None 988 989 def validate_expression( 990 self, expression: exp.Expression, args: t.Optional[t.List] = None 991 ) -> None: 992 """ 993 Validates an already instantiated expression, making sure that all its mandatory arguments 994 are set. 995 996 Args: 997 expression: the expression to validate. 998 args: an optional list of items that was used to instantiate the expression, if it's a Func. 999 """ 1000 if self.error_level == ErrorLevel.IGNORE: 1001 return 1002 1003 for error_message in expression.error_messages(args): 1004 self.raise_error(error_message) 1005 1006 def _find_sql(self, start: Token, end: Token) -> str: 1007 return self.sql[start.start : end.end + 1] 1008 1009 def _advance(self, times: int = 1) -> None: 1010 self._index += times 1011 self._curr = seq_get(self._tokens, self._index) 1012 self._next = seq_get(self._tokens, self._index + 1) 1013 if self._index > 0: 1014 self._prev = self._tokens[self._index - 1] 1015 self._prev_comments = self._prev.comments 1016 else: 1017 self._prev = None 1018 self._prev_comments = None 1019 1020 def _retreat(self, index: int) -> None: 1021 if index != self._index: 1022 self._advance(index - self._index) 1023 1024 def _parse_command(self) -> exp.Command: 1025 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1026 1027 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1028 start = self._prev 1029 exists = self._parse_exists() if allow_exists else None 1030 1031 self._match(TokenType.ON) 1032 1033 kind = self._match_set(self.CREATABLES) and self._prev 1034 1035 if not kind: 1036 return self._parse_as_command(start) 1037 1038 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1039 this = self._parse_user_defined_function(kind=kind.token_type) 1040 elif kind.token_type == TokenType.TABLE: 1041 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1042 elif kind.token_type == TokenType.COLUMN: 1043 this = self._parse_column() 1044 else: 1045 this = self._parse_id_var() 1046 1047 self._match(TokenType.IS) 1048 1049 return self.expression( 1050 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1051 ) 1052 1053 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1054 def _parse_ttl(self) -> exp.Expression: 1055 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1056 this = self._parse_bitwise() 1057 1058 if self._match_text_seq("DELETE"): 1059 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1060 if self._match_text_seq("RECOMPRESS"): 1061 return self.expression( 1062 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1063 ) 1064 if self._match_text_seq("TO", "DISK"): 1065 return self.expression( 1066 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1067 ) 1068 if self._match_text_seq("TO", "VOLUME"): 1069 return self.expression( 1070 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1071 ) 1072 1073 return this 1074 1075 expressions = self._parse_csv(_parse_ttl_action) 1076 where = self._parse_where() 1077 group = self._parse_group() 1078 1079 aggregates = None 1080 if group and self._match(TokenType.SET): 1081 aggregates = self._parse_csv(self._parse_set_item) 1082 1083 return self.expression( 1084 exp.MergeTreeTTL, 1085 expressions=expressions, 1086 where=where, 1087 group=group, 1088 aggregates=aggregates, 1089 ) 1090 1091 def _parse_statement(self) -> t.Optional[exp.Expression]: 1092 if self._curr is None: 1093 return None 1094 1095 if self._match_set(self.STATEMENT_PARSERS): 1096 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1097 1098 if self._match_set(Tokenizer.COMMANDS): 1099 return self._parse_command() 1100 1101 expression = self._parse_expression() 1102 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1103 return self._parse_query_modifiers(expression) 1104 1105 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1106 start = self._prev 1107 temporary = self._match(TokenType.TEMPORARY) 1108 materialized = self._match_text_seq("MATERIALIZED") 1109 kind = self._match_set(self.CREATABLES) and self._prev.text 1110 if not kind: 1111 return self._parse_as_command(start) 1112 1113 return self.expression( 1114 exp.Drop, 1115 exists=self._parse_exists(), 1116 this=self._parse_table(schema=True), 1117 kind=kind, 1118 temporary=temporary, 1119 materialized=materialized, 1120 cascade=self._match_text_seq("CASCADE"), 1121 constraints=self._match_text_seq("CONSTRAINTS"), 1122 purge=self._match_text_seq("PURGE"), 1123 ) 1124 1125 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1126 return ( 1127 self._match(TokenType.IF) 1128 and (not not_ or self._match(TokenType.NOT)) 1129 and self._match(TokenType.EXISTS) 1130 ) 1131 1132 def _parse_create(self) -> t.Optional[exp.Expression]: 1133 start = self._prev 1134 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1135 TokenType.OR, TokenType.REPLACE 1136 ) 1137 unique = self._match(TokenType.UNIQUE) 1138 1139 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1140 self._match(TokenType.TABLE) 1141 1142 properties = None 1143 create_token = self._match_set(self.CREATABLES) and self._prev 1144 1145 if not create_token: 1146 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1147 create_token = self._match_set(self.CREATABLES) and self._prev 1148 1149 if not properties or not create_token: 1150 return self._parse_as_command(start) 1151 1152 exists = self._parse_exists(not_=True) 1153 this = None 1154 expression = None 1155 indexes = None 1156 no_schema_binding = None 1157 begin = None 1158 clone = None 1159 1160 def extend_props(temp_props: t.Optional[exp.Expression]) -> None: 1161 nonlocal properties 1162 if properties and temp_props: 1163 properties.expressions.extend(temp_props.expressions) 1164 elif temp_props: 1165 properties = temp_props 1166 1167 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1168 this = self._parse_user_defined_function(kind=create_token.token_type) 1169 extend_props(self._parse_properties()) 1170 1171 self._match(TokenType.ALIAS) 1172 begin = self._match(TokenType.BEGIN) 1173 return_ = self._match_text_seq("RETURN") 1174 expression = self._parse_statement() 1175 1176 if return_: 1177 expression = self.expression(exp.Return, this=expression) 1178 elif create_token.token_type == TokenType.INDEX: 1179 this = self._parse_index(index=self._parse_id_var()) 1180 elif create_token.token_type in self.DB_CREATABLES: 1181 table_parts = self._parse_table_parts(schema=True) 1182 1183 # exp.Properties.Location.POST_NAME 1184 self._match(TokenType.COMMA) 1185 extend_props(self._parse_properties(before=True)) 1186 1187 this = self._parse_schema(this=table_parts) 1188 1189 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1190 extend_props(self._parse_properties()) 1191 1192 self._match(TokenType.ALIAS) 1193 1194 # exp.Properties.Location.POST_ALIAS 1195 if not ( 1196 self._match(TokenType.SELECT, advance=False) 1197 or self._match(TokenType.WITH, advance=False) 1198 or self._match(TokenType.L_PAREN, advance=False) 1199 ): 1200 extend_props(self._parse_properties()) 1201 1202 expression = self._parse_ddl_select() 1203 1204 if create_token.token_type == TokenType.TABLE: 1205 indexes = [] 1206 while True: 1207 index = self._parse_index() 1208 1209 # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX 1210 extend_props(self._parse_properties()) 1211 1212 if not index: 1213 break 1214 else: 1215 self._match(TokenType.COMMA) 1216 indexes.append(index) 1217 elif create_token.token_type == TokenType.VIEW: 1218 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1219 no_schema_binding = True 1220 1221 if self._match_text_seq("CLONE"): 1222 clone = self._parse_table(schema=True) 1223 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1224 clone_kind = ( 1225 self._match(TokenType.L_PAREN) 1226 and self._match_texts(self.CLONE_KINDS) 1227 and self._prev.text.upper() 1228 ) 1229 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1230 self._match(TokenType.R_PAREN) 1231 clone = self.expression( 1232 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1233 ) 1234 1235 return self.expression( 1236 exp.Create, 1237 this=this, 1238 kind=create_token.text, 1239 replace=replace, 1240 unique=unique, 1241 expression=expression, 1242 exists=exists, 1243 properties=properties, 1244 indexes=indexes, 1245 no_schema_binding=no_schema_binding, 1246 begin=begin, 1247 clone=clone, 1248 ) 1249 1250 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1251 # only used for teradata currently 1252 self._match(TokenType.COMMA) 1253 1254 kwargs = { 1255 "no": self._match_text_seq("NO"), 1256 "dual": self._match_text_seq("DUAL"), 1257 "before": self._match_text_seq("BEFORE"), 1258 "default": self._match_text_seq("DEFAULT"), 1259 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1260 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1261 "after": self._match_text_seq("AFTER"), 1262 "minimum": self._match_texts(("MIN", "MINIMUM")), 1263 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1264 } 1265 1266 if self._match_texts(self.PROPERTY_PARSERS): 1267 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1268 try: 1269 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1270 except TypeError: 1271 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1272 1273 return None 1274 1275 def _parse_property(self) -> t.Optional[exp.Expression]: 1276 if self._match_texts(self.PROPERTY_PARSERS): 1277 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1278 1279 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1280 return self._parse_character_set(default=True) 1281 1282 if self._match_text_seq("COMPOUND", "SORTKEY"): 1283 return self._parse_sortkey(compound=True) 1284 1285 if self._match_text_seq("SQL", "SECURITY"): 1286 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1287 1288 assignment = self._match_pair( 1289 TokenType.VAR, TokenType.EQ, advance=False 1290 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1291 1292 if assignment: 1293 key = self._parse_var_or_string() 1294 self._match(TokenType.EQ) 1295 return self.expression(exp.Property, this=key, value=self._parse_column()) 1296 1297 return None 1298 1299 def _parse_stored(self) -> exp.Expression: 1300 self._match(TokenType.ALIAS) 1301 1302 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1303 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1304 1305 return self.expression( 1306 exp.FileFormatProperty, 1307 this=self.expression( 1308 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1309 ) 1310 if input_format or output_format 1311 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1312 ) 1313 1314 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1315 self._match(TokenType.EQ) 1316 self._match(TokenType.ALIAS) 1317 return self.expression(exp_class, this=self._parse_field()) 1318 1319 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Expression]: 1320 properties = [] 1321 1322 while True: 1323 if before: 1324 prop = self._parse_property_before() 1325 else: 1326 prop = self._parse_property() 1327 1328 if not prop: 1329 break 1330 for p in ensure_list(prop): 1331 properties.append(p) 1332 1333 if properties: 1334 return self.expression(exp.Properties, expressions=properties) 1335 1336 return None 1337 1338 def _parse_fallback(self, no: bool = False) -> exp.Expression: 1339 return self.expression( 1340 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1341 ) 1342 1343 def _parse_volatile_property(self) -> exp.Expression: 1344 if self._index >= 2: 1345 pre_volatile_token = self._tokens[self._index - 2] 1346 else: 1347 pre_volatile_token = None 1348 1349 if pre_volatile_token and pre_volatile_token.token_type in ( 1350 TokenType.CREATE, 1351 TokenType.REPLACE, 1352 TokenType.UNIQUE, 1353 ): 1354 return exp.VolatileProperty() 1355 1356 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1357 1358 def _parse_with_property( 1359 self, 1360 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1361 self._match(TokenType.WITH) 1362 if self._match(TokenType.L_PAREN, advance=False): 1363 return self._parse_wrapped_csv(self._parse_property) 1364 1365 if self._match_text_seq("JOURNAL"): 1366 return self._parse_withjournaltable() 1367 1368 if self._match_text_seq("DATA"): 1369 return self._parse_withdata(no=False) 1370 elif self._match_text_seq("NO", "DATA"): 1371 return self._parse_withdata(no=True) 1372 1373 if not self._next: 1374 return None 1375 1376 return self._parse_withisolatedloading() 1377 1378 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1379 def _parse_definer(self) -> t.Optional[exp.Expression]: 1380 self._match(TokenType.EQ) 1381 1382 user = self._parse_id_var() 1383 self._match(TokenType.PARAMETER) 1384 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1385 1386 if not user or not host: 1387 return None 1388 1389 return exp.DefinerProperty(this=f"{user}@{host}") 1390 1391 def _parse_withjournaltable(self) -> exp.Expression: 1392 self._match(TokenType.TABLE) 1393 self._match(TokenType.EQ) 1394 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1395 1396 def _parse_log(self, no: bool = False) -> exp.Expression: 1397 return self.expression(exp.LogProperty, no=no) 1398 1399 def _parse_journal(self, **kwargs) -> exp.Expression: 1400 return self.expression(exp.JournalProperty, **kwargs) 1401 1402 def _parse_checksum(self) -> exp.Expression: 1403 self._match(TokenType.EQ) 1404 1405 on = None 1406 if self._match(TokenType.ON): 1407 on = True 1408 elif self._match_text_seq("OFF"): 1409 on = False 1410 default = self._match(TokenType.DEFAULT) 1411 1412 return self.expression( 1413 exp.ChecksumProperty, 1414 on=on, 1415 default=default, 1416 ) 1417 1418 def _parse_cluster(self) -> t.Optional[exp.Expression]: 1419 if not self._match_text_seq("BY"): 1420 self._retreat(self._index - 1) 1421 return None 1422 return self.expression( 1423 exp.Cluster, 1424 expressions=self._parse_csv(self._parse_ordered), 1425 ) 1426 1427 def _parse_freespace(self) -> exp.Expression: 1428 self._match(TokenType.EQ) 1429 return self.expression( 1430 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1431 ) 1432 1433 def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression: 1434 if self._match(TokenType.EQ): 1435 return self.expression( 1436 exp.MergeBlockRatioProperty, 1437 this=self._parse_number(), 1438 percent=self._match(TokenType.PERCENT), 1439 ) 1440 return self.expression( 1441 exp.MergeBlockRatioProperty, 1442 no=no, 1443 default=default, 1444 ) 1445 1446 def _parse_datablocksize( 1447 self, 1448 default: t.Optional[bool] = None, 1449 minimum: t.Optional[bool] = None, 1450 maximum: t.Optional[bool] = None, 1451 ) -> exp.Expression: 1452 self._match(TokenType.EQ) 1453 size = self._parse_number() 1454 units = None 1455 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1456 units = self._prev.text 1457 return self.expression( 1458 exp.DataBlocksizeProperty, 1459 size=size, 1460 units=units, 1461 default=default, 1462 minimum=minimum, 1463 maximum=maximum, 1464 ) 1465 1466 def _parse_blockcompression(self) -> exp.Expression: 1467 self._match(TokenType.EQ) 1468 always = self._match_text_seq("ALWAYS") 1469 manual = self._match_text_seq("MANUAL") 1470 never = self._match_text_seq("NEVER") 1471 default = self._match_text_seq("DEFAULT") 1472 autotemp = None 1473 if self._match_text_seq("AUTOTEMP"): 1474 autotemp = self._parse_schema() 1475 1476 return self.expression( 1477 exp.BlockCompressionProperty, 1478 always=always, 1479 manual=manual, 1480 never=never, 1481 default=default, 1482 autotemp=autotemp, 1483 ) 1484 1485 def _parse_withisolatedloading(self) -> exp.Expression: 1486 no = self._match_text_seq("NO") 1487 concurrent = self._match_text_seq("CONCURRENT") 1488 self._match_text_seq("ISOLATED", "LOADING") 1489 for_all = self._match_text_seq("FOR", "ALL") 1490 for_insert = self._match_text_seq("FOR", "INSERT") 1491 for_none = self._match_text_seq("FOR", "NONE") 1492 return self.expression( 1493 exp.IsolatedLoadingProperty, 1494 no=no, 1495 concurrent=concurrent, 1496 for_all=for_all, 1497 for_insert=for_insert, 1498 for_none=for_none, 1499 ) 1500 1501 def _parse_locking(self) -> exp.Expression: 1502 if self._match(TokenType.TABLE): 1503 kind = "TABLE" 1504 elif self._match(TokenType.VIEW): 1505 kind = "VIEW" 1506 elif self._match(TokenType.ROW): 1507 kind = "ROW" 1508 elif self._match_text_seq("DATABASE"): 1509 kind = "DATABASE" 1510 else: 1511 kind = None 1512 1513 if kind in ("DATABASE", "TABLE", "VIEW"): 1514 this = self._parse_table_parts() 1515 else: 1516 this = None 1517 1518 if self._match(TokenType.FOR): 1519 for_or_in = "FOR" 1520 elif self._match(TokenType.IN): 1521 for_or_in = "IN" 1522 else: 1523 for_or_in = None 1524 1525 if self._match_text_seq("ACCESS"): 1526 lock_type = "ACCESS" 1527 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1528 lock_type = "EXCLUSIVE" 1529 elif self._match_text_seq("SHARE"): 1530 lock_type = "SHARE" 1531 elif self._match_text_seq("READ"): 1532 lock_type = "READ" 1533 elif self._match_text_seq("WRITE"): 1534 lock_type = "WRITE" 1535 elif self._match_text_seq("CHECKSUM"): 1536 lock_type = "CHECKSUM" 1537 else: 1538 lock_type = None 1539 1540 override = self._match_text_seq("OVERRIDE") 1541 1542 return self.expression( 1543 exp.LockingProperty, 1544 this=this, 1545 kind=kind, 1546 for_or_in=for_or_in, 1547 lock_type=lock_type, 1548 override=override, 1549 ) 1550 1551 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1552 if self._match(TokenType.PARTITION_BY): 1553 return self._parse_csv(self._parse_conjunction) 1554 return [] 1555 1556 def _parse_partitioned_by(self) -> exp.Expression: 1557 self._match(TokenType.EQ) 1558 return self.expression( 1559 exp.PartitionedByProperty, 1560 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1561 ) 1562 1563 def _parse_withdata(self, no: bool = False) -> exp.Expression: 1564 if self._match_text_seq("AND", "STATISTICS"): 1565 statistics = True 1566 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1567 statistics = False 1568 else: 1569 statistics = None 1570 1571 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1572 1573 def _parse_no_property(self) -> t.Optional[exp.Property]: 1574 if self._match_text_seq("PRIMARY", "INDEX"): 1575 return exp.NoPrimaryIndexProperty() 1576 return None 1577 1578 def _parse_on_property(self) -> t.Optional[exp.Property]: 1579 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1580 return exp.OnCommitProperty() 1581 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1582 return exp.OnCommitProperty(delete=True) 1583 return None 1584 1585 def _parse_distkey(self) -> exp.Expression: 1586 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1587 1588 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1589 table = self._parse_table(schema=True) 1590 options = [] 1591 while self._match_texts(("INCLUDING", "EXCLUDING")): 1592 this = self._prev.text.upper() 1593 id_var = self._parse_id_var() 1594 1595 if not id_var: 1596 return None 1597 1598 options.append( 1599 self.expression( 1600 exp.Property, 1601 this=this, 1602 value=exp.Var(this=id_var.this.upper()), 1603 ) 1604 ) 1605 return self.expression(exp.LikeProperty, this=table, expressions=options) 1606 1607 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1608 return self.expression( 1609 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1610 ) 1611 1612 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1613 self._match(TokenType.EQ) 1614 return self.expression( 1615 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1616 ) 1617 1618 def _parse_returns(self) -> exp.Expression: 1619 value: t.Optional[exp.Expression] 1620 is_table = self._match(TokenType.TABLE) 1621 1622 if is_table: 1623 if self._match(TokenType.LT): 1624 value = self.expression( 1625 exp.Schema, 1626 this="TABLE", 1627 expressions=self._parse_csv(self._parse_struct_types), 1628 ) 1629 if not self._match(TokenType.GT): 1630 self.raise_error("Expecting >") 1631 else: 1632 value = self._parse_schema(exp.Var(this="TABLE")) 1633 else: 1634 value = self._parse_types() 1635 1636 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1637 1638 def _parse_describe(self) -> exp.Expression: 1639 kind = self._match_set(self.CREATABLES) and self._prev.text 1640 this = self._parse_table() 1641 1642 return self.expression(exp.Describe, this=this, kind=kind) 1643 1644 def _parse_insert(self) -> exp.Expression: 1645 overwrite = self._match(TokenType.OVERWRITE) 1646 local = self._match_text_seq("LOCAL") 1647 alternative = None 1648 1649 if self._match_text_seq("DIRECTORY"): 1650 this: t.Optional[exp.Expression] = self.expression( 1651 exp.Directory, 1652 this=self._parse_var_or_string(), 1653 local=local, 1654 row_format=self._parse_row_format(match_row=True), 1655 ) 1656 else: 1657 if self._match(TokenType.OR): 1658 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1659 1660 self._match(TokenType.INTO) 1661 self._match(TokenType.TABLE) 1662 this = self._parse_table(schema=True) 1663 1664 return self.expression( 1665 exp.Insert, 1666 this=this, 1667 exists=self._parse_exists(), 1668 partition=self._parse_partition(), 1669 expression=self._parse_ddl_select(), 1670 conflict=self._parse_on_conflict(), 1671 returning=self._parse_returning(), 1672 overwrite=overwrite, 1673 alternative=alternative, 1674 ) 1675 1676 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1677 conflict = self._match_text_seq("ON", "CONFLICT") 1678 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1679 1680 if not (conflict or duplicate): 1681 return None 1682 1683 nothing = None 1684 expressions = None 1685 key = None 1686 constraint = None 1687 1688 if conflict: 1689 if self._match_text_seq("ON", "CONSTRAINT"): 1690 constraint = self._parse_id_var() 1691 else: 1692 key = self._parse_csv(self._parse_value) 1693 1694 self._match_text_seq("DO") 1695 if self._match_text_seq("NOTHING"): 1696 nothing = True 1697 else: 1698 self._match(TokenType.UPDATE) 1699 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1700 1701 return self.expression( 1702 exp.OnConflict, 1703 duplicate=duplicate, 1704 expressions=expressions, 1705 nothing=nothing, 1706 key=key, 1707 constraint=constraint, 1708 ) 1709 1710 def _parse_returning(self) -> t.Optional[exp.Expression]: 1711 if not self._match(TokenType.RETURNING): 1712 return None 1713 1714 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1715 1716 def _parse_row(self) -> t.Optional[exp.Expression]: 1717 if not self._match(TokenType.FORMAT): 1718 return None 1719 return self._parse_row_format() 1720 1721 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1722 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1723 return None 1724 1725 if self._match_text_seq("SERDE"): 1726 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1727 1728 self._match_text_seq("DELIMITED") 1729 1730 kwargs = {} 1731 1732 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1733 kwargs["fields"] = self._parse_string() 1734 if self._match_text_seq("ESCAPED", "BY"): 1735 kwargs["escaped"] = self._parse_string() 1736 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1737 kwargs["collection_items"] = self._parse_string() 1738 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1739 kwargs["map_keys"] = self._parse_string() 1740 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1741 kwargs["lines"] = self._parse_string() 1742 if self._match_text_seq("NULL", "DEFINED", "AS"): 1743 kwargs["null"] = self._parse_string() 1744 1745 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1746 1747 def _parse_load(self) -> exp.Expression: 1748 if self._match_text_seq("DATA"): 1749 local = self._match_text_seq("LOCAL") 1750 self._match_text_seq("INPATH") 1751 inpath = self._parse_string() 1752 overwrite = self._match(TokenType.OVERWRITE) 1753 self._match_pair(TokenType.INTO, TokenType.TABLE) 1754 1755 return self.expression( 1756 exp.LoadData, 1757 this=self._parse_table(schema=True), 1758 local=local, 1759 overwrite=overwrite, 1760 inpath=inpath, 1761 partition=self._parse_partition(), 1762 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1763 serde=self._match_text_seq("SERDE") and self._parse_string(), 1764 ) 1765 return self._parse_as_command(self._prev) 1766 1767 def _parse_delete(self) -> exp.Expression: 1768 self._match(TokenType.FROM) 1769 1770 return self.expression( 1771 exp.Delete, 1772 this=self._parse_table(), 1773 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1774 where=self._parse_where(), 1775 returning=self._parse_returning(), 1776 ) 1777 1778 def _parse_update(self) -> exp.Expression: 1779 return self.expression( 1780 exp.Update, 1781 **{ # type: ignore 1782 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1783 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1784 "from": self._parse_from(modifiers=True), 1785 "where": self._parse_where(), 1786 "returning": self._parse_returning(), 1787 }, 1788 ) 1789 1790 def _parse_uncache(self) -> exp.Expression: 1791 if not self._match(TokenType.TABLE): 1792 self.raise_error("Expecting TABLE after UNCACHE") 1793 1794 return self.expression( 1795 exp.Uncache, 1796 exists=self._parse_exists(), 1797 this=self._parse_table(schema=True), 1798 ) 1799 1800 def _parse_cache(self) -> exp.Expression: 1801 lazy = self._match_text_seq("LAZY") 1802 self._match(TokenType.TABLE) 1803 table = self._parse_table(schema=True) 1804 options = [] 1805 1806 if self._match_text_seq("OPTIONS"): 1807 self._match_l_paren() 1808 k = self._parse_string() 1809 self._match(TokenType.EQ) 1810 v = self._parse_string() 1811 options = [k, v] 1812 self._match_r_paren() 1813 1814 self._match(TokenType.ALIAS) 1815 return self.expression( 1816 exp.Cache, 1817 this=table, 1818 lazy=lazy, 1819 options=options, 1820 expression=self._parse_select(nested=True), 1821 ) 1822 1823 def _parse_partition(self) -> t.Optional[exp.Expression]: 1824 if not self._match(TokenType.PARTITION): 1825 return None 1826 1827 return self.expression( 1828 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1829 ) 1830 1831 def _parse_value(self) -> exp.Expression: 1832 if self._match(TokenType.L_PAREN): 1833 expressions = self._parse_csv(self._parse_conjunction) 1834 self._match_r_paren() 1835 return self.expression(exp.Tuple, expressions=expressions) 1836 1837 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1838 # Source: https://prestodb.io/docs/current/sql/values.html 1839 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1840 1841 def _parse_select( 1842 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1843 ) -> t.Optional[exp.Expression]: 1844 cte = self._parse_with() 1845 if cte: 1846 this = self._parse_statement() 1847 1848 if not this: 1849 self.raise_error("Failed to parse any statement following CTE") 1850 return cte 1851 1852 if "with" in this.arg_types: 1853 this.set("with", cte) 1854 else: 1855 self.raise_error(f"{this.key} does not support CTE") 1856 this = cte 1857 elif self._match(TokenType.SELECT): 1858 comments = self._prev_comments 1859 1860 hint = self._parse_hint() 1861 all_ = self._match(TokenType.ALL) 1862 distinct = self._match(TokenType.DISTINCT) 1863 1864 kind = ( 1865 self._match(TokenType.ALIAS) 1866 and self._match_texts(("STRUCT", "VALUE")) 1867 and self._prev.text 1868 ) 1869 1870 if distinct: 1871 distinct = self.expression( 1872 exp.Distinct, 1873 on=self._parse_value() if self._match(TokenType.ON) else None, 1874 ) 1875 1876 if all_ and distinct: 1877 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1878 1879 limit = self._parse_limit(top=True) 1880 expressions = self._parse_csv(self._parse_expression) 1881 1882 this = self.expression( 1883 exp.Select, 1884 kind=kind, 1885 hint=hint, 1886 distinct=distinct, 1887 expressions=expressions, 1888 limit=limit, 1889 ) 1890 this.comments = comments 1891 1892 into = self._parse_into() 1893 if into: 1894 this.set("into", into) 1895 1896 from_ = self._parse_from() 1897 if from_: 1898 this.set("from", from_) 1899 1900 this = self._parse_query_modifiers(this) 1901 elif (table or nested) and self._match(TokenType.L_PAREN): 1902 if self._match(TokenType.PIVOT): 1903 this = self._parse_simplified_pivot() 1904 elif self._match(TokenType.FROM): 1905 this = exp.select("*").from_( 1906 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1907 ) 1908 else: 1909 this = self._parse_table() if table else self._parse_select(nested=True) 1910 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1911 1912 self._match_r_paren() 1913 1914 # early return so that subquery unions aren't parsed again 1915 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1916 # Union ALL should be a property of the top select node, not the subquery 1917 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1918 elif self._match(TokenType.VALUES): 1919 this = self.expression( 1920 exp.Values, 1921 expressions=self._parse_csv(self._parse_value), 1922 alias=self._parse_table_alias(), 1923 ) 1924 else: 1925 this = None 1926 1927 return self._parse_set_operations(this) 1928 1929 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1930 if not skip_with_token and not self._match(TokenType.WITH): 1931 return None 1932 1933 comments = self._prev_comments 1934 recursive = self._match(TokenType.RECURSIVE) 1935 1936 expressions = [] 1937 while True: 1938 expressions.append(self._parse_cte()) 1939 1940 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1941 break 1942 else: 1943 self._match(TokenType.WITH) 1944 1945 return self.expression( 1946 exp.With, comments=comments, expressions=expressions, recursive=recursive 1947 ) 1948 1949 def _parse_cte(self) -> exp.Expression: 1950 alias = self._parse_table_alias() 1951 if not alias or not alias.this: 1952 self.raise_error("Expected CTE to have alias") 1953 1954 self._match(TokenType.ALIAS) 1955 1956 return self.expression( 1957 exp.CTE, 1958 this=self._parse_wrapped(self._parse_statement), 1959 alias=alias, 1960 ) 1961 1962 def _parse_table_alias( 1963 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1964 ) -> t.Optional[exp.Expression]: 1965 any_token = self._match(TokenType.ALIAS) 1966 alias = ( 1967 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1968 or self._parse_string_as_identifier() 1969 ) 1970 1971 index = self._index 1972 if self._match(TokenType.L_PAREN): 1973 columns = self._parse_csv(self._parse_function_parameter) 1974 self._match_r_paren() if columns else self._retreat(index) 1975 else: 1976 columns = None 1977 1978 if not alias and not columns: 1979 return None 1980 1981 return self.expression(exp.TableAlias, this=alias, columns=columns) 1982 1983 def _parse_subquery( 1984 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1985 ) -> t.Optional[exp.Expression]: 1986 if not this: 1987 return None 1988 return self.expression( 1989 exp.Subquery, 1990 this=this, 1991 pivots=self._parse_pivots(), 1992 alias=self._parse_table_alias() if parse_alias else None, 1993 ) 1994 1995 def _parse_query_modifiers( 1996 self, this: t.Optional[exp.Expression] 1997 ) -> t.Optional[exp.Expression]: 1998 if isinstance(this, self.MODIFIABLES): 1999 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2000 expression = parser(self) 2001 2002 if expression: 2003 this.set(key, expression) 2004 return this 2005 2006 def _parse_hint(self) -> t.Optional[exp.Expression]: 2007 if self._match(TokenType.HINT): 2008 hints = self._parse_csv(self._parse_function) 2009 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2010 self.raise_error("Expected */ after HINT") 2011 return self.expression(exp.Hint, expressions=hints) 2012 2013 return None 2014 2015 def _parse_into(self) -> t.Optional[exp.Expression]: 2016 if not self._match(TokenType.INTO): 2017 return None 2018 2019 temp = self._match(TokenType.TEMPORARY) 2020 unlogged = self._match_text_seq("UNLOGGED") 2021 self._match(TokenType.TABLE) 2022 2023 return self.expression( 2024 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2025 ) 2026 2027 def _parse_from( 2028 self, modifiers: bool = False, skip_from_token: bool = False 2029 ) -> t.Optional[exp.From]: 2030 if not skip_from_token and not self._match(TokenType.FROM): 2031 return None 2032 2033 comments = self._prev_comments 2034 this = self._parse_table() 2035 2036 return self.expression( 2037 exp.From, 2038 comments=comments, 2039 this=self._parse_query_modifiers(this) if modifiers else this, 2040 ) 2041 2042 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2043 if not self._match(TokenType.MATCH_RECOGNIZE): 2044 return None 2045 2046 self._match_l_paren() 2047 2048 partition = self._parse_partition_by() 2049 order = self._parse_order() 2050 measures = ( 2051 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2052 ) 2053 2054 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2055 rows = exp.Var(this="ONE ROW PER MATCH") 2056 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2057 text = "ALL ROWS PER MATCH" 2058 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2059 text += f" SHOW EMPTY MATCHES" 2060 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2061 text += f" OMIT EMPTY MATCHES" 2062 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2063 text += f" WITH UNMATCHED ROWS" 2064 rows = exp.Var(this=text) 2065 else: 2066 rows = None 2067 2068 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2069 text = "AFTER MATCH SKIP" 2070 if self._match_text_seq("PAST", "LAST", "ROW"): 2071 text += f" PAST LAST ROW" 2072 elif self._match_text_seq("TO", "NEXT", "ROW"): 2073 text += f" TO NEXT ROW" 2074 elif self._match_text_seq("TO", "FIRST"): 2075 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2076 elif self._match_text_seq("TO", "LAST"): 2077 text += f" TO LAST {self._advance_any().text}" # type: ignore 2078 after = exp.Var(this=text) 2079 else: 2080 after = None 2081 2082 if self._match_text_seq("PATTERN"): 2083 self._match_l_paren() 2084 2085 if not self._curr: 2086 self.raise_error("Expecting )", self._curr) 2087 2088 paren = 1 2089 start = self._curr 2090 2091 while self._curr and paren > 0: 2092 if self._curr.token_type == TokenType.L_PAREN: 2093 paren += 1 2094 if self._curr.token_type == TokenType.R_PAREN: 2095 paren -= 1 2096 end = self._prev 2097 self._advance() 2098 if paren > 0: 2099 self.raise_error("Expecting )", self._curr) 2100 pattern = exp.Var(this=self._find_sql(start, end)) 2101 else: 2102 pattern = None 2103 2104 define = ( 2105 self._parse_csv( 2106 lambda: self.expression( 2107 exp.Alias, 2108 alias=self._parse_id_var(any_token=True), 2109 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2110 ) 2111 ) 2112 if self._match_text_seq("DEFINE") 2113 else None 2114 ) 2115 2116 self._match_r_paren() 2117 2118 return self.expression( 2119 exp.MatchRecognize, 2120 partition_by=partition, 2121 order=order, 2122 measures=measures, 2123 rows=rows, 2124 after=after, 2125 pattern=pattern, 2126 define=define, 2127 alias=self._parse_table_alias(), 2128 ) 2129 2130 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2131 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2132 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2133 2134 if outer_apply or cross_apply: 2135 this = self._parse_select(table=True) 2136 view = None 2137 outer = not cross_apply 2138 elif self._match(TokenType.LATERAL): 2139 this = self._parse_select(table=True) 2140 view = self._match(TokenType.VIEW) 2141 outer = self._match(TokenType.OUTER) 2142 else: 2143 return None 2144 2145 if not this: 2146 this = self._parse_function() or self._parse_id_var(any_token=False) 2147 while self._match(TokenType.DOT): 2148 this = exp.Dot( 2149 this=this, 2150 expression=self._parse_function() or self._parse_id_var(any_token=False), 2151 ) 2152 2153 table_alias: t.Optional[exp.Expression] 2154 2155 if view: 2156 table = self._parse_id_var(any_token=False) 2157 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2158 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2159 else: 2160 table_alias = self._parse_table_alias() 2161 2162 expression = self.expression( 2163 exp.Lateral, 2164 this=this, 2165 view=view, 2166 outer=outer, 2167 alias=table_alias, 2168 ) 2169 2170 return expression 2171 2172 def _parse_join_parts( 2173 self, 2174 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2175 return ( 2176 self._match_set(self.JOIN_METHODS) and self._prev, 2177 self._match_set(self.JOIN_SIDES) and self._prev, 2178 self._match_set(self.JOIN_KINDS) and self._prev, 2179 ) 2180 2181 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2182 if self._match(TokenType.COMMA): 2183 return self.expression(exp.Join, this=self._parse_table()) 2184 2185 index = self._index 2186 method, side, kind = self._parse_join_parts() 2187 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2188 join = self._match(TokenType.JOIN) 2189 2190 if not skip_join_token and not join: 2191 self._retreat(index) 2192 kind = None 2193 method = None 2194 side = None 2195 2196 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2197 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2198 2199 if not skip_join_token and not join and not outer_apply and not cross_apply: 2200 return None 2201 2202 if outer_apply: 2203 side = Token(TokenType.LEFT, "LEFT") 2204 2205 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2206 2207 if method: 2208 kwargs["method"] = method.text 2209 if side: 2210 kwargs["side"] = side.text 2211 if kind: 2212 kwargs["kind"] = kind.text 2213 if hint: 2214 kwargs["hint"] = hint 2215 2216 if self._match(TokenType.ON): 2217 kwargs["on"] = self._parse_conjunction() 2218 elif self._match(TokenType.USING): 2219 kwargs["using"] = self._parse_wrapped_id_vars() 2220 2221 return self.expression(exp.Join, **kwargs) 2222 2223 def _parse_index( 2224 self, 2225 index: t.Optional[exp.Expression] = None, 2226 ) -> t.Optional[exp.Expression]: 2227 if index: 2228 unique = None 2229 primary = None 2230 amp = None 2231 2232 self._match(TokenType.ON) 2233 self._match(TokenType.TABLE) # hive 2234 table = self._parse_table_parts(schema=True) 2235 else: 2236 unique = self._match(TokenType.UNIQUE) 2237 primary = self._match_text_seq("PRIMARY") 2238 amp = self._match_text_seq("AMP") 2239 if not self._match(TokenType.INDEX): 2240 return None 2241 index = self._parse_id_var() 2242 table = None 2243 2244 if self._match(TokenType.L_PAREN, advance=False): 2245 columns = self._parse_wrapped_csv(self._parse_ordered) 2246 else: 2247 columns = None 2248 2249 return self.expression( 2250 exp.Index, 2251 this=index, 2252 table=table, 2253 columns=columns, 2254 unique=unique, 2255 primary=primary, 2256 amp=amp, 2257 partition_by=self._parse_partition_by(), 2258 ) 2259 2260 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2261 return ( 2262 (not schema and self._parse_function()) 2263 or self._parse_id_var(any_token=False) 2264 or self._parse_string_as_identifier() 2265 or self._parse_placeholder() 2266 ) 2267 2268 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2269 catalog = None 2270 db = None 2271 table = self._parse_table_part(schema=schema) 2272 2273 while self._match(TokenType.DOT): 2274 if catalog: 2275 # This allows nesting the table in arbitrarily many dot expressions if needed 2276 table = self.expression( 2277 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2278 ) 2279 else: 2280 catalog = db 2281 db = table 2282 table = self._parse_table_part(schema=schema) 2283 2284 if not table: 2285 self.raise_error(f"Expected table name but got {self._curr}") 2286 2287 return self.expression( 2288 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2289 ) 2290 2291 def _parse_table( 2292 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2293 ) -> t.Optional[exp.Expression]: 2294 lateral = self._parse_lateral() 2295 if lateral: 2296 return lateral 2297 2298 unnest = self._parse_unnest() 2299 if unnest: 2300 return unnest 2301 2302 values = self._parse_derived_table_values() 2303 if values: 2304 return values 2305 2306 subquery = self._parse_select(table=True) 2307 if subquery: 2308 if not subquery.args.get("pivots"): 2309 subquery.set("pivots", self._parse_pivots()) 2310 return subquery 2311 2312 this: exp.Expression = self._parse_table_parts(schema=schema) 2313 2314 if schema: 2315 return self._parse_schema(this=this) 2316 2317 if self.alias_post_tablesample: 2318 table_sample = self._parse_table_sample() 2319 2320 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2321 if alias: 2322 this.set("alias", alias) 2323 2324 if not this.args.get("pivots"): 2325 this.set("pivots", self._parse_pivots()) 2326 2327 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2328 this.set( 2329 "hints", 2330 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2331 ) 2332 self._match_r_paren() 2333 2334 if not self.alias_post_tablesample: 2335 table_sample = self._parse_table_sample() 2336 2337 if table_sample: 2338 table_sample.set("this", this) 2339 this = table_sample 2340 2341 return this 2342 2343 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2344 if not self._match(TokenType.UNNEST): 2345 return None 2346 2347 expressions = self._parse_wrapped_csv(self._parse_type) 2348 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2349 alias = self._parse_table_alias() 2350 2351 if alias and self.unnest_column_only: 2352 if alias.args.get("columns"): 2353 self.raise_error("Unexpected extra column alias in unnest.") 2354 alias.set("columns", [alias.this]) 2355 alias.set("this", None) 2356 2357 offset = None 2358 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2359 self._match(TokenType.ALIAS) 2360 offset = self._parse_id_var() or exp.Identifier(this="offset") 2361 2362 return self.expression( 2363 exp.Unnest, 2364 expressions=expressions, 2365 ordinality=ordinality, 2366 alias=alias, 2367 offset=offset, 2368 ) 2369 2370 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2371 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2372 if not is_derived and not self._match(TokenType.VALUES): 2373 return None 2374 2375 expressions = self._parse_csv(self._parse_value) 2376 2377 if is_derived: 2378 self._match_r_paren() 2379 2380 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2381 2382 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2383 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2384 as_modifier and self._match_text_seq("USING", "SAMPLE") 2385 ): 2386 return None 2387 2388 bucket_numerator = None 2389 bucket_denominator = None 2390 bucket_field = None 2391 percent = None 2392 rows = None 2393 size = None 2394 seed = None 2395 2396 kind = ( 2397 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2398 ) 2399 method = self._parse_var(tokens=(TokenType.ROW,)) 2400 2401 self._match(TokenType.L_PAREN) 2402 2403 num = self._parse_number() 2404 2405 if self._match_text_seq("BUCKET"): 2406 bucket_numerator = self._parse_number() 2407 self._match_text_seq("OUT", "OF") 2408 bucket_denominator = bucket_denominator = self._parse_number() 2409 self._match(TokenType.ON) 2410 bucket_field = self._parse_field() 2411 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2412 percent = num 2413 elif self._match(TokenType.ROWS): 2414 rows = num 2415 else: 2416 size = num 2417 2418 self._match(TokenType.R_PAREN) 2419 2420 if self._match(TokenType.L_PAREN): 2421 method = self._parse_var() 2422 seed = self._match(TokenType.COMMA) and self._parse_number() 2423 self._match_r_paren() 2424 elif self._match_texts(("SEED", "REPEATABLE")): 2425 seed = self._parse_wrapped(self._parse_number) 2426 2427 return self.expression( 2428 exp.TableSample, 2429 method=method, 2430 bucket_numerator=bucket_numerator, 2431 bucket_denominator=bucket_denominator, 2432 bucket_field=bucket_field, 2433 percent=percent, 2434 rows=rows, 2435 size=size, 2436 seed=seed, 2437 kind=kind, 2438 ) 2439 2440 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2441 return list(iter(self._parse_pivot, None)) 2442 2443 # https://duckdb.org/docs/sql/statements/pivot 2444 def _parse_simplified_pivot(self) -> exp.Pivot: 2445 def _parse_on() -> t.Optional[exp.Expression]: 2446 this = self._parse_bitwise() 2447 return self._parse_in(this) if self._match(TokenType.IN) else this 2448 2449 this = self._parse_table() 2450 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2451 using = self._match(TokenType.USING) and self._parse_csv( 2452 lambda: self._parse_alias(self._parse_function()) 2453 ) 2454 group = self._parse_group() 2455 return self.expression( 2456 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2457 ) 2458 2459 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2460 index = self._index 2461 2462 if self._match(TokenType.PIVOT): 2463 unpivot = False 2464 elif self._match(TokenType.UNPIVOT): 2465 unpivot = True 2466 else: 2467 return None 2468 2469 expressions = [] 2470 field = None 2471 2472 if not self._match(TokenType.L_PAREN): 2473 self._retreat(index) 2474 return None 2475 2476 if unpivot: 2477 expressions = self._parse_csv(self._parse_column) 2478 else: 2479 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2480 2481 if not expressions: 2482 self.raise_error("Failed to parse PIVOT's aggregation list") 2483 2484 if not self._match(TokenType.FOR): 2485 self.raise_error("Expecting FOR") 2486 2487 value = self._parse_column() 2488 2489 if not self._match(TokenType.IN): 2490 self.raise_error("Expecting IN") 2491 2492 field = self._parse_in(value, alias=True) 2493 2494 self._match_r_paren() 2495 2496 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2497 2498 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2499 pivot.set("alias", self._parse_table_alias()) 2500 2501 if not unpivot: 2502 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2503 2504 columns: t.List[exp.Expression] = [] 2505 for fld in pivot.args["field"].expressions: 2506 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2507 for name in names: 2508 if self.PREFIXED_PIVOT_COLUMNS: 2509 name = f"{name}_{field_name}" if name else field_name 2510 else: 2511 name = f"{field_name}_{name}" if name else field_name 2512 2513 columns.append(exp.to_identifier(name)) 2514 2515 pivot.set("columns", columns) 2516 2517 return pivot 2518 2519 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2520 return [agg.alias for agg in aggregations] 2521 2522 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2523 if not skip_where_token and not self._match(TokenType.WHERE): 2524 return None 2525 2526 return self.expression( 2527 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2528 ) 2529 2530 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2531 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2532 return None 2533 2534 elements = defaultdict(list) 2535 2536 while True: 2537 expressions = self._parse_csv(self._parse_conjunction) 2538 if expressions: 2539 elements["expressions"].extend(expressions) 2540 2541 grouping_sets = self._parse_grouping_sets() 2542 if grouping_sets: 2543 elements["grouping_sets"].extend(grouping_sets) 2544 2545 rollup = None 2546 cube = None 2547 totals = None 2548 2549 with_ = self._match(TokenType.WITH) 2550 if self._match(TokenType.ROLLUP): 2551 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2552 elements["rollup"].extend(ensure_list(rollup)) 2553 2554 if self._match(TokenType.CUBE): 2555 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2556 elements["cube"].extend(ensure_list(cube)) 2557 2558 if self._match_text_seq("TOTALS"): 2559 totals = True 2560 elements["totals"] = True # type: ignore 2561 2562 if not (grouping_sets or rollup or cube or totals): 2563 break 2564 2565 return self.expression(exp.Group, **elements) # type: ignore 2566 2567 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2568 if not self._match(TokenType.GROUPING_SETS): 2569 return None 2570 2571 return self._parse_wrapped_csv(self._parse_grouping_set) 2572 2573 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2574 if self._match(TokenType.L_PAREN): 2575 grouping_set = self._parse_csv(self._parse_column) 2576 self._match_r_paren() 2577 return self.expression(exp.Tuple, expressions=grouping_set) 2578 2579 return self._parse_column() 2580 2581 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2582 if not skip_having_token and not self._match(TokenType.HAVING): 2583 return None 2584 return self.expression(exp.Having, this=self._parse_conjunction()) 2585 2586 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2587 if not self._match(TokenType.QUALIFY): 2588 return None 2589 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2590 2591 def _parse_order( 2592 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2593 ) -> t.Optional[exp.Expression]: 2594 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2595 return this 2596 2597 return self.expression( 2598 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2599 ) 2600 2601 def _parse_sort( 2602 self, exp_class: t.Type[exp.Expression], *texts: str 2603 ) -> t.Optional[exp.Expression]: 2604 if not self._match_text_seq(*texts): 2605 return None 2606 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2607 2608 def _parse_ordered(self) -> exp.Expression: 2609 this = self._parse_conjunction() 2610 self._match(TokenType.ASC) 2611 is_desc = self._match(TokenType.DESC) 2612 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2613 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2614 desc = is_desc or False 2615 asc = not desc 2616 nulls_first = is_nulls_first or False 2617 explicitly_null_ordered = is_nulls_first or is_nulls_last 2618 if ( 2619 not explicitly_null_ordered 2620 and ( 2621 (asc and self.null_ordering == "nulls_are_small") 2622 or (desc and self.null_ordering != "nulls_are_small") 2623 ) 2624 and self.null_ordering != "nulls_are_last" 2625 ): 2626 nulls_first = True 2627 2628 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2629 2630 def _parse_limit( 2631 self, this: t.Optional[exp.Expression] = None, top: bool = False 2632 ) -> t.Optional[exp.Expression]: 2633 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2634 limit_paren = self._match(TokenType.L_PAREN) 2635 limit_exp = self.expression( 2636 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2637 ) 2638 2639 if limit_paren: 2640 self._match_r_paren() 2641 2642 return limit_exp 2643 2644 if self._match(TokenType.FETCH): 2645 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2646 direction = self._prev.text if direction else "FIRST" 2647 2648 count = self._parse_number() 2649 percent = self._match(TokenType.PERCENT) 2650 2651 self._match_set((TokenType.ROW, TokenType.ROWS)) 2652 2653 only = self._match_text_seq("ONLY") 2654 with_ties = self._match_text_seq("WITH", "TIES") 2655 2656 if only and with_ties: 2657 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2658 2659 return self.expression( 2660 exp.Fetch, 2661 direction=direction, 2662 count=count, 2663 percent=percent, 2664 with_ties=with_ties, 2665 ) 2666 2667 return this 2668 2669 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2670 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2671 return this 2672 2673 count = self._parse_number() 2674 self._match_set((TokenType.ROW, TokenType.ROWS)) 2675 return self.expression(exp.Offset, this=this, expression=count) 2676 2677 def _parse_locks(self) -> t.List[exp.Expression]: 2678 # Lists are invariant, so we need to use a type hint here 2679 locks: t.List[exp.Expression] = [] 2680 2681 while True: 2682 if self._match_text_seq("FOR", "UPDATE"): 2683 update = True 2684 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2685 "LOCK", "IN", "SHARE", "MODE" 2686 ): 2687 update = False 2688 else: 2689 break 2690 2691 expressions = None 2692 if self._match_text_seq("OF"): 2693 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2694 2695 wait: t.Optional[bool | exp.Expression] = None 2696 if self._match_text_seq("NOWAIT"): 2697 wait = True 2698 elif self._match_text_seq("WAIT"): 2699 wait = self._parse_primary() 2700 elif self._match_text_seq("SKIP", "LOCKED"): 2701 wait = False 2702 2703 locks.append( 2704 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2705 ) 2706 2707 return locks 2708 2709 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2710 if not self._match_set(self.SET_OPERATIONS): 2711 return this 2712 2713 token_type = self._prev.token_type 2714 2715 if token_type == TokenType.UNION: 2716 expression = exp.Union 2717 elif token_type == TokenType.EXCEPT: 2718 expression = exp.Except 2719 else: 2720 expression = exp.Intersect 2721 2722 return self.expression( 2723 expression, 2724 this=this, 2725 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2726 expression=self._parse_set_operations(self._parse_select(nested=True)), 2727 ) 2728 2729 def _parse_expression(self) -> t.Optional[exp.Expression]: 2730 return self._parse_alias(self._parse_conjunction()) 2731 2732 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2733 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2734 2735 def _parse_equality(self) -> t.Optional[exp.Expression]: 2736 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2737 2738 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2739 return self._parse_tokens(self._parse_range, self.COMPARISON) 2740 2741 def _parse_range(self) -> t.Optional[exp.Expression]: 2742 this = self._parse_bitwise() 2743 negate = self._match(TokenType.NOT) 2744 2745 if self._match_set(self.RANGE_PARSERS): 2746 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2747 if not expression: 2748 return this 2749 2750 this = expression 2751 elif self._match(TokenType.ISNULL): 2752 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2753 2754 # Postgres supports ISNULL and NOTNULL for conditions. 2755 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2756 if self._match(TokenType.NOTNULL): 2757 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2758 this = self.expression(exp.Not, this=this) 2759 2760 if negate: 2761 this = self.expression(exp.Not, this=this) 2762 2763 if self._match(TokenType.IS): 2764 this = self._parse_is(this) 2765 2766 return this 2767 2768 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2769 index = self._index - 1 2770 negate = self._match(TokenType.NOT) 2771 if self._match_text_seq("DISTINCT", "FROM"): 2772 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2773 return self.expression(klass, this=this, expression=self._parse_expression()) 2774 2775 expression = self._parse_null() or self._parse_boolean() 2776 if not expression: 2777 self._retreat(index) 2778 return None 2779 2780 this = self.expression(exp.Is, this=this, expression=expression) 2781 return self.expression(exp.Not, this=this) if negate else this 2782 2783 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2784 unnest = self._parse_unnest() 2785 if unnest: 2786 this = self.expression(exp.In, this=this, unnest=unnest) 2787 elif self._match(TokenType.L_PAREN): 2788 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2789 2790 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2791 this = self.expression(exp.In, this=this, query=expressions[0]) 2792 else: 2793 this = self.expression(exp.In, this=this, expressions=expressions) 2794 2795 self._match_r_paren(this) 2796 else: 2797 this = self.expression(exp.In, this=this, field=self._parse_field()) 2798 2799 return this 2800 2801 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2802 low = self._parse_bitwise() 2803 self._match(TokenType.AND) 2804 high = self._parse_bitwise() 2805 return self.expression(exp.Between, this=this, low=low, high=high) 2806 2807 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2808 if not self._match(TokenType.ESCAPE): 2809 return this 2810 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2811 2812 def _parse_interval(self) -> t.Optional[exp.Expression]: 2813 if not self._match(TokenType.INTERVAL): 2814 return None 2815 2816 this = self._parse_primary() or self._parse_term() 2817 unit = self._parse_function() or self._parse_var() 2818 2819 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2820 # each INTERVAL expression into this canonical form so it's easy to transpile 2821 if this and this.is_number: 2822 this = exp.Literal.string(this.name) 2823 elif this and this.is_string: 2824 parts = this.name.split() 2825 2826 if len(parts) == 2: 2827 if unit: 2828 # this is not actually a unit, it's something else 2829 unit = None 2830 self._retreat(self._index - 1) 2831 else: 2832 this = exp.Literal.string(parts[0]) 2833 unit = self.expression(exp.Var, this=parts[1]) 2834 2835 return self.expression(exp.Interval, this=this, unit=unit) 2836 2837 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2838 this = self._parse_term() 2839 2840 while True: 2841 if self._match_set(self.BITWISE): 2842 this = self.expression( 2843 self.BITWISE[self._prev.token_type], 2844 this=this, 2845 expression=self._parse_term(), 2846 ) 2847 elif self._match_pair(TokenType.LT, TokenType.LT): 2848 this = self.expression( 2849 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2850 ) 2851 elif self._match_pair(TokenType.GT, TokenType.GT): 2852 this = self.expression( 2853 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2854 ) 2855 else: 2856 break 2857 2858 return this 2859 2860 def _parse_term(self) -> t.Optional[exp.Expression]: 2861 return self._parse_tokens(self._parse_factor, self.TERM) 2862 2863 def _parse_factor(self) -> t.Optional[exp.Expression]: 2864 return self._parse_tokens(self._parse_unary, self.FACTOR) 2865 2866 def _parse_unary(self) -> t.Optional[exp.Expression]: 2867 if self._match_set(self.UNARY_PARSERS): 2868 return self.UNARY_PARSERS[self._prev.token_type](self) 2869 return self._parse_at_time_zone(self._parse_type()) 2870 2871 def _parse_type(self) -> t.Optional[exp.Expression]: 2872 interval = self._parse_interval() 2873 if interval: 2874 return interval 2875 2876 index = self._index 2877 data_type = self._parse_types(check_func=True) 2878 this = self._parse_column() 2879 2880 if data_type: 2881 if isinstance(this, exp.Literal): 2882 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2883 if parser: 2884 return parser(self, this, data_type) 2885 return self.expression(exp.Cast, this=this, to=data_type) 2886 if not data_type.expressions: 2887 self._retreat(index) 2888 return self._parse_column() 2889 return self._parse_column_ops(data_type) 2890 2891 return this 2892 2893 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2894 this = self._parse_type() 2895 if not this: 2896 return None 2897 2898 return self.expression( 2899 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2900 ) 2901 2902 def _parse_types( 2903 self, check_func: bool = False, schema: bool = False 2904 ) -> t.Optional[exp.Expression]: 2905 index = self._index 2906 2907 prefix = self._match_text_seq("SYSUDTLIB", ".") 2908 2909 if not self._match_set(self.TYPE_TOKENS): 2910 return None 2911 2912 type_token = self._prev.token_type 2913 2914 if type_token == TokenType.PSEUDO_TYPE: 2915 return self.expression(exp.PseudoType, this=self._prev.text) 2916 2917 nested = type_token in self.NESTED_TYPE_TOKENS 2918 is_struct = type_token == TokenType.STRUCT 2919 expressions = None 2920 maybe_func = False 2921 2922 if self._match(TokenType.L_PAREN): 2923 if is_struct: 2924 expressions = self._parse_csv(self._parse_struct_types) 2925 elif nested: 2926 expressions = self._parse_csv( 2927 lambda: self._parse_types(check_func=check_func, schema=schema) 2928 ) 2929 else: 2930 expressions = self._parse_csv(self._parse_type_size) 2931 2932 if not expressions or not self._match(TokenType.R_PAREN): 2933 self._retreat(index) 2934 return None 2935 2936 maybe_func = True 2937 2938 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2939 this = exp.DataType( 2940 this=exp.DataType.Type.ARRAY, 2941 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2942 nested=True, 2943 ) 2944 2945 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2946 this = exp.DataType( 2947 this=exp.DataType.Type.ARRAY, 2948 expressions=[this], 2949 nested=True, 2950 ) 2951 2952 return this 2953 2954 if self._match(TokenType.L_BRACKET): 2955 self._retreat(index) 2956 return None 2957 2958 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2959 if nested and self._match(TokenType.LT): 2960 if is_struct: 2961 expressions = self._parse_csv(self._parse_struct_types) 2962 else: 2963 expressions = self._parse_csv( 2964 lambda: self._parse_types(check_func=check_func, schema=schema) 2965 ) 2966 2967 if not self._match(TokenType.GT): 2968 self.raise_error("Expecting >") 2969 2970 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2971 values = self._parse_csv(self._parse_conjunction) 2972 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2973 2974 value: t.Optional[exp.Expression] = None 2975 if type_token in self.TIMESTAMPS: 2976 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2977 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2978 elif ( 2979 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2980 or type_token == TokenType.TIMESTAMPLTZ 2981 ): 2982 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2983 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2984 if type_token == TokenType.TIME: 2985 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2986 else: 2987 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2988 2989 maybe_func = maybe_func and value is None 2990 2991 if value is None: 2992 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2993 elif type_token == TokenType.INTERVAL: 2994 unit = self._parse_var() 2995 2996 if not unit: 2997 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2998 else: 2999 value = self.expression(exp.Interval, unit=unit) 3000 3001 if maybe_func and check_func: 3002 index2 = self._index 3003 peek = self._parse_string() 3004 3005 if not peek: 3006 self._retreat(index) 3007 return None 3008 3009 self._retreat(index2) 3010 3011 if value: 3012 return value 3013 3014 return exp.DataType( 3015 this=exp.DataType.Type[type_token.value.upper()], 3016 expressions=expressions, 3017 nested=nested, 3018 values=values, 3019 prefix=prefix, 3020 ) 3021 3022 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3023 this = self._parse_type() or self._parse_id_var() 3024 self._match(TokenType.COLON) 3025 return self._parse_column_def(this) 3026 3027 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3028 if not self._match_text_seq("AT", "TIME", "ZONE"): 3029 return this 3030 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3031 3032 def _parse_column(self) -> t.Optional[exp.Expression]: 3033 this = self._parse_field() 3034 if isinstance(this, exp.Identifier): 3035 this = self.expression(exp.Column, this=this) 3036 elif not this: 3037 return self._parse_bracket(this) 3038 return self._parse_column_ops(this) 3039 3040 def _parse_column_ops(self, this: exp.Expression) -> exp.Expression: 3041 this = self._parse_bracket(this) 3042 3043 while self._match_set(self.COLUMN_OPERATORS): 3044 op_token = self._prev.token_type 3045 op = self.COLUMN_OPERATORS.get(op_token) 3046 3047 if op_token == TokenType.DCOLON: 3048 field = self._parse_types() 3049 if not field: 3050 self.raise_error("Expected type") 3051 elif op and self._curr: 3052 self._advance() 3053 value = self._prev.text 3054 field = ( 3055 exp.Literal.number(value) 3056 if self._prev.token_type == TokenType.NUMBER 3057 else exp.Literal.string(value) 3058 ) 3059 else: 3060 field = self._parse_field(anonymous_func=True) 3061 3062 if isinstance(field, exp.Func): 3063 # bigquery allows function calls like x.y.count(...) 3064 # SAFE.SUBSTR(...) 3065 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3066 this = self._replace_columns_with_dots(this) 3067 3068 if op: 3069 this = op(self, this, field) 3070 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3071 this = self.expression( 3072 exp.Column, 3073 this=field, 3074 table=this.this, 3075 db=this.args.get("table"), 3076 catalog=this.args.get("db"), 3077 ) 3078 else: 3079 this = self.expression(exp.Dot, this=this, expression=field) 3080 this = self._parse_bracket(this) 3081 return this 3082 3083 def _parse_primary(self) -> t.Optional[exp.Expression]: 3084 if self._match_set(self.PRIMARY_PARSERS): 3085 token_type = self._prev.token_type 3086 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3087 3088 if token_type == TokenType.STRING: 3089 expressions = [primary] 3090 while self._match(TokenType.STRING): 3091 expressions.append(exp.Literal.string(self._prev.text)) 3092 if len(expressions) > 1: 3093 return self.expression(exp.Concat, expressions=expressions) 3094 return primary 3095 3096 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3097 return exp.Literal.number(f"0.{self._prev.text}") 3098 3099 if self._match(TokenType.L_PAREN): 3100 comments = self._prev_comments 3101 query = self._parse_select() 3102 3103 if query: 3104 expressions = [query] 3105 else: 3106 expressions = self._parse_csv(self._parse_expression) 3107 3108 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3109 3110 if isinstance(this, exp.Subqueryable): 3111 this = self._parse_set_operations( 3112 self._parse_subquery(this=this, parse_alias=False) 3113 ) 3114 elif len(expressions) > 1: 3115 this = self.expression(exp.Tuple, expressions=expressions) 3116 else: 3117 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3118 3119 if this: 3120 this.add_comments(comments) 3121 self._match_r_paren(expression=this) 3122 3123 return this 3124 3125 return None 3126 3127 def _parse_field( 3128 self, 3129 any_token: bool = False, 3130 tokens: t.Optional[t.Collection[TokenType]] = None, 3131 anonymous_func: bool = False, 3132 ) -> t.Optional[exp.Expression]: 3133 return ( 3134 self._parse_primary() 3135 or self._parse_function(anonymous=anonymous_func) 3136 or self._parse_id_var(any_token=any_token, tokens=tokens) 3137 ) 3138 3139 def _parse_function( 3140 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3141 ) -> t.Optional[exp.Expression]: 3142 if not self._curr: 3143 return None 3144 3145 token_type = self._curr.token_type 3146 3147 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3148 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3149 3150 if not self._next or self._next.token_type != TokenType.L_PAREN: 3151 if token_type in self.NO_PAREN_FUNCTIONS: 3152 self._advance() 3153 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3154 3155 return None 3156 3157 if token_type not in self.FUNC_TOKENS: 3158 return None 3159 3160 this = self._curr.text 3161 upper = this.upper() 3162 self._advance(2) 3163 3164 parser = self.FUNCTION_PARSERS.get(upper) 3165 3166 if parser and not anonymous: 3167 this = parser(self) 3168 else: 3169 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3170 3171 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3172 this = self.expression(subquery_predicate, this=self._parse_select()) 3173 self._match_r_paren() 3174 return this 3175 3176 if functions is None: 3177 functions = self.FUNCTIONS 3178 3179 function = functions.get(upper) 3180 3181 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3182 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3183 3184 if function and not anonymous: 3185 this = function(args) 3186 self.validate_expression(this, args) 3187 else: 3188 this = self.expression(exp.Anonymous, this=this, expressions=args) 3189 3190 self._match_r_paren(this) 3191 return self._parse_window(this) 3192 3193 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3194 return self._parse_column_def(self._parse_id_var()) 3195 3196 def _parse_user_defined_function( 3197 self, kind: t.Optional[TokenType] = None 3198 ) -> t.Optional[exp.Expression]: 3199 this = self._parse_id_var() 3200 3201 while self._match(TokenType.DOT): 3202 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3203 3204 if not self._match(TokenType.L_PAREN): 3205 return this 3206 3207 expressions = self._parse_csv(self._parse_function_parameter) 3208 self._match_r_paren() 3209 return self.expression( 3210 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3211 ) 3212 3213 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3214 literal = self._parse_primary() 3215 if literal: 3216 return self.expression(exp.Introducer, this=token.text, expression=literal) 3217 3218 return self.expression(exp.Identifier, this=token.text) 3219 3220 def _parse_session_parameter(self) -> exp.Expression: 3221 kind = None 3222 this = self._parse_id_var() or self._parse_primary() 3223 3224 if this and self._match(TokenType.DOT): 3225 kind = this.name 3226 this = self._parse_var() or self._parse_primary() 3227 3228 return self.expression(exp.SessionParameter, this=this, kind=kind) 3229 3230 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3231 index = self._index 3232 3233 if self._match(TokenType.L_PAREN): 3234 expressions = self._parse_csv(self._parse_id_var) 3235 3236 if not self._match(TokenType.R_PAREN): 3237 self._retreat(index) 3238 else: 3239 expressions = [self._parse_id_var()] 3240 3241 if self._match_set(self.LAMBDAS): 3242 return self.LAMBDAS[self._prev.token_type](self, expressions) 3243 3244 self._retreat(index) 3245 3246 this: t.Optional[exp.Expression] 3247 3248 if self._match(TokenType.DISTINCT): 3249 this = self.expression( 3250 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3251 ) 3252 else: 3253 this = self._parse_select_or_expression(alias=alias) 3254 3255 if isinstance(this, exp.EQ): 3256 left = this.this 3257 if isinstance(left, exp.Column): 3258 left.replace(exp.Var(this=left.text("this"))) 3259 3260 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3261 3262 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3263 index = self._index 3264 3265 if not self.errors: 3266 try: 3267 if self._parse_select(nested=True): 3268 return this 3269 except ParseError: 3270 pass 3271 finally: 3272 self.errors.clear() 3273 self._retreat(index) 3274 3275 if not self._match(TokenType.L_PAREN): 3276 return this 3277 3278 args = self._parse_csv( 3279 lambda: self._parse_constraint() 3280 or self._parse_column_def(self._parse_field(any_token=True)) 3281 ) 3282 self._match_r_paren() 3283 return self.expression(exp.Schema, this=this, expressions=args) 3284 3285 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3286 # column defs are not really columns, they're identifiers 3287 if isinstance(this, exp.Column): 3288 this = this.this 3289 kind = self._parse_types(schema=True) 3290 3291 if self._match_text_seq("FOR", "ORDINALITY"): 3292 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3293 3294 constraints = [] 3295 while True: 3296 constraint = self._parse_column_constraint() 3297 if not constraint: 3298 break 3299 constraints.append(constraint) 3300 3301 if not kind and not constraints: 3302 return this 3303 3304 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3305 3306 def _parse_auto_increment(self) -> exp.Expression: 3307 start = None 3308 increment = None 3309 3310 if self._match(TokenType.L_PAREN, advance=False): 3311 args = self._parse_wrapped_csv(self._parse_bitwise) 3312 start = seq_get(args, 0) 3313 increment = seq_get(args, 1) 3314 elif self._match_text_seq("START"): 3315 start = self._parse_bitwise() 3316 self._match_text_seq("INCREMENT") 3317 increment = self._parse_bitwise() 3318 3319 if start and increment: 3320 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3321 3322 return exp.AutoIncrementColumnConstraint() 3323 3324 def _parse_compress(self) -> exp.Expression: 3325 if self._match(TokenType.L_PAREN, advance=False): 3326 return self.expression( 3327 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3328 ) 3329 3330 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3331 3332 def _parse_generated_as_identity(self) -> exp.Expression: 3333 if self._match_text_seq("BY", "DEFAULT"): 3334 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3335 this = self.expression( 3336 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3337 ) 3338 else: 3339 self._match_text_seq("ALWAYS") 3340 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3341 3342 self._match(TokenType.ALIAS) 3343 identity = self._match_text_seq("IDENTITY") 3344 3345 if self._match(TokenType.L_PAREN): 3346 if self._match_text_seq("START", "WITH"): 3347 this.set("start", self._parse_bitwise()) 3348 if self._match_text_seq("INCREMENT", "BY"): 3349 this.set("increment", self._parse_bitwise()) 3350 if self._match_text_seq("MINVALUE"): 3351 this.set("minvalue", self._parse_bitwise()) 3352 if self._match_text_seq("MAXVALUE"): 3353 this.set("maxvalue", self._parse_bitwise()) 3354 3355 if self._match_text_seq("CYCLE"): 3356 this.set("cycle", True) 3357 elif self._match_text_seq("NO", "CYCLE"): 3358 this.set("cycle", False) 3359 3360 if not identity: 3361 this.set("expression", self._parse_bitwise()) 3362 3363 self._match_r_paren() 3364 3365 return this 3366 3367 def _parse_inline(self) -> t.Optional[exp.Expression]: 3368 self._match_text_seq("LENGTH") 3369 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3370 3371 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3372 if self._match_text_seq("NULL"): 3373 return self.expression(exp.NotNullColumnConstraint) 3374 if self._match_text_seq("CASESPECIFIC"): 3375 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3376 return None 3377 3378 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3379 if self._match(TokenType.CONSTRAINT): 3380 this = self._parse_id_var() 3381 else: 3382 this = None 3383 3384 if self._match_texts(self.CONSTRAINT_PARSERS): 3385 return self.expression( 3386 exp.ColumnConstraint, 3387 this=this, 3388 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3389 ) 3390 3391 return this 3392 3393 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3394 if not self._match(TokenType.CONSTRAINT): 3395 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3396 3397 this = self._parse_id_var() 3398 expressions = [] 3399 3400 while True: 3401 constraint = self._parse_unnamed_constraint() or self._parse_function() 3402 if not constraint: 3403 break 3404 expressions.append(constraint) 3405 3406 return self.expression(exp.Constraint, this=this, expressions=expressions) 3407 3408 def _parse_unnamed_constraint( 3409 self, constraints: t.Optional[t.Collection[str]] = None 3410 ) -> t.Optional[exp.Expression]: 3411 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3412 return None 3413 3414 constraint = self._prev.text.upper() 3415 if constraint not in self.CONSTRAINT_PARSERS: 3416 self.raise_error(f"No parser found for schema constraint {constraint}.") 3417 3418 return self.CONSTRAINT_PARSERS[constraint](self) 3419 3420 def _parse_unique(self) -> exp.Expression: 3421 self._match_text_seq("KEY") 3422 return self.expression( 3423 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3424 ) 3425 3426 def _parse_key_constraint_options(self) -> t.List[str]: 3427 options = [] 3428 while True: 3429 if not self._curr: 3430 break 3431 3432 if self._match(TokenType.ON): 3433 action = None 3434 on = self._advance_any() and self._prev.text 3435 3436 if self._match_text_seq("NO", "ACTION"): 3437 action = "NO ACTION" 3438 elif self._match_text_seq("CASCADE"): 3439 action = "CASCADE" 3440 elif self._match_pair(TokenType.SET, TokenType.NULL): 3441 action = "SET NULL" 3442 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3443 action = "SET DEFAULT" 3444 else: 3445 self.raise_error("Invalid key constraint") 3446 3447 options.append(f"ON {on} {action}") 3448 elif self._match_text_seq("NOT", "ENFORCED"): 3449 options.append("NOT ENFORCED") 3450 elif self._match_text_seq("DEFERRABLE"): 3451 options.append("DEFERRABLE") 3452 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3453 options.append("INITIALLY DEFERRED") 3454 elif self._match_text_seq("NORELY"): 3455 options.append("NORELY") 3456 elif self._match_text_seq("MATCH", "FULL"): 3457 options.append("MATCH FULL") 3458 else: 3459 break 3460 3461 return options 3462 3463 def _parse_references(self, match: bool = True) -> t.Optional[exp.Expression]: 3464 if match and not self._match(TokenType.REFERENCES): 3465 return None 3466 3467 expressions = None 3468 this = self._parse_id_var() 3469 3470 if self._match(TokenType.L_PAREN, advance=False): 3471 expressions = self._parse_wrapped_id_vars() 3472 3473 options = self._parse_key_constraint_options() 3474 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3475 3476 def _parse_foreign_key(self) -> exp.Expression: 3477 expressions = self._parse_wrapped_id_vars() 3478 reference = self._parse_references() 3479 options = {} 3480 3481 while self._match(TokenType.ON): 3482 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3483 self.raise_error("Expected DELETE or UPDATE") 3484 3485 kind = self._prev.text.lower() 3486 3487 if self._match_text_seq("NO", "ACTION"): 3488 action = "NO ACTION" 3489 elif self._match(TokenType.SET): 3490 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3491 action = "SET " + self._prev.text.upper() 3492 else: 3493 self._advance() 3494 action = self._prev.text.upper() 3495 3496 options[kind] = action 3497 3498 return self.expression( 3499 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3500 ) 3501 3502 def _parse_primary_key( 3503 self, wrapped_optional: bool = False, in_props: bool = False 3504 ) -> exp.Expression: 3505 desc = ( 3506 self._match_set((TokenType.ASC, TokenType.DESC)) 3507 and self._prev.token_type == TokenType.DESC 3508 ) 3509 3510 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3511 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3512 3513 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3514 options = self._parse_key_constraint_options() 3515 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3516 3517 @t.overload 3518 def _parse_bracket(self, this: exp.Expression) -> exp.Expression: 3519 ... 3520 3521 @t.overload 3522 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3523 ... 3524 3525 def _parse_bracket(self, this): 3526 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3527 return this 3528 3529 bracket_kind = self._prev.token_type 3530 3531 if self._match(TokenType.COLON): 3532 expressions: t.List[t.Optional[exp.Expression]] = [ 3533 self.expression(exp.Slice, expression=self._parse_conjunction()) 3534 ] 3535 else: 3536 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3537 3538 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3539 if bracket_kind == TokenType.L_BRACE: 3540 this = self.expression(exp.Struct, expressions=expressions) 3541 elif not this or this.name.upper() == "ARRAY": 3542 this = self.expression(exp.Array, expressions=expressions) 3543 else: 3544 expressions = apply_index_offset(this, expressions, -self.index_offset) 3545 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3546 3547 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3548 self.raise_error("Expected ]") 3549 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3550 self.raise_error("Expected }") 3551 3552 self._add_comments(this) 3553 return self._parse_bracket(this) 3554 3555 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3556 if self._match(TokenType.COLON): 3557 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3558 return this 3559 3560 def _parse_case(self) -> t.Optional[exp.Expression]: 3561 ifs = [] 3562 default = None 3563 3564 expression = self._parse_conjunction() 3565 3566 while self._match(TokenType.WHEN): 3567 this = self._parse_conjunction() 3568 self._match(TokenType.THEN) 3569 then = self._parse_conjunction() 3570 ifs.append(self.expression(exp.If, this=this, true=then)) 3571 3572 if self._match(TokenType.ELSE): 3573 default = self._parse_conjunction() 3574 3575 if not self._match(TokenType.END): 3576 self.raise_error("Expected END after CASE", self._prev) 3577 3578 return self._parse_window( 3579 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3580 ) 3581 3582 def _parse_if(self) -> t.Optional[exp.Expression]: 3583 if self._match(TokenType.L_PAREN): 3584 args = self._parse_csv(self._parse_conjunction) 3585 this = exp.If.from_arg_list(args) 3586 self.validate_expression(this, args) 3587 self._match_r_paren() 3588 else: 3589 index = self._index - 1 3590 condition = self._parse_conjunction() 3591 3592 if not condition: 3593 self._retreat(index) 3594 return None 3595 3596 self._match(TokenType.THEN) 3597 true = self._parse_conjunction() 3598 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3599 self._match(TokenType.END) 3600 this = self.expression(exp.If, this=condition, true=true, false=false) 3601 3602 return self._parse_window(this) 3603 3604 def _parse_extract(self) -> exp.Expression: 3605 this = self._parse_function() or self._parse_var() or self._parse_type() 3606 3607 if self._match(TokenType.FROM): 3608 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3609 3610 if not self._match(TokenType.COMMA): 3611 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3612 3613 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3614 3615 def _parse_cast(self, strict: bool) -> exp.Expression: 3616 this = self._parse_conjunction() 3617 3618 if not self._match(TokenType.ALIAS): 3619 if self._match(TokenType.COMMA): 3620 return self.expression( 3621 exp.CastToStrType, this=this, expression=self._parse_string() 3622 ) 3623 else: 3624 self.raise_error("Expected AS after CAST") 3625 3626 to = self._parse_types() 3627 3628 if not to: 3629 self.raise_error("Expected TYPE after CAST") 3630 elif to.this == exp.DataType.Type.CHAR: 3631 if self._match(TokenType.CHARACTER_SET): 3632 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3633 3634 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3635 3636 def _parse_string_agg(self) -> exp.Expression: 3637 expression: t.Optional[exp.Expression] 3638 3639 if self._match(TokenType.DISTINCT): 3640 args = self._parse_csv(self._parse_conjunction) 3641 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3642 else: 3643 args = self._parse_csv(self._parse_conjunction) 3644 expression = seq_get(args, 0) 3645 3646 index = self._index 3647 if not self._match(TokenType.R_PAREN): 3648 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3649 order = self._parse_order(this=expression) 3650 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3651 3652 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3653 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3654 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3655 if not self._match_text_seq("WITHIN", "GROUP"): 3656 self._retreat(index) 3657 this = exp.GroupConcat.from_arg_list(args) 3658 self.validate_expression(this, args) 3659 return this 3660 3661 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3662 order = self._parse_order(this=expression) 3663 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3664 3665 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3666 to: t.Optional[exp.Expression] 3667 this = self._parse_bitwise() 3668 3669 if self._match(TokenType.USING): 3670 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3671 elif self._match(TokenType.COMMA): 3672 to = self._parse_bitwise() 3673 else: 3674 to = None 3675 3676 # Swap the argument order if needed to produce the correct AST 3677 if self.CONVERT_TYPE_FIRST: 3678 this, to = to, this 3679 3680 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3681 3682 def _parse_decode(self) -> t.Optional[exp.Expression]: 3683 """ 3684 There are generally two variants of the DECODE function: 3685 3686 - DECODE(bin, charset) 3687 - DECODE(expression, search, result [, search, result] ... [, default]) 3688 3689 The second variant will always be parsed into a CASE expression. Note that NULL 3690 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3691 instead of relying on pattern matching. 3692 """ 3693 args = self._parse_csv(self._parse_conjunction) 3694 3695 if len(args) < 3: 3696 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3697 3698 expression, *expressions = args 3699 if not expression: 3700 return None 3701 3702 ifs = [] 3703 for search, result in zip(expressions[::2], expressions[1::2]): 3704 if not search or not result: 3705 return None 3706 3707 if isinstance(search, exp.Literal): 3708 ifs.append( 3709 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3710 ) 3711 elif isinstance(search, exp.Null): 3712 ifs.append( 3713 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3714 ) 3715 else: 3716 cond = exp.or_( 3717 exp.EQ(this=expression.copy(), expression=search), 3718 exp.and_( 3719 exp.Is(this=expression.copy(), expression=exp.Null()), 3720 exp.Is(this=search.copy(), expression=exp.Null()), 3721 copy=False, 3722 ), 3723 copy=False, 3724 ) 3725 ifs.append(exp.If(this=cond, true=result)) 3726 3727 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3728 3729 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3730 self._match_text_seq("KEY") 3731 key = self._parse_field() 3732 self._match(TokenType.COLON) 3733 self._match_text_seq("VALUE") 3734 value = self._parse_field() 3735 if not key and not value: 3736 return None 3737 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3738 3739 def _parse_json_object(self) -> exp.Expression: 3740 expressions = self._parse_csv(self._parse_json_key_value) 3741 3742 null_handling = None 3743 if self._match_text_seq("NULL", "ON", "NULL"): 3744 null_handling = "NULL ON NULL" 3745 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3746 null_handling = "ABSENT ON NULL" 3747 3748 unique_keys = None 3749 if self._match_text_seq("WITH", "UNIQUE"): 3750 unique_keys = True 3751 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3752 unique_keys = False 3753 3754 self._match_text_seq("KEYS") 3755 3756 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3757 format_json = self._match_text_seq("FORMAT", "JSON") 3758 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3759 3760 return self.expression( 3761 exp.JSONObject, 3762 expressions=expressions, 3763 null_handling=null_handling, 3764 unique_keys=unique_keys, 3765 return_type=return_type, 3766 format_json=format_json, 3767 encoding=encoding, 3768 ) 3769 3770 def _parse_logarithm(self) -> exp.Expression: 3771 # Default argument order is base, expression 3772 args = self._parse_csv(self._parse_range) 3773 3774 if len(args) > 1: 3775 if not self.LOG_BASE_FIRST: 3776 args.reverse() 3777 return exp.Log.from_arg_list(args) 3778 3779 return self.expression( 3780 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3781 ) 3782 3783 def _parse_match_against(self) -> exp.Expression: 3784 expressions = self._parse_csv(self._parse_column) 3785 3786 self._match_text_seq(")", "AGAINST", "(") 3787 3788 this = self._parse_string() 3789 3790 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3791 modifier = "IN NATURAL LANGUAGE MODE" 3792 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3793 modifier = f"{modifier} WITH QUERY EXPANSION" 3794 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3795 modifier = "IN BOOLEAN MODE" 3796 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3797 modifier = "WITH QUERY EXPANSION" 3798 else: 3799 modifier = None 3800 3801 return self.expression( 3802 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3803 ) 3804 3805 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3806 def _parse_open_json(self) -> exp.Expression: 3807 this = self._parse_bitwise() 3808 path = self._match(TokenType.COMMA) and self._parse_string() 3809 3810 def _parse_open_json_column_def() -> exp.Expression: 3811 this = self._parse_field(any_token=True) 3812 kind = self._parse_types() 3813 path = self._parse_string() 3814 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3815 return self.expression( 3816 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3817 ) 3818 3819 expressions = None 3820 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3821 self._match_l_paren() 3822 expressions = self._parse_csv(_parse_open_json_column_def) 3823 3824 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3825 3826 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3827 args = self._parse_csv(self._parse_bitwise) 3828 3829 if self._match(TokenType.IN): 3830 return self.expression( 3831 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3832 ) 3833 3834 if haystack_first: 3835 haystack = seq_get(args, 0) 3836 needle = seq_get(args, 1) 3837 else: 3838 needle = seq_get(args, 0) 3839 haystack = seq_get(args, 1) 3840 3841 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3842 3843 self.validate_expression(this, args) 3844 3845 return this 3846 3847 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3848 args = self._parse_csv(self._parse_table) 3849 return exp.JoinHint(this=func_name.upper(), expressions=args) 3850 3851 def _parse_substring(self) -> exp.Expression: 3852 # Postgres supports the form: substring(string [from int] [for int]) 3853 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3854 3855 args = self._parse_csv(self._parse_bitwise) 3856 3857 if self._match(TokenType.FROM): 3858 args.append(self._parse_bitwise()) 3859 if self._match(TokenType.FOR): 3860 args.append(self._parse_bitwise()) 3861 3862 this = exp.Substring.from_arg_list(args) 3863 self.validate_expression(this, args) 3864 3865 return this 3866 3867 def _parse_trim(self) -> exp.Expression: 3868 # https://www.w3resource.com/sql/character-functions/trim.php 3869 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3870 3871 position = None 3872 collation = None 3873 3874 if self._match_texts(self.TRIM_TYPES): 3875 position = self._prev.text.upper() 3876 3877 expression = self._parse_bitwise() 3878 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3879 this = self._parse_bitwise() 3880 else: 3881 this = expression 3882 expression = None 3883 3884 if self._match(TokenType.COLLATE): 3885 collation = self._parse_bitwise() 3886 3887 return self.expression( 3888 exp.Trim, 3889 this=this, 3890 position=position, 3891 expression=expression, 3892 collation=collation, 3893 ) 3894 3895 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3896 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3897 3898 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3899 return self._parse_window(self._parse_id_var(), alias=True) 3900 3901 def _parse_respect_or_ignore_nulls( 3902 self, this: t.Optional[exp.Expression] 3903 ) -> t.Optional[exp.Expression]: 3904 if self._match_text_seq("IGNORE", "NULLS"): 3905 return self.expression(exp.IgnoreNulls, this=this) 3906 if self._match_text_seq("RESPECT", "NULLS"): 3907 return self.expression(exp.RespectNulls, this=this) 3908 return this 3909 3910 def _parse_window( 3911 self, this: t.Optional[exp.Expression], alias: bool = False 3912 ) -> t.Optional[exp.Expression]: 3913 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3914 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3915 self._match_r_paren() 3916 3917 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3918 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3919 if self._match_text_seq("WITHIN", "GROUP"): 3920 order = self._parse_wrapped(self._parse_order) 3921 this = self.expression(exp.WithinGroup, this=this, expression=order) 3922 3923 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3924 # Some dialects choose to implement and some do not. 3925 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3926 3927 # There is some code above in _parse_lambda that handles 3928 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3929 3930 # The below changes handle 3931 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3932 3933 # Oracle allows both formats 3934 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3935 # and Snowflake chose to do the same for familiarity 3936 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3937 this = self._parse_respect_or_ignore_nulls(this) 3938 3939 # bigquery select from window x AS (partition by ...) 3940 if alias: 3941 over = None 3942 self._match(TokenType.ALIAS) 3943 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3944 return this 3945 else: 3946 over = self._prev.text.upper() 3947 3948 if not self._match(TokenType.L_PAREN): 3949 return self.expression( 3950 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3951 ) 3952 3953 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3954 3955 first = self._match(TokenType.FIRST) 3956 if self._match_text_seq("LAST"): 3957 first = False 3958 3959 partition = self._parse_partition_by() 3960 order = self._parse_order() 3961 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3962 3963 if kind: 3964 self._match(TokenType.BETWEEN) 3965 start = self._parse_window_spec() 3966 self._match(TokenType.AND) 3967 end = self._parse_window_spec() 3968 3969 spec = self.expression( 3970 exp.WindowSpec, 3971 kind=kind, 3972 start=start["value"], 3973 start_side=start["side"], 3974 end=end["value"], 3975 end_side=end["side"], 3976 ) 3977 else: 3978 spec = None 3979 3980 self._match_r_paren() 3981 3982 return self.expression( 3983 exp.Window, 3984 this=this, 3985 partition_by=partition, 3986 order=order, 3987 spec=spec, 3988 alias=window_alias, 3989 over=over, 3990 first=first, 3991 ) 3992 3993 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3994 self._match(TokenType.BETWEEN) 3995 3996 return { 3997 "value": ( 3998 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 3999 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4000 or self._parse_bitwise() 4001 ), 4002 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4003 } 4004 4005 def _parse_alias( 4006 self, this: t.Optional[exp.Expression], explicit: bool = False 4007 ) -> t.Optional[exp.Expression]: 4008 any_token = self._match(TokenType.ALIAS) 4009 4010 if explicit and not any_token: 4011 return this 4012 4013 if self._match(TokenType.L_PAREN): 4014 aliases = self.expression( 4015 exp.Aliases, 4016 this=this, 4017 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4018 ) 4019 self._match_r_paren(aliases) 4020 return aliases 4021 4022 alias = self._parse_id_var(any_token) 4023 4024 if alias: 4025 return self.expression(exp.Alias, this=this, alias=alias) 4026 4027 return this 4028 4029 def _parse_id_var( 4030 self, 4031 any_token: bool = True, 4032 tokens: t.Optional[t.Collection[TokenType]] = None, 4033 ) -> t.Optional[exp.Expression]: 4034 identifier = self._parse_identifier() 4035 4036 if identifier: 4037 return identifier 4038 4039 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4040 quoted = self._prev.token_type == TokenType.STRING 4041 return exp.Identifier(this=self._prev.text, quoted=quoted) 4042 4043 return None 4044 4045 def _parse_string(self) -> t.Optional[exp.Expression]: 4046 if self._match(TokenType.STRING): 4047 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4048 return self._parse_placeholder() 4049 4050 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 4051 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4052 4053 def _parse_number(self) -> t.Optional[exp.Expression]: 4054 if self._match(TokenType.NUMBER): 4055 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4056 return self._parse_placeholder() 4057 4058 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4059 if self._match(TokenType.IDENTIFIER): 4060 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4061 return self._parse_placeholder() 4062 4063 def _parse_var( 4064 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4065 ) -> t.Optional[exp.Expression]: 4066 if ( 4067 (any_token and self._advance_any()) 4068 or self._match(TokenType.VAR) 4069 or (self._match_set(tokens) if tokens else False) 4070 ): 4071 return self.expression(exp.Var, this=self._prev.text) 4072 return self._parse_placeholder() 4073 4074 def _advance_any(self) -> t.Optional[Token]: 4075 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4076 self._advance() 4077 return self._prev 4078 return None 4079 4080 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4081 return self._parse_var() or self._parse_string() 4082 4083 def _parse_null(self) -> t.Optional[exp.Expression]: 4084 if self._match(TokenType.NULL): 4085 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4086 return None 4087 4088 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4089 if self._match(TokenType.TRUE): 4090 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4091 if self._match(TokenType.FALSE): 4092 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4093 return None 4094 4095 def _parse_star(self) -> t.Optional[exp.Expression]: 4096 if self._match(TokenType.STAR): 4097 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4098 return None 4099 4100 def _parse_parameter(self) -> exp.Expression: 4101 wrapped = self._match(TokenType.L_BRACE) 4102 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4103 self._match(TokenType.R_BRACE) 4104 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4105 4106 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4107 if self._match_set(self.PLACEHOLDER_PARSERS): 4108 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4109 if placeholder: 4110 return placeholder 4111 self._advance(-1) 4112 return None 4113 4114 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4115 if not self._match(TokenType.EXCEPT): 4116 return None 4117 if self._match(TokenType.L_PAREN, advance=False): 4118 return self._parse_wrapped_csv(self._parse_column) 4119 return self._parse_csv(self._parse_column) 4120 4121 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4122 if not self._match(TokenType.REPLACE): 4123 return None 4124 if self._match(TokenType.L_PAREN, advance=False): 4125 return self._parse_wrapped_csv(self._parse_expression) 4126 return self._parse_csv(self._parse_expression) 4127 4128 def _parse_csv( 4129 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4130 ) -> t.List[t.Optional[exp.Expression]]: 4131 parse_result = parse_method() 4132 items = [parse_result] if parse_result is not None else [] 4133 4134 while self._match(sep): 4135 self._add_comments(parse_result) 4136 parse_result = parse_method() 4137 if parse_result is not None: 4138 items.append(parse_result) 4139 4140 return items 4141 4142 def _parse_tokens( 4143 self, parse_method: t.Callable, expressions: t.Dict 4144 ) -> t.Optional[exp.Expression]: 4145 this = parse_method() 4146 4147 while self._match_set(expressions): 4148 this = self.expression( 4149 expressions[self._prev.token_type], 4150 this=this, 4151 comments=self._prev_comments, 4152 expression=parse_method(), 4153 ) 4154 4155 return this 4156 4157 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4158 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4159 4160 def _parse_wrapped_csv( 4161 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4162 ) -> t.List[t.Optional[exp.Expression]]: 4163 return self._parse_wrapped( 4164 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4165 ) 4166 4167 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4168 wrapped = self._match(TokenType.L_PAREN) 4169 if not wrapped and not optional: 4170 self.raise_error("Expecting (") 4171 parse_result = parse_method() 4172 if wrapped: 4173 self._match_r_paren() 4174 return parse_result 4175 4176 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4177 return self._parse_select() or self._parse_set_operations( 4178 self._parse_expression() if alias else self._parse_conjunction() 4179 ) 4180 4181 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4182 return self._parse_query_modifiers( 4183 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4184 ) 4185 4186 def _parse_transaction(self) -> exp.Expression: 4187 this = None 4188 if self._match_texts(self.TRANSACTION_KIND): 4189 this = self._prev.text 4190 4191 self._match_texts({"TRANSACTION", "WORK"}) 4192 4193 modes = [] 4194 while True: 4195 mode = [] 4196 while self._match(TokenType.VAR): 4197 mode.append(self._prev.text) 4198 4199 if mode: 4200 modes.append(" ".join(mode)) 4201 if not self._match(TokenType.COMMA): 4202 break 4203 4204 return self.expression(exp.Transaction, this=this, modes=modes) 4205 4206 def _parse_commit_or_rollback(self) -> exp.Expression: 4207 chain = None 4208 savepoint = None 4209 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4210 4211 self._match_texts({"TRANSACTION", "WORK"}) 4212 4213 if self._match_text_seq("TO"): 4214 self._match_text_seq("SAVEPOINT") 4215 savepoint = self._parse_id_var() 4216 4217 if self._match(TokenType.AND): 4218 chain = not self._match_text_seq("NO") 4219 self._match_text_seq("CHAIN") 4220 4221 if is_rollback: 4222 return self.expression(exp.Rollback, savepoint=savepoint) 4223 return self.expression(exp.Commit, chain=chain) 4224 4225 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4226 if not self._match_text_seq("ADD"): 4227 return None 4228 4229 self._match(TokenType.COLUMN) 4230 exists_column = self._parse_exists(not_=True) 4231 expression = self._parse_column_def(self._parse_field(any_token=True)) 4232 4233 if expression: 4234 expression.set("exists", exists_column) 4235 4236 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4237 if self._match_texts(("FIRST", "AFTER")): 4238 position = self._prev.text 4239 column_position = self.expression( 4240 exp.ColumnPosition, this=self._parse_column(), position=position 4241 ) 4242 expression.set("position", column_position) 4243 4244 return expression 4245 4246 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4247 drop = self._match(TokenType.DROP) and self._parse_drop() 4248 if drop and not isinstance(drop, exp.Command): 4249 drop.set("kind", drop.args.get("kind", "COLUMN")) 4250 return drop 4251 4252 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4253 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4254 return self.expression( 4255 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4256 ) 4257 4258 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4259 this = None 4260 kind = self._prev.token_type 4261 4262 if kind == TokenType.CONSTRAINT: 4263 this = self._parse_id_var() 4264 4265 if self._match_text_seq("CHECK"): 4266 expression = self._parse_wrapped(self._parse_conjunction) 4267 enforced = self._match_text_seq("ENFORCED") 4268 4269 return self.expression( 4270 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4271 ) 4272 4273 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4274 expression = self._parse_foreign_key() 4275 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4276 expression = self._parse_primary_key() 4277 else: 4278 expression = None 4279 4280 return self.expression(exp.AddConstraint, this=this, expression=expression) 4281 4282 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4283 index = self._index - 1 4284 4285 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4286 return self._parse_csv(self._parse_add_constraint) 4287 4288 self._retreat(index) 4289 return self._parse_csv(self._parse_add_column) 4290 4291 def _parse_alter_table_alter(self) -> exp.Expression: 4292 self._match(TokenType.COLUMN) 4293 column = self._parse_field(any_token=True) 4294 4295 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4296 return self.expression(exp.AlterColumn, this=column, drop=True) 4297 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4298 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4299 4300 self._match_text_seq("SET", "DATA") 4301 return self.expression( 4302 exp.AlterColumn, 4303 this=column, 4304 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4305 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4306 using=self._match(TokenType.USING) and self._parse_conjunction(), 4307 ) 4308 4309 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4310 index = self._index - 1 4311 4312 partition_exists = self._parse_exists() 4313 if self._match(TokenType.PARTITION, advance=False): 4314 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4315 4316 self._retreat(index) 4317 return self._parse_csv(self._parse_drop_column) 4318 4319 def _parse_alter_table_rename(self) -> exp.Expression: 4320 self._match_text_seq("TO") 4321 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4322 4323 def _parse_alter(self) -> t.Optional[exp.Expression]: 4324 start = self._prev 4325 4326 if not self._match(TokenType.TABLE): 4327 return self._parse_as_command(start) 4328 4329 exists = self._parse_exists() 4330 this = self._parse_table(schema=True) 4331 4332 if self._next: 4333 self._advance() 4334 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4335 4336 if parser: 4337 actions = ensure_list(parser(self)) 4338 4339 if not self._curr: 4340 return self.expression( 4341 exp.AlterTable, 4342 this=this, 4343 exists=exists, 4344 actions=actions, 4345 ) 4346 return self._parse_as_command(start) 4347 4348 def _parse_merge(self) -> exp.Expression: 4349 self._match(TokenType.INTO) 4350 target = self._parse_table() 4351 4352 self._match(TokenType.USING) 4353 using = self._parse_table() 4354 4355 self._match(TokenType.ON) 4356 on = self._parse_conjunction() 4357 4358 whens = [] 4359 while self._match(TokenType.WHEN): 4360 matched = not self._match(TokenType.NOT) 4361 self._match_text_seq("MATCHED") 4362 source = ( 4363 False 4364 if self._match_text_seq("BY", "TARGET") 4365 else self._match_text_seq("BY", "SOURCE") 4366 ) 4367 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4368 4369 self._match(TokenType.THEN) 4370 4371 if self._match(TokenType.INSERT): 4372 _this = self._parse_star() 4373 if _this: 4374 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4375 else: 4376 then = self.expression( 4377 exp.Insert, 4378 this=self._parse_value(), 4379 expression=self._match(TokenType.VALUES) and self._parse_value(), 4380 ) 4381 elif self._match(TokenType.UPDATE): 4382 expressions = self._parse_star() 4383 if expressions: 4384 then = self.expression(exp.Update, expressions=expressions) 4385 else: 4386 then = self.expression( 4387 exp.Update, 4388 expressions=self._match(TokenType.SET) 4389 and self._parse_csv(self._parse_equality), 4390 ) 4391 elif self._match(TokenType.DELETE): 4392 then = self.expression(exp.Var, this=self._prev.text) 4393 else: 4394 then = None 4395 4396 whens.append( 4397 self.expression( 4398 exp.When, 4399 matched=matched, 4400 source=source, 4401 condition=condition, 4402 then=then, 4403 ) 4404 ) 4405 4406 return self.expression( 4407 exp.Merge, 4408 this=target, 4409 using=using, 4410 on=on, 4411 expressions=whens, 4412 ) 4413 4414 def _parse_show(self) -> t.Optional[exp.Expression]: 4415 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4416 if parser: 4417 return parser(self) 4418 self._advance() 4419 return self.expression(exp.Show, this=self._prev.text.upper()) 4420 4421 def _parse_set_item_assignment( 4422 self, kind: t.Optional[str] = None 4423 ) -> t.Optional[exp.Expression]: 4424 index = self._index 4425 4426 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4427 return self._parse_set_transaction(global_=kind == "GLOBAL") 4428 4429 left = self._parse_primary() or self._parse_id_var() 4430 4431 if not self._match_texts(("=", "TO")): 4432 self._retreat(index) 4433 return None 4434 4435 right = self._parse_statement() or self._parse_id_var() 4436 this = self.expression( 4437 exp.EQ, 4438 this=left, 4439 expression=right, 4440 ) 4441 4442 return self.expression( 4443 exp.SetItem, 4444 this=this, 4445 kind=kind, 4446 ) 4447 4448 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4449 self._match_text_seq("TRANSACTION") 4450 characteristics = self._parse_csv( 4451 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4452 ) 4453 return self.expression( 4454 exp.SetItem, 4455 expressions=characteristics, 4456 kind="TRANSACTION", 4457 **{"global": global_}, # type: ignore 4458 ) 4459 4460 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4461 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4462 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4463 4464 def _parse_set(self) -> exp.Expression: 4465 index = self._index 4466 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4467 4468 if self._curr: 4469 self._retreat(index) 4470 return self._parse_as_command(self._prev) 4471 4472 return set_ 4473 4474 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4475 for option in options: 4476 if self._match_text_seq(*option.split(" ")): 4477 return exp.Var(this=option) 4478 return None 4479 4480 def _parse_as_command(self, start: Token) -> exp.Command: 4481 while self._curr: 4482 self._advance() 4483 text = self._find_sql(start, self._prev) 4484 size = len(start.text) 4485 return exp.Command(this=text[:size], expression=text[size:]) 4486 4487 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4488 settings = [] 4489 4490 self._match_l_paren() 4491 kind = self._parse_id_var() 4492 4493 if self._match(TokenType.L_PAREN): 4494 while True: 4495 key = self._parse_id_var() 4496 value = self._parse_primary() 4497 4498 if not key and value is None: 4499 break 4500 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4501 self._match(TokenType.R_PAREN) 4502 4503 self._match_r_paren() 4504 4505 return self.expression( 4506 exp.DictProperty, 4507 this=this, 4508 kind=kind.this if kind else None, 4509 settings=settings, 4510 ) 4511 4512 def _parse_dict_range(self, this: str) -> exp.DictRange: 4513 self._match_l_paren() 4514 has_min = self._match_text_seq("MIN") 4515 if has_min: 4516 min = self._parse_var() or self._parse_primary() 4517 self._match_text_seq("MAX") 4518 max = self._parse_var() or self._parse_primary() 4519 else: 4520 max = self._parse_var() or self._parse_primary() 4521 min = exp.Literal.number(0) 4522 self._match_r_paren() 4523 return self.expression(exp.DictRange, this=this, min=min, max=max) 4524 4525 def _find_parser( 4526 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4527 ) -> t.Optional[t.Callable]: 4528 if not self._curr: 4529 return None 4530 4531 index = self._index 4532 this = [] 4533 while True: 4534 # The current token might be multiple words 4535 curr = self._curr.text.upper() 4536 key = curr.split(" ") 4537 this.append(curr) 4538 self._advance() 4539 result, trie = in_trie(trie, key) 4540 if result == 0: 4541 break 4542 if result == 2: 4543 subparser = parsers[" ".join(this)] 4544 return subparser 4545 self._retreat(index) 4546 return None 4547 4548 def _match(self, token_type, advance=True, expression=None): 4549 if not self._curr: 4550 return None 4551 4552 if self._curr.token_type == token_type: 4553 if advance: 4554 self._advance() 4555 self._add_comments(expression) 4556 return True 4557 4558 return None 4559 4560 def _match_set(self, types, advance=True): 4561 if not self._curr: 4562 return None 4563 4564 if self._curr.token_type in types: 4565 if advance: 4566 self._advance() 4567 return True 4568 4569 return None 4570 4571 def _match_pair(self, token_type_a, token_type_b, advance=True): 4572 if not self._curr or not self._next: 4573 return None 4574 4575 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4576 if advance: 4577 self._advance(2) 4578 return True 4579 4580 return None 4581 4582 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4583 if not self._match(TokenType.L_PAREN, expression=expression): 4584 self.raise_error("Expecting (") 4585 4586 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4587 if not self._match(TokenType.R_PAREN, expression=expression): 4588 self.raise_error("Expecting )") 4589 4590 def _match_texts(self, texts, advance=True): 4591 if self._curr and self._curr.text.upper() in texts: 4592 if advance: 4593 self._advance() 4594 return True 4595 return False 4596 4597 def _match_text_seq(self, *texts, advance=True): 4598 index = self._index 4599 for text in texts: 4600 if self._curr and self._curr.text.upper() == text: 4601 self._advance() 4602 else: 4603 self._retreat(index) 4604 return False 4605 4606 if not advance: 4607 self._retreat(index) 4608 4609 return True 4610 4611 @t.overload 4612 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4613 ... 4614 4615 @t.overload 4616 def _replace_columns_with_dots( 4617 self, this: t.Optional[exp.Expression] 4618 ) -> t.Optional[exp.Expression]: 4619 ... 4620 4621 def _replace_columns_with_dots(self, this): 4622 if isinstance(this, exp.Dot): 4623 exp.replace_children(this, self._replace_columns_with_dots) 4624 elif isinstance(this, exp.Column): 4625 exp.replace_children(this, self._replace_columns_with_dots) 4626 table = this.args.get("table") 4627 this = ( 4628 self.expression(exp.Dot, this=table, expression=this.this) 4629 if table 4630 else self.expression(exp.Var, this=this.name) 4631 ) 4632 elif isinstance(this, exp.Identifier): 4633 this = self.expression(exp.Var, this=this.name) 4634 4635 return this 4636 4637 def _replace_lambda( 4638 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4639 ) -> t.Optional[exp.Expression]: 4640 if not node: 4641 return node 4642 4643 for column in node.find_all(exp.Column): 4644 if column.parts[0].name in lambda_variables: 4645 dot_or_id = column.to_dot() if column.table else column.this 4646 parent = column.parent 4647 4648 while isinstance(parent, exp.Dot): 4649 if not isinstance(parent.parent, exp.Dot): 4650 parent.replace(dot_or_id) 4651 break 4652 parent = parent.parent 4653 else: 4654 if column is node: 4655 node = dot_or_id 4656 else: 4657 column.replace(dot_or_id) 4658 return node
20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 return exp.VarMap( 30 keys=exp.Array(expressions=keys), 31 values=exp.Array(expressions=values), 32 )
57class Parser(metaclass=_Parser): 58 """ 59 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 60 a parsed syntax tree. 61 62 Args: 63 error_level: the desired error level. 64 Default: ErrorLevel.IMMEDIATE 65 error_message_context: determines the amount of context to capture from a 66 query string when displaying the error message (in number of characters). 67 Default: 50. 68 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 69 Default: 0 70 alias_post_tablesample: If the table alias comes after tablesample. 71 Default: False 72 max_errors: Maximum number of error messages to include in a raised ParseError. 73 This is only relevant if error_level is ErrorLevel.RAISE. 74 Default: 3 75 null_ordering: Indicates the default null ordering method to use if not explicitly set. 76 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 77 Default: "nulls_are_small" 78 """ 79 80 FUNCTIONS: t.Dict[str, t.Callable] = { 81 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 82 "DATE_TO_DATE_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 87 "IFNULL": exp.Coalesce.from_arg_list, 88 "LIKE": parse_like, 89 "TIME_TO_TIME_STR": lambda args: exp.Cast( 90 this=seq_get(args, 0), 91 to=exp.DataType(this=exp.DataType.Type.TEXT), 92 ), 93 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 94 this=exp.Cast( 95 this=seq_get(args, 0), 96 to=exp.DataType(this=exp.DataType.Type.TEXT), 97 ), 98 start=exp.Literal.number(1), 99 length=exp.Literal.number(10), 100 ), 101 "VAR_MAP": parse_var_map, 102 } 103 104 NO_PAREN_FUNCTIONS = { 105 TokenType.CURRENT_DATE: exp.CurrentDate, 106 TokenType.CURRENT_DATETIME: exp.CurrentDate, 107 TokenType.CURRENT_TIME: exp.CurrentTime, 108 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 109 TokenType.CURRENT_USER: exp.CurrentUser, 110 } 111 112 JOIN_HINTS: t.Set[str] = set() 113 114 NESTED_TYPE_TOKENS = { 115 TokenType.ARRAY, 116 TokenType.MAP, 117 TokenType.NULLABLE, 118 TokenType.STRUCT, 119 } 120 121 TYPE_TOKENS = { 122 TokenType.BIT, 123 TokenType.BOOLEAN, 124 TokenType.TINYINT, 125 TokenType.UTINYINT, 126 TokenType.SMALLINT, 127 TokenType.USMALLINT, 128 TokenType.INT, 129 TokenType.UINT, 130 TokenType.BIGINT, 131 TokenType.UBIGINT, 132 TokenType.INT128, 133 TokenType.UINT128, 134 TokenType.INT256, 135 TokenType.UINT256, 136 TokenType.FLOAT, 137 TokenType.DOUBLE, 138 TokenType.CHAR, 139 TokenType.NCHAR, 140 TokenType.VARCHAR, 141 TokenType.NVARCHAR, 142 TokenType.TEXT, 143 TokenType.MEDIUMTEXT, 144 TokenType.LONGTEXT, 145 TokenType.MEDIUMBLOB, 146 TokenType.LONGBLOB, 147 TokenType.BINARY, 148 TokenType.VARBINARY, 149 TokenType.JSON, 150 TokenType.JSONB, 151 TokenType.INTERVAL, 152 TokenType.TIME, 153 TokenType.TIMESTAMP, 154 TokenType.TIMESTAMPTZ, 155 TokenType.TIMESTAMPLTZ, 156 TokenType.DATETIME, 157 TokenType.DATETIME64, 158 TokenType.DATE, 159 TokenType.INT4RANGE, 160 TokenType.INT4MULTIRANGE, 161 TokenType.INT8RANGE, 162 TokenType.INT8MULTIRANGE, 163 TokenType.NUMRANGE, 164 TokenType.NUMMULTIRANGE, 165 TokenType.TSRANGE, 166 TokenType.TSMULTIRANGE, 167 TokenType.TSTZRANGE, 168 TokenType.TSTZMULTIRANGE, 169 TokenType.DATERANGE, 170 TokenType.DATEMULTIRANGE, 171 TokenType.DECIMAL, 172 TokenType.BIGDECIMAL, 173 TokenType.UUID, 174 TokenType.GEOGRAPHY, 175 TokenType.GEOMETRY, 176 TokenType.HLLSKETCH, 177 TokenType.HSTORE, 178 TokenType.PSEUDO_TYPE, 179 TokenType.SUPER, 180 TokenType.SERIAL, 181 TokenType.SMALLSERIAL, 182 TokenType.BIGSERIAL, 183 TokenType.XML, 184 TokenType.UNIQUEIDENTIFIER, 185 TokenType.MONEY, 186 TokenType.SMALLMONEY, 187 TokenType.ROWVERSION, 188 TokenType.IMAGE, 189 TokenType.VARIANT, 190 TokenType.OBJECT, 191 TokenType.INET, 192 *NESTED_TYPE_TOKENS, 193 } 194 195 SUBQUERY_PREDICATES = { 196 TokenType.ANY: exp.Any, 197 TokenType.ALL: exp.All, 198 TokenType.EXISTS: exp.Exists, 199 TokenType.SOME: exp.Any, 200 } 201 202 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 ID_VAR_TOKENS = { 221 TokenType.VAR, 222 TokenType.ANTI, 223 TokenType.APPLY, 224 TokenType.ASC, 225 TokenType.AUTO_INCREMENT, 226 TokenType.BEGIN, 227 TokenType.CACHE, 228 TokenType.COLLATE, 229 TokenType.COMMAND, 230 TokenType.COMMENT, 231 TokenType.COMMIT, 232 TokenType.CONSTRAINT, 233 TokenType.DEFAULT, 234 TokenType.DELETE, 235 TokenType.DESC, 236 TokenType.DESCRIBE, 237 TokenType.DICTIONARY, 238 TokenType.DIV, 239 TokenType.END, 240 TokenType.EXECUTE, 241 TokenType.ESCAPE, 242 TokenType.FALSE, 243 TokenType.FIRST, 244 TokenType.FILTER, 245 TokenType.FORMAT, 246 TokenType.FULL, 247 TokenType.IF, 248 TokenType.IS, 249 TokenType.ISNULL, 250 TokenType.INTERVAL, 251 TokenType.KEEP, 252 TokenType.LEFT, 253 TokenType.LOAD, 254 TokenType.MERGE, 255 TokenType.NATURAL, 256 TokenType.NEXT, 257 TokenType.OFFSET, 258 TokenType.ORDINALITY, 259 TokenType.OVERWRITE, 260 TokenType.PARTITION, 261 TokenType.PERCENT, 262 TokenType.PIVOT, 263 TokenType.PRAGMA, 264 TokenType.RANGE, 265 TokenType.REFERENCES, 266 TokenType.RIGHT, 267 TokenType.ROW, 268 TokenType.ROWS, 269 TokenType.SEMI, 270 TokenType.SET, 271 TokenType.SETTINGS, 272 TokenType.SHOW, 273 TokenType.TEMPORARY, 274 TokenType.TOP, 275 TokenType.TRUE, 276 TokenType.UNIQUE, 277 TokenType.UNPIVOT, 278 TokenType.VOLATILE, 279 TokenType.WINDOW, 280 *CREATABLES, 281 *SUBQUERY_PREDICATES, 282 *TYPE_TOKENS, 283 *NO_PAREN_FUNCTIONS, 284 } 285 286 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 287 288 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 289 TokenType.APPLY, 290 TokenType.ASOF, 291 TokenType.FULL, 292 TokenType.LEFT, 293 TokenType.LOCK, 294 TokenType.NATURAL, 295 TokenType.OFFSET, 296 TokenType.RIGHT, 297 TokenType.WINDOW, 298 } 299 300 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 301 302 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 303 304 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 305 306 FUNC_TOKENS = { 307 TokenType.COMMAND, 308 TokenType.CURRENT_DATE, 309 TokenType.CURRENT_DATETIME, 310 TokenType.CURRENT_TIMESTAMP, 311 TokenType.CURRENT_TIME, 312 TokenType.CURRENT_USER, 313 TokenType.FILTER, 314 TokenType.FIRST, 315 TokenType.FORMAT, 316 TokenType.GLOB, 317 TokenType.IDENTIFIER, 318 TokenType.INDEX, 319 TokenType.ISNULL, 320 TokenType.ILIKE, 321 TokenType.LIKE, 322 TokenType.MERGE, 323 TokenType.OFFSET, 324 TokenType.PRIMARY_KEY, 325 TokenType.RANGE, 326 TokenType.REPLACE, 327 TokenType.ROW, 328 TokenType.UNNEST, 329 TokenType.VAR, 330 TokenType.LEFT, 331 TokenType.RIGHT, 332 TokenType.DATE, 333 TokenType.DATETIME, 334 TokenType.TABLE, 335 TokenType.TIMESTAMP, 336 TokenType.TIMESTAMPTZ, 337 TokenType.WINDOW, 338 *TYPE_TOKENS, 339 *SUBQUERY_PREDICATES, 340 } 341 342 CONJUNCTION = { 343 TokenType.AND: exp.And, 344 TokenType.OR: exp.Or, 345 } 346 347 EQUALITY = { 348 TokenType.EQ: exp.EQ, 349 TokenType.NEQ: exp.NEQ, 350 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 351 } 352 353 COMPARISON = { 354 TokenType.GT: exp.GT, 355 TokenType.GTE: exp.GTE, 356 TokenType.LT: exp.LT, 357 TokenType.LTE: exp.LTE, 358 } 359 360 BITWISE = { 361 TokenType.AMP: exp.BitwiseAnd, 362 TokenType.CARET: exp.BitwiseXor, 363 TokenType.PIPE: exp.BitwiseOr, 364 TokenType.DPIPE: exp.DPipe, 365 } 366 367 TERM = { 368 TokenType.DASH: exp.Sub, 369 TokenType.PLUS: exp.Add, 370 TokenType.MOD: exp.Mod, 371 TokenType.COLLATE: exp.Collate, 372 } 373 374 FACTOR = { 375 TokenType.DIV: exp.IntDiv, 376 TokenType.LR_ARROW: exp.Distance, 377 TokenType.SLASH: exp.Div, 378 TokenType.STAR: exp.Mul, 379 } 380 381 TIMESTAMPS = { 382 TokenType.TIME, 383 TokenType.TIMESTAMP, 384 TokenType.TIMESTAMPTZ, 385 TokenType.TIMESTAMPLTZ, 386 } 387 388 SET_OPERATIONS = { 389 TokenType.UNION, 390 TokenType.INTERSECT, 391 TokenType.EXCEPT, 392 } 393 394 JOIN_METHODS = { 395 TokenType.NATURAL, 396 TokenType.ASOF, 397 } 398 399 JOIN_SIDES = { 400 TokenType.LEFT, 401 TokenType.RIGHT, 402 TokenType.FULL, 403 } 404 405 JOIN_KINDS = { 406 TokenType.INNER, 407 TokenType.OUTER, 408 TokenType.CROSS, 409 TokenType.SEMI, 410 TokenType.ANTI, 411 } 412 413 LAMBDAS = { 414 TokenType.ARROW: lambda self, expressions: self.expression( 415 exp.Lambda, 416 this=self._replace_lambda( 417 self._parse_conjunction(), 418 {node.name for node in expressions}, 419 ), 420 expressions=expressions, 421 ), 422 TokenType.FARROW: lambda self, expressions: self.expression( 423 exp.Kwarg, 424 this=exp.Var(this=expressions[0].name), 425 expression=self._parse_conjunction(), 426 ), 427 } 428 429 COLUMN_OPERATORS = { 430 TokenType.DOT: None, 431 TokenType.DCOLON: lambda self, this, to: self.expression( 432 exp.Cast if self.STRICT_CAST else exp.TryCast, 433 this=this, 434 to=to, 435 ), 436 TokenType.ARROW: lambda self, this, path: self.expression( 437 exp.JSONExtract, 438 this=this, 439 expression=path, 440 ), 441 TokenType.DARROW: lambda self, this, path: self.expression( 442 exp.JSONExtractScalar, 443 this=this, 444 expression=path, 445 ), 446 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 447 exp.JSONBExtract, 448 this=this, 449 expression=path, 450 ), 451 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtractScalar, 453 this=this, 454 expression=path, 455 ), 456 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 457 exp.JSONBContains, 458 this=this, 459 expression=key, 460 ), 461 } 462 463 EXPRESSION_PARSERS = { 464 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 465 exp.Column: lambda self: self._parse_column(), 466 exp.Condition: lambda self: self._parse_conjunction(), 467 exp.DataType: lambda self: self._parse_types(), 468 exp.Expression: lambda self: self._parse_statement(), 469 exp.From: lambda self: self._parse_from(), 470 exp.Group: lambda self: self._parse_group(), 471 exp.Having: lambda self: self._parse_having(), 472 exp.Identifier: lambda self: self._parse_id_var(), 473 exp.Join: lambda self: self._parse_join(), 474 exp.Lambda: lambda self: self._parse_lambda(), 475 exp.Lateral: lambda self: self._parse_lateral(), 476 exp.Limit: lambda self: self._parse_limit(), 477 exp.Offset: lambda self: self._parse_offset(), 478 exp.Order: lambda self: self._parse_order(), 479 exp.Ordered: lambda self: self._parse_ordered(), 480 exp.Properties: lambda self: self._parse_properties(), 481 exp.Qualify: lambda self: self._parse_qualify(), 482 exp.Returning: lambda self: self._parse_returning(), 483 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 484 exp.Table: lambda self: self._parse_table_parts(), 485 exp.TableAlias: lambda self: self._parse_table_alias(), 486 exp.Where: lambda self: self._parse_where(), 487 exp.Window: lambda self: self._parse_named_window(), 488 exp.With: lambda self: self._parse_with(), 489 "JOIN_TYPE": lambda self: self._parse_join_parts(), 490 } 491 492 STATEMENT_PARSERS = { 493 TokenType.ALTER: lambda self: self._parse_alter(), 494 TokenType.BEGIN: lambda self: self._parse_transaction(), 495 TokenType.CACHE: lambda self: self._parse_cache(), 496 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 497 TokenType.COMMENT: lambda self: self._parse_comment(), 498 TokenType.CREATE: lambda self: self._parse_create(), 499 TokenType.DELETE: lambda self: self._parse_delete(), 500 TokenType.DESC: lambda self: self._parse_describe(), 501 TokenType.DESCRIBE: lambda self: self._parse_describe(), 502 TokenType.DROP: lambda self: self._parse_drop(), 503 TokenType.END: lambda self: self._parse_commit_or_rollback(), 504 TokenType.FROM: lambda self: exp.select("*").from_( 505 t.cast(exp.From, self._parse_from(skip_from_token=True)) 506 ), 507 TokenType.INSERT: lambda self: self._parse_insert(), 508 TokenType.LOAD: lambda self: self._parse_load(), 509 TokenType.MERGE: lambda self: self._parse_merge(), 510 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 511 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 512 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 513 TokenType.SET: lambda self: self._parse_set(), 514 TokenType.UNCACHE: lambda self: self._parse_uncache(), 515 TokenType.UPDATE: lambda self: self._parse_update(), 516 TokenType.USE: lambda self: self.expression( 517 exp.Use, 518 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 519 and exp.Var(this=self._prev.text), 520 this=self._parse_table(schema=False), 521 ), 522 } 523 524 UNARY_PARSERS = { 525 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 526 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 527 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 528 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 529 } 530 531 PRIMARY_PARSERS = { 532 TokenType.STRING: lambda self, token: self.expression( 533 exp.Literal, this=token.text, is_string=True 534 ), 535 TokenType.NUMBER: lambda self, token: self.expression( 536 exp.Literal, this=token.text, is_string=False 537 ), 538 TokenType.STAR: lambda self, _: self.expression( 539 exp.Star, 540 **{"except": self._parse_except(), "replace": self._parse_replace()}, 541 ), 542 TokenType.NULL: lambda self, _: self.expression(exp.Null), 543 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 544 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 545 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 546 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 547 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 548 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 549 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 550 exp.National, this=token.text 551 ), 552 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 553 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 554 } 555 556 PLACEHOLDER_PARSERS = { 557 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 558 TokenType.PARAMETER: lambda self: self._parse_parameter(), 559 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 560 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 561 else None, 562 } 563 564 RANGE_PARSERS = { 565 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 566 TokenType.GLOB: binary_range_parser(exp.Glob), 567 TokenType.ILIKE: binary_range_parser(exp.ILike), 568 TokenType.IN: lambda self, this: self._parse_in(this), 569 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 570 TokenType.IS: lambda self, this: self._parse_is(this), 571 TokenType.LIKE: binary_range_parser(exp.Like), 572 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 573 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 574 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 575 } 576 577 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 578 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 579 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 580 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 581 "CHARACTER SET": lambda self: self._parse_character_set(), 582 "CHECKSUM": lambda self: self._parse_checksum(), 583 "CLUSTER": lambda self: self._parse_cluster(), 584 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 585 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 586 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 587 "DEFINER": lambda self: self._parse_definer(), 588 "DETERMINISTIC": lambda self: self.expression( 589 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 590 ), 591 "DISTKEY": lambda self: self._parse_distkey(), 592 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 593 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 594 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 595 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 596 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 597 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 598 "FREESPACE": lambda self: self._parse_freespace(), 599 "IMMUTABLE": lambda self: self.expression( 600 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 601 ), 602 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 603 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 604 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 605 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 606 "LIKE": lambda self: self._parse_create_like(), 607 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 608 "LOCK": lambda self: self._parse_locking(), 609 "LOCKING": lambda self: self._parse_locking(), 610 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 611 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 612 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 613 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 614 "NO": lambda self: self._parse_no_property(), 615 "ON": lambda self: self._parse_on_property(), 616 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 617 "PARTITION BY": lambda self: self._parse_partitioned_by(), 618 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 619 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 620 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 621 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 622 "RETURNS": lambda self: self._parse_returns(), 623 "ROW": lambda self: self._parse_row(), 624 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 625 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 626 "SETTINGS": lambda self: self.expression( 627 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 628 ), 629 "SORTKEY": lambda self: self._parse_sortkey(), 630 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 631 "STABLE": lambda self: self.expression( 632 exp.StabilityProperty, this=exp.Literal.string("STABLE") 633 ), 634 "STORED": lambda self: self._parse_stored(), 635 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 636 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 637 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 638 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 639 "TTL": lambda self: self._parse_ttl(), 640 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 641 "VOLATILE": lambda self: self._parse_volatile_property(), 642 "WITH": lambda self: self._parse_with_property(), 643 } 644 645 CONSTRAINT_PARSERS = { 646 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 647 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 648 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 649 "CHARACTER SET": lambda self: self.expression( 650 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 651 ), 652 "CHECK": lambda self: self.expression( 653 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 654 ), 655 "COLLATE": lambda self: self.expression( 656 exp.CollateColumnConstraint, this=self._parse_var() 657 ), 658 "COMMENT": lambda self: self.expression( 659 exp.CommentColumnConstraint, this=self._parse_string() 660 ), 661 "COMPRESS": lambda self: self._parse_compress(), 662 "DEFAULT": lambda self: self.expression( 663 exp.DefaultColumnConstraint, this=self._parse_bitwise() 664 ), 665 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 666 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 667 "FORMAT": lambda self: self.expression( 668 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 669 ), 670 "GENERATED": lambda self: self._parse_generated_as_identity(), 671 "IDENTITY": lambda self: self._parse_auto_increment(), 672 "INLINE": lambda self: self._parse_inline(), 673 "LIKE": lambda self: self._parse_create_like(), 674 "NOT": lambda self: self._parse_not_constraint(), 675 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 676 "ON": lambda self: self._match(TokenType.UPDATE) 677 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 678 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 679 "PRIMARY KEY": lambda self: self._parse_primary_key(), 680 "REFERENCES": lambda self: self._parse_references(match=False), 681 "TITLE": lambda self: self.expression( 682 exp.TitleColumnConstraint, this=self._parse_var_or_string() 683 ), 684 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 685 "UNIQUE": lambda self: self._parse_unique(), 686 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 687 } 688 689 ALTER_PARSERS = { 690 "ADD": lambda self: self._parse_alter_table_add(), 691 "ALTER": lambda self: self._parse_alter_table_alter(), 692 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 693 "DROP": lambda self: self._parse_alter_table_drop(), 694 "RENAME": lambda self: self._parse_alter_table_rename(), 695 } 696 697 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 698 699 NO_PAREN_FUNCTION_PARSERS = { 700 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 701 TokenType.CASE: lambda self: self._parse_case(), 702 TokenType.IF: lambda self: self._parse_if(), 703 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 704 exp.NextValueFor, 705 this=self._parse_column(), 706 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 707 ), 708 } 709 710 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 711 712 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 713 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 714 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 715 "DECODE": lambda self: self._parse_decode(), 716 "EXTRACT": lambda self: self._parse_extract(), 717 "JSON_OBJECT": lambda self: self._parse_json_object(), 718 "LOG": lambda self: self._parse_logarithm(), 719 "MATCH": lambda self: self._parse_match_against(), 720 "OPENJSON": lambda self: self._parse_open_json(), 721 "POSITION": lambda self: self._parse_position(), 722 "SAFE_CAST": lambda self: self._parse_cast(False), 723 "STRING_AGG": lambda self: self._parse_string_agg(), 724 "SUBSTRING": lambda self: self._parse_substring(), 725 "TRIM": lambda self: self._parse_trim(), 726 "TRY_CAST": lambda self: self._parse_cast(False), 727 "TRY_CONVERT": lambda self: self._parse_convert(False), 728 } 729 730 QUERY_MODIFIER_PARSERS = { 731 "joins": lambda self: list(iter(self._parse_join, None)), 732 "laterals": lambda self: list(iter(self._parse_lateral, None)), 733 "match": lambda self: self._parse_match_recognize(), 734 "where": lambda self: self._parse_where(), 735 "group": lambda self: self._parse_group(), 736 "having": lambda self: self._parse_having(), 737 "qualify": lambda self: self._parse_qualify(), 738 "windows": lambda self: self._parse_window_clause(), 739 "order": lambda self: self._parse_order(), 740 "limit": lambda self: self._parse_limit(), 741 "offset": lambda self: self._parse_offset(), 742 "locks": lambda self: self._parse_locks(), 743 "sample": lambda self: self._parse_table_sample(as_modifier=True), 744 } 745 746 SET_PARSERS = { 747 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 748 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 749 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 750 "TRANSACTION": lambda self: self._parse_set_transaction(), 751 } 752 753 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 754 755 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 756 757 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 758 759 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 760 761 TRANSACTION_CHARACTERISTICS = { 762 "ISOLATION LEVEL REPEATABLE READ", 763 "ISOLATION LEVEL READ COMMITTED", 764 "ISOLATION LEVEL READ UNCOMMITTED", 765 "ISOLATION LEVEL SERIALIZABLE", 766 "READ WRITE", 767 "READ ONLY", 768 } 769 770 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 771 772 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 773 774 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 775 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 776 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 777 778 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 779 780 STRICT_CAST = True 781 782 CONVERT_TYPE_FIRST = False 783 784 PREFIXED_PIVOT_COLUMNS = False 785 IDENTIFY_PIVOT_STRINGS = False 786 787 LOG_BASE_FIRST = True 788 LOG_DEFAULTS_TO_LN = False 789 790 __slots__ = ( 791 "error_level", 792 "error_message_context", 793 "sql", 794 "errors", 795 "index_offset", 796 "unnest_column_only", 797 "alias_post_tablesample", 798 "max_errors", 799 "null_ordering", 800 "_tokens", 801 "_index", 802 "_curr", 803 "_next", 804 "_prev", 805 "_prev_comments", 806 "_show_trie", 807 "_set_trie", 808 ) 809 810 def __init__( 811 self, 812 error_level: t.Optional[ErrorLevel] = None, 813 error_message_context: int = 100, 814 index_offset: int = 0, 815 unnest_column_only: bool = False, 816 alias_post_tablesample: bool = False, 817 max_errors: int = 3, 818 null_ordering: t.Optional[str] = None, 819 ): 820 self.error_level = error_level or ErrorLevel.IMMEDIATE 821 self.error_message_context = error_message_context 822 self.index_offset = index_offset 823 self.unnest_column_only = unnest_column_only 824 self.alias_post_tablesample = alias_post_tablesample 825 self.max_errors = max_errors 826 self.null_ordering = null_ordering 827 self.reset() 828 829 def reset(self): 830 self.sql = "" 831 self.errors = [] 832 self._tokens = [] 833 self._index = 0 834 self._curr = None 835 self._next = None 836 self._prev = None 837 self._prev_comments = None 838 839 def parse( 840 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 841 ) -> t.List[t.Optional[exp.Expression]]: 842 """ 843 Parses a list of tokens and returns a list of syntax trees, one tree 844 per parsed SQL statement. 845 846 Args: 847 raw_tokens: the list of tokens. 848 sql: the original SQL string, used to produce helpful debug messages. 849 850 Returns: 851 The list of syntax trees. 852 """ 853 return self._parse( 854 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 855 ) 856 857 def parse_into( 858 self, 859 expression_types: exp.IntoType, 860 raw_tokens: t.List[Token], 861 sql: t.Optional[str] = None, 862 ) -> t.List[t.Optional[exp.Expression]]: 863 """ 864 Parses a list of tokens into a given Expression type. If a collection of Expression 865 types is given instead, this method will try to parse the token list into each one 866 of them, stopping at the first for which the parsing succeeds. 867 868 Args: 869 expression_types: the expression type(s) to try and parse the token list into. 870 raw_tokens: the list of tokens. 871 sql: the original SQL string, used to produce helpful debug messages. 872 873 Returns: 874 The target Expression. 875 """ 876 errors = [] 877 for expression_type in ensure_collection(expression_types): 878 parser = self.EXPRESSION_PARSERS.get(expression_type) 879 if not parser: 880 raise TypeError(f"No parser registered for {expression_type}") 881 try: 882 return self._parse(parser, raw_tokens, sql) 883 except ParseError as e: 884 e.errors[0]["into_expression"] = expression_type 885 errors.append(e) 886 raise ParseError( 887 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 888 errors=merge_errors(errors), 889 ) from errors[-1] 890 891 def _parse( 892 self, 893 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 894 raw_tokens: t.List[Token], 895 sql: t.Optional[str] = None, 896 ) -> t.List[t.Optional[exp.Expression]]: 897 self.reset() 898 self.sql = sql or "" 899 total = len(raw_tokens) 900 chunks: t.List[t.List[Token]] = [[]] 901 902 for i, token in enumerate(raw_tokens): 903 if token.token_type == TokenType.SEMICOLON: 904 if i < total - 1: 905 chunks.append([]) 906 else: 907 chunks[-1].append(token) 908 909 expressions = [] 910 911 for tokens in chunks: 912 self._index = -1 913 self._tokens = tokens 914 self._advance() 915 916 expressions.append(parse_method(self)) 917 918 if self._index < len(self._tokens): 919 self.raise_error("Invalid expression / Unexpected token") 920 921 self.check_errors() 922 923 return expressions 924 925 def check_errors(self) -> None: 926 """ 927 Logs or raises any found errors, depending on the chosen error level setting. 928 """ 929 if self.error_level == ErrorLevel.WARN: 930 for error in self.errors: 931 logger.error(str(error)) 932 elif self.error_level == ErrorLevel.RAISE and self.errors: 933 raise ParseError( 934 concat_messages(self.errors, self.max_errors), 935 errors=merge_errors(self.errors), 936 ) 937 938 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 939 """ 940 Appends an error in the list of recorded errors or raises it, depending on the chosen 941 error level setting. 942 """ 943 token = token or self._curr or self._prev or Token.string("") 944 start = token.start 945 end = token.end + 1 946 start_context = self.sql[max(start - self.error_message_context, 0) : start] 947 highlight = self.sql[start:end] 948 end_context = self.sql[end : end + self.error_message_context] 949 950 error = ParseError.new( 951 f"{message}. Line {token.line}, Col: {token.col}.\n" 952 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 953 description=message, 954 line=token.line, 955 col=token.col, 956 start_context=start_context, 957 highlight=highlight, 958 end_context=end_context, 959 ) 960 961 if self.error_level == ErrorLevel.IMMEDIATE: 962 raise error 963 964 self.errors.append(error) 965 966 def expression( 967 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 968 ) -> E: 969 """ 970 Creates a new, validated Expression. 971 972 Args: 973 exp_class: the expression class to instantiate. 974 comments: an optional list of comments to attach to the expression. 975 kwargs: the arguments to set for the expression along with their respective values. 976 977 Returns: 978 The target expression. 979 """ 980 instance = exp_class(**kwargs) 981 instance.add_comments(comments) if comments else self._add_comments(instance) 982 self.validate_expression(instance) 983 return instance 984 985 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 986 if expression and self._prev_comments: 987 expression.add_comments(self._prev_comments) 988 self._prev_comments = None 989 990 def validate_expression( 991 self, expression: exp.Expression, args: t.Optional[t.List] = None 992 ) -> None: 993 """ 994 Validates an already instantiated expression, making sure that all its mandatory arguments 995 are set. 996 997 Args: 998 expression: the expression to validate. 999 args: an optional list of items that was used to instantiate the expression, if it's a Func. 1000 """ 1001 if self.error_level == ErrorLevel.IGNORE: 1002 return 1003 1004 for error_message in expression.error_messages(args): 1005 self.raise_error(error_message) 1006 1007 def _find_sql(self, start: Token, end: Token) -> str: 1008 return self.sql[start.start : end.end + 1] 1009 1010 def _advance(self, times: int = 1) -> None: 1011 self._index += times 1012 self._curr = seq_get(self._tokens, self._index) 1013 self._next = seq_get(self._tokens, self._index + 1) 1014 if self._index > 0: 1015 self._prev = self._tokens[self._index - 1] 1016 self._prev_comments = self._prev.comments 1017 else: 1018 self._prev = None 1019 self._prev_comments = None 1020 1021 def _retreat(self, index: int) -> None: 1022 if index != self._index: 1023 self._advance(index - self._index) 1024 1025 def _parse_command(self) -> exp.Command: 1026 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1027 1028 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1029 start = self._prev 1030 exists = self._parse_exists() if allow_exists else None 1031 1032 self._match(TokenType.ON) 1033 1034 kind = self._match_set(self.CREATABLES) and self._prev 1035 1036 if not kind: 1037 return self._parse_as_command(start) 1038 1039 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1040 this = self._parse_user_defined_function(kind=kind.token_type) 1041 elif kind.token_type == TokenType.TABLE: 1042 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1043 elif kind.token_type == TokenType.COLUMN: 1044 this = self._parse_column() 1045 else: 1046 this = self._parse_id_var() 1047 1048 self._match(TokenType.IS) 1049 1050 return self.expression( 1051 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1052 ) 1053 1054 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1055 def _parse_ttl(self) -> exp.Expression: 1056 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1057 this = self._parse_bitwise() 1058 1059 if self._match_text_seq("DELETE"): 1060 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1061 if self._match_text_seq("RECOMPRESS"): 1062 return self.expression( 1063 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1064 ) 1065 if self._match_text_seq("TO", "DISK"): 1066 return self.expression( 1067 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1068 ) 1069 if self._match_text_seq("TO", "VOLUME"): 1070 return self.expression( 1071 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1072 ) 1073 1074 return this 1075 1076 expressions = self._parse_csv(_parse_ttl_action) 1077 where = self._parse_where() 1078 group = self._parse_group() 1079 1080 aggregates = None 1081 if group and self._match(TokenType.SET): 1082 aggregates = self._parse_csv(self._parse_set_item) 1083 1084 return self.expression( 1085 exp.MergeTreeTTL, 1086 expressions=expressions, 1087 where=where, 1088 group=group, 1089 aggregates=aggregates, 1090 ) 1091 1092 def _parse_statement(self) -> t.Optional[exp.Expression]: 1093 if self._curr is None: 1094 return None 1095 1096 if self._match_set(self.STATEMENT_PARSERS): 1097 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1098 1099 if self._match_set(Tokenizer.COMMANDS): 1100 return self._parse_command() 1101 1102 expression = self._parse_expression() 1103 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1104 return self._parse_query_modifiers(expression) 1105 1106 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1107 start = self._prev 1108 temporary = self._match(TokenType.TEMPORARY) 1109 materialized = self._match_text_seq("MATERIALIZED") 1110 kind = self._match_set(self.CREATABLES) and self._prev.text 1111 if not kind: 1112 return self._parse_as_command(start) 1113 1114 return self.expression( 1115 exp.Drop, 1116 exists=self._parse_exists(), 1117 this=self._parse_table(schema=True), 1118 kind=kind, 1119 temporary=temporary, 1120 materialized=materialized, 1121 cascade=self._match_text_seq("CASCADE"), 1122 constraints=self._match_text_seq("CONSTRAINTS"), 1123 purge=self._match_text_seq("PURGE"), 1124 ) 1125 1126 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1127 return ( 1128 self._match(TokenType.IF) 1129 and (not not_ or self._match(TokenType.NOT)) 1130 and self._match(TokenType.EXISTS) 1131 ) 1132 1133 def _parse_create(self) -> t.Optional[exp.Expression]: 1134 start = self._prev 1135 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1136 TokenType.OR, TokenType.REPLACE 1137 ) 1138 unique = self._match(TokenType.UNIQUE) 1139 1140 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1141 self._match(TokenType.TABLE) 1142 1143 properties = None 1144 create_token = self._match_set(self.CREATABLES) and self._prev 1145 1146 if not create_token: 1147 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1148 create_token = self._match_set(self.CREATABLES) and self._prev 1149 1150 if not properties or not create_token: 1151 return self._parse_as_command(start) 1152 1153 exists = self._parse_exists(not_=True) 1154 this = None 1155 expression = None 1156 indexes = None 1157 no_schema_binding = None 1158 begin = None 1159 clone = None 1160 1161 def extend_props(temp_props: t.Optional[exp.Expression]) -> None: 1162 nonlocal properties 1163 if properties and temp_props: 1164 properties.expressions.extend(temp_props.expressions) 1165 elif temp_props: 1166 properties = temp_props 1167 1168 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1169 this = self._parse_user_defined_function(kind=create_token.token_type) 1170 extend_props(self._parse_properties()) 1171 1172 self._match(TokenType.ALIAS) 1173 begin = self._match(TokenType.BEGIN) 1174 return_ = self._match_text_seq("RETURN") 1175 expression = self._parse_statement() 1176 1177 if return_: 1178 expression = self.expression(exp.Return, this=expression) 1179 elif create_token.token_type == TokenType.INDEX: 1180 this = self._parse_index(index=self._parse_id_var()) 1181 elif create_token.token_type in self.DB_CREATABLES: 1182 table_parts = self._parse_table_parts(schema=True) 1183 1184 # exp.Properties.Location.POST_NAME 1185 self._match(TokenType.COMMA) 1186 extend_props(self._parse_properties(before=True)) 1187 1188 this = self._parse_schema(this=table_parts) 1189 1190 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1191 extend_props(self._parse_properties()) 1192 1193 self._match(TokenType.ALIAS) 1194 1195 # exp.Properties.Location.POST_ALIAS 1196 if not ( 1197 self._match(TokenType.SELECT, advance=False) 1198 or self._match(TokenType.WITH, advance=False) 1199 or self._match(TokenType.L_PAREN, advance=False) 1200 ): 1201 extend_props(self._parse_properties()) 1202 1203 expression = self._parse_ddl_select() 1204 1205 if create_token.token_type == TokenType.TABLE: 1206 indexes = [] 1207 while True: 1208 index = self._parse_index() 1209 1210 # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX 1211 extend_props(self._parse_properties()) 1212 1213 if not index: 1214 break 1215 else: 1216 self._match(TokenType.COMMA) 1217 indexes.append(index) 1218 elif create_token.token_type == TokenType.VIEW: 1219 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1220 no_schema_binding = True 1221 1222 if self._match_text_seq("CLONE"): 1223 clone = self._parse_table(schema=True) 1224 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1225 clone_kind = ( 1226 self._match(TokenType.L_PAREN) 1227 and self._match_texts(self.CLONE_KINDS) 1228 and self._prev.text.upper() 1229 ) 1230 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1231 self._match(TokenType.R_PAREN) 1232 clone = self.expression( 1233 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1234 ) 1235 1236 return self.expression( 1237 exp.Create, 1238 this=this, 1239 kind=create_token.text, 1240 replace=replace, 1241 unique=unique, 1242 expression=expression, 1243 exists=exists, 1244 properties=properties, 1245 indexes=indexes, 1246 no_schema_binding=no_schema_binding, 1247 begin=begin, 1248 clone=clone, 1249 ) 1250 1251 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1252 # only used for teradata currently 1253 self._match(TokenType.COMMA) 1254 1255 kwargs = { 1256 "no": self._match_text_seq("NO"), 1257 "dual": self._match_text_seq("DUAL"), 1258 "before": self._match_text_seq("BEFORE"), 1259 "default": self._match_text_seq("DEFAULT"), 1260 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1261 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1262 "after": self._match_text_seq("AFTER"), 1263 "minimum": self._match_texts(("MIN", "MINIMUM")), 1264 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1265 } 1266 1267 if self._match_texts(self.PROPERTY_PARSERS): 1268 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1269 try: 1270 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1271 except TypeError: 1272 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1273 1274 return None 1275 1276 def _parse_property(self) -> t.Optional[exp.Expression]: 1277 if self._match_texts(self.PROPERTY_PARSERS): 1278 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1279 1280 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1281 return self._parse_character_set(default=True) 1282 1283 if self._match_text_seq("COMPOUND", "SORTKEY"): 1284 return self._parse_sortkey(compound=True) 1285 1286 if self._match_text_seq("SQL", "SECURITY"): 1287 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1288 1289 assignment = self._match_pair( 1290 TokenType.VAR, TokenType.EQ, advance=False 1291 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1292 1293 if assignment: 1294 key = self._parse_var_or_string() 1295 self._match(TokenType.EQ) 1296 return self.expression(exp.Property, this=key, value=self._parse_column()) 1297 1298 return None 1299 1300 def _parse_stored(self) -> exp.Expression: 1301 self._match(TokenType.ALIAS) 1302 1303 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1304 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1305 1306 return self.expression( 1307 exp.FileFormatProperty, 1308 this=self.expression( 1309 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1310 ) 1311 if input_format or output_format 1312 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1313 ) 1314 1315 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1316 self._match(TokenType.EQ) 1317 self._match(TokenType.ALIAS) 1318 return self.expression(exp_class, this=self._parse_field()) 1319 1320 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Expression]: 1321 properties = [] 1322 1323 while True: 1324 if before: 1325 prop = self._parse_property_before() 1326 else: 1327 prop = self._parse_property() 1328 1329 if not prop: 1330 break 1331 for p in ensure_list(prop): 1332 properties.append(p) 1333 1334 if properties: 1335 return self.expression(exp.Properties, expressions=properties) 1336 1337 return None 1338 1339 def _parse_fallback(self, no: bool = False) -> exp.Expression: 1340 return self.expression( 1341 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1342 ) 1343 1344 def _parse_volatile_property(self) -> exp.Expression: 1345 if self._index >= 2: 1346 pre_volatile_token = self._tokens[self._index - 2] 1347 else: 1348 pre_volatile_token = None 1349 1350 if pre_volatile_token and pre_volatile_token.token_type in ( 1351 TokenType.CREATE, 1352 TokenType.REPLACE, 1353 TokenType.UNIQUE, 1354 ): 1355 return exp.VolatileProperty() 1356 1357 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1358 1359 def _parse_with_property( 1360 self, 1361 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1362 self._match(TokenType.WITH) 1363 if self._match(TokenType.L_PAREN, advance=False): 1364 return self._parse_wrapped_csv(self._parse_property) 1365 1366 if self._match_text_seq("JOURNAL"): 1367 return self._parse_withjournaltable() 1368 1369 if self._match_text_seq("DATA"): 1370 return self._parse_withdata(no=False) 1371 elif self._match_text_seq("NO", "DATA"): 1372 return self._parse_withdata(no=True) 1373 1374 if not self._next: 1375 return None 1376 1377 return self._parse_withisolatedloading() 1378 1379 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1380 def _parse_definer(self) -> t.Optional[exp.Expression]: 1381 self._match(TokenType.EQ) 1382 1383 user = self._parse_id_var() 1384 self._match(TokenType.PARAMETER) 1385 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1386 1387 if not user or not host: 1388 return None 1389 1390 return exp.DefinerProperty(this=f"{user}@{host}") 1391 1392 def _parse_withjournaltable(self) -> exp.Expression: 1393 self._match(TokenType.TABLE) 1394 self._match(TokenType.EQ) 1395 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1396 1397 def _parse_log(self, no: bool = False) -> exp.Expression: 1398 return self.expression(exp.LogProperty, no=no) 1399 1400 def _parse_journal(self, **kwargs) -> exp.Expression: 1401 return self.expression(exp.JournalProperty, **kwargs) 1402 1403 def _parse_checksum(self) -> exp.Expression: 1404 self._match(TokenType.EQ) 1405 1406 on = None 1407 if self._match(TokenType.ON): 1408 on = True 1409 elif self._match_text_seq("OFF"): 1410 on = False 1411 default = self._match(TokenType.DEFAULT) 1412 1413 return self.expression( 1414 exp.ChecksumProperty, 1415 on=on, 1416 default=default, 1417 ) 1418 1419 def _parse_cluster(self) -> t.Optional[exp.Expression]: 1420 if not self._match_text_seq("BY"): 1421 self._retreat(self._index - 1) 1422 return None 1423 return self.expression( 1424 exp.Cluster, 1425 expressions=self._parse_csv(self._parse_ordered), 1426 ) 1427 1428 def _parse_freespace(self) -> exp.Expression: 1429 self._match(TokenType.EQ) 1430 return self.expression( 1431 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1432 ) 1433 1434 def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression: 1435 if self._match(TokenType.EQ): 1436 return self.expression( 1437 exp.MergeBlockRatioProperty, 1438 this=self._parse_number(), 1439 percent=self._match(TokenType.PERCENT), 1440 ) 1441 return self.expression( 1442 exp.MergeBlockRatioProperty, 1443 no=no, 1444 default=default, 1445 ) 1446 1447 def _parse_datablocksize( 1448 self, 1449 default: t.Optional[bool] = None, 1450 minimum: t.Optional[bool] = None, 1451 maximum: t.Optional[bool] = None, 1452 ) -> exp.Expression: 1453 self._match(TokenType.EQ) 1454 size = self._parse_number() 1455 units = None 1456 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1457 units = self._prev.text 1458 return self.expression( 1459 exp.DataBlocksizeProperty, 1460 size=size, 1461 units=units, 1462 default=default, 1463 minimum=minimum, 1464 maximum=maximum, 1465 ) 1466 1467 def _parse_blockcompression(self) -> exp.Expression: 1468 self._match(TokenType.EQ) 1469 always = self._match_text_seq("ALWAYS") 1470 manual = self._match_text_seq("MANUAL") 1471 never = self._match_text_seq("NEVER") 1472 default = self._match_text_seq("DEFAULT") 1473 autotemp = None 1474 if self._match_text_seq("AUTOTEMP"): 1475 autotemp = self._parse_schema() 1476 1477 return self.expression( 1478 exp.BlockCompressionProperty, 1479 always=always, 1480 manual=manual, 1481 never=never, 1482 default=default, 1483 autotemp=autotemp, 1484 ) 1485 1486 def _parse_withisolatedloading(self) -> exp.Expression: 1487 no = self._match_text_seq("NO") 1488 concurrent = self._match_text_seq("CONCURRENT") 1489 self._match_text_seq("ISOLATED", "LOADING") 1490 for_all = self._match_text_seq("FOR", "ALL") 1491 for_insert = self._match_text_seq("FOR", "INSERT") 1492 for_none = self._match_text_seq("FOR", "NONE") 1493 return self.expression( 1494 exp.IsolatedLoadingProperty, 1495 no=no, 1496 concurrent=concurrent, 1497 for_all=for_all, 1498 for_insert=for_insert, 1499 for_none=for_none, 1500 ) 1501 1502 def _parse_locking(self) -> exp.Expression: 1503 if self._match(TokenType.TABLE): 1504 kind = "TABLE" 1505 elif self._match(TokenType.VIEW): 1506 kind = "VIEW" 1507 elif self._match(TokenType.ROW): 1508 kind = "ROW" 1509 elif self._match_text_seq("DATABASE"): 1510 kind = "DATABASE" 1511 else: 1512 kind = None 1513 1514 if kind in ("DATABASE", "TABLE", "VIEW"): 1515 this = self._parse_table_parts() 1516 else: 1517 this = None 1518 1519 if self._match(TokenType.FOR): 1520 for_or_in = "FOR" 1521 elif self._match(TokenType.IN): 1522 for_or_in = "IN" 1523 else: 1524 for_or_in = None 1525 1526 if self._match_text_seq("ACCESS"): 1527 lock_type = "ACCESS" 1528 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1529 lock_type = "EXCLUSIVE" 1530 elif self._match_text_seq("SHARE"): 1531 lock_type = "SHARE" 1532 elif self._match_text_seq("READ"): 1533 lock_type = "READ" 1534 elif self._match_text_seq("WRITE"): 1535 lock_type = "WRITE" 1536 elif self._match_text_seq("CHECKSUM"): 1537 lock_type = "CHECKSUM" 1538 else: 1539 lock_type = None 1540 1541 override = self._match_text_seq("OVERRIDE") 1542 1543 return self.expression( 1544 exp.LockingProperty, 1545 this=this, 1546 kind=kind, 1547 for_or_in=for_or_in, 1548 lock_type=lock_type, 1549 override=override, 1550 ) 1551 1552 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1553 if self._match(TokenType.PARTITION_BY): 1554 return self._parse_csv(self._parse_conjunction) 1555 return [] 1556 1557 def _parse_partitioned_by(self) -> exp.Expression: 1558 self._match(TokenType.EQ) 1559 return self.expression( 1560 exp.PartitionedByProperty, 1561 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1562 ) 1563 1564 def _parse_withdata(self, no: bool = False) -> exp.Expression: 1565 if self._match_text_seq("AND", "STATISTICS"): 1566 statistics = True 1567 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1568 statistics = False 1569 else: 1570 statistics = None 1571 1572 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1573 1574 def _parse_no_property(self) -> t.Optional[exp.Property]: 1575 if self._match_text_seq("PRIMARY", "INDEX"): 1576 return exp.NoPrimaryIndexProperty() 1577 return None 1578 1579 def _parse_on_property(self) -> t.Optional[exp.Property]: 1580 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1581 return exp.OnCommitProperty() 1582 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1583 return exp.OnCommitProperty(delete=True) 1584 return None 1585 1586 def _parse_distkey(self) -> exp.Expression: 1587 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1588 1589 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1590 table = self._parse_table(schema=True) 1591 options = [] 1592 while self._match_texts(("INCLUDING", "EXCLUDING")): 1593 this = self._prev.text.upper() 1594 id_var = self._parse_id_var() 1595 1596 if not id_var: 1597 return None 1598 1599 options.append( 1600 self.expression( 1601 exp.Property, 1602 this=this, 1603 value=exp.Var(this=id_var.this.upper()), 1604 ) 1605 ) 1606 return self.expression(exp.LikeProperty, this=table, expressions=options) 1607 1608 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1609 return self.expression( 1610 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1611 ) 1612 1613 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1614 self._match(TokenType.EQ) 1615 return self.expression( 1616 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1617 ) 1618 1619 def _parse_returns(self) -> exp.Expression: 1620 value: t.Optional[exp.Expression] 1621 is_table = self._match(TokenType.TABLE) 1622 1623 if is_table: 1624 if self._match(TokenType.LT): 1625 value = self.expression( 1626 exp.Schema, 1627 this="TABLE", 1628 expressions=self._parse_csv(self._parse_struct_types), 1629 ) 1630 if not self._match(TokenType.GT): 1631 self.raise_error("Expecting >") 1632 else: 1633 value = self._parse_schema(exp.Var(this="TABLE")) 1634 else: 1635 value = self._parse_types() 1636 1637 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1638 1639 def _parse_describe(self) -> exp.Expression: 1640 kind = self._match_set(self.CREATABLES) and self._prev.text 1641 this = self._parse_table() 1642 1643 return self.expression(exp.Describe, this=this, kind=kind) 1644 1645 def _parse_insert(self) -> exp.Expression: 1646 overwrite = self._match(TokenType.OVERWRITE) 1647 local = self._match_text_seq("LOCAL") 1648 alternative = None 1649 1650 if self._match_text_seq("DIRECTORY"): 1651 this: t.Optional[exp.Expression] = self.expression( 1652 exp.Directory, 1653 this=self._parse_var_or_string(), 1654 local=local, 1655 row_format=self._parse_row_format(match_row=True), 1656 ) 1657 else: 1658 if self._match(TokenType.OR): 1659 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1660 1661 self._match(TokenType.INTO) 1662 self._match(TokenType.TABLE) 1663 this = self._parse_table(schema=True) 1664 1665 return self.expression( 1666 exp.Insert, 1667 this=this, 1668 exists=self._parse_exists(), 1669 partition=self._parse_partition(), 1670 expression=self._parse_ddl_select(), 1671 conflict=self._parse_on_conflict(), 1672 returning=self._parse_returning(), 1673 overwrite=overwrite, 1674 alternative=alternative, 1675 ) 1676 1677 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1678 conflict = self._match_text_seq("ON", "CONFLICT") 1679 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1680 1681 if not (conflict or duplicate): 1682 return None 1683 1684 nothing = None 1685 expressions = None 1686 key = None 1687 constraint = None 1688 1689 if conflict: 1690 if self._match_text_seq("ON", "CONSTRAINT"): 1691 constraint = self._parse_id_var() 1692 else: 1693 key = self._parse_csv(self._parse_value) 1694 1695 self._match_text_seq("DO") 1696 if self._match_text_seq("NOTHING"): 1697 nothing = True 1698 else: 1699 self._match(TokenType.UPDATE) 1700 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1701 1702 return self.expression( 1703 exp.OnConflict, 1704 duplicate=duplicate, 1705 expressions=expressions, 1706 nothing=nothing, 1707 key=key, 1708 constraint=constraint, 1709 ) 1710 1711 def _parse_returning(self) -> t.Optional[exp.Expression]: 1712 if not self._match(TokenType.RETURNING): 1713 return None 1714 1715 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1716 1717 def _parse_row(self) -> t.Optional[exp.Expression]: 1718 if not self._match(TokenType.FORMAT): 1719 return None 1720 return self._parse_row_format() 1721 1722 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1723 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1724 return None 1725 1726 if self._match_text_seq("SERDE"): 1727 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1728 1729 self._match_text_seq("DELIMITED") 1730 1731 kwargs = {} 1732 1733 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1734 kwargs["fields"] = self._parse_string() 1735 if self._match_text_seq("ESCAPED", "BY"): 1736 kwargs["escaped"] = self._parse_string() 1737 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1738 kwargs["collection_items"] = self._parse_string() 1739 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1740 kwargs["map_keys"] = self._parse_string() 1741 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1742 kwargs["lines"] = self._parse_string() 1743 if self._match_text_seq("NULL", "DEFINED", "AS"): 1744 kwargs["null"] = self._parse_string() 1745 1746 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1747 1748 def _parse_load(self) -> exp.Expression: 1749 if self._match_text_seq("DATA"): 1750 local = self._match_text_seq("LOCAL") 1751 self._match_text_seq("INPATH") 1752 inpath = self._parse_string() 1753 overwrite = self._match(TokenType.OVERWRITE) 1754 self._match_pair(TokenType.INTO, TokenType.TABLE) 1755 1756 return self.expression( 1757 exp.LoadData, 1758 this=self._parse_table(schema=True), 1759 local=local, 1760 overwrite=overwrite, 1761 inpath=inpath, 1762 partition=self._parse_partition(), 1763 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1764 serde=self._match_text_seq("SERDE") and self._parse_string(), 1765 ) 1766 return self._parse_as_command(self._prev) 1767 1768 def _parse_delete(self) -> exp.Expression: 1769 self._match(TokenType.FROM) 1770 1771 return self.expression( 1772 exp.Delete, 1773 this=self._parse_table(), 1774 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1775 where=self._parse_where(), 1776 returning=self._parse_returning(), 1777 ) 1778 1779 def _parse_update(self) -> exp.Expression: 1780 return self.expression( 1781 exp.Update, 1782 **{ # type: ignore 1783 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1784 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1785 "from": self._parse_from(modifiers=True), 1786 "where": self._parse_where(), 1787 "returning": self._parse_returning(), 1788 }, 1789 ) 1790 1791 def _parse_uncache(self) -> exp.Expression: 1792 if not self._match(TokenType.TABLE): 1793 self.raise_error("Expecting TABLE after UNCACHE") 1794 1795 return self.expression( 1796 exp.Uncache, 1797 exists=self._parse_exists(), 1798 this=self._parse_table(schema=True), 1799 ) 1800 1801 def _parse_cache(self) -> exp.Expression: 1802 lazy = self._match_text_seq("LAZY") 1803 self._match(TokenType.TABLE) 1804 table = self._parse_table(schema=True) 1805 options = [] 1806 1807 if self._match_text_seq("OPTIONS"): 1808 self._match_l_paren() 1809 k = self._parse_string() 1810 self._match(TokenType.EQ) 1811 v = self._parse_string() 1812 options = [k, v] 1813 self._match_r_paren() 1814 1815 self._match(TokenType.ALIAS) 1816 return self.expression( 1817 exp.Cache, 1818 this=table, 1819 lazy=lazy, 1820 options=options, 1821 expression=self._parse_select(nested=True), 1822 ) 1823 1824 def _parse_partition(self) -> t.Optional[exp.Expression]: 1825 if not self._match(TokenType.PARTITION): 1826 return None 1827 1828 return self.expression( 1829 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1830 ) 1831 1832 def _parse_value(self) -> exp.Expression: 1833 if self._match(TokenType.L_PAREN): 1834 expressions = self._parse_csv(self._parse_conjunction) 1835 self._match_r_paren() 1836 return self.expression(exp.Tuple, expressions=expressions) 1837 1838 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1839 # Source: https://prestodb.io/docs/current/sql/values.html 1840 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1841 1842 def _parse_select( 1843 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1844 ) -> t.Optional[exp.Expression]: 1845 cte = self._parse_with() 1846 if cte: 1847 this = self._parse_statement() 1848 1849 if not this: 1850 self.raise_error("Failed to parse any statement following CTE") 1851 return cte 1852 1853 if "with" in this.arg_types: 1854 this.set("with", cte) 1855 else: 1856 self.raise_error(f"{this.key} does not support CTE") 1857 this = cte 1858 elif self._match(TokenType.SELECT): 1859 comments = self._prev_comments 1860 1861 hint = self._parse_hint() 1862 all_ = self._match(TokenType.ALL) 1863 distinct = self._match(TokenType.DISTINCT) 1864 1865 kind = ( 1866 self._match(TokenType.ALIAS) 1867 and self._match_texts(("STRUCT", "VALUE")) 1868 and self._prev.text 1869 ) 1870 1871 if distinct: 1872 distinct = self.expression( 1873 exp.Distinct, 1874 on=self._parse_value() if self._match(TokenType.ON) else None, 1875 ) 1876 1877 if all_ and distinct: 1878 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1879 1880 limit = self._parse_limit(top=True) 1881 expressions = self._parse_csv(self._parse_expression) 1882 1883 this = self.expression( 1884 exp.Select, 1885 kind=kind, 1886 hint=hint, 1887 distinct=distinct, 1888 expressions=expressions, 1889 limit=limit, 1890 ) 1891 this.comments = comments 1892 1893 into = self._parse_into() 1894 if into: 1895 this.set("into", into) 1896 1897 from_ = self._parse_from() 1898 if from_: 1899 this.set("from", from_) 1900 1901 this = self._parse_query_modifiers(this) 1902 elif (table or nested) and self._match(TokenType.L_PAREN): 1903 if self._match(TokenType.PIVOT): 1904 this = self._parse_simplified_pivot() 1905 elif self._match(TokenType.FROM): 1906 this = exp.select("*").from_( 1907 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1908 ) 1909 else: 1910 this = self._parse_table() if table else self._parse_select(nested=True) 1911 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1912 1913 self._match_r_paren() 1914 1915 # early return so that subquery unions aren't parsed again 1916 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1917 # Union ALL should be a property of the top select node, not the subquery 1918 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1919 elif self._match(TokenType.VALUES): 1920 this = self.expression( 1921 exp.Values, 1922 expressions=self._parse_csv(self._parse_value), 1923 alias=self._parse_table_alias(), 1924 ) 1925 else: 1926 this = None 1927 1928 return self._parse_set_operations(this) 1929 1930 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1931 if not skip_with_token and not self._match(TokenType.WITH): 1932 return None 1933 1934 comments = self._prev_comments 1935 recursive = self._match(TokenType.RECURSIVE) 1936 1937 expressions = [] 1938 while True: 1939 expressions.append(self._parse_cte()) 1940 1941 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1942 break 1943 else: 1944 self._match(TokenType.WITH) 1945 1946 return self.expression( 1947 exp.With, comments=comments, expressions=expressions, recursive=recursive 1948 ) 1949 1950 def _parse_cte(self) -> exp.Expression: 1951 alias = self._parse_table_alias() 1952 if not alias or not alias.this: 1953 self.raise_error("Expected CTE to have alias") 1954 1955 self._match(TokenType.ALIAS) 1956 1957 return self.expression( 1958 exp.CTE, 1959 this=self._parse_wrapped(self._parse_statement), 1960 alias=alias, 1961 ) 1962 1963 def _parse_table_alias( 1964 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1965 ) -> t.Optional[exp.Expression]: 1966 any_token = self._match(TokenType.ALIAS) 1967 alias = ( 1968 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1969 or self._parse_string_as_identifier() 1970 ) 1971 1972 index = self._index 1973 if self._match(TokenType.L_PAREN): 1974 columns = self._parse_csv(self._parse_function_parameter) 1975 self._match_r_paren() if columns else self._retreat(index) 1976 else: 1977 columns = None 1978 1979 if not alias and not columns: 1980 return None 1981 1982 return self.expression(exp.TableAlias, this=alias, columns=columns) 1983 1984 def _parse_subquery( 1985 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1986 ) -> t.Optional[exp.Expression]: 1987 if not this: 1988 return None 1989 return self.expression( 1990 exp.Subquery, 1991 this=this, 1992 pivots=self._parse_pivots(), 1993 alias=self._parse_table_alias() if parse_alias else None, 1994 ) 1995 1996 def _parse_query_modifiers( 1997 self, this: t.Optional[exp.Expression] 1998 ) -> t.Optional[exp.Expression]: 1999 if isinstance(this, self.MODIFIABLES): 2000 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2001 expression = parser(self) 2002 2003 if expression: 2004 this.set(key, expression) 2005 return this 2006 2007 def _parse_hint(self) -> t.Optional[exp.Expression]: 2008 if self._match(TokenType.HINT): 2009 hints = self._parse_csv(self._parse_function) 2010 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2011 self.raise_error("Expected */ after HINT") 2012 return self.expression(exp.Hint, expressions=hints) 2013 2014 return None 2015 2016 def _parse_into(self) -> t.Optional[exp.Expression]: 2017 if not self._match(TokenType.INTO): 2018 return None 2019 2020 temp = self._match(TokenType.TEMPORARY) 2021 unlogged = self._match_text_seq("UNLOGGED") 2022 self._match(TokenType.TABLE) 2023 2024 return self.expression( 2025 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2026 ) 2027 2028 def _parse_from( 2029 self, modifiers: bool = False, skip_from_token: bool = False 2030 ) -> t.Optional[exp.From]: 2031 if not skip_from_token and not self._match(TokenType.FROM): 2032 return None 2033 2034 comments = self._prev_comments 2035 this = self._parse_table() 2036 2037 return self.expression( 2038 exp.From, 2039 comments=comments, 2040 this=self._parse_query_modifiers(this) if modifiers else this, 2041 ) 2042 2043 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2044 if not self._match(TokenType.MATCH_RECOGNIZE): 2045 return None 2046 2047 self._match_l_paren() 2048 2049 partition = self._parse_partition_by() 2050 order = self._parse_order() 2051 measures = ( 2052 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2053 ) 2054 2055 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2056 rows = exp.Var(this="ONE ROW PER MATCH") 2057 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2058 text = "ALL ROWS PER MATCH" 2059 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2060 text += f" SHOW EMPTY MATCHES" 2061 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2062 text += f" OMIT EMPTY MATCHES" 2063 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2064 text += f" WITH UNMATCHED ROWS" 2065 rows = exp.Var(this=text) 2066 else: 2067 rows = None 2068 2069 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2070 text = "AFTER MATCH SKIP" 2071 if self._match_text_seq("PAST", "LAST", "ROW"): 2072 text += f" PAST LAST ROW" 2073 elif self._match_text_seq("TO", "NEXT", "ROW"): 2074 text += f" TO NEXT ROW" 2075 elif self._match_text_seq("TO", "FIRST"): 2076 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2077 elif self._match_text_seq("TO", "LAST"): 2078 text += f" TO LAST {self._advance_any().text}" # type: ignore 2079 after = exp.Var(this=text) 2080 else: 2081 after = None 2082 2083 if self._match_text_seq("PATTERN"): 2084 self._match_l_paren() 2085 2086 if not self._curr: 2087 self.raise_error("Expecting )", self._curr) 2088 2089 paren = 1 2090 start = self._curr 2091 2092 while self._curr and paren > 0: 2093 if self._curr.token_type == TokenType.L_PAREN: 2094 paren += 1 2095 if self._curr.token_type == TokenType.R_PAREN: 2096 paren -= 1 2097 end = self._prev 2098 self._advance() 2099 if paren > 0: 2100 self.raise_error("Expecting )", self._curr) 2101 pattern = exp.Var(this=self._find_sql(start, end)) 2102 else: 2103 pattern = None 2104 2105 define = ( 2106 self._parse_csv( 2107 lambda: self.expression( 2108 exp.Alias, 2109 alias=self._parse_id_var(any_token=True), 2110 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2111 ) 2112 ) 2113 if self._match_text_seq("DEFINE") 2114 else None 2115 ) 2116 2117 self._match_r_paren() 2118 2119 return self.expression( 2120 exp.MatchRecognize, 2121 partition_by=partition, 2122 order=order, 2123 measures=measures, 2124 rows=rows, 2125 after=after, 2126 pattern=pattern, 2127 define=define, 2128 alias=self._parse_table_alias(), 2129 ) 2130 2131 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2132 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2133 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2134 2135 if outer_apply or cross_apply: 2136 this = self._parse_select(table=True) 2137 view = None 2138 outer = not cross_apply 2139 elif self._match(TokenType.LATERAL): 2140 this = self._parse_select(table=True) 2141 view = self._match(TokenType.VIEW) 2142 outer = self._match(TokenType.OUTER) 2143 else: 2144 return None 2145 2146 if not this: 2147 this = self._parse_function() or self._parse_id_var(any_token=False) 2148 while self._match(TokenType.DOT): 2149 this = exp.Dot( 2150 this=this, 2151 expression=self._parse_function() or self._parse_id_var(any_token=False), 2152 ) 2153 2154 table_alias: t.Optional[exp.Expression] 2155 2156 if view: 2157 table = self._parse_id_var(any_token=False) 2158 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2159 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2160 else: 2161 table_alias = self._parse_table_alias() 2162 2163 expression = self.expression( 2164 exp.Lateral, 2165 this=this, 2166 view=view, 2167 outer=outer, 2168 alias=table_alias, 2169 ) 2170 2171 return expression 2172 2173 def _parse_join_parts( 2174 self, 2175 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2176 return ( 2177 self._match_set(self.JOIN_METHODS) and self._prev, 2178 self._match_set(self.JOIN_SIDES) and self._prev, 2179 self._match_set(self.JOIN_KINDS) and self._prev, 2180 ) 2181 2182 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2183 if self._match(TokenType.COMMA): 2184 return self.expression(exp.Join, this=self._parse_table()) 2185 2186 index = self._index 2187 method, side, kind = self._parse_join_parts() 2188 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2189 join = self._match(TokenType.JOIN) 2190 2191 if not skip_join_token and not join: 2192 self._retreat(index) 2193 kind = None 2194 method = None 2195 side = None 2196 2197 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2198 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2199 2200 if not skip_join_token and not join and not outer_apply and not cross_apply: 2201 return None 2202 2203 if outer_apply: 2204 side = Token(TokenType.LEFT, "LEFT") 2205 2206 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2207 2208 if method: 2209 kwargs["method"] = method.text 2210 if side: 2211 kwargs["side"] = side.text 2212 if kind: 2213 kwargs["kind"] = kind.text 2214 if hint: 2215 kwargs["hint"] = hint 2216 2217 if self._match(TokenType.ON): 2218 kwargs["on"] = self._parse_conjunction() 2219 elif self._match(TokenType.USING): 2220 kwargs["using"] = self._parse_wrapped_id_vars() 2221 2222 return self.expression(exp.Join, **kwargs) 2223 2224 def _parse_index( 2225 self, 2226 index: t.Optional[exp.Expression] = None, 2227 ) -> t.Optional[exp.Expression]: 2228 if index: 2229 unique = None 2230 primary = None 2231 amp = None 2232 2233 self._match(TokenType.ON) 2234 self._match(TokenType.TABLE) # hive 2235 table = self._parse_table_parts(schema=True) 2236 else: 2237 unique = self._match(TokenType.UNIQUE) 2238 primary = self._match_text_seq("PRIMARY") 2239 amp = self._match_text_seq("AMP") 2240 if not self._match(TokenType.INDEX): 2241 return None 2242 index = self._parse_id_var() 2243 table = None 2244 2245 if self._match(TokenType.L_PAREN, advance=False): 2246 columns = self._parse_wrapped_csv(self._parse_ordered) 2247 else: 2248 columns = None 2249 2250 return self.expression( 2251 exp.Index, 2252 this=index, 2253 table=table, 2254 columns=columns, 2255 unique=unique, 2256 primary=primary, 2257 amp=amp, 2258 partition_by=self._parse_partition_by(), 2259 ) 2260 2261 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2262 return ( 2263 (not schema and self._parse_function()) 2264 or self._parse_id_var(any_token=False) 2265 or self._parse_string_as_identifier() 2266 or self._parse_placeholder() 2267 ) 2268 2269 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2270 catalog = None 2271 db = None 2272 table = self._parse_table_part(schema=schema) 2273 2274 while self._match(TokenType.DOT): 2275 if catalog: 2276 # This allows nesting the table in arbitrarily many dot expressions if needed 2277 table = self.expression( 2278 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2279 ) 2280 else: 2281 catalog = db 2282 db = table 2283 table = self._parse_table_part(schema=schema) 2284 2285 if not table: 2286 self.raise_error(f"Expected table name but got {self._curr}") 2287 2288 return self.expression( 2289 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2290 ) 2291 2292 def _parse_table( 2293 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2294 ) -> t.Optional[exp.Expression]: 2295 lateral = self._parse_lateral() 2296 if lateral: 2297 return lateral 2298 2299 unnest = self._parse_unnest() 2300 if unnest: 2301 return unnest 2302 2303 values = self._parse_derived_table_values() 2304 if values: 2305 return values 2306 2307 subquery = self._parse_select(table=True) 2308 if subquery: 2309 if not subquery.args.get("pivots"): 2310 subquery.set("pivots", self._parse_pivots()) 2311 return subquery 2312 2313 this: exp.Expression = self._parse_table_parts(schema=schema) 2314 2315 if schema: 2316 return self._parse_schema(this=this) 2317 2318 if self.alias_post_tablesample: 2319 table_sample = self._parse_table_sample() 2320 2321 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2322 if alias: 2323 this.set("alias", alias) 2324 2325 if not this.args.get("pivots"): 2326 this.set("pivots", self._parse_pivots()) 2327 2328 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2329 this.set( 2330 "hints", 2331 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2332 ) 2333 self._match_r_paren() 2334 2335 if not self.alias_post_tablesample: 2336 table_sample = self._parse_table_sample() 2337 2338 if table_sample: 2339 table_sample.set("this", this) 2340 this = table_sample 2341 2342 return this 2343 2344 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2345 if not self._match(TokenType.UNNEST): 2346 return None 2347 2348 expressions = self._parse_wrapped_csv(self._parse_type) 2349 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2350 alias = self._parse_table_alias() 2351 2352 if alias and self.unnest_column_only: 2353 if alias.args.get("columns"): 2354 self.raise_error("Unexpected extra column alias in unnest.") 2355 alias.set("columns", [alias.this]) 2356 alias.set("this", None) 2357 2358 offset = None 2359 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2360 self._match(TokenType.ALIAS) 2361 offset = self._parse_id_var() or exp.Identifier(this="offset") 2362 2363 return self.expression( 2364 exp.Unnest, 2365 expressions=expressions, 2366 ordinality=ordinality, 2367 alias=alias, 2368 offset=offset, 2369 ) 2370 2371 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2372 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2373 if not is_derived and not self._match(TokenType.VALUES): 2374 return None 2375 2376 expressions = self._parse_csv(self._parse_value) 2377 2378 if is_derived: 2379 self._match_r_paren() 2380 2381 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2382 2383 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2384 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2385 as_modifier and self._match_text_seq("USING", "SAMPLE") 2386 ): 2387 return None 2388 2389 bucket_numerator = None 2390 bucket_denominator = None 2391 bucket_field = None 2392 percent = None 2393 rows = None 2394 size = None 2395 seed = None 2396 2397 kind = ( 2398 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2399 ) 2400 method = self._parse_var(tokens=(TokenType.ROW,)) 2401 2402 self._match(TokenType.L_PAREN) 2403 2404 num = self._parse_number() 2405 2406 if self._match_text_seq("BUCKET"): 2407 bucket_numerator = self._parse_number() 2408 self._match_text_seq("OUT", "OF") 2409 bucket_denominator = bucket_denominator = self._parse_number() 2410 self._match(TokenType.ON) 2411 bucket_field = self._parse_field() 2412 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2413 percent = num 2414 elif self._match(TokenType.ROWS): 2415 rows = num 2416 else: 2417 size = num 2418 2419 self._match(TokenType.R_PAREN) 2420 2421 if self._match(TokenType.L_PAREN): 2422 method = self._parse_var() 2423 seed = self._match(TokenType.COMMA) and self._parse_number() 2424 self._match_r_paren() 2425 elif self._match_texts(("SEED", "REPEATABLE")): 2426 seed = self._parse_wrapped(self._parse_number) 2427 2428 return self.expression( 2429 exp.TableSample, 2430 method=method, 2431 bucket_numerator=bucket_numerator, 2432 bucket_denominator=bucket_denominator, 2433 bucket_field=bucket_field, 2434 percent=percent, 2435 rows=rows, 2436 size=size, 2437 seed=seed, 2438 kind=kind, 2439 ) 2440 2441 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2442 return list(iter(self._parse_pivot, None)) 2443 2444 # https://duckdb.org/docs/sql/statements/pivot 2445 def _parse_simplified_pivot(self) -> exp.Pivot: 2446 def _parse_on() -> t.Optional[exp.Expression]: 2447 this = self._parse_bitwise() 2448 return self._parse_in(this) if self._match(TokenType.IN) else this 2449 2450 this = self._parse_table() 2451 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2452 using = self._match(TokenType.USING) and self._parse_csv( 2453 lambda: self._parse_alias(self._parse_function()) 2454 ) 2455 group = self._parse_group() 2456 return self.expression( 2457 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2458 ) 2459 2460 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2461 index = self._index 2462 2463 if self._match(TokenType.PIVOT): 2464 unpivot = False 2465 elif self._match(TokenType.UNPIVOT): 2466 unpivot = True 2467 else: 2468 return None 2469 2470 expressions = [] 2471 field = None 2472 2473 if not self._match(TokenType.L_PAREN): 2474 self._retreat(index) 2475 return None 2476 2477 if unpivot: 2478 expressions = self._parse_csv(self._parse_column) 2479 else: 2480 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2481 2482 if not expressions: 2483 self.raise_error("Failed to parse PIVOT's aggregation list") 2484 2485 if not self._match(TokenType.FOR): 2486 self.raise_error("Expecting FOR") 2487 2488 value = self._parse_column() 2489 2490 if not self._match(TokenType.IN): 2491 self.raise_error("Expecting IN") 2492 2493 field = self._parse_in(value, alias=True) 2494 2495 self._match_r_paren() 2496 2497 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2498 2499 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2500 pivot.set("alias", self._parse_table_alias()) 2501 2502 if not unpivot: 2503 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2504 2505 columns: t.List[exp.Expression] = [] 2506 for fld in pivot.args["field"].expressions: 2507 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2508 for name in names: 2509 if self.PREFIXED_PIVOT_COLUMNS: 2510 name = f"{name}_{field_name}" if name else field_name 2511 else: 2512 name = f"{field_name}_{name}" if name else field_name 2513 2514 columns.append(exp.to_identifier(name)) 2515 2516 pivot.set("columns", columns) 2517 2518 return pivot 2519 2520 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2521 return [agg.alias for agg in aggregations] 2522 2523 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2524 if not skip_where_token and not self._match(TokenType.WHERE): 2525 return None 2526 2527 return self.expression( 2528 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2529 ) 2530 2531 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2532 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2533 return None 2534 2535 elements = defaultdict(list) 2536 2537 while True: 2538 expressions = self._parse_csv(self._parse_conjunction) 2539 if expressions: 2540 elements["expressions"].extend(expressions) 2541 2542 grouping_sets = self._parse_grouping_sets() 2543 if grouping_sets: 2544 elements["grouping_sets"].extend(grouping_sets) 2545 2546 rollup = None 2547 cube = None 2548 totals = None 2549 2550 with_ = self._match(TokenType.WITH) 2551 if self._match(TokenType.ROLLUP): 2552 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2553 elements["rollup"].extend(ensure_list(rollup)) 2554 2555 if self._match(TokenType.CUBE): 2556 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2557 elements["cube"].extend(ensure_list(cube)) 2558 2559 if self._match_text_seq("TOTALS"): 2560 totals = True 2561 elements["totals"] = True # type: ignore 2562 2563 if not (grouping_sets or rollup or cube or totals): 2564 break 2565 2566 return self.expression(exp.Group, **elements) # type: ignore 2567 2568 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2569 if not self._match(TokenType.GROUPING_SETS): 2570 return None 2571 2572 return self._parse_wrapped_csv(self._parse_grouping_set) 2573 2574 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2575 if self._match(TokenType.L_PAREN): 2576 grouping_set = self._parse_csv(self._parse_column) 2577 self._match_r_paren() 2578 return self.expression(exp.Tuple, expressions=grouping_set) 2579 2580 return self._parse_column() 2581 2582 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2583 if not skip_having_token and not self._match(TokenType.HAVING): 2584 return None 2585 return self.expression(exp.Having, this=self._parse_conjunction()) 2586 2587 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2588 if not self._match(TokenType.QUALIFY): 2589 return None 2590 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2591 2592 def _parse_order( 2593 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2594 ) -> t.Optional[exp.Expression]: 2595 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2596 return this 2597 2598 return self.expression( 2599 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2600 ) 2601 2602 def _parse_sort( 2603 self, exp_class: t.Type[exp.Expression], *texts: str 2604 ) -> t.Optional[exp.Expression]: 2605 if not self._match_text_seq(*texts): 2606 return None 2607 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2608 2609 def _parse_ordered(self) -> exp.Expression: 2610 this = self._parse_conjunction() 2611 self._match(TokenType.ASC) 2612 is_desc = self._match(TokenType.DESC) 2613 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2614 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2615 desc = is_desc or False 2616 asc = not desc 2617 nulls_first = is_nulls_first or False 2618 explicitly_null_ordered = is_nulls_first or is_nulls_last 2619 if ( 2620 not explicitly_null_ordered 2621 and ( 2622 (asc and self.null_ordering == "nulls_are_small") 2623 or (desc and self.null_ordering != "nulls_are_small") 2624 ) 2625 and self.null_ordering != "nulls_are_last" 2626 ): 2627 nulls_first = True 2628 2629 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2630 2631 def _parse_limit( 2632 self, this: t.Optional[exp.Expression] = None, top: bool = False 2633 ) -> t.Optional[exp.Expression]: 2634 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2635 limit_paren = self._match(TokenType.L_PAREN) 2636 limit_exp = self.expression( 2637 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2638 ) 2639 2640 if limit_paren: 2641 self._match_r_paren() 2642 2643 return limit_exp 2644 2645 if self._match(TokenType.FETCH): 2646 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2647 direction = self._prev.text if direction else "FIRST" 2648 2649 count = self._parse_number() 2650 percent = self._match(TokenType.PERCENT) 2651 2652 self._match_set((TokenType.ROW, TokenType.ROWS)) 2653 2654 only = self._match_text_seq("ONLY") 2655 with_ties = self._match_text_seq("WITH", "TIES") 2656 2657 if only and with_ties: 2658 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2659 2660 return self.expression( 2661 exp.Fetch, 2662 direction=direction, 2663 count=count, 2664 percent=percent, 2665 with_ties=with_ties, 2666 ) 2667 2668 return this 2669 2670 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2671 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2672 return this 2673 2674 count = self._parse_number() 2675 self._match_set((TokenType.ROW, TokenType.ROWS)) 2676 return self.expression(exp.Offset, this=this, expression=count) 2677 2678 def _parse_locks(self) -> t.List[exp.Expression]: 2679 # Lists are invariant, so we need to use a type hint here 2680 locks: t.List[exp.Expression] = [] 2681 2682 while True: 2683 if self._match_text_seq("FOR", "UPDATE"): 2684 update = True 2685 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2686 "LOCK", "IN", "SHARE", "MODE" 2687 ): 2688 update = False 2689 else: 2690 break 2691 2692 expressions = None 2693 if self._match_text_seq("OF"): 2694 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2695 2696 wait: t.Optional[bool | exp.Expression] = None 2697 if self._match_text_seq("NOWAIT"): 2698 wait = True 2699 elif self._match_text_seq("WAIT"): 2700 wait = self._parse_primary() 2701 elif self._match_text_seq("SKIP", "LOCKED"): 2702 wait = False 2703 2704 locks.append( 2705 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2706 ) 2707 2708 return locks 2709 2710 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2711 if not self._match_set(self.SET_OPERATIONS): 2712 return this 2713 2714 token_type = self._prev.token_type 2715 2716 if token_type == TokenType.UNION: 2717 expression = exp.Union 2718 elif token_type == TokenType.EXCEPT: 2719 expression = exp.Except 2720 else: 2721 expression = exp.Intersect 2722 2723 return self.expression( 2724 expression, 2725 this=this, 2726 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2727 expression=self._parse_set_operations(self._parse_select(nested=True)), 2728 ) 2729 2730 def _parse_expression(self) -> t.Optional[exp.Expression]: 2731 return self._parse_alias(self._parse_conjunction()) 2732 2733 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2734 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2735 2736 def _parse_equality(self) -> t.Optional[exp.Expression]: 2737 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2738 2739 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2740 return self._parse_tokens(self._parse_range, self.COMPARISON) 2741 2742 def _parse_range(self) -> t.Optional[exp.Expression]: 2743 this = self._parse_bitwise() 2744 negate = self._match(TokenType.NOT) 2745 2746 if self._match_set(self.RANGE_PARSERS): 2747 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2748 if not expression: 2749 return this 2750 2751 this = expression 2752 elif self._match(TokenType.ISNULL): 2753 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2754 2755 # Postgres supports ISNULL and NOTNULL for conditions. 2756 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2757 if self._match(TokenType.NOTNULL): 2758 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2759 this = self.expression(exp.Not, this=this) 2760 2761 if negate: 2762 this = self.expression(exp.Not, this=this) 2763 2764 if self._match(TokenType.IS): 2765 this = self._parse_is(this) 2766 2767 return this 2768 2769 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2770 index = self._index - 1 2771 negate = self._match(TokenType.NOT) 2772 if self._match_text_seq("DISTINCT", "FROM"): 2773 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2774 return self.expression(klass, this=this, expression=self._parse_expression()) 2775 2776 expression = self._parse_null() or self._parse_boolean() 2777 if not expression: 2778 self._retreat(index) 2779 return None 2780 2781 this = self.expression(exp.Is, this=this, expression=expression) 2782 return self.expression(exp.Not, this=this) if negate else this 2783 2784 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2785 unnest = self._parse_unnest() 2786 if unnest: 2787 this = self.expression(exp.In, this=this, unnest=unnest) 2788 elif self._match(TokenType.L_PAREN): 2789 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2790 2791 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2792 this = self.expression(exp.In, this=this, query=expressions[0]) 2793 else: 2794 this = self.expression(exp.In, this=this, expressions=expressions) 2795 2796 self._match_r_paren(this) 2797 else: 2798 this = self.expression(exp.In, this=this, field=self._parse_field()) 2799 2800 return this 2801 2802 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2803 low = self._parse_bitwise() 2804 self._match(TokenType.AND) 2805 high = self._parse_bitwise() 2806 return self.expression(exp.Between, this=this, low=low, high=high) 2807 2808 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2809 if not self._match(TokenType.ESCAPE): 2810 return this 2811 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2812 2813 def _parse_interval(self) -> t.Optional[exp.Expression]: 2814 if not self._match(TokenType.INTERVAL): 2815 return None 2816 2817 this = self._parse_primary() or self._parse_term() 2818 unit = self._parse_function() or self._parse_var() 2819 2820 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2821 # each INTERVAL expression into this canonical form so it's easy to transpile 2822 if this and this.is_number: 2823 this = exp.Literal.string(this.name) 2824 elif this and this.is_string: 2825 parts = this.name.split() 2826 2827 if len(parts) == 2: 2828 if unit: 2829 # this is not actually a unit, it's something else 2830 unit = None 2831 self._retreat(self._index - 1) 2832 else: 2833 this = exp.Literal.string(parts[0]) 2834 unit = self.expression(exp.Var, this=parts[1]) 2835 2836 return self.expression(exp.Interval, this=this, unit=unit) 2837 2838 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2839 this = self._parse_term() 2840 2841 while True: 2842 if self._match_set(self.BITWISE): 2843 this = self.expression( 2844 self.BITWISE[self._prev.token_type], 2845 this=this, 2846 expression=self._parse_term(), 2847 ) 2848 elif self._match_pair(TokenType.LT, TokenType.LT): 2849 this = self.expression( 2850 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2851 ) 2852 elif self._match_pair(TokenType.GT, TokenType.GT): 2853 this = self.expression( 2854 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2855 ) 2856 else: 2857 break 2858 2859 return this 2860 2861 def _parse_term(self) -> t.Optional[exp.Expression]: 2862 return self._parse_tokens(self._parse_factor, self.TERM) 2863 2864 def _parse_factor(self) -> t.Optional[exp.Expression]: 2865 return self._parse_tokens(self._parse_unary, self.FACTOR) 2866 2867 def _parse_unary(self) -> t.Optional[exp.Expression]: 2868 if self._match_set(self.UNARY_PARSERS): 2869 return self.UNARY_PARSERS[self._prev.token_type](self) 2870 return self._parse_at_time_zone(self._parse_type()) 2871 2872 def _parse_type(self) -> t.Optional[exp.Expression]: 2873 interval = self._parse_interval() 2874 if interval: 2875 return interval 2876 2877 index = self._index 2878 data_type = self._parse_types(check_func=True) 2879 this = self._parse_column() 2880 2881 if data_type: 2882 if isinstance(this, exp.Literal): 2883 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2884 if parser: 2885 return parser(self, this, data_type) 2886 return self.expression(exp.Cast, this=this, to=data_type) 2887 if not data_type.expressions: 2888 self._retreat(index) 2889 return self._parse_column() 2890 return self._parse_column_ops(data_type) 2891 2892 return this 2893 2894 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2895 this = self._parse_type() 2896 if not this: 2897 return None 2898 2899 return self.expression( 2900 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2901 ) 2902 2903 def _parse_types( 2904 self, check_func: bool = False, schema: bool = False 2905 ) -> t.Optional[exp.Expression]: 2906 index = self._index 2907 2908 prefix = self._match_text_seq("SYSUDTLIB", ".") 2909 2910 if not self._match_set(self.TYPE_TOKENS): 2911 return None 2912 2913 type_token = self._prev.token_type 2914 2915 if type_token == TokenType.PSEUDO_TYPE: 2916 return self.expression(exp.PseudoType, this=self._prev.text) 2917 2918 nested = type_token in self.NESTED_TYPE_TOKENS 2919 is_struct = type_token == TokenType.STRUCT 2920 expressions = None 2921 maybe_func = False 2922 2923 if self._match(TokenType.L_PAREN): 2924 if is_struct: 2925 expressions = self._parse_csv(self._parse_struct_types) 2926 elif nested: 2927 expressions = self._parse_csv( 2928 lambda: self._parse_types(check_func=check_func, schema=schema) 2929 ) 2930 else: 2931 expressions = self._parse_csv(self._parse_type_size) 2932 2933 if not expressions or not self._match(TokenType.R_PAREN): 2934 self._retreat(index) 2935 return None 2936 2937 maybe_func = True 2938 2939 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2940 this = exp.DataType( 2941 this=exp.DataType.Type.ARRAY, 2942 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2943 nested=True, 2944 ) 2945 2946 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2947 this = exp.DataType( 2948 this=exp.DataType.Type.ARRAY, 2949 expressions=[this], 2950 nested=True, 2951 ) 2952 2953 return this 2954 2955 if self._match(TokenType.L_BRACKET): 2956 self._retreat(index) 2957 return None 2958 2959 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2960 if nested and self._match(TokenType.LT): 2961 if is_struct: 2962 expressions = self._parse_csv(self._parse_struct_types) 2963 else: 2964 expressions = self._parse_csv( 2965 lambda: self._parse_types(check_func=check_func, schema=schema) 2966 ) 2967 2968 if not self._match(TokenType.GT): 2969 self.raise_error("Expecting >") 2970 2971 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2972 values = self._parse_csv(self._parse_conjunction) 2973 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2974 2975 value: t.Optional[exp.Expression] = None 2976 if type_token in self.TIMESTAMPS: 2977 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2978 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2979 elif ( 2980 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2981 or type_token == TokenType.TIMESTAMPLTZ 2982 ): 2983 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2984 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2985 if type_token == TokenType.TIME: 2986 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2987 else: 2988 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2989 2990 maybe_func = maybe_func and value is None 2991 2992 if value is None: 2993 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2994 elif type_token == TokenType.INTERVAL: 2995 unit = self._parse_var() 2996 2997 if not unit: 2998 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2999 else: 3000 value = self.expression(exp.Interval, unit=unit) 3001 3002 if maybe_func and check_func: 3003 index2 = self._index 3004 peek = self._parse_string() 3005 3006 if not peek: 3007 self._retreat(index) 3008 return None 3009 3010 self._retreat(index2) 3011 3012 if value: 3013 return value 3014 3015 return exp.DataType( 3016 this=exp.DataType.Type[type_token.value.upper()], 3017 expressions=expressions, 3018 nested=nested, 3019 values=values, 3020 prefix=prefix, 3021 ) 3022 3023 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3024 this = self._parse_type() or self._parse_id_var() 3025 self._match(TokenType.COLON) 3026 return self._parse_column_def(this) 3027 3028 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3029 if not self._match_text_seq("AT", "TIME", "ZONE"): 3030 return this 3031 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3032 3033 def _parse_column(self) -> t.Optional[exp.Expression]: 3034 this = self._parse_field() 3035 if isinstance(this, exp.Identifier): 3036 this = self.expression(exp.Column, this=this) 3037 elif not this: 3038 return self._parse_bracket(this) 3039 return self._parse_column_ops(this) 3040 3041 def _parse_column_ops(self, this: exp.Expression) -> exp.Expression: 3042 this = self._parse_bracket(this) 3043 3044 while self._match_set(self.COLUMN_OPERATORS): 3045 op_token = self._prev.token_type 3046 op = self.COLUMN_OPERATORS.get(op_token) 3047 3048 if op_token == TokenType.DCOLON: 3049 field = self._parse_types() 3050 if not field: 3051 self.raise_error("Expected type") 3052 elif op and self._curr: 3053 self._advance() 3054 value = self._prev.text 3055 field = ( 3056 exp.Literal.number(value) 3057 if self._prev.token_type == TokenType.NUMBER 3058 else exp.Literal.string(value) 3059 ) 3060 else: 3061 field = self._parse_field(anonymous_func=True) 3062 3063 if isinstance(field, exp.Func): 3064 # bigquery allows function calls like x.y.count(...) 3065 # SAFE.SUBSTR(...) 3066 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3067 this = self._replace_columns_with_dots(this) 3068 3069 if op: 3070 this = op(self, this, field) 3071 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3072 this = self.expression( 3073 exp.Column, 3074 this=field, 3075 table=this.this, 3076 db=this.args.get("table"), 3077 catalog=this.args.get("db"), 3078 ) 3079 else: 3080 this = self.expression(exp.Dot, this=this, expression=field) 3081 this = self._parse_bracket(this) 3082 return this 3083 3084 def _parse_primary(self) -> t.Optional[exp.Expression]: 3085 if self._match_set(self.PRIMARY_PARSERS): 3086 token_type = self._prev.token_type 3087 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3088 3089 if token_type == TokenType.STRING: 3090 expressions = [primary] 3091 while self._match(TokenType.STRING): 3092 expressions.append(exp.Literal.string(self._prev.text)) 3093 if len(expressions) > 1: 3094 return self.expression(exp.Concat, expressions=expressions) 3095 return primary 3096 3097 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3098 return exp.Literal.number(f"0.{self._prev.text}") 3099 3100 if self._match(TokenType.L_PAREN): 3101 comments = self._prev_comments 3102 query = self._parse_select() 3103 3104 if query: 3105 expressions = [query] 3106 else: 3107 expressions = self._parse_csv(self._parse_expression) 3108 3109 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3110 3111 if isinstance(this, exp.Subqueryable): 3112 this = self._parse_set_operations( 3113 self._parse_subquery(this=this, parse_alias=False) 3114 ) 3115 elif len(expressions) > 1: 3116 this = self.expression(exp.Tuple, expressions=expressions) 3117 else: 3118 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3119 3120 if this: 3121 this.add_comments(comments) 3122 self._match_r_paren(expression=this) 3123 3124 return this 3125 3126 return None 3127 3128 def _parse_field( 3129 self, 3130 any_token: bool = False, 3131 tokens: t.Optional[t.Collection[TokenType]] = None, 3132 anonymous_func: bool = False, 3133 ) -> t.Optional[exp.Expression]: 3134 return ( 3135 self._parse_primary() 3136 or self._parse_function(anonymous=anonymous_func) 3137 or self._parse_id_var(any_token=any_token, tokens=tokens) 3138 ) 3139 3140 def _parse_function( 3141 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3142 ) -> t.Optional[exp.Expression]: 3143 if not self._curr: 3144 return None 3145 3146 token_type = self._curr.token_type 3147 3148 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3149 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3150 3151 if not self._next or self._next.token_type != TokenType.L_PAREN: 3152 if token_type in self.NO_PAREN_FUNCTIONS: 3153 self._advance() 3154 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3155 3156 return None 3157 3158 if token_type not in self.FUNC_TOKENS: 3159 return None 3160 3161 this = self._curr.text 3162 upper = this.upper() 3163 self._advance(2) 3164 3165 parser = self.FUNCTION_PARSERS.get(upper) 3166 3167 if parser and not anonymous: 3168 this = parser(self) 3169 else: 3170 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3171 3172 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3173 this = self.expression(subquery_predicate, this=self._parse_select()) 3174 self._match_r_paren() 3175 return this 3176 3177 if functions is None: 3178 functions = self.FUNCTIONS 3179 3180 function = functions.get(upper) 3181 3182 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3183 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3184 3185 if function and not anonymous: 3186 this = function(args) 3187 self.validate_expression(this, args) 3188 else: 3189 this = self.expression(exp.Anonymous, this=this, expressions=args) 3190 3191 self._match_r_paren(this) 3192 return self._parse_window(this) 3193 3194 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3195 return self._parse_column_def(self._parse_id_var()) 3196 3197 def _parse_user_defined_function( 3198 self, kind: t.Optional[TokenType] = None 3199 ) -> t.Optional[exp.Expression]: 3200 this = self._parse_id_var() 3201 3202 while self._match(TokenType.DOT): 3203 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3204 3205 if not self._match(TokenType.L_PAREN): 3206 return this 3207 3208 expressions = self._parse_csv(self._parse_function_parameter) 3209 self._match_r_paren() 3210 return self.expression( 3211 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3212 ) 3213 3214 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3215 literal = self._parse_primary() 3216 if literal: 3217 return self.expression(exp.Introducer, this=token.text, expression=literal) 3218 3219 return self.expression(exp.Identifier, this=token.text) 3220 3221 def _parse_session_parameter(self) -> exp.Expression: 3222 kind = None 3223 this = self._parse_id_var() or self._parse_primary() 3224 3225 if this and self._match(TokenType.DOT): 3226 kind = this.name 3227 this = self._parse_var() or self._parse_primary() 3228 3229 return self.expression(exp.SessionParameter, this=this, kind=kind) 3230 3231 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3232 index = self._index 3233 3234 if self._match(TokenType.L_PAREN): 3235 expressions = self._parse_csv(self._parse_id_var) 3236 3237 if not self._match(TokenType.R_PAREN): 3238 self._retreat(index) 3239 else: 3240 expressions = [self._parse_id_var()] 3241 3242 if self._match_set(self.LAMBDAS): 3243 return self.LAMBDAS[self._prev.token_type](self, expressions) 3244 3245 self._retreat(index) 3246 3247 this: t.Optional[exp.Expression] 3248 3249 if self._match(TokenType.DISTINCT): 3250 this = self.expression( 3251 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3252 ) 3253 else: 3254 this = self._parse_select_or_expression(alias=alias) 3255 3256 if isinstance(this, exp.EQ): 3257 left = this.this 3258 if isinstance(left, exp.Column): 3259 left.replace(exp.Var(this=left.text("this"))) 3260 3261 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3262 3263 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3264 index = self._index 3265 3266 if not self.errors: 3267 try: 3268 if self._parse_select(nested=True): 3269 return this 3270 except ParseError: 3271 pass 3272 finally: 3273 self.errors.clear() 3274 self._retreat(index) 3275 3276 if not self._match(TokenType.L_PAREN): 3277 return this 3278 3279 args = self._parse_csv( 3280 lambda: self._parse_constraint() 3281 or self._parse_column_def(self._parse_field(any_token=True)) 3282 ) 3283 self._match_r_paren() 3284 return self.expression(exp.Schema, this=this, expressions=args) 3285 3286 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3287 # column defs are not really columns, they're identifiers 3288 if isinstance(this, exp.Column): 3289 this = this.this 3290 kind = self._parse_types(schema=True) 3291 3292 if self._match_text_seq("FOR", "ORDINALITY"): 3293 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3294 3295 constraints = [] 3296 while True: 3297 constraint = self._parse_column_constraint() 3298 if not constraint: 3299 break 3300 constraints.append(constraint) 3301 3302 if not kind and not constraints: 3303 return this 3304 3305 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3306 3307 def _parse_auto_increment(self) -> exp.Expression: 3308 start = None 3309 increment = None 3310 3311 if self._match(TokenType.L_PAREN, advance=False): 3312 args = self._parse_wrapped_csv(self._parse_bitwise) 3313 start = seq_get(args, 0) 3314 increment = seq_get(args, 1) 3315 elif self._match_text_seq("START"): 3316 start = self._parse_bitwise() 3317 self._match_text_seq("INCREMENT") 3318 increment = self._parse_bitwise() 3319 3320 if start and increment: 3321 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3322 3323 return exp.AutoIncrementColumnConstraint() 3324 3325 def _parse_compress(self) -> exp.Expression: 3326 if self._match(TokenType.L_PAREN, advance=False): 3327 return self.expression( 3328 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3329 ) 3330 3331 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3332 3333 def _parse_generated_as_identity(self) -> exp.Expression: 3334 if self._match_text_seq("BY", "DEFAULT"): 3335 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3336 this = self.expression( 3337 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3338 ) 3339 else: 3340 self._match_text_seq("ALWAYS") 3341 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3342 3343 self._match(TokenType.ALIAS) 3344 identity = self._match_text_seq("IDENTITY") 3345 3346 if self._match(TokenType.L_PAREN): 3347 if self._match_text_seq("START", "WITH"): 3348 this.set("start", self._parse_bitwise()) 3349 if self._match_text_seq("INCREMENT", "BY"): 3350 this.set("increment", self._parse_bitwise()) 3351 if self._match_text_seq("MINVALUE"): 3352 this.set("minvalue", self._parse_bitwise()) 3353 if self._match_text_seq("MAXVALUE"): 3354 this.set("maxvalue", self._parse_bitwise()) 3355 3356 if self._match_text_seq("CYCLE"): 3357 this.set("cycle", True) 3358 elif self._match_text_seq("NO", "CYCLE"): 3359 this.set("cycle", False) 3360 3361 if not identity: 3362 this.set("expression", self._parse_bitwise()) 3363 3364 self._match_r_paren() 3365 3366 return this 3367 3368 def _parse_inline(self) -> t.Optional[exp.Expression]: 3369 self._match_text_seq("LENGTH") 3370 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3371 3372 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3373 if self._match_text_seq("NULL"): 3374 return self.expression(exp.NotNullColumnConstraint) 3375 if self._match_text_seq("CASESPECIFIC"): 3376 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3377 return None 3378 3379 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3380 if self._match(TokenType.CONSTRAINT): 3381 this = self._parse_id_var() 3382 else: 3383 this = None 3384 3385 if self._match_texts(self.CONSTRAINT_PARSERS): 3386 return self.expression( 3387 exp.ColumnConstraint, 3388 this=this, 3389 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3390 ) 3391 3392 return this 3393 3394 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3395 if not self._match(TokenType.CONSTRAINT): 3396 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3397 3398 this = self._parse_id_var() 3399 expressions = [] 3400 3401 while True: 3402 constraint = self._parse_unnamed_constraint() or self._parse_function() 3403 if not constraint: 3404 break 3405 expressions.append(constraint) 3406 3407 return self.expression(exp.Constraint, this=this, expressions=expressions) 3408 3409 def _parse_unnamed_constraint( 3410 self, constraints: t.Optional[t.Collection[str]] = None 3411 ) -> t.Optional[exp.Expression]: 3412 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3413 return None 3414 3415 constraint = self._prev.text.upper() 3416 if constraint not in self.CONSTRAINT_PARSERS: 3417 self.raise_error(f"No parser found for schema constraint {constraint}.") 3418 3419 return self.CONSTRAINT_PARSERS[constraint](self) 3420 3421 def _parse_unique(self) -> exp.Expression: 3422 self._match_text_seq("KEY") 3423 return self.expression( 3424 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3425 ) 3426 3427 def _parse_key_constraint_options(self) -> t.List[str]: 3428 options = [] 3429 while True: 3430 if not self._curr: 3431 break 3432 3433 if self._match(TokenType.ON): 3434 action = None 3435 on = self._advance_any() and self._prev.text 3436 3437 if self._match_text_seq("NO", "ACTION"): 3438 action = "NO ACTION" 3439 elif self._match_text_seq("CASCADE"): 3440 action = "CASCADE" 3441 elif self._match_pair(TokenType.SET, TokenType.NULL): 3442 action = "SET NULL" 3443 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3444 action = "SET DEFAULT" 3445 else: 3446 self.raise_error("Invalid key constraint") 3447 3448 options.append(f"ON {on} {action}") 3449 elif self._match_text_seq("NOT", "ENFORCED"): 3450 options.append("NOT ENFORCED") 3451 elif self._match_text_seq("DEFERRABLE"): 3452 options.append("DEFERRABLE") 3453 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3454 options.append("INITIALLY DEFERRED") 3455 elif self._match_text_seq("NORELY"): 3456 options.append("NORELY") 3457 elif self._match_text_seq("MATCH", "FULL"): 3458 options.append("MATCH FULL") 3459 else: 3460 break 3461 3462 return options 3463 3464 def _parse_references(self, match: bool = True) -> t.Optional[exp.Expression]: 3465 if match and not self._match(TokenType.REFERENCES): 3466 return None 3467 3468 expressions = None 3469 this = self._parse_id_var() 3470 3471 if self._match(TokenType.L_PAREN, advance=False): 3472 expressions = self._parse_wrapped_id_vars() 3473 3474 options = self._parse_key_constraint_options() 3475 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3476 3477 def _parse_foreign_key(self) -> exp.Expression: 3478 expressions = self._parse_wrapped_id_vars() 3479 reference = self._parse_references() 3480 options = {} 3481 3482 while self._match(TokenType.ON): 3483 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3484 self.raise_error("Expected DELETE or UPDATE") 3485 3486 kind = self._prev.text.lower() 3487 3488 if self._match_text_seq("NO", "ACTION"): 3489 action = "NO ACTION" 3490 elif self._match(TokenType.SET): 3491 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3492 action = "SET " + self._prev.text.upper() 3493 else: 3494 self._advance() 3495 action = self._prev.text.upper() 3496 3497 options[kind] = action 3498 3499 return self.expression( 3500 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3501 ) 3502 3503 def _parse_primary_key( 3504 self, wrapped_optional: bool = False, in_props: bool = False 3505 ) -> exp.Expression: 3506 desc = ( 3507 self._match_set((TokenType.ASC, TokenType.DESC)) 3508 and self._prev.token_type == TokenType.DESC 3509 ) 3510 3511 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3512 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3513 3514 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3515 options = self._parse_key_constraint_options() 3516 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3517 3518 @t.overload 3519 def _parse_bracket(self, this: exp.Expression) -> exp.Expression: 3520 ... 3521 3522 @t.overload 3523 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3524 ... 3525 3526 def _parse_bracket(self, this): 3527 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3528 return this 3529 3530 bracket_kind = self._prev.token_type 3531 3532 if self._match(TokenType.COLON): 3533 expressions: t.List[t.Optional[exp.Expression]] = [ 3534 self.expression(exp.Slice, expression=self._parse_conjunction()) 3535 ] 3536 else: 3537 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3538 3539 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3540 if bracket_kind == TokenType.L_BRACE: 3541 this = self.expression(exp.Struct, expressions=expressions) 3542 elif not this or this.name.upper() == "ARRAY": 3543 this = self.expression(exp.Array, expressions=expressions) 3544 else: 3545 expressions = apply_index_offset(this, expressions, -self.index_offset) 3546 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3547 3548 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3549 self.raise_error("Expected ]") 3550 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3551 self.raise_error("Expected }") 3552 3553 self._add_comments(this) 3554 return self._parse_bracket(this) 3555 3556 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3557 if self._match(TokenType.COLON): 3558 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3559 return this 3560 3561 def _parse_case(self) -> t.Optional[exp.Expression]: 3562 ifs = [] 3563 default = None 3564 3565 expression = self._parse_conjunction() 3566 3567 while self._match(TokenType.WHEN): 3568 this = self._parse_conjunction() 3569 self._match(TokenType.THEN) 3570 then = self._parse_conjunction() 3571 ifs.append(self.expression(exp.If, this=this, true=then)) 3572 3573 if self._match(TokenType.ELSE): 3574 default = self._parse_conjunction() 3575 3576 if not self._match(TokenType.END): 3577 self.raise_error("Expected END after CASE", self._prev) 3578 3579 return self._parse_window( 3580 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3581 ) 3582 3583 def _parse_if(self) -> t.Optional[exp.Expression]: 3584 if self._match(TokenType.L_PAREN): 3585 args = self._parse_csv(self._parse_conjunction) 3586 this = exp.If.from_arg_list(args) 3587 self.validate_expression(this, args) 3588 self._match_r_paren() 3589 else: 3590 index = self._index - 1 3591 condition = self._parse_conjunction() 3592 3593 if not condition: 3594 self._retreat(index) 3595 return None 3596 3597 self._match(TokenType.THEN) 3598 true = self._parse_conjunction() 3599 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3600 self._match(TokenType.END) 3601 this = self.expression(exp.If, this=condition, true=true, false=false) 3602 3603 return self._parse_window(this) 3604 3605 def _parse_extract(self) -> exp.Expression: 3606 this = self._parse_function() or self._parse_var() or self._parse_type() 3607 3608 if self._match(TokenType.FROM): 3609 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3610 3611 if not self._match(TokenType.COMMA): 3612 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3613 3614 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3615 3616 def _parse_cast(self, strict: bool) -> exp.Expression: 3617 this = self._parse_conjunction() 3618 3619 if not self._match(TokenType.ALIAS): 3620 if self._match(TokenType.COMMA): 3621 return self.expression( 3622 exp.CastToStrType, this=this, expression=self._parse_string() 3623 ) 3624 else: 3625 self.raise_error("Expected AS after CAST") 3626 3627 to = self._parse_types() 3628 3629 if not to: 3630 self.raise_error("Expected TYPE after CAST") 3631 elif to.this == exp.DataType.Type.CHAR: 3632 if self._match(TokenType.CHARACTER_SET): 3633 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3634 3635 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3636 3637 def _parse_string_agg(self) -> exp.Expression: 3638 expression: t.Optional[exp.Expression] 3639 3640 if self._match(TokenType.DISTINCT): 3641 args = self._parse_csv(self._parse_conjunction) 3642 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3643 else: 3644 args = self._parse_csv(self._parse_conjunction) 3645 expression = seq_get(args, 0) 3646 3647 index = self._index 3648 if not self._match(TokenType.R_PAREN): 3649 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3650 order = self._parse_order(this=expression) 3651 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3652 3653 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3654 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3655 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3656 if not self._match_text_seq("WITHIN", "GROUP"): 3657 self._retreat(index) 3658 this = exp.GroupConcat.from_arg_list(args) 3659 self.validate_expression(this, args) 3660 return this 3661 3662 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3663 order = self._parse_order(this=expression) 3664 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3665 3666 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3667 to: t.Optional[exp.Expression] 3668 this = self._parse_bitwise() 3669 3670 if self._match(TokenType.USING): 3671 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3672 elif self._match(TokenType.COMMA): 3673 to = self._parse_bitwise() 3674 else: 3675 to = None 3676 3677 # Swap the argument order if needed to produce the correct AST 3678 if self.CONVERT_TYPE_FIRST: 3679 this, to = to, this 3680 3681 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3682 3683 def _parse_decode(self) -> t.Optional[exp.Expression]: 3684 """ 3685 There are generally two variants of the DECODE function: 3686 3687 - DECODE(bin, charset) 3688 - DECODE(expression, search, result [, search, result] ... [, default]) 3689 3690 The second variant will always be parsed into a CASE expression. Note that NULL 3691 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3692 instead of relying on pattern matching. 3693 """ 3694 args = self._parse_csv(self._parse_conjunction) 3695 3696 if len(args) < 3: 3697 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3698 3699 expression, *expressions = args 3700 if not expression: 3701 return None 3702 3703 ifs = [] 3704 for search, result in zip(expressions[::2], expressions[1::2]): 3705 if not search or not result: 3706 return None 3707 3708 if isinstance(search, exp.Literal): 3709 ifs.append( 3710 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3711 ) 3712 elif isinstance(search, exp.Null): 3713 ifs.append( 3714 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3715 ) 3716 else: 3717 cond = exp.or_( 3718 exp.EQ(this=expression.copy(), expression=search), 3719 exp.and_( 3720 exp.Is(this=expression.copy(), expression=exp.Null()), 3721 exp.Is(this=search.copy(), expression=exp.Null()), 3722 copy=False, 3723 ), 3724 copy=False, 3725 ) 3726 ifs.append(exp.If(this=cond, true=result)) 3727 3728 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3729 3730 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3731 self._match_text_seq("KEY") 3732 key = self._parse_field() 3733 self._match(TokenType.COLON) 3734 self._match_text_seq("VALUE") 3735 value = self._parse_field() 3736 if not key and not value: 3737 return None 3738 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3739 3740 def _parse_json_object(self) -> exp.Expression: 3741 expressions = self._parse_csv(self._parse_json_key_value) 3742 3743 null_handling = None 3744 if self._match_text_seq("NULL", "ON", "NULL"): 3745 null_handling = "NULL ON NULL" 3746 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3747 null_handling = "ABSENT ON NULL" 3748 3749 unique_keys = None 3750 if self._match_text_seq("WITH", "UNIQUE"): 3751 unique_keys = True 3752 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3753 unique_keys = False 3754 3755 self._match_text_seq("KEYS") 3756 3757 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3758 format_json = self._match_text_seq("FORMAT", "JSON") 3759 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3760 3761 return self.expression( 3762 exp.JSONObject, 3763 expressions=expressions, 3764 null_handling=null_handling, 3765 unique_keys=unique_keys, 3766 return_type=return_type, 3767 format_json=format_json, 3768 encoding=encoding, 3769 ) 3770 3771 def _parse_logarithm(self) -> exp.Expression: 3772 # Default argument order is base, expression 3773 args = self._parse_csv(self._parse_range) 3774 3775 if len(args) > 1: 3776 if not self.LOG_BASE_FIRST: 3777 args.reverse() 3778 return exp.Log.from_arg_list(args) 3779 3780 return self.expression( 3781 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3782 ) 3783 3784 def _parse_match_against(self) -> exp.Expression: 3785 expressions = self._parse_csv(self._parse_column) 3786 3787 self._match_text_seq(")", "AGAINST", "(") 3788 3789 this = self._parse_string() 3790 3791 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3792 modifier = "IN NATURAL LANGUAGE MODE" 3793 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3794 modifier = f"{modifier} WITH QUERY EXPANSION" 3795 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3796 modifier = "IN BOOLEAN MODE" 3797 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3798 modifier = "WITH QUERY EXPANSION" 3799 else: 3800 modifier = None 3801 3802 return self.expression( 3803 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3804 ) 3805 3806 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3807 def _parse_open_json(self) -> exp.Expression: 3808 this = self._parse_bitwise() 3809 path = self._match(TokenType.COMMA) and self._parse_string() 3810 3811 def _parse_open_json_column_def() -> exp.Expression: 3812 this = self._parse_field(any_token=True) 3813 kind = self._parse_types() 3814 path = self._parse_string() 3815 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3816 return self.expression( 3817 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3818 ) 3819 3820 expressions = None 3821 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3822 self._match_l_paren() 3823 expressions = self._parse_csv(_parse_open_json_column_def) 3824 3825 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3826 3827 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3828 args = self._parse_csv(self._parse_bitwise) 3829 3830 if self._match(TokenType.IN): 3831 return self.expression( 3832 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3833 ) 3834 3835 if haystack_first: 3836 haystack = seq_get(args, 0) 3837 needle = seq_get(args, 1) 3838 else: 3839 needle = seq_get(args, 0) 3840 haystack = seq_get(args, 1) 3841 3842 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3843 3844 self.validate_expression(this, args) 3845 3846 return this 3847 3848 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3849 args = self._parse_csv(self._parse_table) 3850 return exp.JoinHint(this=func_name.upper(), expressions=args) 3851 3852 def _parse_substring(self) -> exp.Expression: 3853 # Postgres supports the form: substring(string [from int] [for int]) 3854 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3855 3856 args = self._parse_csv(self._parse_bitwise) 3857 3858 if self._match(TokenType.FROM): 3859 args.append(self._parse_bitwise()) 3860 if self._match(TokenType.FOR): 3861 args.append(self._parse_bitwise()) 3862 3863 this = exp.Substring.from_arg_list(args) 3864 self.validate_expression(this, args) 3865 3866 return this 3867 3868 def _parse_trim(self) -> exp.Expression: 3869 # https://www.w3resource.com/sql/character-functions/trim.php 3870 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3871 3872 position = None 3873 collation = None 3874 3875 if self._match_texts(self.TRIM_TYPES): 3876 position = self._prev.text.upper() 3877 3878 expression = self._parse_bitwise() 3879 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3880 this = self._parse_bitwise() 3881 else: 3882 this = expression 3883 expression = None 3884 3885 if self._match(TokenType.COLLATE): 3886 collation = self._parse_bitwise() 3887 3888 return self.expression( 3889 exp.Trim, 3890 this=this, 3891 position=position, 3892 expression=expression, 3893 collation=collation, 3894 ) 3895 3896 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3897 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3898 3899 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3900 return self._parse_window(self._parse_id_var(), alias=True) 3901 3902 def _parse_respect_or_ignore_nulls( 3903 self, this: t.Optional[exp.Expression] 3904 ) -> t.Optional[exp.Expression]: 3905 if self._match_text_seq("IGNORE", "NULLS"): 3906 return self.expression(exp.IgnoreNulls, this=this) 3907 if self._match_text_seq("RESPECT", "NULLS"): 3908 return self.expression(exp.RespectNulls, this=this) 3909 return this 3910 3911 def _parse_window( 3912 self, this: t.Optional[exp.Expression], alias: bool = False 3913 ) -> t.Optional[exp.Expression]: 3914 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3915 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3916 self._match_r_paren() 3917 3918 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3919 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3920 if self._match_text_seq("WITHIN", "GROUP"): 3921 order = self._parse_wrapped(self._parse_order) 3922 this = self.expression(exp.WithinGroup, this=this, expression=order) 3923 3924 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3925 # Some dialects choose to implement and some do not. 3926 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3927 3928 # There is some code above in _parse_lambda that handles 3929 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3930 3931 # The below changes handle 3932 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3933 3934 # Oracle allows both formats 3935 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3936 # and Snowflake chose to do the same for familiarity 3937 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3938 this = self._parse_respect_or_ignore_nulls(this) 3939 3940 # bigquery select from window x AS (partition by ...) 3941 if alias: 3942 over = None 3943 self._match(TokenType.ALIAS) 3944 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3945 return this 3946 else: 3947 over = self._prev.text.upper() 3948 3949 if not self._match(TokenType.L_PAREN): 3950 return self.expression( 3951 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3952 ) 3953 3954 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3955 3956 first = self._match(TokenType.FIRST) 3957 if self._match_text_seq("LAST"): 3958 first = False 3959 3960 partition = self._parse_partition_by() 3961 order = self._parse_order() 3962 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3963 3964 if kind: 3965 self._match(TokenType.BETWEEN) 3966 start = self._parse_window_spec() 3967 self._match(TokenType.AND) 3968 end = self._parse_window_spec() 3969 3970 spec = self.expression( 3971 exp.WindowSpec, 3972 kind=kind, 3973 start=start["value"], 3974 start_side=start["side"], 3975 end=end["value"], 3976 end_side=end["side"], 3977 ) 3978 else: 3979 spec = None 3980 3981 self._match_r_paren() 3982 3983 return self.expression( 3984 exp.Window, 3985 this=this, 3986 partition_by=partition, 3987 order=order, 3988 spec=spec, 3989 alias=window_alias, 3990 over=over, 3991 first=first, 3992 ) 3993 3994 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3995 self._match(TokenType.BETWEEN) 3996 3997 return { 3998 "value": ( 3999 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4000 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4001 or self._parse_bitwise() 4002 ), 4003 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4004 } 4005 4006 def _parse_alias( 4007 self, this: t.Optional[exp.Expression], explicit: bool = False 4008 ) -> t.Optional[exp.Expression]: 4009 any_token = self._match(TokenType.ALIAS) 4010 4011 if explicit and not any_token: 4012 return this 4013 4014 if self._match(TokenType.L_PAREN): 4015 aliases = self.expression( 4016 exp.Aliases, 4017 this=this, 4018 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4019 ) 4020 self._match_r_paren(aliases) 4021 return aliases 4022 4023 alias = self._parse_id_var(any_token) 4024 4025 if alias: 4026 return self.expression(exp.Alias, this=this, alias=alias) 4027 4028 return this 4029 4030 def _parse_id_var( 4031 self, 4032 any_token: bool = True, 4033 tokens: t.Optional[t.Collection[TokenType]] = None, 4034 ) -> t.Optional[exp.Expression]: 4035 identifier = self._parse_identifier() 4036 4037 if identifier: 4038 return identifier 4039 4040 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4041 quoted = self._prev.token_type == TokenType.STRING 4042 return exp.Identifier(this=self._prev.text, quoted=quoted) 4043 4044 return None 4045 4046 def _parse_string(self) -> t.Optional[exp.Expression]: 4047 if self._match(TokenType.STRING): 4048 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4049 return self._parse_placeholder() 4050 4051 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 4052 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4053 4054 def _parse_number(self) -> t.Optional[exp.Expression]: 4055 if self._match(TokenType.NUMBER): 4056 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4057 return self._parse_placeholder() 4058 4059 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4060 if self._match(TokenType.IDENTIFIER): 4061 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4062 return self._parse_placeholder() 4063 4064 def _parse_var( 4065 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4066 ) -> t.Optional[exp.Expression]: 4067 if ( 4068 (any_token and self._advance_any()) 4069 or self._match(TokenType.VAR) 4070 or (self._match_set(tokens) if tokens else False) 4071 ): 4072 return self.expression(exp.Var, this=self._prev.text) 4073 return self._parse_placeholder() 4074 4075 def _advance_any(self) -> t.Optional[Token]: 4076 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4077 self._advance() 4078 return self._prev 4079 return None 4080 4081 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4082 return self._parse_var() or self._parse_string() 4083 4084 def _parse_null(self) -> t.Optional[exp.Expression]: 4085 if self._match(TokenType.NULL): 4086 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4087 return None 4088 4089 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4090 if self._match(TokenType.TRUE): 4091 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4092 if self._match(TokenType.FALSE): 4093 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4094 return None 4095 4096 def _parse_star(self) -> t.Optional[exp.Expression]: 4097 if self._match(TokenType.STAR): 4098 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4099 return None 4100 4101 def _parse_parameter(self) -> exp.Expression: 4102 wrapped = self._match(TokenType.L_BRACE) 4103 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4104 self._match(TokenType.R_BRACE) 4105 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4106 4107 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4108 if self._match_set(self.PLACEHOLDER_PARSERS): 4109 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4110 if placeholder: 4111 return placeholder 4112 self._advance(-1) 4113 return None 4114 4115 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4116 if not self._match(TokenType.EXCEPT): 4117 return None 4118 if self._match(TokenType.L_PAREN, advance=False): 4119 return self._parse_wrapped_csv(self._parse_column) 4120 return self._parse_csv(self._parse_column) 4121 4122 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4123 if not self._match(TokenType.REPLACE): 4124 return None 4125 if self._match(TokenType.L_PAREN, advance=False): 4126 return self._parse_wrapped_csv(self._parse_expression) 4127 return self._parse_csv(self._parse_expression) 4128 4129 def _parse_csv( 4130 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4131 ) -> t.List[t.Optional[exp.Expression]]: 4132 parse_result = parse_method() 4133 items = [parse_result] if parse_result is not None else [] 4134 4135 while self._match(sep): 4136 self._add_comments(parse_result) 4137 parse_result = parse_method() 4138 if parse_result is not None: 4139 items.append(parse_result) 4140 4141 return items 4142 4143 def _parse_tokens( 4144 self, parse_method: t.Callable, expressions: t.Dict 4145 ) -> t.Optional[exp.Expression]: 4146 this = parse_method() 4147 4148 while self._match_set(expressions): 4149 this = self.expression( 4150 expressions[self._prev.token_type], 4151 this=this, 4152 comments=self._prev_comments, 4153 expression=parse_method(), 4154 ) 4155 4156 return this 4157 4158 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4159 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4160 4161 def _parse_wrapped_csv( 4162 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4163 ) -> t.List[t.Optional[exp.Expression]]: 4164 return self._parse_wrapped( 4165 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4166 ) 4167 4168 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4169 wrapped = self._match(TokenType.L_PAREN) 4170 if not wrapped and not optional: 4171 self.raise_error("Expecting (") 4172 parse_result = parse_method() 4173 if wrapped: 4174 self._match_r_paren() 4175 return parse_result 4176 4177 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4178 return self._parse_select() or self._parse_set_operations( 4179 self._parse_expression() if alias else self._parse_conjunction() 4180 ) 4181 4182 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4183 return self._parse_query_modifiers( 4184 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4185 ) 4186 4187 def _parse_transaction(self) -> exp.Expression: 4188 this = None 4189 if self._match_texts(self.TRANSACTION_KIND): 4190 this = self._prev.text 4191 4192 self._match_texts({"TRANSACTION", "WORK"}) 4193 4194 modes = [] 4195 while True: 4196 mode = [] 4197 while self._match(TokenType.VAR): 4198 mode.append(self._prev.text) 4199 4200 if mode: 4201 modes.append(" ".join(mode)) 4202 if not self._match(TokenType.COMMA): 4203 break 4204 4205 return self.expression(exp.Transaction, this=this, modes=modes) 4206 4207 def _parse_commit_or_rollback(self) -> exp.Expression: 4208 chain = None 4209 savepoint = None 4210 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4211 4212 self._match_texts({"TRANSACTION", "WORK"}) 4213 4214 if self._match_text_seq("TO"): 4215 self._match_text_seq("SAVEPOINT") 4216 savepoint = self._parse_id_var() 4217 4218 if self._match(TokenType.AND): 4219 chain = not self._match_text_seq("NO") 4220 self._match_text_seq("CHAIN") 4221 4222 if is_rollback: 4223 return self.expression(exp.Rollback, savepoint=savepoint) 4224 return self.expression(exp.Commit, chain=chain) 4225 4226 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4227 if not self._match_text_seq("ADD"): 4228 return None 4229 4230 self._match(TokenType.COLUMN) 4231 exists_column = self._parse_exists(not_=True) 4232 expression = self._parse_column_def(self._parse_field(any_token=True)) 4233 4234 if expression: 4235 expression.set("exists", exists_column) 4236 4237 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4238 if self._match_texts(("FIRST", "AFTER")): 4239 position = self._prev.text 4240 column_position = self.expression( 4241 exp.ColumnPosition, this=self._parse_column(), position=position 4242 ) 4243 expression.set("position", column_position) 4244 4245 return expression 4246 4247 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4248 drop = self._match(TokenType.DROP) and self._parse_drop() 4249 if drop and not isinstance(drop, exp.Command): 4250 drop.set("kind", drop.args.get("kind", "COLUMN")) 4251 return drop 4252 4253 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4254 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4255 return self.expression( 4256 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4257 ) 4258 4259 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4260 this = None 4261 kind = self._prev.token_type 4262 4263 if kind == TokenType.CONSTRAINT: 4264 this = self._parse_id_var() 4265 4266 if self._match_text_seq("CHECK"): 4267 expression = self._parse_wrapped(self._parse_conjunction) 4268 enforced = self._match_text_seq("ENFORCED") 4269 4270 return self.expression( 4271 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4272 ) 4273 4274 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4275 expression = self._parse_foreign_key() 4276 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4277 expression = self._parse_primary_key() 4278 else: 4279 expression = None 4280 4281 return self.expression(exp.AddConstraint, this=this, expression=expression) 4282 4283 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4284 index = self._index - 1 4285 4286 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4287 return self._parse_csv(self._parse_add_constraint) 4288 4289 self._retreat(index) 4290 return self._parse_csv(self._parse_add_column) 4291 4292 def _parse_alter_table_alter(self) -> exp.Expression: 4293 self._match(TokenType.COLUMN) 4294 column = self._parse_field(any_token=True) 4295 4296 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4297 return self.expression(exp.AlterColumn, this=column, drop=True) 4298 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4299 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4300 4301 self._match_text_seq("SET", "DATA") 4302 return self.expression( 4303 exp.AlterColumn, 4304 this=column, 4305 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4306 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4307 using=self._match(TokenType.USING) and self._parse_conjunction(), 4308 ) 4309 4310 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4311 index = self._index - 1 4312 4313 partition_exists = self._parse_exists() 4314 if self._match(TokenType.PARTITION, advance=False): 4315 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4316 4317 self._retreat(index) 4318 return self._parse_csv(self._parse_drop_column) 4319 4320 def _parse_alter_table_rename(self) -> exp.Expression: 4321 self._match_text_seq("TO") 4322 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4323 4324 def _parse_alter(self) -> t.Optional[exp.Expression]: 4325 start = self._prev 4326 4327 if not self._match(TokenType.TABLE): 4328 return self._parse_as_command(start) 4329 4330 exists = self._parse_exists() 4331 this = self._parse_table(schema=True) 4332 4333 if self._next: 4334 self._advance() 4335 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4336 4337 if parser: 4338 actions = ensure_list(parser(self)) 4339 4340 if not self._curr: 4341 return self.expression( 4342 exp.AlterTable, 4343 this=this, 4344 exists=exists, 4345 actions=actions, 4346 ) 4347 return self._parse_as_command(start) 4348 4349 def _parse_merge(self) -> exp.Expression: 4350 self._match(TokenType.INTO) 4351 target = self._parse_table() 4352 4353 self._match(TokenType.USING) 4354 using = self._parse_table() 4355 4356 self._match(TokenType.ON) 4357 on = self._parse_conjunction() 4358 4359 whens = [] 4360 while self._match(TokenType.WHEN): 4361 matched = not self._match(TokenType.NOT) 4362 self._match_text_seq("MATCHED") 4363 source = ( 4364 False 4365 if self._match_text_seq("BY", "TARGET") 4366 else self._match_text_seq("BY", "SOURCE") 4367 ) 4368 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4369 4370 self._match(TokenType.THEN) 4371 4372 if self._match(TokenType.INSERT): 4373 _this = self._parse_star() 4374 if _this: 4375 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4376 else: 4377 then = self.expression( 4378 exp.Insert, 4379 this=self._parse_value(), 4380 expression=self._match(TokenType.VALUES) and self._parse_value(), 4381 ) 4382 elif self._match(TokenType.UPDATE): 4383 expressions = self._parse_star() 4384 if expressions: 4385 then = self.expression(exp.Update, expressions=expressions) 4386 else: 4387 then = self.expression( 4388 exp.Update, 4389 expressions=self._match(TokenType.SET) 4390 and self._parse_csv(self._parse_equality), 4391 ) 4392 elif self._match(TokenType.DELETE): 4393 then = self.expression(exp.Var, this=self._prev.text) 4394 else: 4395 then = None 4396 4397 whens.append( 4398 self.expression( 4399 exp.When, 4400 matched=matched, 4401 source=source, 4402 condition=condition, 4403 then=then, 4404 ) 4405 ) 4406 4407 return self.expression( 4408 exp.Merge, 4409 this=target, 4410 using=using, 4411 on=on, 4412 expressions=whens, 4413 ) 4414 4415 def _parse_show(self) -> t.Optional[exp.Expression]: 4416 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4417 if parser: 4418 return parser(self) 4419 self._advance() 4420 return self.expression(exp.Show, this=self._prev.text.upper()) 4421 4422 def _parse_set_item_assignment( 4423 self, kind: t.Optional[str] = None 4424 ) -> t.Optional[exp.Expression]: 4425 index = self._index 4426 4427 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4428 return self._parse_set_transaction(global_=kind == "GLOBAL") 4429 4430 left = self._parse_primary() or self._parse_id_var() 4431 4432 if not self._match_texts(("=", "TO")): 4433 self._retreat(index) 4434 return None 4435 4436 right = self._parse_statement() or self._parse_id_var() 4437 this = self.expression( 4438 exp.EQ, 4439 this=left, 4440 expression=right, 4441 ) 4442 4443 return self.expression( 4444 exp.SetItem, 4445 this=this, 4446 kind=kind, 4447 ) 4448 4449 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4450 self._match_text_seq("TRANSACTION") 4451 characteristics = self._parse_csv( 4452 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4453 ) 4454 return self.expression( 4455 exp.SetItem, 4456 expressions=characteristics, 4457 kind="TRANSACTION", 4458 **{"global": global_}, # type: ignore 4459 ) 4460 4461 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4462 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4463 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4464 4465 def _parse_set(self) -> exp.Expression: 4466 index = self._index 4467 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4468 4469 if self._curr: 4470 self._retreat(index) 4471 return self._parse_as_command(self._prev) 4472 4473 return set_ 4474 4475 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4476 for option in options: 4477 if self._match_text_seq(*option.split(" ")): 4478 return exp.Var(this=option) 4479 return None 4480 4481 def _parse_as_command(self, start: Token) -> exp.Command: 4482 while self._curr: 4483 self._advance() 4484 text = self._find_sql(start, self._prev) 4485 size = len(start.text) 4486 return exp.Command(this=text[:size], expression=text[size:]) 4487 4488 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4489 settings = [] 4490 4491 self._match_l_paren() 4492 kind = self._parse_id_var() 4493 4494 if self._match(TokenType.L_PAREN): 4495 while True: 4496 key = self._parse_id_var() 4497 value = self._parse_primary() 4498 4499 if not key and value is None: 4500 break 4501 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4502 self._match(TokenType.R_PAREN) 4503 4504 self._match_r_paren() 4505 4506 return self.expression( 4507 exp.DictProperty, 4508 this=this, 4509 kind=kind.this if kind else None, 4510 settings=settings, 4511 ) 4512 4513 def _parse_dict_range(self, this: str) -> exp.DictRange: 4514 self._match_l_paren() 4515 has_min = self._match_text_seq("MIN") 4516 if has_min: 4517 min = self._parse_var() or self._parse_primary() 4518 self._match_text_seq("MAX") 4519 max = self._parse_var() or self._parse_primary() 4520 else: 4521 max = self._parse_var() or self._parse_primary() 4522 min = exp.Literal.number(0) 4523 self._match_r_paren() 4524 return self.expression(exp.DictRange, this=this, min=min, max=max) 4525 4526 def _find_parser( 4527 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4528 ) -> t.Optional[t.Callable]: 4529 if not self._curr: 4530 return None 4531 4532 index = self._index 4533 this = [] 4534 while True: 4535 # The current token might be multiple words 4536 curr = self._curr.text.upper() 4537 key = curr.split(" ") 4538 this.append(curr) 4539 self._advance() 4540 result, trie = in_trie(trie, key) 4541 if result == 0: 4542 break 4543 if result == 2: 4544 subparser = parsers[" ".join(this)] 4545 return subparser 4546 self._retreat(index) 4547 return None 4548 4549 def _match(self, token_type, advance=True, expression=None): 4550 if not self._curr: 4551 return None 4552 4553 if self._curr.token_type == token_type: 4554 if advance: 4555 self._advance() 4556 self._add_comments(expression) 4557 return True 4558 4559 return None 4560 4561 def _match_set(self, types, advance=True): 4562 if not self._curr: 4563 return None 4564 4565 if self._curr.token_type in types: 4566 if advance: 4567 self._advance() 4568 return True 4569 4570 return None 4571 4572 def _match_pair(self, token_type_a, token_type_b, advance=True): 4573 if not self._curr or not self._next: 4574 return None 4575 4576 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4577 if advance: 4578 self._advance(2) 4579 return True 4580 4581 return None 4582 4583 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4584 if not self._match(TokenType.L_PAREN, expression=expression): 4585 self.raise_error("Expecting (") 4586 4587 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4588 if not self._match(TokenType.R_PAREN, expression=expression): 4589 self.raise_error("Expecting )") 4590 4591 def _match_texts(self, texts, advance=True): 4592 if self._curr and self._curr.text.upper() in texts: 4593 if advance: 4594 self._advance() 4595 return True 4596 return False 4597 4598 def _match_text_seq(self, *texts, advance=True): 4599 index = self._index 4600 for text in texts: 4601 if self._curr and self._curr.text.upper() == text: 4602 self._advance() 4603 else: 4604 self._retreat(index) 4605 return False 4606 4607 if not advance: 4608 self._retreat(index) 4609 4610 return True 4611 4612 @t.overload 4613 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4614 ... 4615 4616 @t.overload 4617 def _replace_columns_with_dots( 4618 self, this: t.Optional[exp.Expression] 4619 ) -> t.Optional[exp.Expression]: 4620 ... 4621 4622 def _replace_columns_with_dots(self, this): 4623 if isinstance(this, exp.Dot): 4624 exp.replace_children(this, self._replace_columns_with_dots) 4625 elif isinstance(this, exp.Column): 4626 exp.replace_children(this, self._replace_columns_with_dots) 4627 table = this.args.get("table") 4628 this = ( 4629 self.expression(exp.Dot, this=table, expression=this.this) 4630 if table 4631 else self.expression(exp.Var, this=this.name) 4632 ) 4633 elif isinstance(this, exp.Identifier): 4634 this = self.expression(exp.Var, this=this.name) 4635 4636 return this 4637 4638 def _replace_lambda( 4639 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4640 ) -> t.Optional[exp.Expression]: 4641 if not node: 4642 return node 4643 4644 for column in node.find_all(exp.Column): 4645 if column.parts[0].name in lambda_variables: 4646 dot_or_id = column.to_dot() if column.table else column.this 4647 parent = column.parent 4648 4649 while isinstance(parent, exp.Dot): 4650 if not isinstance(parent.parent, exp.Dot): 4651 parent.replace(dot_or_id) 4652 break 4653 parent = parent.parent 4654 else: 4655 if column is node: 4656 node = dot_or_id 4657 else: 4658 column.replace(dot_or_id) 4659 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
810 def __init__( 811 self, 812 error_level: t.Optional[ErrorLevel] = None, 813 error_message_context: int = 100, 814 index_offset: int = 0, 815 unnest_column_only: bool = False, 816 alias_post_tablesample: bool = False, 817 max_errors: int = 3, 818 null_ordering: t.Optional[str] = None, 819 ): 820 self.error_level = error_level or ErrorLevel.IMMEDIATE 821 self.error_message_context = error_message_context 822 self.index_offset = index_offset 823 self.unnest_column_only = unnest_column_only 824 self.alias_post_tablesample = alias_post_tablesample 825 self.max_errors = max_errors 826 self.null_ordering = null_ordering 827 self.reset()
839 def parse( 840 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 841 ) -> t.List[t.Optional[exp.Expression]]: 842 """ 843 Parses a list of tokens and returns a list of syntax trees, one tree 844 per parsed SQL statement. 845 846 Args: 847 raw_tokens: the list of tokens. 848 sql: the original SQL string, used to produce helpful debug messages. 849 850 Returns: 851 The list of syntax trees. 852 """ 853 return self._parse( 854 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 855 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
857 def parse_into( 858 self, 859 expression_types: exp.IntoType, 860 raw_tokens: t.List[Token], 861 sql: t.Optional[str] = None, 862 ) -> t.List[t.Optional[exp.Expression]]: 863 """ 864 Parses a list of tokens into a given Expression type. If a collection of Expression 865 types is given instead, this method will try to parse the token list into each one 866 of them, stopping at the first for which the parsing succeeds. 867 868 Args: 869 expression_types: the expression type(s) to try and parse the token list into. 870 raw_tokens: the list of tokens. 871 sql: the original SQL string, used to produce helpful debug messages. 872 873 Returns: 874 The target Expression. 875 """ 876 errors = [] 877 for expression_type in ensure_collection(expression_types): 878 parser = self.EXPRESSION_PARSERS.get(expression_type) 879 if not parser: 880 raise TypeError(f"No parser registered for {expression_type}") 881 try: 882 return self._parse(parser, raw_tokens, sql) 883 except ParseError as e: 884 e.errors[0]["into_expression"] = expression_type 885 errors.append(e) 886 raise ParseError( 887 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 888 errors=merge_errors(errors), 889 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
925 def check_errors(self) -> None: 926 """ 927 Logs or raises any found errors, depending on the chosen error level setting. 928 """ 929 if self.error_level == ErrorLevel.WARN: 930 for error in self.errors: 931 logger.error(str(error)) 932 elif self.error_level == ErrorLevel.RAISE and self.errors: 933 raise ParseError( 934 concat_messages(self.errors, self.max_errors), 935 errors=merge_errors(self.errors), 936 )
Logs or raises any found errors, depending on the chosen error level setting.
938 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 939 """ 940 Appends an error in the list of recorded errors or raises it, depending on the chosen 941 error level setting. 942 """ 943 token = token or self._curr or self._prev or Token.string("") 944 start = token.start 945 end = token.end + 1 946 start_context = self.sql[max(start - self.error_message_context, 0) : start] 947 highlight = self.sql[start:end] 948 end_context = self.sql[end : end + self.error_message_context] 949 950 error = ParseError.new( 951 f"{message}. Line {token.line}, Col: {token.col}.\n" 952 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 953 description=message, 954 line=token.line, 955 col=token.col, 956 start_context=start_context, 957 highlight=highlight, 958 end_context=end_context, 959 ) 960 961 if self.error_level == ErrorLevel.IMMEDIATE: 962 raise error 963 964 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
966 def expression( 967 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 968 ) -> E: 969 """ 970 Creates a new, validated Expression. 971 972 Args: 973 exp_class: the expression class to instantiate. 974 comments: an optional list of comments to attach to the expression. 975 kwargs: the arguments to set for the expression along with their respective values. 976 977 Returns: 978 The target expression. 979 """ 980 instance = exp_class(**kwargs) 981 instance.add_comments(comments) if comments else self._add_comments(instance) 982 self.validate_expression(instance) 983 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
990 def validate_expression( 991 self, expression: exp.Expression, args: t.Optional[t.List] = None 992 ) -> None: 993 """ 994 Validates an already instantiated expression, making sure that all its mandatory arguments 995 are set. 996 997 Args: 998 expression: the expression to validate. 999 args: an optional list of items that was used to instantiate the expression, if it's a Func. 1000 """ 1001 if self.error_level == ErrorLevel.IGNORE: 1002 return 1003 1004 for error_message in expression.error_messages(args): 1005 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.