sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get 10from sqlglot.tokens import Token, Tokenizer, TokenType 11from sqlglot.trie import in_trie, new_trie 12 13logger = logging.getLogger("sqlglot") 14 15E = t.TypeVar("E", bound=exp.Expression) 16 17 18def parse_var_map(args: t.Sequence) -> exp.Expression: 19 if len(args) == 1 and args[0].is_star: 20 return exp.StarMap(this=args[0]) 21 22 keys = [] 23 values = [] 24 for i in range(0, len(args), 2): 25 keys.append(args[i]) 26 values.append(args[i + 1]) 27 return exp.VarMap( 28 keys=exp.Array(expressions=keys), 29 values=exp.Array(expressions=values), 30 ) 31 32 33def parse_like(args): 34 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 35 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 36 37 38def binary_range_parser( 39 expr_type: t.Type[exp.Expression], 40) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 41 return lambda self, this: self._parse_escape( 42 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 43 ) 44 45 46class _Parser(type): 47 def __new__(cls, clsname, bases, attrs): 48 klass = super().__new__(cls, clsname, bases, attrs) 49 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 50 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 51 52 return klass 53 54 55class Parser(metaclass=_Parser): 56 """ 57 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 58 a parsed syntax tree. 59 60 Args: 61 error_level: the desired error level. 62 Default: ErrorLevel.RAISE 63 error_message_context: determines the amount of context to capture from a 64 query string when displaying the error message (in number of characters). 65 Default: 50. 66 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 67 Default: 0 68 alias_post_tablesample: If the table alias comes after tablesample. 69 Default: False 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 null_ordering: Indicates the default null ordering method to use if not explicitly set. 74 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 75 Default: "nulls_are_small" 76 """ 77 78 FUNCTIONS: t.Dict[str, t.Callable] = { 79 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 80 "DATE_TO_DATE_STR": lambda args: exp.Cast( 81 this=seq_get(args, 0), 82 to=exp.DataType(this=exp.DataType.Type.TEXT), 83 ), 84 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 85 "IFNULL": exp.Coalesce.from_arg_list, 86 "LIKE": parse_like, 87 "TIME_TO_TIME_STR": lambda args: exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 92 this=exp.Cast( 93 this=seq_get(args, 0), 94 to=exp.DataType(this=exp.DataType.Type.TEXT), 95 ), 96 start=exp.Literal.number(1), 97 length=exp.Literal.number(10), 98 ), 99 "VAR_MAP": parse_var_map, 100 } 101 102 NO_PAREN_FUNCTIONS = { 103 TokenType.CURRENT_DATE: exp.CurrentDate, 104 TokenType.CURRENT_DATETIME: exp.CurrentDate, 105 TokenType.CURRENT_TIME: exp.CurrentTime, 106 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 107 TokenType.CURRENT_USER: exp.CurrentUser, 108 } 109 110 JOIN_HINTS: t.Set[str] = set() 111 112 NESTED_TYPE_TOKENS = { 113 TokenType.ARRAY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 TokenType.STRUCT, 117 } 118 119 TYPE_TOKENS = { 120 TokenType.BIT, 121 TokenType.BOOLEAN, 122 TokenType.TINYINT, 123 TokenType.UTINYINT, 124 TokenType.SMALLINT, 125 TokenType.USMALLINT, 126 TokenType.INT, 127 TokenType.UINT, 128 TokenType.BIGINT, 129 TokenType.UBIGINT, 130 TokenType.INT128, 131 TokenType.UINT128, 132 TokenType.INT256, 133 TokenType.UINT256, 134 TokenType.FLOAT, 135 TokenType.DOUBLE, 136 TokenType.CHAR, 137 TokenType.NCHAR, 138 TokenType.VARCHAR, 139 TokenType.NVARCHAR, 140 TokenType.TEXT, 141 TokenType.MEDIUMTEXT, 142 TokenType.LONGTEXT, 143 TokenType.MEDIUMBLOB, 144 TokenType.LONGBLOB, 145 TokenType.BINARY, 146 TokenType.VARBINARY, 147 TokenType.JSON, 148 TokenType.JSONB, 149 TokenType.INTERVAL, 150 TokenType.TIME, 151 TokenType.TIMESTAMP, 152 TokenType.TIMESTAMPTZ, 153 TokenType.TIMESTAMPLTZ, 154 TokenType.DATETIME, 155 TokenType.DATETIME64, 156 TokenType.DATE, 157 TokenType.DECIMAL, 158 TokenType.BIGDECIMAL, 159 TokenType.UUID, 160 TokenType.GEOGRAPHY, 161 TokenType.GEOMETRY, 162 TokenType.HLLSKETCH, 163 TokenType.HSTORE, 164 TokenType.PSEUDO_TYPE, 165 TokenType.SUPER, 166 TokenType.SERIAL, 167 TokenType.SMALLSERIAL, 168 TokenType.BIGSERIAL, 169 TokenType.XML, 170 TokenType.UNIQUEIDENTIFIER, 171 TokenType.MONEY, 172 TokenType.SMALLMONEY, 173 TokenType.ROWVERSION, 174 TokenType.IMAGE, 175 TokenType.VARIANT, 176 TokenType.OBJECT, 177 TokenType.INET, 178 *NESTED_TYPE_TOKENS, 179 } 180 181 SUBQUERY_PREDICATES = { 182 TokenType.ANY: exp.Any, 183 TokenType.ALL: exp.All, 184 TokenType.EXISTS: exp.Exists, 185 TokenType.SOME: exp.Any, 186 } 187 188 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 189 190 DB_CREATABLES = { 191 TokenType.DATABASE, 192 TokenType.SCHEMA, 193 TokenType.TABLE, 194 TokenType.VIEW, 195 } 196 197 CREATABLES = { 198 TokenType.COLUMN, 199 TokenType.FUNCTION, 200 TokenType.INDEX, 201 TokenType.PROCEDURE, 202 *DB_CREATABLES, 203 } 204 205 ID_VAR_TOKENS = { 206 TokenType.VAR, 207 TokenType.ANTI, 208 TokenType.APPLY, 209 TokenType.ASC, 210 TokenType.AUTO_INCREMENT, 211 TokenType.BEGIN, 212 TokenType.CACHE, 213 TokenType.COLLATE, 214 TokenType.COMMAND, 215 TokenType.COMMENT, 216 TokenType.COMMIT, 217 TokenType.CONSTRAINT, 218 TokenType.DEFAULT, 219 TokenType.DELETE, 220 TokenType.DESC, 221 TokenType.DESCRIBE, 222 TokenType.DIV, 223 TokenType.END, 224 TokenType.EXECUTE, 225 TokenType.ESCAPE, 226 TokenType.FALSE, 227 TokenType.FIRST, 228 TokenType.FILTER, 229 TokenType.FORMAT, 230 TokenType.FULL, 231 TokenType.IF, 232 TokenType.IS, 233 TokenType.ISNULL, 234 TokenType.INTERVAL, 235 TokenType.KEEP, 236 TokenType.LEFT, 237 TokenType.LOAD, 238 TokenType.MERGE, 239 TokenType.NATURAL, 240 TokenType.NEXT, 241 TokenType.OFFSET, 242 TokenType.ORDINALITY, 243 TokenType.OVERWRITE, 244 TokenType.PARTITION, 245 TokenType.PERCENT, 246 TokenType.PIVOT, 247 TokenType.PRAGMA, 248 TokenType.RANGE, 249 TokenType.REFERENCES, 250 TokenType.RIGHT, 251 TokenType.ROW, 252 TokenType.ROWS, 253 TokenType.SEMI, 254 TokenType.SET, 255 TokenType.SETTINGS, 256 TokenType.SHOW, 257 TokenType.TEMPORARY, 258 TokenType.TOP, 259 TokenType.TRUE, 260 TokenType.UNIQUE, 261 TokenType.UNPIVOT, 262 TokenType.VOLATILE, 263 TokenType.WINDOW, 264 *CREATABLES, 265 *SUBQUERY_PREDICATES, 266 *TYPE_TOKENS, 267 *NO_PAREN_FUNCTIONS, 268 } 269 270 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 271 272 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 273 TokenType.APPLY, 274 TokenType.FULL, 275 TokenType.LEFT, 276 TokenType.LOCK, 277 TokenType.NATURAL, 278 TokenType.OFFSET, 279 TokenType.RIGHT, 280 TokenType.WINDOW, 281 } 282 283 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 284 285 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 286 287 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 288 289 FUNC_TOKENS = { 290 TokenType.COMMAND, 291 TokenType.CURRENT_DATE, 292 TokenType.CURRENT_DATETIME, 293 TokenType.CURRENT_TIMESTAMP, 294 TokenType.CURRENT_TIME, 295 TokenType.CURRENT_USER, 296 TokenType.FILTER, 297 TokenType.FIRST, 298 TokenType.FORMAT, 299 TokenType.GLOB, 300 TokenType.IDENTIFIER, 301 TokenType.INDEX, 302 TokenType.ISNULL, 303 TokenType.ILIKE, 304 TokenType.LIKE, 305 TokenType.MERGE, 306 TokenType.OFFSET, 307 TokenType.PRIMARY_KEY, 308 TokenType.RANGE, 309 TokenType.REPLACE, 310 TokenType.ROW, 311 TokenType.UNNEST, 312 TokenType.VAR, 313 TokenType.LEFT, 314 TokenType.RIGHT, 315 TokenType.DATE, 316 TokenType.DATETIME, 317 TokenType.TABLE, 318 TokenType.TIMESTAMP, 319 TokenType.TIMESTAMPTZ, 320 TokenType.WINDOW, 321 *TYPE_TOKENS, 322 *SUBQUERY_PREDICATES, 323 } 324 325 CONJUNCTION = { 326 TokenType.AND: exp.And, 327 TokenType.OR: exp.Or, 328 } 329 330 EQUALITY = { 331 TokenType.EQ: exp.EQ, 332 TokenType.NEQ: exp.NEQ, 333 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 334 } 335 336 COMPARISON = { 337 TokenType.GT: exp.GT, 338 TokenType.GTE: exp.GTE, 339 TokenType.LT: exp.LT, 340 TokenType.LTE: exp.LTE, 341 } 342 343 BITWISE = { 344 TokenType.AMP: exp.BitwiseAnd, 345 TokenType.CARET: exp.BitwiseXor, 346 TokenType.PIPE: exp.BitwiseOr, 347 TokenType.DPIPE: exp.DPipe, 348 } 349 350 TERM = { 351 TokenType.DASH: exp.Sub, 352 TokenType.PLUS: exp.Add, 353 TokenType.MOD: exp.Mod, 354 TokenType.COLLATE: exp.Collate, 355 } 356 357 FACTOR = { 358 TokenType.DIV: exp.IntDiv, 359 TokenType.LR_ARROW: exp.Distance, 360 TokenType.SLASH: exp.Div, 361 TokenType.STAR: exp.Mul, 362 } 363 364 TIMESTAMPS = { 365 TokenType.TIME, 366 TokenType.TIMESTAMP, 367 TokenType.TIMESTAMPTZ, 368 TokenType.TIMESTAMPLTZ, 369 } 370 371 SET_OPERATIONS = { 372 TokenType.UNION, 373 TokenType.INTERSECT, 374 TokenType.EXCEPT, 375 } 376 377 JOIN_SIDES = { 378 TokenType.LEFT, 379 TokenType.RIGHT, 380 TokenType.FULL, 381 } 382 383 JOIN_KINDS = { 384 TokenType.INNER, 385 TokenType.OUTER, 386 TokenType.CROSS, 387 TokenType.SEMI, 388 TokenType.ANTI, 389 } 390 391 LAMBDAS = { 392 TokenType.ARROW: lambda self, expressions: self.expression( 393 exp.Lambda, 394 this=self._replace_lambda( 395 self._parse_conjunction(), 396 {node.name for node in expressions}, 397 ), 398 expressions=expressions, 399 ), 400 TokenType.FARROW: lambda self, expressions: self.expression( 401 exp.Kwarg, 402 this=exp.Var(this=expressions[0].name), 403 expression=self._parse_conjunction(), 404 ), 405 } 406 407 COLUMN_OPERATORS = { 408 TokenType.DOT: None, 409 TokenType.DCOLON: lambda self, this, to: self.expression( 410 exp.Cast if self.STRICT_CAST else exp.TryCast, 411 this=this, 412 to=to, 413 ), 414 TokenType.ARROW: lambda self, this, path: self.expression( 415 exp.JSONExtract, 416 this=this, 417 expression=path, 418 ), 419 TokenType.DARROW: lambda self, this, path: self.expression( 420 exp.JSONExtractScalar, 421 this=this, 422 expression=path, 423 ), 424 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 425 exp.JSONBExtract, 426 this=this, 427 expression=path, 428 ), 429 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 430 exp.JSONBExtractScalar, 431 this=this, 432 expression=path, 433 ), 434 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 435 exp.JSONBContains, 436 this=this, 437 expression=key, 438 ), 439 } 440 441 EXPRESSION_PARSERS = { 442 exp.Column: lambda self: self._parse_column(), 443 exp.DataType: lambda self: self._parse_types(), 444 exp.From: lambda self: self._parse_from(), 445 exp.Group: lambda self: self._parse_group(), 446 exp.Identifier: lambda self: self._parse_id_var(), 447 exp.Lateral: lambda self: self._parse_lateral(), 448 exp.Join: lambda self: self._parse_join(), 449 exp.Order: lambda self: self._parse_order(), 450 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 451 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 452 exp.Lambda: lambda self: self._parse_lambda(), 453 exp.Limit: lambda self: self._parse_limit(), 454 exp.Offset: lambda self: self._parse_offset(), 455 exp.TableAlias: lambda self: self._parse_table_alias(), 456 exp.Table: lambda self: self._parse_table_parts(), 457 exp.Condition: lambda self: self._parse_conjunction(), 458 exp.Expression: lambda self: self._parse_statement(), 459 exp.Properties: lambda self: self._parse_properties(), 460 exp.Where: lambda self: self._parse_where(), 461 exp.Ordered: lambda self: self._parse_ordered(), 462 exp.Having: lambda self: self._parse_having(), 463 exp.With: lambda self: self._parse_with(), 464 exp.Window: lambda self: self._parse_named_window(), 465 exp.Qualify: lambda self: self._parse_qualify(), 466 exp.Returning: lambda self: self._parse_returning(), 467 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 468 } 469 470 STATEMENT_PARSERS = { 471 TokenType.ALTER: lambda self: self._parse_alter(), 472 TokenType.BEGIN: lambda self: self._parse_transaction(), 473 TokenType.CACHE: lambda self: self._parse_cache(), 474 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 475 TokenType.COMMENT: lambda self: self._parse_comment(), 476 TokenType.CREATE: lambda self: self._parse_create(), 477 TokenType.DELETE: lambda self: self._parse_delete(), 478 TokenType.DESC: lambda self: self._parse_describe(), 479 TokenType.DESCRIBE: lambda self: self._parse_describe(), 480 TokenType.DROP: lambda self: self._parse_drop(), 481 TokenType.END: lambda self: self._parse_commit_or_rollback(), 482 TokenType.INSERT: lambda self: self._parse_insert(), 483 TokenType.LOAD: lambda self: self._parse_load(), 484 TokenType.MERGE: lambda self: self._parse_merge(), 485 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 486 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 487 TokenType.SET: lambda self: self._parse_set(), 488 TokenType.UNCACHE: lambda self: self._parse_uncache(), 489 TokenType.UPDATE: lambda self: self._parse_update(), 490 TokenType.USE: lambda self: self.expression( 491 exp.Use, 492 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 493 and exp.Var(this=self._prev.text), 494 this=self._parse_table(schema=False), 495 ), 496 } 497 498 UNARY_PARSERS = { 499 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 500 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 501 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 502 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 503 } 504 505 PRIMARY_PARSERS = { 506 TokenType.STRING: lambda self, token: self.expression( 507 exp.Literal, this=token.text, is_string=True 508 ), 509 TokenType.NUMBER: lambda self, token: self.expression( 510 exp.Literal, this=token.text, is_string=False 511 ), 512 TokenType.STAR: lambda self, _: self.expression( 513 exp.Star, 514 **{"except": self._parse_except(), "replace": self._parse_replace()}, 515 ), 516 TokenType.NULL: lambda self, _: self.expression(exp.Null), 517 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 518 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 519 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 520 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 521 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 522 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 523 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 524 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 525 } 526 527 PLACEHOLDER_PARSERS = { 528 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 529 TokenType.PARAMETER: lambda self: self._parse_parameter(), 530 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 531 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 532 else None, 533 } 534 535 RANGE_PARSERS = { 536 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 537 TokenType.GLOB: binary_range_parser(exp.Glob), 538 TokenType.ILIKE: binary_range_parser(exp.ILike), 539 TokenType.IN: lambda self, this: self._parse_in(this), 540 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 541 TokenType.IS: lambda self, this: self._parse_is(this), 542 TokenType.LIKE: binary_range_parser(exp.Like), 543 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 544 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 545 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 546 } 547 548 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 549 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 550 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 551 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 552 "CHARACTER SET": lambda self: self._parse_character_set(), 553 "CHECKSUM": lambda self: self._parse_checksum(), 554 "CLUSTER": lambda self: self._parse_cluster(), 555 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 556 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 557 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 558 "DEFINER": lambda self: self._parse_definer(), 559 "DETERMINISTIC": lambda self: self.expression( 560 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 561 ), 562 "DISTKEY": lambda self: self._parse_distkey(), 563 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 564 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 565 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 566 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 567 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 568 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 569 "FREESPACE": lambda self: self._parse_freespace(), 570 "IMMUTABLE": lambda self: self.expression( 571 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 572 ), 573 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 574 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 575 "LIKE": lambda self: self._parse_create_like(), 576 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 577 "LOCK": lambda self: self._parse_locking(), 578 "LOCKING": lambda self: self._parse_locking(), 579 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 580 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 581 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 582 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 583 "NO": lambda self: self._parse_no_property(), 584 "ON": lambda self: self._parse_on_property(), 585 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 586 "PARTITION BY": lambda self: self._parse_partitioned_by(), 587 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 588 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 589 "PRIMARY KEY": lambda self: self._parse_primary_key(), 590 "RETURNS": lambda self: self._parse_returns(), 591 "ROW": lambda self: self._parse_row(), 592 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 593 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 594 "SETTINGS": lambda self: self.expression( 595 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 596 ), 597 "SORTKEY": lambda self: self._parse_sortkey(), 598 "STABLE": lambda self: self.expression( 599 exp.StabilityProperty, this=exp.Literal.string("STABLE") 600 ), 601 "STORED": lambda self: self._parse_stored(), 602 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 603 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 604 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 605 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 606 "TTL": lambda self: self._parse_ttl(), 607 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 608 "VOLATILE": lambda self: self._parse_volatile_property(), 609 "WITH": lambda self: self._parse_with_property(), 610 } 611 612 CONSTRAINT_PARSERS = { 613 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 614 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 615 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 616 "CHARACTER SET": lambda self: self.expression( 617 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 618 ), 619 "CHECK": lambda self: self.expression( 620 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 621 ), 622 "COLLATE": lambda self: self.expression( 623 exp.CollateColumnConstraint, this=self._parse_var() 624 ), 625 "COMMENT": lambda self: self.expression( 626 exp.CommentColumnConstraint, this=self._parse_string() 627 ), 628 "COMPRESS": lambda self: self._parse_compress(), 629 "DEFAULT": lambda self: self.expression( 630 exp.DefaultColumnConstraint, this=self._parse_bitwise() 631 ), 632 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 633 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 634 "FORMAT": lambda self: self.expression( 635 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 636 ), 637 "GENERATED": lambda self: self._parse_generated_as_identity(), 638 "IDENTITY": lambda self: self._parse_auto_increment(), 639 "INLINE": lambda self: self._parse_inline(), 640 "LIKE": lambda self: self._parse_create_like(), 641 "NOT": lambda self: self._parse_not_constraint(), 642 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 643 "ON": lambda self: self._match(TokenType.UPDATE) 644 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 645 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 646 "PRIMARY KEY": lambda self: self._parse_primary_key(), 647 "REFERENCES": lambda self: self._parse_references(match=False), 648 "TITLE": lambda self: self.expression( 649 exp.TitleColumnConstraint, this=self._parse_var_or_string() 650 ), 651 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 652 "UNIQUE": lambda self: self._parse_unique(), 653 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 654 } 655 656 ALTER_PARSERS = { 657 "ADD": lambda self: self._parse_alter_table_add(), 658 "ALTER": lambda self: self._parse_alter_table_alter(), 659 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 660 "DROP": lambda self: self._parse_alter_table_drop(), 661 "RENAME": lambda self: self._parse_alter_table_rename(), 662 } 663 664 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 665 666 NO_PAREN_FUNCTION_PARSERS = { 667 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 668 TokenType.CASE: lambda self: self._parse_case(), 669 TokenType.IF: lambda self: self._parse_if(), 670 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 671 exp.NextValueFor, 672 this=self._parse_column(), 673 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 674 ), 675 } 676 677 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 678 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 679 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 680 "DECODE": lambda self: self._parse_decode(), 681 "EXTRACT": lambda self: self._parse_extract(), 682 "JSON_OBJECT": lambda self: self._parse_json_object(), 683 "LOG": lambda self: self._parse_logarithm(), 684 "MATCH": lambda self: self._parse_match_against(), 685 "OPENJSON": lambda self: self._parse_open_json(), 686 "POSITION": lambda self: self._parse_position(), 687 "STRING_AGG": lambda self: self._parse_string_agg(), 688 "SUBSTRING": lambda self: self._parse_substring(), 689 "STRUCT": lambda self: self._parse_struct(), 690 "TRIM": lambda self: self._parse_trim(), 691 "TRY_CAST": lambda self: self._parse_cast(False), 692 "TRY_CONVERT": lambda self: self._parse_convert(False), 693 } 694 695 QUERY_MODIFIER_PARSERS = { 696 "joins": lambda self: list(iter(self._parse_join, None)), 697 "laterals": lambda self: list(iter(self._parse_lateral, None)), 698 "match": lambda self: self._parse_match_recognize(), 699 "where": lambda self: self._parse_where(), 700 "group": lambda self: self._parse_group(), 701 "having": lambda self: self._parse_having(), 702 "qualify": lambda self: self._parse_qualify(), 703 "windows": lambda self: self._parse_window_clause(), 704 "order": lambda self: self._parse_order(), 705 "limit": lambda self: self._parse_limit(), 706 "offset": lambda self: self._parse_offset(), 707 "locks": lambda self: self._parse_locks(), 708 "sample": lambda self: self._parse_table_sample(as_modifier=True), 709 } 710 711 SET_PARSERS = { 712 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 713 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 714 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 715 "TRANSACTION": lambda self: self._parse_set_transaction(), 716 } 717 718 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 719 720 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 721 722 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 723 724 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 725 726 TRANSACTION_CHARACTERISTICS = { 727 "ISOLATION LEVEL REPEATABLE READ", 728 "ISOLATION LEVEL READ COMMITTED", 729 "ISOLATION LEVEL READ UNCOMMITTED", 730 "ISOLATION LEVEL SERIALIZABLE", 731 "READ WRITE", 732 "READ ONLY", 733 } 734 735 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 736 737 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 738 739 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 740 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 741 742 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 743 744 STRICT_CAST = True 745 746 CONVERT_TYPE_FIRST = False 747 748 PREFIXED_PIVOT_COLUMNS = False 749 IDENTIFY_PIVOT_STRINGS = False 750 751 LOG_BASE_FIRST = True 752 LOG_DEFAULTS_TO_LN = False 753 754 __slots__ = ( 755 "error_level", 756 "error_message_context", 757 "sql", 758 "errors", 759 "index_offset", 760 "unnest_column_only", 761 "alias_post_tablesample", 762 "max_errors", 763 "null_ordering", 764 "_tokens", 765 "_index", 766 "_curr", 767 "_next", 768 "_prev", 769 "_prev_comments", 770 "_show_trie", 771 "_set_trie", 772 ) 773 774 def __init__( 775 self, 776 error_level: t.Optional[ErrorLevel] = None, 777 error_message_context: int = 100, 778 index_offset: int = 0, 779 unnest_column_only: bool = False, 780 alias_post_tablesample: bool = False, 781 max_errors: int = 3, 782 null_ordering: t.Optional[str] = None, 783 ): 784 self.error_level = error_level or ErrorLevel.IMMEDIATE 785 self.error_message_context = error_message_context 786 self.index_offset = index_offset 787 self.unnest_column_only = unnest_column_only 788 self.alias_post_tablesample = alias_post_tablesample 789 self.max_errors = max_errors 790 self.null_ordering = null_ordering 791 self.reset() 792 793 def reset(self): 794 self.sql = "" 795 self.errors = [] 796 self._tokens = [] 797 self._index = 0 798 self._curr = None 799 self._next = None 800 self._prev = None 801 self._prev_comments = None 802 803 def parse( 804 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 805 ) -> t.List[t.Optional[exp.Expression]]: 806 """ 807 Parses a list of tokens and returns a list of syntax trees, one tree 808 per parsed SQL statement. 809 810 Args: 811 raw_tokens: the list of tokens. 812 sql: the original SQL string, used to produce helpful debug messages. 813 814 Returns: 815 The list of syntax trees. 816 """ 817 return self._parse( 818 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 819 ) 820 821 def parse_into( 822 self, 823 expression_types: exp.IntoType, 824 raw_tokens: t.List[Token], 825 sql: t.Optional[str] = None, 826 ) -> t.List[t.Optional[exp.Expression]]: 827 """ 828 Parses a list of tokens into a given Expression type. If a collection of Expression 829 types is given instead, this method will try to parse the token list into each one 830 of them, stopping at the first for which the parsing succeeds. 831 832 Args: 833 expression_types: the expression type(s) to try and parse the token list into. 834 raw_tokens: the list of tokens. 835 sql: the original SQL string, used to produce helpful debug messages. 836 837 Returns: 838 The target Expression. 839 """ 840 errors = [] 841 for expression_type in ensure_collection(expression_types): 842 parser = self.EXPRESSION_PARSERS.get(expression_type) 843 if not parser: 844 raise TypeError(f"No parser registered for {expression_type}") 845 try: 846 return self._parse(parser, raw_tokens, sql) 847 except ParseError as e: 848 e.errors[0]["into_expression"] = expression_type 849 errors.append(e) 850 raise ParseError( 851 f"Failed to parse into {expression_types}", 852 errors=merge_errors(errors), 853 ) from errors[-1] 854 855 def _parse( 856 self, 857 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 858 raw_tokens: t.List[Token], 859 sql: t.Optional[str] = None, 860 ) -> t.List[t.Optional[exp.Expression]]: 861 self.reset() 862 self.sql = sql or "" 863 total = len(raw_tokens) 864 chunks: t.List[t.List[Token]] = [[]] 865 866 for i, token in enumerate(raw_tokens): 867 if token.token_type == TokenType.SEMICOLON: 868 if i < total - 1: 869 chunks.append([]) 870 else: 871 chunks[-1].append(token) 872 873 expressions = [] 874 875 for tokens in chunks: 876 self._index = -1 877 self._tokens = tokens 878 self._advance() 879 880 expressions.append(parse_method(self)) 881 882 if self._index < len(self._tokens): 883 self.raise_error("Invalid expression / Unexpected token") 884 885 self.check_errors() 886 887 return expressions 888 889 def check_errors(self) -> None: 890 """ 891 Logs or raises any found errors, depending on the chosen error level setting. 892 """ 893 if self.error_level == ErrorLevel.WARN: 894 for error in self.errors: 895 logger.error(str(error)) 896 elif self.error_level == ErrorLevel.RAISE and self.errors: 897 raise ParseError( 898 concat_messages(self.errors, self.max_errors), 899 errors=merge_errors(self.errors), 900 ) 901 902 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 903 """ 904 Appends an error in the list of recorded errors or raises it, depending on the chosen 905 error level setting. 906 """ 907 token = token or self._curr or self._prev or Token.string("") 908 start = token.start 909 end = token.end + 1 910 start_context = self.sql[max(start - self.error_message_context, 0) : start] 911 highlight = self.sql[start:end] 912 end_context = self.sql[end : end + self.error_message_context] 913 914 error = ParseError.new( 915 f"{message}. Line {token.line}, Col: {token.col}.\n" 916 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 917 description=message, 918 line=token.line, 919 col=token.col, 920 start_context=start_context, 921 highlight=highlight, 922 end_context=end_context, 923 ) 924 925 if self.error_level == ErrorLevel.IMMEDIATE: 926 raise error 927 928 self.errors.append(error) 929 930 def expression( 931 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 932 ) -> E: 933 """ 934 Creates a new, validated Expression. 935 936 Args: 937 exp_class: the expression class to instantiate. 938 comments: an optional list of comments to attach to the expression. 939 kwargs: the arguments to set for the expression along with their respective values. 940 941 Returns: 942 The target expression. 943 """ 944 instance = exp_class(**kwargs) 945 instance.add_comments(comments) if comments else self._add_comments(instance) 946 self.validate_expression(instance) 947 return instance 948 949 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 950 if expression and self._prev_comments: 951 expression.add_comments(self._prev_comments) 952 self._prev_comments = None 953 954 def validate_expression( 955 self, expression: exp.Expression, args: t.Optional[t.List] = None 956 ) -> None: 957 """ 958 Validates an already instantiated expression, making sure that all its mandatory arguments 959 are set. 960 961 Args: 962 expression: the expression to validate. 963 args: an optional list of items that was used to instantiate the expression, if it's a Func. 964 """ 965 if self.error_level == ErrorLevel.IGNORE: 966 return 967 968 for error_message in expression.error_messages(args): 969 self.raise_error(error_message) 970 971 def _find_sql(self, start: Token, end: Token) -> str: 972 return self.sql[start.start : end.end + 1] 973 974 def _advance(self, times: int = 1) -> None: 975 self._index += times 976 self._curr = seq_get(self._tokens, self._index) 977 self._next = seq_get(self._tokens, self._index + 1) 978 if self._index > 0: 979 self._prev = self._tokens[self._index - 1] 980 self._prev_comments = self._prev.comments 981 else: 982 self._prev = None 983 self._prev_comments = None 984 985 def _retreat(self, index: int) -> None: 986 if index != self._index: 987 self._advance(index - self._index) 988 989 def _parse_command(self) -> exp.Command: 990 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 991 992 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 993 start = self._prev 994 exists = self._parse_exists() if allow_exists else None 995 996 self._match(TokenType.ON) 997 998 kind = self._match_set(self.CREATABLES) and self._prev 999 1000 if not kind: 1001 return self._parse_as_command(start) 1002 1003 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1004 this = self._parse_user_defined_function(kind=kind.token_type) 1005 elif kind.token_type == TokenType.TABLE: 1006 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1007 elif kind.token_type == TokenType.COLUMN: 1008 this = self._parse_column() 1009 else: 1010 this = self._parse_id_var() 1011 1012 self._match(TokenType.IS) 1013 1014 return self.expression( 1015 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1016 ) 1017 1018 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1019 def _parse_ttl(self) -> exp.Expression: 1020 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1021 this = self._parse_bitwise() 1022 1023 if self._match_text_seq("DELETE"): 1024 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1025 if self._match_text_seq("RECOMPRESS"): 1026 return self.expression( 1027 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1028 ) 1029 if self._match_text_seq("TO", "DISK"): 1030 return self.expression( 1031 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1032 ) 1033 if self._match_text_seq("TO", "VOLUME"): 1034 return self.expression( 1035 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1036 ) 1037 1038 return this 1039 1040 expressions = self._parse_csv(_parse_ttl_action) 1041 where = self._parse_where() 1042 group = self._parse_group() 1043 1044 aggregates = None 1045 if group and self._match(TokenType.SET): 1046 aggregates = self._parse_csv(self._parse_set_item) 1047 1048 return self.expression( 1049 exp.MergeTreeTTL, 1050 expressions=expressions, 1051 where=where, 1052 group=group, 1053 aggregates=aggregates, 1054 ) 1055 1056 def _parse_statement(self) -> t.Optional[exp.Expression]: 1057 if self._curr is None: 1058 return None 1059 1060 if self._match_set(self.STATEMENT_PARSERS): 1061 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1062 1063 if self._match_set(Tokenizer.COMMANDS): 1064 return self._parse_command() 1065 1066 expression = self._parse_expression() 1067 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1068 return self._parse_query_modifiers(expression) 1069 1070 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1071 start = self._prev 1072 temporary = self._match(TokenType.TEMPORARY) 1073 materialized = self._match_text_seq("MATERIALIZED") 1074 kind = self._match_set(self.CREATABLES) and self._prev.text 1075 if not kind: 1076 return self._parse_as_command(start) 1077 1078 return self.expression( 1079 exp.Drop, 1080 exists=self._parse_exists(), 1081 this=self._parse_table(schema=True), 1082 kind=kind, 1083 temporary=temporary, 1084 materialized=materialized, 1085 cascade=self._match_text_seq("CASCADE"), 1086 constraints=self._match_text_seq("CONSTRAINTS"), 1087 purge=self._match_text_seq("PURGE"), 1088 ) 1089 1090 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1091 return ( 1092 self._match(TokenType.IF) 1093 and (not not_ or self._match(TokenType.NOT)) 1094 and self._match(TokenType.EXISTS) 1095 ) 1096 1097 def _parse_create(self) -> t.Optional[exp.Expression]: 1098 start = self._prev 1099 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1100 TokenType.OR, TokenType.REPLACE 1101 ) 1102 unique = self._match(TokenType.UNIQUE) 1103 1104 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1105 self._match(TokenType.TABLE) 1106 1107 properties = None 1108 create_token = self._match_set(self.CREATABLES) and self._prev 1109 1110 if not create_token: 1111 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1112 create_token = self._match_set(self.CREATABLES) and self._prev 1113 1114 if not properties or not create_token: 1115 return self._parse_as_command(start) 1116 1117 exists = self._parse_exists(not_=True) 1118 this = None 1119 expression = None 1120 indexes = None 1121 no_schema_binding = None 1122 begin = None 1123 clone = None 1124 1125 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1126 this = self._parse_user_defined_function(kind=create_token.token_type) 1127 temp_properties = self._parse_properties() 1128 if properties and temp_properties: 1129 properties.expressions.extend(temp_properties.expressions) 1130 elif temp_properties: 1131 properties = temp_properties 1132 1133 self._match(TokenType.ALIAS) 1134 begin = self._match(TokenType.BEGIN) 1135 return_ = self._match_text_seq("RETURN") 1136 expression = self._parse_statement() 1137 1138 if return_: 1139 expression = self.expression(exp.Return, this=expression) 1140 elif create_token.token_type == TokenType.INDEX: 1141 this = self._parse_index() 1142 elif create_token.token_type in self.DB_CREATABLES: 1143 table_parts = self._parse_table_parts(schema=True) 1144 1145 # exp.Properties.Location.POST_NAME 1146 if self._match(TokenType.COMMA): 1147 temp_properties = self._parse_properties(before=True) 1148 if properties and temp_properties: 1149 properties.expressions.extend(temp_properties.expressions) 1150 elif temp_properties: 1151 properties = temp_properties 1152 1153 this = self._parse_schema(this=table_parts) 1154 1155 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1156 temp_properties = self._parse_properties() 1157 if properties and temp_properties: 1158 properties.expressions.extend(temp_properties.expressions) 1159 elif temp_properties: 1160 properties = temp_properties 1161 1162 self._match(TokenType.ALIAS) 1163 1164 # exp.Properties.Location.POST_ALIAS 1165 if not ( 1166 self._match(TokenType.SELECT, advance=False) 1167 or self._match(TokenType.WITH, advance=False) 1168 or self._match(TokenType.L_PAREN, advance=False) 1169 ): 1170 temp_properties = self._parse_properties() 1171 if properties and temp_properties: 1172 properties.expressions.extend(temp_properties.expressions) 1173 elif temp_properties: 1174 properties = temp_properties 1175 1176 expression = self._parse_ddl_select() 1177 1178 if create_token.token_type == TokenType.TABLE: 1179 indexes = [] 1180 while True: 1181 index = self._parse_create_table_index() 1182 1183 # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX 1184 temp_properties = self._parse_properties() 1185 if properties and temp_properties: 1186 properties.expressions.extend(temp_properties.expressions) 1187 elif temp_properties: 1188 properties = temp_properties 1189 1190 if not index: 1191 break 1192 else: 1193 self._match(TokenType.COMMA) 1194 indexes.append(index) 1195 elif create_token.token_type == TokenType.VIEW: 1196 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1197 no_schema_binding = True 1198 1199 if self._match_text_seq("CLONE"): 1200 clone = self._parse_table(schema=True) 1201 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1202 clone_kind = ( 1203 self._match(TokenType.L_PAREN) 1204 and self._match_texts(self.CLONE_KINDS) 1205 and self._prev.text.upper() 1206 ) 1207 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1208 self._match(TokenType.R_PAREN) 1209 clone = self.expression( 1210 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1211 ) 1212 1213 return self.expression( 1214 exp.Create, 1215 this=this, 1216 kind=create_token.text, 1217 replace=replace, 1218 unique=unique, 1219 expression=expression, 1220 exists=exists, 1221 properties=properties, 1222 indexes=indexes, 1223 no_schema_binding=no_schema_binding, 1224 begin=begin, 1225 clone=clone, 1226 ) 1227 1228 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1229 # only used for teradata currently 1230 self._match(TokenType.COMMA) 1231 1232 kwargs = { 1233 "no": self._match_text_seq("NO"), 1234 "dual": self._match_text_seq("DUAL"), 1235 "before": self._match_text_seq("BEFORE"), 1236 "default": self._match_text_seq("DEFAULT"), 1237 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1238 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1239 "after": self._match_text_seq("AFTER"), 1240 "minimum": self._match_texts(("MIN", "MINIMUM")), 1241 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1242 } 1243 1244 if self._match_texts(self.PROPERTY_PARSERS): 1245 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1246 try: 1247 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1248 except TypeError: 1249 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1250 1251 return None 1252 1253 def _parse_property(self) -> t.Optional[exp.Expression]: 1254 if self._match_texts(self.PROPERTY_PARSERS): 1255 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1256 1257 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1258 return self._parse_character_set(default=True) 1259 1260 if self._match_text_seq("COMPOUND", "SORTKEY"): 1261 return self._parse_sortkey(compound=True) 1262 1263 if self._match_text_seq("SQL", "SECURITY"): 1264 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1265 1266 assignment = self._match_pair( 1267 TokenType.VAR, TokenType.EQ, advance=False 1268 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1269 1270 if assignment: 1271 key = self._parse_var_or_string() 1272 self._match(TokenType.EQ) 1273 return self.expression(exp.Property, this=key, value=self._parse_column()) 1274 1275 return None 1276 1277 def _parse_stored(self) -> exp.Expression: 1278 self._match(TokenType.ALIAS) 1279 1280 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1281 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1282 1283 return self.expression( 1284 exp.FileFormatProperty, 1285 this=self.expression( 1286 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1287 ) 1288 if input_format or output_format 1289 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1290 ) 1291 1292 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1293 self._match(TokenType.EQ) 1294 self._match(TokenType.ALIAS) 1295 return self.expression(exp_class, this=self._parse_field()) 1296 1297 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1298 properties = [] 1299 1300 while True: 1301 if before: 1302 prop = self._parse_property_before() 1303 else: 1304 prop = self._parse_property() 1305 1306 if not prop: 1307 break 1308 for p in ensure_list(prop): 1309 properties.append(p) 1310 1311 if properties: 1312 return self.expression(exp.Properties, expressions=properties) 1313 1314 return None 1315 1316 def _parse_fallback(self, no: bool = False) -> exp.Expression: 1317 return self.expression( 1318 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1319 ) 1320 1321 def _parse_volatile_property(self) -> exp.Expression: 1322 if self._index >= 2: 1323 pre_volatile_token = self._tokens[self._index - 2] 1324 else: 1325 pre_volatile_token = None 1326 1327 if pre_volatile_token and pre_volatile_token.token_type in ( 1328 TokenType.CREATE, 1329 TokenType.REPLACE, 1330 TokenType.UNIQUE, 1331 ): 1332 return exp.VolatileProperty() 1333 1334 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1335 1336 def _parse_with_property( 1337 self, 1338 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1339 self._match(TokenType.WITH) 1340 if self._match(TokenType.L_PAREN, advance=False): 1341 return self._parse_wrapped_csv(self._parse_property) 1342 1343 if self._match_text_seq("JOURNAL"): 1344 return self._parse_withjournaltable() 1345 1346 if self._match_text_seq("DATA"): 1347 return self._parse_withdata(no=False) 1348 elif self._match_text_seq("NO", "DATA"): 1349 return self._parse_withdata(no=True) 1350 1351 if not self._next: 1352 return None 1353 1354 return self._parse_withisolatedloading() 1355 1356 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1357 def _parse_definer(self) -> t.Optional[exp.Expression]: 1358 self._match(TokenType.EQ) 1359 1360 user = self._parse_id_var() 1361 self._match(TokenType.PARAMETER) 1362 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1363 1364 if not user or not host: 1365 return None 1366 1367 return exp.DefinerProperty(this=f"{user}@{host}") 1368 1369 def _parse_withjournaltable(self) -> exp.Expression: 1370 self._match(TokenType.TABLE) 1371 self._match(TokenType.EQ) 1372 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1373 1374 def _parse_log(self, no: bool = False) -> exp.Expression: 1375 return self.expression(exp.LogProperty, no=no) 1376 1377 def _parse_journal(self, **kwargs) -> exp.Expression: 1378 return self.expression(exp.JournalProperty, **kwargs) 1379 1380 def _parse_checksum(self) -> exp.Expression: 1381 self._match(TokenType.EQ) 1382 1383 on = None 1384 if self._match(TokenType.ON): 1385 on = True 1386 elif self._match_text_seq("OFF"): 1387 on = False 1388 default = self._match(TokenType.DEFAULT) 1389 1390 return self.expression( 1391 exp.ChecksumProperty, 1392 on=on, 1393 default=default, 1394 ) 1395 1396 def _parse_cluster(self) -> t.Optional[exp.Expression]: 1397 if not self._match_text_seq("BY"): 1398 self._retreat(self._index - 1) 1399 return None 1400 return self.expression( 1401 exp.Cluster, 1402 expressions=self._parse_csv(self._parse_ordered), 1403 ) 1404 1405 def _parse_freespace(self) -> exp.Expression: 1406 self._match(TokenType.EQ) 1407 return self.expression( 1408 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1409 ) 1410 1411 def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression: 1412 if self._match(TokenType.EQ): 1413 return self.expression( 1414 exp.MergeBlockRatioProperty, 1415 this=self._parse_number(), 1416 percent=self._match(TokenType.PERCENT), 1417 ) 1418 return self.expression( 1419 exp.MergeBlockRatioProperty, 1420 no=no, 1421 default=default, 1422 ) 1423 1424 def _parse_datablocksize( 1425 self, 1426 default: t.Optional[bool] = None, 1427 minimum: t.Optional[bool] = None, 1428 maximum: t.Optional[bool] = None, 1429 ) -> exp.Expression: 1430 self._match(TokenType.EQ) 1431 size = self._parse_number() 1432 units = None 1433 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1434 units = self._prev.text 1435 return self.expression( 1436 exp.DataBlocksizeProperty, 1437 size=size, 1438 units=units, 1439 default=default, 1440 minimum=minimum, 1441 maximum=maximum, 1442 ) 1443 1444 def _parse_blockcompression(self) -> exp.Expression: 1445 self._match(TokenType.EQ) 1446 always = self._match_text_seq("ALWAYS") 1447 manual = self._match_text_seq("MANUAL") 1448 never = self._match_text_seq("NEVER") 1449 default = self._match_text_seq("DEFAULT") 1450 autotemp = None 1451 if self._match_text_seq("AUTOTEMP"): 1452 autotemp = self._parse_schema() 1453 1454 return self.expression( 1455 exp.BlockCompressionProperty, 1456 always=always, 1457 manual=manual, 1458 never=never, 1459 default=default, 1460 autotemp=autotemp, 1461 ) 1462 1463 def _parse_withisolatedloading(self) -> exp.Expression: 1464 no = self._match_text_seq("NO") 1465 concurrent = self._match_text_seq("CONCURRENT") 1466 self._match_text_seq("ISOLATED", "LOADING") 1467 for_all = self._match_text_seq("FOR", "ALL") 1468 for_insert = self._match_text_seq("FOR", "INSERT") 1469 for_none = self._match_text_seq("FOR", "NONE") 1470 return self.expression( 1471 exp.IsolatedLoadingProperty, 1472 no=no, 1473 concurrent=concurrent, 1474 for_all=for_all, 1475 for_insert=for_insert, 1476 for_none=for_none, 1477 ) 1478 1479 def _parse_locking(self) -> exp.Expression: 1480 if self._match(TokenType.TABLE): 1481 kind = "TABLE" 1482 elif self._match(TokenType.VIEW): 1483 kind = "VIEW" 1484 elif self._match(TokenType.ROW): 1485 kind = "ROW" 1486 elif self._match_text_seq("DATABASE"): 1487 kind = "DATABASE" 1488 else: 1489 kind = None 1490 1491 if kind in ("DATABASE", "TABLE", "VIEW"): 1492 this = self._parse_table_parts() 1493 else: 1494 this = None 1495 1496 if self._match(TokenType.FOR): 1497 for_or_in = "FOR" 1498 elif self._match(TokenType.IN): 1499 for_or_in = "IN" 1500 else: 1501 for_or_in = None 1502 1503 if self._match_text_seq("ACCESS"): 1504 lock_type = "ACCESS" 1505 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1506 lock_type = "EXCLUSIVE" 1507 elif self._match_text_seq("SHARE"): 1508 lock_type = "SHARE" 1509 elif self._match_text_seq("READ"): 1510 lock_type = "READ" 1511 elif self._match_text_seq("WRITE"): 1512 lock_type = "WRITE" 1513 elif self._match_text_seq("CHECKSUM"): 1514 lock_type = "CHECKSUM" 1515 else: 1516 lock_type = None 1517 1518 override = self._match_text_seq("OVERRIDE") 1519 1520 return self.expression( 1521 exp.LockingProperty, 1522 this=this, 1523 kind=kind, 1524 for_or_in=for_or_in, 1525 lock_type=lock_type, 1526 override=override, 1527 ) 1528 1529 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1530 if self._match(TokenType.PARTITION_BY): 1531 return self._parse_csv(self._parse_conjunction) 1532 return [] 1533 1534 def _parse_partitioned_by(self) -> exp.Expression: 1535 self._match(TokenType.EQ) 1536 return self.expression( 1537 exp.PartitionedByProperty, 1538 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1539 ) 1540 1541 def _parse_withdata(self, no=False) -> exp.Expression: 1542 if self._match_text_seq("AND", "STATISTICS"): 1543 statistics = True 1544 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1545 statistics = False 1546 else: 1547 statistics = None 1548 1549 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1550 1551 def _parse_no_property(self) -> t.Optional[exp.Property]: 1552 if self._match_text_seq("PRIMARY", "INDEX"): 1553 return exp.NoPrimaryIndexProperty() 1554 return None 1555 1556 def _parse_on_property(self) -> t.Optional[exp.Property]: 1557 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1558 return exp.OnCommitProperty() 1559 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1560 return exp.OnCommitProperty(delete=True) 1561 return None 1562 1563 def _parse_distkey(self) -> exp.Expression: 1564 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1565 1566 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1567 table = self._parse_table(schema=True) 1568 options = [] 1569 while self._match_texts(("INCLUDING", "EXCLUDING")): 1570 this = self._prev.text.upper() 1571 id_var = self._parse_id_var() 1572 1573 if not id_var: 1574 return None 1575 1576 options.append( 1577 self.expression( 1578 exp.Property, 1579 this=this, 1580 value=exp.Var(this=id_var.this.upper()), 1581 ) 1582 ) 1583 return self.expression(exp.LikeProperty, this=table, expressions=options) 1584 1585 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1586 return self.expression( 1587 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1588 ) 1589 1590 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1591 self._match(TokenType.EQ) 1592 return self.expression( 1593 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1594 ) 1595 1596 def _parse_returns(self) -> exp.Expression: 1597 value: t.Optional[exp.Expression] 1598 is_table = self._match(TokenType.TABLE) 1599 1600 if is_table: 1601 if self._match(TokenType.LT): 1602 value = self.expression( 1603 exp.Schema, 1604 this="TABLE", 1605 expressions=self._parse_csv(self._parse_struct_types), 1606 ) 1607 if not self._match(TokenType.GT): 1608 self.raise_error("Expecting >") 1609 else: 1610 value = self._parse_schema(exp.Var(this="TABLE")) 1611 else: 1612 value = self._parse_types() 1613 1614 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1615 1616 def _parse_describe(self) -> exp.Expression: 1617 kind = self._match_set(self.CREATABLES) and self._prev.text 1618 this = self._parse_table() 1619 1620 return self.expression(exp.Describe, this=this, kind=kind) 1621 1622 def _parse_insert(self) -> exp.Expression: 1623 overwrite = self._match(TokenType.OVERWRITE) 1624 local = self._match_text_seq("LOCAL") 1625 alternative = None 1626 1627 if self._match_text_seq("DIRECTORY"): 1628 this: t.Optional[exp.Expression] = self.expression( 1629 exp.Directory, 1630 this=self._parse_var_or_string(), 1631 local=local, 1632 row_format=self._parse_row_format(match_row=True), 1633 ) 1634 else: 1635 if self._match(TokenType.OR): 1636 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1637 1638 self._match(TokenType.INTO) 1639 self._match(TokenType.TABLE) 1640 this = self._parse_table(schema=True) 1641 1642 return self.expression( 1643 exp.Insert, 1644 this=this, 1645 exists=self._parse_exists(), 1646 partition=self._parse_partition(), 1647 expression=self._parse_ddl_select(), 1648 conflict=self._parse_on_conflict(), 1649 returning=self._parse_returning(), 1650 overwrite=overwrite, 1651 alternative=alternative, 1652 ) 1653 1654 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1655 conflict = self._match_text_seq("ON", "CONFLICT") 1656 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1657 1658 if not (conflict or duplicate): 1659 return None 1660 1661 nothing = None 1662 expressions = None 1663 key = None 1664 constraint = None 1665 1666 if conflict: 1667 if self._match_text_seq("ON", "CONSTRAINT"): 1668 constraint = self._parse_id_var() 1669 else: 1670 key = self._parse_csv(self._parse_value) 1671 1672 self._match_text_seq("DO") 1673 if self._match_text_seq("NOTHING"): 1674 nothing = True 1675 else: 1676 self._match(TokenType.UPDATE) 1677 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1678 1679 return self.expression( 1680 exp.OnConflict, 1681 duplicate=duplicate, 1682 expressions=expressions, 1683 nothing=nothing, 1684 key=key, 1685 constraint=constraint, 1686 ) 1687 1688 def _parse_returning(self) -> t.Optional[exp.Expression]: 1689 if not self._match(TokenType.RETURNING): 1690 return None 1691 1692 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1693 1694 def _parse_row(self) -> t.Optional[exp.Expression]: 1695 if not self._match(TokenType.FORMAT): 1696 return None 1697 return self._parse_row_format() 1698 1699 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1700 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1701 return None 1702 1703 if self._match_text_seq("SERDE"): 1704 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1705 1706 self._match_text_seq("DELIMITED") 1707 1708 kwargs = {} 1709 1710 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1711 kwargs["fields"] = self._parse_string() 1712 if self._match_text_seq("ESCAPED", "BY"): 1713 kwargs["escaped"] = self._parse_string() 1714 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1715 kwargs["collection_items"] = self._parse_string() 1716 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1717 kwargs["map_keys"] = self._parse_string() 1718 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1719 kwargs["lines"] = self._parse_string() 1720 if self._match_text_seq("NULL", "DEFINED", "AS"): 1721 kwargs["null"] = self._parse_string() 1722 1723 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1724 1725 def _parse_load(self) -> exp.Expression: 1726 if self._match_text_seq("DATA"): 1727 local = self._match_text_seq("LOCAL") 1728 self._match_text_seq("INPATH") 1729 inpath = self._parse_string() 1730 overwrite = self._match(TokenType.OVERWRITE) 1731 self._match_pair(TokenType.INTO, TokenType.TABLE) 1732 1733 return self.expression( 1734 exp.LoadData, 1735 this=self._parse_table(schema=True), 1736 local=local, 1737 overwrite=overwrite, 1738 inpath=inpath, 1739 partition=self._parse_partition(), 1740 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1741 serde=self._match_text_seq("SERDE") and self._parse_string(), 1742 ) 1743 return self._parse_as_command(self._prev) 1744 1745 def _parse_delete(self) -> exp.Expression: 1746 self._match(TokenType.FROM) 1747 1748 return self.expression( 1749 exp.Delete, 1750 this=self._parse_table(), 1751 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1752 where=self._parse_where(), 1753 returning=self._parse_returning(), 1754 ) 1755 1756 def _parse_update(self) -> exp.Expression: 1757 return self.expression( 1758 exp.Update, 1759 **{ # type: ignore 1760 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1761 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1762 "from": self._parse_from(modifiers=True), 1763 "where": self._parse_where(), 1764 "returning": self._parse_returning(), 1765 }, 1766 ) 1767 1768 def _parse_uncache(self) -> exp.Expression: 1769 if not self._match(TokenType.TABLE): 1770 self.raise_error("Expecting TABLE after UNCACHE") 1771 1772 return self.expression( 1773 exp.Uncache, 1774 exists=self._parse_exists(), 1775 this=self._parse_table(schema=True), 1776 ) 1777 1778 def _parse_cache(self) -> exp.Expression: 1779 lazy = self._match_text_seq("LAZY") 1780 self._match(TokenType.TABLE) 1781 table = self._parse_table(schema=True) 1782 options = [] 1783 1784 if self._match_text_seq("OPTIONS"): 1785 self._match_l_paren() 1786 k = self._parse_string() 1787 self._match(TokenType.EQ) 1788 v = self._parse_string() 1789 options = [k, v] 1790 self._match_r_paren() 1791 1792 self._match(TokenType.ALIAS) 1793 return self.expression( 1794 exp.Cache, 1795 this=table, 1796 lazy=lazy, 1797 options=options, 1798 expression=self._parse_select(nested=True), 1799 ) 1800 1801 def _parse_partition(self) -> t.Optional[exp.Expression]: 1802 if not self._match(TokenType.PARTITION): 1803 return None 1804 1805 return self.expression( 1806 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1807 ) 1808 1809 def _parse_value(self) -> exp.Expression: 1810 if self._match(TokenType.L_PAREN): 1811 expressions = self._parse_csv(self._parse_conjunction) 1812 self._match_r_paren() 1813 return self.expression(exp.Tuple, expressions=expressions) 1814 1815 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1816 # Source: https://prestodb.io/docs/current/sql/values.html 1817 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1818 1819 def _parse_select( 1820 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1821 ) -> t.Optional[exp.Expression]: 1822 cte = self._parse_with() 1823 if cte: 1824 this = self._parse_statement() 1825 1826 if not this: 1827 self.raise_error("Failed to parse any statement following CTE") 1828 return cte 1829 1830 if "with" in this.arg_types: 1831 this.set("with", cte) 1832 else: 1833 self.raise_error(f"{this.key} does not support CTE") 1834 this = cte 1835 elif self._match(TokenType.SELECT): 1836 comments = self._prev_comments 1837 1838 hint = self._parse_hint() 1839 all_ = self._match(TokenType.ALL) 1840 distinct = self._match(TokenType.DISTINCT) 1841 1842 kind = ( 1843 self._match(TokenType.ALIAS) 1844 and self._match_texts(("STRUCT", "VALUE")) 1845 and self._prev.text 1846 ) 1847 1848 if distinct: 1849 distinct = self.expression( 1850 exp.Distinct, 1851 on=self._parse_value() if self._match(TokenType.ON) else None, 1852 ) 1853 1854 if all_ and distinct: 1855 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1856 1857 limit = self._parse_limit(top=True) 1858 expressions = self._parse_csv(self._parse_expression) 1859 1860 this = self.expression( 1861 exp.Select, 1862 kind=kind, 1863 hint=hint, 1864 distinct=distinct, 1865 expressions=expressions, 1866 limit=limit, 1867 ) 1868 this.comments = comments 1869 1870 into = self._parse_into() 1871 if into: 1872 this.set("into", into) 1873 1874 from_ = self._parse_from() 1875 if from_: 1876 this.set("from", from_) 1877 1878 this = self._parse_query_modifiers(this) 1879 elif (table or nested) and self._match(TokenType.L_PAREN): 1880 this = self._parse_table() if table else self._parse_select(nested=True) 1881 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1882 self._match_r_paren() 1883 1884 # early return so that subquery unions aren't parsed again 1885 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1886 # Union ALL should be a property of the top select node, not the subquery 1887 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1888 elif self._match(TokenType.VALUES): 1889 this = self.expression( 1890 exp.Values, 1891 expressions=self._parse_csv(self._parse_value), 1892 alias=self._parse_table_alias(), 1893 ) 1894 else: 1895 this = None 1896 1897 return self._parse_set_operations(this) 1898 1899 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1900 if not skip_with_token and not self._match(TokenType.WITH): 1901 return None 1902 1903 comments = self._prev_comments 1904 recursive = self._match(TokenType.RECURSIVE) 1905 1906 expressions = [] 1907 while True: 1908 expressions.append(self._parse_cte()) 1909 1910 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1911 break 1912 else: 1913 self._match(TokenType.WITH) 1914 1915 return self.expression( 1916 exp.With, comments=comments, expressions=expressions, recursive=recursive 1917 ) 1918 1919 def _parse_cte(self) -> exp.Expression: 1920 alias = self._parse_table_alias() 1921 if not alias or not alias.this: 1922 self.raise_error("Expected CTE to have alias") 1923 1924 self._match(TokenType.ALIAS) 1925 1926 return self.expression( 1927 exp.CTE, 1928 this=self._parse_wrapped(self._parse_statement), 1929 alias=alias, 1930 ) 1931 1932 def _parse_table_alias( 1933 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1934 ) -> t.Optional[exp.Expression]: 1935 any_token = self._match(TokenType.ALIAS) 1936 alias = ( 1937 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1938 or self._parse_string_as_identifier() 1939 ) 1940 1941 index = self._index 1942 if self._match(TokenType.L_PAREN): 1943 columns = self._parse_csv(self._parse_function_parameter) 1944 self._match_r_paren() if columns else self._retreat(index) 1945 else: 1946 columns = None 1947 1948 if not alias and not columns: 1949 return None 1950 1951 return self.expression(exp.TableAlias, this=alias, columns=columns) 1952 1953 def _parse_subquery( 1954 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1955 ) -> exp.Expression: 1956 return self.expression( 1957 exp.Subquery, 1958 this=this, 1959 pivots=self._parse_pivots(), 1960 alias=self._parse_table_alias() if parse_alias else None, 1961 ) 1962 1963 def _parse_query_modifiers( 1964 self, this: t.Optional[exp.Expression] 1965 ) -> t.Optional[exp.Expression]: 1966 if isinstance(this, self.MODIFIABLES): 1967 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1968 expression = parser(self) 1969 1970 if expression: 1971 this.set(key, expression) 1972 return this 1973 1974 def _parse_hint(self) -> t.Optional[exp.Expression]: 1975 if self._match(TokenType.HINT): 1976 hints = self._parse_csv(self._parse_function) 1977 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1978 self.raise_error("Expected */ after HINT") 1979 return self.expression(exp.Hint, expressions=hints) 1980 1981 return None 1982 1983 def _parse_into(self) -> t.Optional[exp.Expression]: 1984 if not self._match(TokenType.INTO): 1985 return None 1986 1987 temp = self._match(TokenType.TEMPORARY) 1988 unlogged = self._match_text_seq("UNLOGGED") 1989 self._match(TokenType.TABLE) 1990 1991 return self.expression( 1992 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1993 ) 1994 1995 def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]: 1996 if not self._match(TokenType.FROM): 1997 return None 1998 1999 comments = self._prev_comments 2000 this = self._parse_table() 2001 2002 return self.expression( 2003 exp.From, 2004 comments=comments, 2005 this=self._parse_query_modifiers(this) if modifiers else this, 2006 ) 2007 2008 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2009 if not self._match(TokenType.MATCH_RECOGNIZE): 2010 return None 2011 2012 self._match_l_paren() 2013 2014 partition = self._parse_partition_by() 2015 order = self._parse_order() 2016 measures = ( 2017 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2018 ) 2019 2020 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2021 rows = exp.Var(this="ONE ROW PER MATCH") 2022 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2023 text = "ALL ROWS PER MATCH" 2024 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2025 text += f" SHOW EMPTY MATCHES" 2026 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2027 text += f" OMIT EMPTY MATCHES" 2028 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2029 text += f" WITH UNMATCHED ROWS" 2030 rows = exp.Var(this=text) 2031 else: 2032 rows = None 2033 2034 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2035 text = "AFTER MATCH SKIP" 2036 if self._match_text_seq("PAST", "LAST", "ROW"): 2037 text += f" PAST LAST ROW" 2038 elif self._match_text_seq("TO", "NEXT", "ROW"): 2039 text += f" TO NEXT ROW" 2040 elif self._match_text_seq("TO", "FIRST"): 2041 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2042 elif self._match_text_seq("TO", "LAST"): 2043 text += f" TO LAST {self._advance_any().text}" # type: ignore 2044 after = exp.Var(this=text) 2045 else: 2046 after = None 2047 2048 if self._match_text_seq("PATTERN"): 2049 self._match_l_paren() 2050 2051 if not self._curr: 2052 self.raise_error("Expecting )", self._curr) 2053 2054 paren = 1 2055 start = self._curr 2056 2057 while self._curr and paren > 0: 2058 if self._curr.token_type == TokenType.L_PAREN: 2059 paren += 1 2060 if self._curr.token_type == TokenType.R_PAREN: 2061 paren -= 1 2062 end = self._prev 2063 self._advance() 2064 if paren > 0: 2065 self.raise_error("Expecting )", self._curr) 2066 pattern = exp.Var(this=self._find_sql(start, end)) 2067 else: 2068 pattern = None 2069 2070 define = ( 2071 self._parse_csv( 2072 lambda: self.expression( 2073 exp.Alias, 2074 alias=self._parse_id_var(any_token=True), 2075 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2076 ) 2077 ) 2078 if self._match_text_seq("DEFINE") 2079 else None 2080 ) 2081 2082 self._match_r_paren() 2083 2084 return self.expression( 2085 exp.MatchRecognize, 2086 partition_by=partition, 2087 order=order, 2088 measures=measures, 2089 rows=rows, 2090 after=after, 2091 pattern=pattern, 2092 define=define, 2093 alias=self._parse_table_alias(), 2094 ) 2095 2096 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2097 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2098 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2099 2100 if outer_apply or cross_apply: 2101 this = self._parse_select(table=True) 2102 view = None 2103 outer = not cross_apply 2104 elif self._match(TokenType.LATERAL): 2105 this = self._parse_select(table=True) 2106 view = self._match(TokenType.VIEW) 2107 outer = self._match(TokenType.OUTER) 2108 else: 2109 return None 2110 2111 if not this: 2112 this = self._parse_function() or self._parse_id_var(any_token=False) 2113 while self._match(TokenType.DOT): 2114 this = exp.Dot( 2115 this=this, 2116 expression=self._parse_function() or self._parse_id_var(any_token=False), 2117 ) 2118 2119 table_alias: t.Optional[exp.Expression] 2120 2121 if view: 2122 table = self._parse_id_var(any_token=False) 2123 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2124 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2125 else: 2126 table_alias = self._parse_table_alias() 2127 2128 expression = self.expression( 2129 exp.Lateral, 2130 this=this, 2131 view=view, 2132 outer=outer, 2133 alias=table_alias, 2134 ) 2135 2136 return expression 2137 2138 def _parse_join_side_and_kind( 2139 self, 2140 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2141 return ( 2142 self._match(TokenType.NATURAL) and self._prev, 2143 self._match_set(self.JOIN_SIDES) and self._prev, 2144 self._match_set(self.JOIN_KINDS) and self._prev, 2145 ) 2146 2147 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2148 if self._match(TokenType.COMMA): 2149 return self.expression(exp.Join, this=self._parse_table()) 2150 2151 index = self._index 2152 natural, side, kind = self._parse_join_side_and_kind() 2153 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2154 join = self._match(TokenType.JOIN) 2155 2156 if not skip_join_token and not join: 2157 self._retreat(index) 2158 kind = None 2159 natural = None 2160 side = None 2161 2162 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2163 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2164 2165 if not skip_join_token and not join and not outer_apply and not cross_apply: 2166 return None 2167 2168 if outer_apply: 2169 side = Token(TokenType.LEFT, "LEFT") 2170 2171 kwargs: t.Dict[ 2172 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2173 ] = {"this": self._parse_table()} 2174 2175 if natural: 2176 kwargs["natural"] = True 2177 if side: 2178 kwargs["side"] = side.text 2179 if kind: 2180 kwargs["kind"] = kind.text 2181 if hint: 2182 kwargs["hint"] = hint 2183 2184 if self._match(TokenType.ON): 2185 kwargs["on"] = self._parse_conjunction() 2186 elif self._match(TokenType.USING): 2187 kwargs["using"] = self._parse_wrapped_id_vars() 2188 2189 return self.expression(exp.Join, **kwargs) # type: ignore 2190 2191 def _parse_index(self) -> exp.Expression: 2192 index = self._parse_id_var() 2193 self._match(TokenType.ON) 2194 self._match(TokenType.TABLE) # hive 2195 2196 return self.expression( 2197 exp.Index, 2198 this=index, 2199 table=self.expression(exp.Table, this=self._parse_id_var()), 2200 columns=self._parse_expression(), 2201 ) 2202 2203 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2204 unique = self._match(TokenType.UNIQUE) 2205 primary = self._match_text_seq("PRIMARY") 2206 amp = self._match_text_seq("AMP") 2207 if not self._match(TokenType.INDEX): 2208 return None 2209 index = self._parse_id_var() 2210 columns = None 2211 if self._match(TokenType.L_PAREN, advance=False): 2212 columns = self._parse_wrapped_csv(self._parse_column) 2213 return self.expression( 2214 exp.Index, 2215 this=index, 2216 columns=columns, 2217 unique=unique, 2218 primary=primary, 2219 amp=amp, 2220 ) 2221 2222 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2223 return ( 2224 (not schema and self._parse_function()) 2225 or self._parse_id_var(any_token=False) 2226 or self._parse_string_as_identifier() 2227 or self._parse_placeholder() 2228 ) 2229 2230 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2231 catalog = None 2232 db = None 2233 table = self._parse_table_part(schema=schema) 2234 2235 while self._match(TokenType.DOT): 2236 if catalog: 2237 # This allows nesting the table in arbitrarily many dot expressions if needed 2238 table = self.expression( 2239 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2240 ) 2241 else: 2242 catalog = db 2243 db = table 2244 table = self._parse_table_part(schema=schema) 2245 2246 if not table: 2247 self.raise_error(f"Expected table name but got {self._curr}") 2248 2249 return self.expression( 2250 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2251 ) 2252 2253 def _parse_table( 2254 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2255 ) -> t.Optional[exp.Expression]: 2256 lateral = self._parse_lateral() 2257 if lateral: 2258 return lateral 2259 2260 unnest = self._parse_unnest() 2261 if unnest: 2262 return unnest 2263 2264 values = self._parse_derived_table_values() 2265 if values: 2266 return values 2267 2268 subquery = self._parse_select(table=True) 2269 if subquery: 2270 if not subquery.args.get("pivots"): 2271 subquery.set("pivots", self._parse_pivots()) 2272 return subquery 2273 2274 this: exp.Expression = self._parse_table_parts(schema=schema) 2275 2276 if schema: 2277 return self._parse_schema(this=this) 2278 2279 if self.alias_post_tablesample: 2280 table_sample = self._parse_table_sample() 2281 2282 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2283 if alias: 2284 this.set("alias", alias) 2285 2286 if not this.args.get("pivots"): 2287 this.set("pivots", self._parse_pivots()) 2288 2289 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2290 this.set( 2291 "hints", 2292 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2293 ) 2294 self._match_r_paren() 2295 2296 if not self.alias_post_tablesample: 2297 table_sample = self._parse_table_sample() 2298 2299 if table_sample: 2300 table_sample.set("this", this) 2301 this = table_sample 2302 2303 return this 2304 2305 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2306 if not self._match(TokenType.UNNEST): 2307 return None 2308 2309 expressions = self._parse_wrapped_csv(self._parse_type) 2310 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2311 alias = self._parse_table_alias() 2312 2313 if alias and self.unnest_column_only: 2314 if alias.args.get("columns"): 2315 self.raise_error("Unexpected extra column alias in unnest.") 2316 alias.set("columns", [alias.this]) 2317 alias.set("this", None) 2318 2319 offset = None 2320 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2321 self._match(TokenType.ALIAS) 2322 offset = self._parse_id_var() or exp.Identifier(this="offset") 2323 2324 return self.expression( 2325 exp.Unnest, 2326 expressions=expressions, 2327 ordinality=ordinality, 2328 alias=alias, 2329 offset=offset, 2330 ) 2331 2332 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2333 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2334 if not is_derived and not self._match(TokenType.VALUES): 2335 return None 2336 2337 expressions = self._parse_csv(self._parse_value) 2338 2339 if is_derived: 2340 self._match_r_paren() 2341 2342 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2343 2344 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2345 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2346 as_modifier and self._match_text_seq("USING", "SAMPLE") 2347 ): 2348 return None 2349 2350 bucket_numerator = None 2351 bucket_denominator = None 2352 bucket_field = None 2353 percent = None 2354 rows = None 2355 size = None 2356 seed = None 2357 2358 kind = ( 2359 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2360 ) 2361 method = self._parse_var(tokens=(TokenType.ROW,)) 2362 2363 self._match(TokenType.L_PAREN) 2364 2365 num = self._parse_number() 2366 2367 if self._match_text_seq("BUCKET"): 2368 bucket_numerator = self._parse_number() 2369 self._match_text_seq("OUT", "OF") 2370 bucket_denominator = bucket_denominator = self._parse_number() 2371 self._match(TokenType.ON) 2372 bucket_field = self._parse_field() 2373 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2374 percent = num 2375 elif self._match(TokenType.ROWS): 2376 rows = num 2377 else: 2378 size = num 2379 2380 self._match(TokenType.R_PAREN) 2381 2382 if self._match(TokenType.L_PAREN): 2383 method = self._parse_var() 2384 seed = self._match(TokenType.COMMA) and self._parse_number() 2385 self._match_r_paren() 2386 elif self._match_texts(("SEED", "REPEATABLE")): 2387 seed = self._parse_wrapped(self._parse_number) 2388 2389 return self.expression( 2390 exp.TableSample, 2391 method=method, 2392 bucket_numerator=bucket_numerator, 2393 bucket_denominator=bucket_denominator, 2394 bucket_field=bucket_field, 2395 percent=percent, 2396 rows=rows, 2397 size=size, 2398 seed=seed, 2399 kind=kind, 2400 ) 2401 2402 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2403 return list(iter(self._parse_pivot, None)) 2404 2405 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2406 index = self._index 2407 2408 if self._match(TokenType.PIVOT): 2409 unpivot = False 2410 elif self._match(TokenType.UNPIVOT): 2411 unpivot = True 2412 else: 2413 return None 2414 2415 expressions = [] 2416 field = None 2417 2418 if not self._match(TokenType.L_PAREN): 2419 self._retreat(index) 2420 return None 2421 2422 if unpivot: 2423 expressions = self._parse_csv(self._parse_column) 2424 else: 2425 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2426 2427 if not expressions: 2428 self.raise_error("Failed to parse PIVOT's aggregation list") 2429 2430 if not self._match(TokenType.FOR): 2431 self.raise_error("Expecting FOR") 2432 2433 value = self._parse_column() 2434 2435 if not self._match(TokenType.IN): 2436 self.raise_error("Expecting IN") 2437 2438 field = self._parse_in(value, alias=True) 2439 2440 self._match_r_paren() 2441 2442 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2443 2444 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2445 pivot.set("alias", self._parse_table_alias()) 2446 2447 if not unpivot: 2448 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2449 2450 columns: t.List[exp.Expression] = [] 2451 for fld in pivot.args["field"].expressions: 2452 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2453 for name in names: 2454 if self.PREFIXED_PIVOT_COLUMNS: 2455 name = f"{name}_{field_name}" if name else field_name 2456 else: 2457 name = f"{field_name}_{name}" if name else field_name 2458 2459 columns.append(exp.to_identifier(name)) 2460 2461 pivot.set("columns", columns) 2462 2463 return pivot 2464 2465 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2466 return [agg.alias for agg in aggregations] 2467 2468 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2469 if not skip_where_token and not self._match(TokenType.WHERE): 2470 return None 2471 2472 return self.expression( 2473 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2474 ) 2475 2476 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2477 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2478 return None 2479 2480 elements = defaultdict(list) 2481 2482 while True: 2483 expressions = self._parse_csv(self._parse_conjunction) 2484 if expressions: 2485 elements["expressions"].extend(expressions) 2486 2487 grouping_sets = self._parse_grouping_sets() 2488 if grouping_sets: 2489 elements["grouping_sets"].extend(grouping_sets) 2490 2491 rollup = None 2492 cube = None 2493 totals = None 2494 2495 with_ = self._match(TokenType.WITH) 2496 if self._match(TokenType.ROLLUP): 2497 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2498 elements["rollup"].extend(ensure_list(rollup)) 2499 2500 if self._match(TokenType.CUBE): 2501 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2502 elements["cube"].extend(ensure_list(cube)) 2503 2504 if self._match_text_seq("TOTALS"): 2505 totals = True 2506 elements["totals"] = True # type: ignore 2507 2508 if not (grouping_sets or rollup or cube or totals): 2509 break 2510 2511 return self.expression(exp.Group, **elements) # type: ignore 2512 2513 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2514 if not self._match(TokenType.GROUPING_SETS): 2515 return None 2516 2517 return self._parse_wrapped_csv(self._parse_grouping_set) 2518 2519 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2520 if self._match(TokenType.L_PAREN): 2521 grouping_set = self._parse_csv(self._parse_column) 2522 self._match_r_paren() 2523 return self.expression(exp.Tuple, expressions=grouping_set) 2524 2525 return self._parse_column() 2526 2527 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2528 if not skip_having_token and not self._match(TokenType.HAVING): 2529 return None 2530 return self.expression(exp.Having, this=self._parse_conjunction()) 2531 2532 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2533 if not self._match(TokenType.QUALIFY): 2534 return None 2535 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2536 2537 def _parse_order( 2538 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2539 ) -> t.Optional[exp.Expression]: 2540 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2541 return this 2542 2543 return self.expression( 2544 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2545 ) 2546 2547 def _parse_sort( 2548 self, exp_class: t.Type[exp.Expression], *texts: str 2549 ) -> t.Optional[exp.Expression]: 2550 if not self._match_text_seq(*texts): 2551 return None 2552 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2553 2554 def _parse_ordered(self) -> exp.Expression: 2555 this = self._parse_conjunction() 2556 self._match(TokenType.ASC) 2557 is_desc = self._match(TokenType.DESC) 2558 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2559 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2560 desc = is_desc or False 2561 asc = not desc 2562 nulls_first = is_nulls_first or False 2563 explicitly_null_ordered = is_nulls_first or is_nulls_last 2564 if ( 2565 not explicitly_null_ordered 2566 and ( 2567 (asc and self.null_ordering == "nulls_are_small") 2568 or (desc and self.null_ordering != "nulls_are_small") 2569 ) 2570 and self.null_ordering != "nulls_are_last" 2571 ): 2572 nulls_first = True 2573 2574 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2575 2576 def _parse_limit( 2577 self, this: t.Optional[exp.Expression] = None, top: bool = False 2578 ) -> t.Optional[exp.Expression]: 2579 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2580 limit_paren = self._match(TokenType.L_PAREN) 2581 limit_exp = self.expression( 2582 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2583 ) 2584 2585 if limit_paren: 2586 self._match_r_paren() 2587 2588 return limit_exp 2589 2590 if self._match(TokenType.FETCH): 2591 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2592 direction = self._prev.text if direction else "FIRST" 2593 2594 count = self._parse_number() 2595 percent = self._match(TokenType.PERCENT) 2596 2597 self._match_set((TokenType.ROW, TokenType.ROWS)) 2598 2599 only = self._match_text_seq("ONLY") 2600 with_ties = self._match_text_seq("WITH", "TIES") 2601 2602 if only and with_ties: 2603 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2604 2605 return self.expression( 2606 exp.Fetch, 2607 direction=direction, 2608 count=count, 2609 percent=percent, 2610 with_ties=with_ties, 2611 ) 2612 2613 return this 2614 2615 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2616 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2617 return this 2618 2619 count = self._parse_number() 2620 self._match_set((TokenType.ROW, TokenType.ROWS)) 2621 return self.expression(exp.Offset, this=this, expression=count) 2622 2623 def _parse_locks(self) -> t.List[exp.Expression]: 2624 # Lists are invariant, so we need to use a type hint here 2625 locks: t.List[exp.Expression] = [] 2626 2627 while True: 2628 if self._match_text_seq("FOR", "UPDATE"): 2629 update = True 2630 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2631 "LOCK", "IN", "SHARE", "MODE" 2632 ): 2633 update = False 2634 else: 2635 break 2636 2637 expressions = None 2638 if self._match_text_seq("OF"): 2639 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2640 2641 wait: t.Optional[bool | exp.Expression] = None 2642 if self._match_text_seq("NOWAIT"): 2643 wait = True 2644 elif self._match_text_seq("WAIT"): 2645 wait = self._parse_primary() 2646 elif self._match_text_seq("SKIP", "LOCKED"): 2647 wait = False 2648 2649 locks.append( 2650 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2651 ) 2652 2653 return locks 2654 2655 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2656 if not self._match_set(self.SET_OPERATIONS): 2657 return this 2658 2659 token_type = self._prev.token_type 2660 2661 if token_type == TokenType.UNION: 2662 expression = exp.Union 2663 elif token_type == TokenType.EXCEPT: 2664 expression = exp.Except 2665 else: 2666 expression = exp.Intersect 2667 2668 return self.expression( 2669 expression, 2670 this=this, 2671 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2672 expression=self._parse_set_operations(self._parse_select(nested=True)), 2673 ) 2674 2675 def _parse_expression(self) -> t.Optional[exp.Expression]: 2676 return self._parse_alias(self._parse_conjunction()) 2677 2678 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2679 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2680 2681 def _parse_equality(self) -> t.Optional[exp.Expression]: 2682 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2683 2684 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2685 return self._parse_tokens(self._parse_range, self.COMPARISON) 2686 2687 def _parse_range(self) -> t.Optional[exp.Expression]: 2688 this = self._parse_bitwise() 2689 negate = self._match(TokenType.NOT) 2690 2691 if self._match_set(self.RANGE_PARSERS): 2692 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2693 if not expression: 2694 return this 2695 2696 this = expression 2697 elif self._match(TokenType.ISNULL): 2698 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2699 2700 # Postgres supports ISNULL and NOTNULL for conditions. 2701 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2702 if self._match(TokenType.NOTNULL): 2703 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2704 this = self.expression(exp.Not, this=this) 2705 2706 if negate: 2707 this = self.expression(exp.Not, this=this) 2708 2709 if self._match(TokenType.IS): 2710 this = self._parse_is(this) 2711 2712 return this 2713 2714 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2715 index = self._index - 1 2716 negate = self._match(TokenType.NOT) 2717 if self._match_text_seq("DISTINCT", "FROM"): 2718 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2719 return self.expression(klass, this=this, expression=self._parse_expression()) 2720 2721 expression = self._parse_null() or self._parse_boolean() 2722 if not expression: 2723 self._retreat(index) 2724 return None 2725 2726 this = self.expression(exp.Is, this=this, expression=expression) 2727 return self.expression(exp.Not, this=this) if negate else this 2728 2729 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.Expression: 2730 unnest = self._parse_unnest() 2731 if unnest: 2732 this = self.expression(exp.In, this=this, unnest=unnest) 2733 elif self._match(TokenType.L_PAREN): 2734 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2735 2736 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2737 this = self.expression(exp.In, this=this, query=expressions[0]) 2738 else: 2739 this = self.expression(exp.In, this=this, expressions=expressions) 2740 2741 self._match_r_paren(this) 2742 else: 2743 this = self.expression(exp.In, this=this, field=self._parse_field()) 2744 2745 return this 2746 2747 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2748 low = self._parse_bitwise() 2749 self._match(TokenType.AND) 2750 high = self._parse_bitwise() 2751 return self.expression(exp.Between, this=this, low=low, high=high) 2752 2753 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2754 if not self._match(TokenType.ESCAPE): 2755 return this 2756 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2757 2758 def _parse_interval(self) -> t.Optional[exp.Expression]: 2759 if not self._match(TokenType.INTERVAL): 2760 return None 2761 2762 this = self._parse_primary() or self._parse_term() 2763 unit = self._parse_function() or self._parse_var() 2764 2765 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2766 # each INTERVAL expression into this canonical form so it's easy to transpile 2767 if this and isinstance(this, exp.Literal): 2768 if this.is_number: 2769 this = exp.Literal.string(this.name) 2770 2771 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2772 parts = this.name.split() 2773 if not unit and len(parts) <= 2: 2774 this = exp.Literal.string(seq_get(parts, 0)) 2775 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2776 2777 return self.expression(exp.Interval, this=this, unit=unit) 2778 2779 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2780 this = self._parse_term() 2781 2782 while True: 2783 if self._match_set(self.BITWISE): 2784 this = self.expression( 2785 self.BITWISE[self._prev.token_type], 2786 this=this, 2787 expression=self._parse_term(), 2788 ) 2789 elif self._match_pair(TokenType.LT, TokenType.LT): 2790 this = self.expression( 2791 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2792 ) 2793 elif self._match_pair(TokenType.GT, TokenType.GT): 2794 this = self.expression( 2795 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2796 ) 2797 else: 2798 break 2799 2800 return this 2801 2802 def _parse_term(self) -> t.Optional[exp.Expression]: 2803 return self._parse_tokens(self._parse_factor, self.TERM) 2804 2805 def _parse_factor(self) -> t.Optional[exp.Expression]: 2806 return self._parse_tokens(self._parse_unary, self.FACTOR) 2807 2808 def _parse_unary(self) -> t.Optional[exp.Expression]: 2809 if self._match_set(self.UNARY_PARSERS): 2810 return self.UNARY_PARSERS[self._prev.token_type](self) 2811 return self._parse_at_time_zone(self._parse_type()) 2812 2813 def _parse_type(self) -> t.Optional[exp.Expression]: 2814 interval = self._parse_interval() 2815 if interval: 2816 return interval 2817 2818 index = self._index 2819 data_type = self._parse_types(check_func=True) 2820 this = self._parse_column() 2821 2822 if data_type: 2823 if isinstance(this, exp.Literal): 2824 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2825 if parser: 2826 return parser(self, this, data_type) 2827 return self.expression(exp.Cast, this=this, to=data_type) 2828 if not data_type.expressions: 2829 self._retreat(index) 2830 return self._parse_column() 2831 return data_type 2832 2833 return this 2834 2835 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2836 this = self._parse_type() 2837 if not this: 2838 return None 2839 2840 return self.expression( 2841 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2842 ) 2843 2844 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2845 index = self._index 2846 2847 prefix = self._match_text_seq("SYSUDTLIB", ".") 2848 2849 if not self._match_set(self.TYPE_TOKENS): 2850 return None 2851 2852 type_token = self._prev.token_type 2853 2854 if type_token == TokenType.PSEUDO_TYPE: 2855 return self.expression(exp.PseudoType, this=self._prev.text) 2856 2857 nested = type_token in self.NESTED_TYPE_TOKENS 2858 is_struct = type_token == TokenType.STRUCT 2859 expressions = None 2860 maybe_func = False 2861 2862 if self._match(TokenType.L_PAREN): 2863 if is_struct: 2864 expressions = self._parse_csv(self._parse_struct_types) 2865 elif nested: 2866 expressions = self._parse_csv(self._parse_types) 2867 else: 2868 expressions = self._parse_csv(self._parse_type_size) 2869 2870 if not expressions or not self._match(TokenType.R_PAREN): 2871 self._retreat(index) 2872 return None 2873 2874 maybe_func = True 2875 2876 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2877 this = exp.DataType( 2878 this=exp.DataType.Type.ARRAY, 2879 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2880 nested=True, 2881 ) 2882 2883 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2884 this = exp.DataType( 2885 this=exp.DataType.Type.ARRAY, 2886 expressions=[this], 2887 nested=True, 2888 ) 2889 2890 return this 2891 2892 if self._match(TokenType.L_BRACKET): 2893 self._retreat(index) 2894 return None 2895 2896 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2897 if nested and self._match(TokenType.LT): 2898 if is_struct: 2899 expressions = self._parse_csv(self._parse_struct_types) 2900 else: 2901 expressions = self._parse_csv(self._parse_types) 2902 2903 if not self._match(TokenType.GT): 2904 self.raise_error("Expecting >") 2905 2906 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2907 values = self._parse_csv(self._parse_conjunction) 2908 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2909 2910 value: t.Optional[exp.Expression] = None 2911 if type_token in self.TIMESTAMPS: 2912 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2913 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2914 elif ( 2915 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2916 or type_token == TokenType.TIMESTAMPLTZ 2917 ): 2918 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2919 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2920 if type_token == TokenType.TIME: 2921 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2922 else: 2923 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2924 2925 maybe_func = maybe_func and value is None 2926 2927 if value is None: 2928 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2929 elif type_token == TokenType.INTERVAL: 2930 unit = self._parse_var() 2931 2932 if not unit: 2933 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2934 else: 2935 value = self.expression(exp.Interval, unit=unit) 2936 2937 if maybe_func and check_func: 2938 index2 = self._index 2939 peek = self._parse_string() 2940 2941 if not peek: 2942 self._retreat(index) 2943 return None 2944 2945 self._retreat(index2) 2946 2947 if value: 2948 return value 2949 2950 return exp.DataType( 2951 this=exp.DataType.Type[type_token.value.upper()], 2952 expressions=expressions, 2953 nested=nested, 2954 values=values, 2955 prefix=prefix, 2956 ) 2957 2958 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 2959 this = self._parse_type() or self._parse_id_var() 2960 self._match(TokenType.COLON) 2961 return self._parse_column_def(this) 2962 2963 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2964 if not self._match_text_seq("AT", "TIME", "ZONE"): 2965 return this 2966 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2967 2968 def _parse_column(self) -> t.Optional[exp.Expression]: 2969 this = self._parse_field() 2970 if isinstance(this, exp.Identifier): 2971 this = self.expression(exp.Column, this=this) 2972 elif not this: 2973 return self._parse_bracket(this) 2974 this = self._parse_bracket(this) 2975 2976 while self._match_set(self.COLUMN_OPERATORS): 2977 op_token = self._prev.token_type 2978 op = self.COLUMN_OPERATORS.get(op_token) 2979 2980 if op_token == TokenType.DCOLON: 2981 field = self._parse_types() 2982 if not field: 2983 self.raise_error("Expected type") 2984 elif op and self._curr: 2985 self._advance() 2986 value = self._prev.text 2987 field = ( 2988 exp.Literal.number(value) 2989 if self._prev.token_type == TokenType.NUMBER 2990 else exp.Literal.string(value) 2991 ) 2992 else: 2993 field = ( 2994 self._parse_star() 2995 or self._parse_function(anonymous=True) 2996 or self._parse_id_var() 2997 ) 2998 2999 if isinstance(field, exp.Func): 3000 # bigquery allows function calls like x.y.count(...) 3001 # SAFE.SUBSTR(...) 3002 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3003 this = self._replace_columns_with_dots(this) 3004 3005 if op: 3006 this = op(self, this, field) 3007 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3008 this = self.expression( 3009 exp.Column, 3010 this=field, 3011 table=this.this, 3012 db=this.args.get("table"), 3013 catalog=this.args.get("db"), 3014 ) 3015 else: 3016 this = self.expression(exp.Dot, this=this, expression=field) 3017 this = self._parse_bracket(this) 3018 3019 return this 3020 3021 def _parse_primary(self) -> t.Optional[exp.Expression]: 3022 if self._match_set(self.PRIMARY_PARSERS): 3023 token_type = self._prev.token_type 3024 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3025 3026 if token_type == TokenType.STRING: 3027 expressions = [primary] 3028 while self._match(TokenType.STRING): 3029 expressions.append(exp.Literal.string(self._prev.text)) 3030 if len(expressions) > 1: 3031 return self.expression(exp.Concat, expressions=expressions) 3032 return primary 3033 3034 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3035 return exp.Literal.number(f"0.{self._prev.text}") 3036 3037 if self._match(TokenType.L_PAREN): 3038 comments = self._prev_comments 3039 query = self._parse_select() 3040 3041 if query: 3042 expressions = [query] 3043 else: 3044 expressions = self._parse_csv(self._parse_expression) 3045 3046 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3047 3048 if isinstance(this, exp.Subqueryable): 3049 this = self._parse_set_operations( 3050 self._parse_subquery(this=this, parse_alias=False) 3051 ) 3052 elif len(expressions) > 1: 3053 this = self.expression(exp.Tuple, expressions=expressions) 3054 else: 3055 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3056 3057 if this: 3058 this.add_comments(comments) 3059 self._match_r_paren(expression=this) 3060 3061 return this 3062 3063 return None 3064 3065 def _parse_field( 3066 self, 3067 any_token: bool = False, 3068 tokens: t.Optional[t.Collection[TokenType]] = None, 3069 ) -> t.Optional[exp.Expression]: 3070 return ( 3071 self._parse_primary() 3072 or self._parse_function() 3073 or self._parse_id_var(any_token=any_token, tokens=tokens) 3074 ) 3075 3076 def _parse_function( 3077 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3078 ) -> t.Optional[exp.Expression]: 3079 if not self._curr: 3080 return None 3081 3082 token_type = self._curr.token_type 3083 3084 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3085 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3086 3087 if not self._next or self._next.token_type != TokenType.L_PAREN: 3088 if token_type in self.NO_PAREN_FUNCTIONS: 3089 self._advance() 3090 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3091 3092 return None 3093 3094 if token_type not in self.FUNC_TOKENS: 3095 return None 3096 3097 this = self._curr.text 3098 upper = this.upper() 3099 self._advance(2) 3100 3101 parser = self.FUNCTION_PARSERS.get(upper) 3102 3103 if parser and not anonymous: 3104 this = parser(self) 3105 else: 3106 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3107 3108 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3109 this = self.expression(subquery_predicate, this=self._parse_select()) 3110 self._match_r_paren() 3111 return this 3112 3113 if functions is None: 3114 functions = self.FUNCTIONS 3115 3116 function = functions.get(upper) 3117 args = self._parse_csv(self._parse_lambda) 3118 3119 if function and not anonymous: 3120 this = function(args) 3121 self.validate_expression(this, args) 3122 else: 3123 this = self.expression(exp.Anonymous, this=this, expressions=args) 3124 3125 self._match_r_paren(this) 3126 return self._parse_window(this) 3127 3128 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3129 return self._parse_column_def(self._parse_id_var()) 3130 3131 def _parse_user_defined_function( 3132 self, kind: t.Optional[TokenType] = None 3133 ) -> t.Optional[exp.Expression]: 3134 this = self._parse_id_var() 3135 3136 while self._match(TokenType.DOT): 3137 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3138 3139 if not self._match(TokenType.L_PAREN): 3140 return this 3141 3142 expressions = self._parse_csv(self._parse_function_parameter) 3143 self._match_r_paren() 3144 return self.expression( 3145 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3146 ) 3147 3148 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3149 literal = self._parse_primary() 3150 if literal: 3151 return self.expression(exp.Introducer, this=token.text, expression=literal) 3152 3153 return self.expression(exp.Identifier, this=token.text) 3154 3155 def _parse_national(self, token: Token) -> exp.Expression: 3156 return self.expression(exp.National, this=exp.Literal.string(token.text)) 3157 3158 def _parse_session_parameter(self) -> exp.Expression: 3159 kind = None 3160 this = self._parse_id_var() or self._parse_primary() 3161 3162 if this and self._match(TokenType.DOT): 3163 kind = this.name 3164 this = self._parse_var() or self._parse_primary() 3165 3166 return self.expression(exp.SessionParameter, this=this, kind=kind) 3167 3168 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3169 index = self._index 3170 3171 if self._match(TokenType.L_PAREN): 3172 expressions = self._parse_csv(self._parse_id_var) 3173 3174 if not self._match(TokenType.R_PAREN): 3175 self._retreat(index) 3176 else: 3177 expressions = [self._parse_id_var()] 3178 3179 if self._match_set(self.LAMBDAS): 3180 return self.LAMBDAS[self._prev.token_type](self, expressions) 3181 3182 self._retreat(index) 3183 3184 this: t.Optional[exp.Expression] 3185 3186 if self._match(TokenType.DISTINCT): 3187 this = self.expression( 3188 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3189 ) 3190 else: 3191 this = self._parse_select_or_expression(alias=alias) 3192 3193 if isinstance(this, exp.EQ): 3194 left = this.this 3195 if isinstance(left, exp.Column): 3196 left.replace(exp.Var(this=left.text("this"))) 3197 3198 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3199 3200 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3201 index = self._index 3202 3203 try: 3204 if self._parse_select(nested=True): 3205 return this 3206 except Exception: 3207 pass 3208 finally: 3209 self._retreat(index) 3210 3211 if not self._match(TokenType.L_PAREN): 3212 return this 3213 3214 args = self._parse_csv( 3215 lambda: self._parse_constraint() 3216 or self._parse_column_def(self._parse_field(any_token=True)) 3217 ) 3218 self._match_r_paren() 3219 return self.expression(exp.Schema, this=this, expressions=args) 3220 3221 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3222 # column defs are not really columns, they're identifiers 3223 if isinstance(this, exp.Column): 3224 this = this.this 3225 kind = self._parse_types() 3226 3227 if self._match_text_seq("FOR", "ORDINALITY"): 3228 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3229 3230 constraints = [] 3231 while True: 3232 constraint = self._parse_column_constraint() 3233 if not constraint: 3234 break 3235 constraints.append(constraint) 3236 3237 if not kind and not constraints: 3238 return this 3239 3240 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3241 3242 def _parse_auto_increment(self) -> exp.Expression: 3243 start = None 3244 increment = None 3245 3246 if self._match(TokenType.L_PAREN, advance=False): 3247 args = self._parse_wrapped_csv(self._parse_bitwise) 3248 start = seq_get(args, 0) 3249 increment = seq_get(args, 1) 3250 elif self._match_text_seq("START"): 3251 start = self._parse_bitwise() 3252 self._match_text_seq("INCREMENT") 3253 increment = self._parse_bitwise() 3254 3255 if start and increment: 3256 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3257 3258 return exp.AutoIncrementColumnConstraint() 3259 3260 def _parse_compress(self) -> exp.Expression: 3261 if self._match(TokenType.L_PAREN, advance=False): 3262 return self.expression( 3263 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3264 ) 3265 3266 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3267 3268 def _parse_generated_as_identity(self) -> exp.Expression: 3269 if self._match_text_seq("BY", "DEFAULT"): 3270 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3271 this = self.expression( 3272 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3273 ) 3274 else: 3275 self._match_text_seq("ALWAYS") 3276 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3277 3278 self._match(TokenType.ALIAS) 3279 identity = self._match_text_seq("IDENTITY") 3280 3281 if self._match(TokenType.L_PAREN): 3282 if self._match_text_seq("START", "WITH"): 3283 this.set("start", self._parse_bitwise()) 3284 if self._match_text_seq("INCREMENT", "BY"): 3285 this.set("increment", self._parse_bitwise()) 3286 if self._match_text_seq("MINVALUE"): 3287 this.set("minvalue", self._parse_bitwise()) 3288 if self._match_text_seq("MAXVALUE"): 3289 this.set("maxvalue", self._parse_bitwise()) 3290 3291 if self._match_text_seq("CYCLE"): 3292 this.set("cycle", True) 3293 elif self._match_text_seq("NO", "CYCLE"): 3294 this.set("cycle", False) 3295 3296 if not identity: 3297 this.set("expression", self._parse_bitwise()) 3298 3299 self._match_r_paren() 3300 3301 return this 3302 3303 def _parse_inline(self) -> t.Optional[exp.Expression]: 3304 self._match_text_seq("LENGTH") 3305 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3306 3307 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3308 if self._match_text_seq("NULL"): 3309 return self.expression(exp.NotNullColumnConstraint) 3310 if self._match_text_seq("CASESPECIFIC"): 3311 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3312 return None 3313 3314 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3315 if self._match(TokenType.CONSTRAINT): 3316 this = self._parse_id_var() 3317 else: 3318 this = None 3319 3320 if self._match_texts(self.CONSTRAINT_PARSERS): 3321 return self.expression( 3322 exp.ColumnConstraint, 3323 this=this, 3324 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3325 ) 3326 3327 return this 3328 3329 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3330 if not self._match(TokenType.CONSTRAINT): 3331 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3332 3333 this = self._parse_id_var() 3334 expressions = [] 3335 3336 while True: 3337 constraint = self._parse_unnamed_constraint() or self._parse_function() 3338 if not constraint: 3339 break 3340 expressions.append(constraint) 3341 3342 return self.expression(exp.Constraint, this=this, expressions=expressions) 3343 3344 def _parse_unnamed_constraint( 3345 self, constraints: t.Optional[t.Collection[str]] = None 3346 ) -> t.Optional[exp.Expression]: 3347 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3348 return None 3349 3350 constraint = self._prev.text.upper() 3351 if constraint not in self.CONSTRAINT_PARSERS: 3352 self.raise_error(f"No parser found for schema constraint {constraint}.") 3353 3354 return self.CONSTRAINT_PARSERS[constraint](self) 3355 3356 def _parse_unique(self) -> exp.Expression: 3357 if not self._match(TokenType.L_PAREN, advance=False): 3358 return self.expression(exp.UniqueColumnConstraint) 3359 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3360 3361 def _parse_key_constraint_options(self) -> t.List[str]: 3362 options = [] 3363 while True: 3364 if not self._curr: 3365 break 3366 3367 if self._match(TokenType.ON): 3368 action = None 3369 on = self._advance_any() and self._prev.text 3370 3371 if self._match_text_seq("NO", "ACTION"): 3372 action = "NO ACTION" 3373 elif self._match_text_seq("CASCADE"): 3374 action = "CASCADE" 3375 elif self._match_pair(TokenType.SET, TokenType.NULL): 3376 action = "SET NULL" 3377 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3378 action = "SET DEFAULT" 3379 else: 3380 self.raise_error("Invalid key constraint") 3381 3382 options.append(f"ON {on} {action}") 3383 elif self._match_text_seq("NOT", "ENFORCED"): 3384 options.append("NOT ENFORCED") 3385 elif self._match_text_seq("DEFERRABLE"): 3386 options.append("DEFERRABLE") 3387 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3388 options.append("INITIALLY DEFERRED") 3389 elif self._match_text_seq("NORELY"): 3390 options.append("NORELY") 3391 elif self._match_text_seq("MATCH", "FULL"): 3392 options.append("MATCH FULL") 3393 else: 3394 break 3395 3396 return options 3397 3398 def _parse_references(self, match=True) -> t.Optional[exp.Expression]: 3399 if match and not self._match(TokenType.REFERENCES): 3400 return None 3401 3402 expressions = None 3403 this = self._parse_id_var() 3404 3405 if self._match(TokenType.L_PAREN, advance=False): 3406 expressions = self._parse_wrapped_id_vars() 3407 3408 options = self._parse_key_constraint_options() 3409 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3410 3411 def _parse_foreign_key(self) -> exp.Expression: 3412 expressions = self._parse_wrapped_id_vars() 3413 reference = self._parse_references() 3414 options = {} 3415 3416 while self._match(TokenType.ON): 3417 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3418 self.raise_error("Expected DELETE or UPDATE") 3419 3420 kind = self._prev.text.lower() 3421 3422 if self._match_text_seq("NO", "ACTION"): 3423 action = "NO ACTION" 3424 elif self._match(TokenType.SET): 3425 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3426 action = "SET " + self._prev.text.upper() 3427 else: 3428 self._advance() 3429 action = self._prev.text.upper() 3430 3431 options[kind] = action 3432 3433 return self.expression( 3434 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3435 ) 3436 3437 def _parse_primary_key(self) -> exp.Expression: 3438 desc = ( 3439 self._match_set((TokenType.ASC, TokenType.DESC)) 3440 and self._prev.token_type == TokenType.DESC 3441 ) 3442 3443 if not self._match(TokenType.L_PAREN, advance=False): 3444 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3445 3446 expressions = self._parse_wrapped_csv(self._parse_field) 3447 options = self._parse_key_constraint_options() 3448 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3449 3450 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3451 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3452 return this 3453 3454 bracket_kind = self._prev.token_type 3455 expressions: t.List[t.Optional[exp.Expression]] 3456 3457 if self._match(TokenType.COLON): 3458 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3459 else: 3460 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3461 3462 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3463 if bracket_kind == TokenType.L_BRACE: 3464 this = self.expression(exp.Struct, expressions=expressions) 3465 elif not this or this.name.upper() == "ARRAY": 3466 this = self.expression(exp.Array, expressions=expressions) 3467 else: 3468 expressions = apply_index_offset(this, expressions, -self.index_offset) 3469 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3470 3471 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3472 self.raise_error("Expected ]") 3473 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3474 self.raise_error("Expected }") 3475 3476 self._add_comments(this) 3477 return self._parse_bracket(this) 3478 3479 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3480 if self._match(TokenType.COLON): 3481 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3482 return this 3483 3484 def _parse_case(self) -> t.Optional[exp.Expression]: 3485 ifs = [] 3486 default = None 3487 3488 expression = self._parse_conjunction() 3489 3490 while self._match(TokenType.WHEN): 3491 this = self._parse_conjunction() 3492 self._match(TokenType.THEN) 3493 then = self._parse_conjunction() 3494 ifs.append(self.expression(exp.If, this=this, true=then)) 3495 3496 if self._match(TokenType.ELSE): 3497 default = self._parse_conjunction() 3498 3499 if not self._match(TokenType.END): 3500 self.raise_error("Expected END after CASE", self._prev) 3501 3502 return self._parse_window( 3503 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3504 ) 3505 3506 def _parse_if(self) -> t.Optional[exp.Expression]: 3507 if self._match(TokenType.L_PAREN): 3508 args = self._parse_csv(self._parse_conjunction) 3509 this = exp.If.from_arg_list(args) 3510 self.validate_expression(this, args) 3511 self._match_r_paren() 3512 else: 3513 index = self._index - 1 3514 condition = self._parse_conjunction() 3515 3516 if not condition: 3517 self._retreat(index) 3518 return None 3519 3520 self._match(TokenType.THEN) 3521 true = self._parse_conjunction() 3522 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3523 self._match(TokenType.END) 3524 this = self.expression(exp.If, this=condition, true=true, false=false) 3525 3526 return self._parse_window(this) 3527 3528 def _parse_extract(self) -> exp.Expression: 3529 this = self._parse_function() or self._parse_var() or self._parse_type() 3530 3531 if self._match(TokenType.FROM): 3532 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3533 3534 if not self._match(TokenType.COMMA): 3535 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3536 3537 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3538 3539 def _parse_cast(self, strict: bool) -> exp.Expression: 3540 this = self._parse_conjunction() 3541 3542 if not self._match(TokenType.ALIAS): 3543 if self._match(TokenType.COMMA): 3544 return self.expression( 3545 exp.CastToStrType, this=this, expression=self._parse_string() 3546 ) 3547 else: 3548 self.raise_error("Expected AS after CAST") 3549 3550 to = self._parse_types() 3551 3552 if not to: 3553 self.raise_error("Expected TYPE after CAST") 3554 elif to.this == exp.DataType.Type.CHAR: 3555 if self._match(TokenType.CHARACTER_SET): 3556 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3557 3558 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3559 3560 def _parse_string_agg(self) -> exp.Expression: 3561 expression: t.Optional[exp.Expression] 3562 3563 if self._match(TokenType.DISTINCT): 3564 args = self._parse_csv(self._parse_conjunction) 3565 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3566 else: 3567 args = self._parse_csv(self._parse_conjunction) 3568 expression = seq_get(args, 0) 3569 3570 index = self._index 3571 if not self._match(TokenType.R_PAREN): 3572 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3573 order = self._parse_order(this=expression) 3574 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3575 3576 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3577 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3578 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3579 if not self._match_text_seq("WITHIN", "GROUP"): 3580 self._retreat(index) 3581 this = exp.GroupConcat.from_arg_list(args) 3582 self.validate_expression(this, args) 3583 return this 3584 3585 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3586 order = self._parse_order(this=expression) 3587 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3588 3589 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3590 to: t.Optional[exp.Expression] 3591 this = self._parse_bitwise() 3592 3593 if self._match(TokenType.USING): 3594 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3595 elif self._match(TokenType.COMMA): 3596 to = self._parse_bitwise() 3597 else: 3598 to = None 3599 3600 # Swap the argument order if needed to produce the correct AST 3601 if self.CONVERT_TYPE_FIRST: 3602 this, to = to, this 3603 3604 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3605 3606 def _parse_decode(self) -> t.Optional[exp.Expression]: 3607 """ 3608 There are generally two variants of the DECODE function: 3609 3610 - DECODE(bin, charset) 3611 - DECODE(expression, search, result [, search, result] ... [, default]) 3612 3613 The second variant will always be parsed into a CASE expression. Note that NULL 3614 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3615 instead of relying on pattern matching. 3616 """ 3617 args = self._parse_csv(self._parse_conjunction) 3618 3619 if len(args) < 3: 3620 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3621 3622 expression, *expressions = args 3623 if not expression: 3624 return None 3625 3626 ifs = [] 3627 for search, result in zip(expressions[::2], expressions[1::2]): 3628 if not search or not result: 3629 return None 3630 3631 if isinstance(search, exp.Literal): 3632 ifs.append( 3633 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3634 ) 3635 elif isinstance(search, exp.Null): 3636 ifs.append( 3637 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3638 ) 3639 else: 3640 cond = exp.or_( 3641 exp.EQ(this=expression.copy(), expression=search), 3642 exp.and_( 3643 exp.Is(this=expression.copy(), expression=exp.Null()), 3644 exp.Is(this=search.copy(), expression=exp.Null()), 3645 copy=False, 3646 ), 3647 copy=False, 3648 ) 3649 ifs.append(exp.If(this=cond, true=result)) 3650 3651 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3652 3653 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3654 self._match_text_seq("KEY") 3655 key = self._parse_field() 3656 self._match(TokenType.COLON) 3657 self._match_text_seq("VALUE") 3658 value = self._parse_field() 3659 if not key and not value: 3660 return None 3661 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3662 3663 def _parse_json_object(self) -> exp.Expression: 3664 expressions = self._parse_csv(self._parse_json_key_value) 3665 3666 null_handling = None 3667 if self._match_text_seq("NULL", "ON", "NULL"): 3668 null_handling = "NULL ON NULL" 3669 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3670 null_handling = "ABSENT ON NULL" 3671 3672 unique_keys = None 3673 if self._match_text_seq("WITH", "UNIQUE"): 3674 unique_keys = True 3675 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3676 unique_keys = False 3677 3678 self._match_text_seq("KEYS") 3679 3680 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3681 format_json = self._match_text_seq("FORMAT", "JSON") 3682 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3683 3684 return self.expression( 3685 exp.JSONObject, 3686 expressions=expressions, 3687 null_handling=null_handling, 3688 unique_keys=unique_keys, 3689 return_type=return_type, 3690 format_json=format_json, 3691 encoding=encoding, 3692 ) 3693 3694 def _parse_logarithm(self) -> exp.Expression: 3695 # Default argument order is base, expression 3696 args = self._parse_csv(self._parse_range) 3697 3698 if len(args) > 1: 3699 if not self.LOG_BASE_FIRST: 3700 args.reverse() 3701 return exp.Log.from_arg_list(args) 3702 3703 return self.expression( 3704 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3705 ) 3706 3707 def _parse_match_against(self) -> exp.Expression: 3708 expressions = self._parse_csv(self._parse_column) 3709 3710 self._match_text_seq(")", "AGAINST", "(") 3711 3712 this = self._parse_string() 3713 3714 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3715 modifier = "IN NATURAL LANGUAGE MODE" 3716 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3717 modifier = f"{modifier} WITH QUERY EXPANSION" 3718 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3719 modifier = "IN BOOLEAN MODE" 3720 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3721 modifier = "WITH QUERY EXPANSION" 3722 else: 3723 modifier = None 3724 3725 return self.expression( 3726 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3727 ) 3728 3729 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3730 def _parse_open_json(self) -> exp.Expression: 3731 this = self._parse_bitwise() 3732 path = self._match(TokenType.COMMA) and self._parse_string() 3733 3734 def _parse_open_json_column_def() -> exp.Expression: 3735 this = self._parse_field(any_token=True) 3736 kind = self._parse_types() 3737 path = self._parse_string() 3738 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3739 return self.expression( 3740 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3741 ) 3742 3743 expressions = None 3744 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3745 self._match_l_paren() 3746 expressions = self._parse_csv(_parse_open_json_column_def) 3747 3748 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3749 3750 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3751 args = self._parse_csv(self._parse_bitwise) 3752 3753 if self._match(TokenType.IN): 3754 return self.expression( 3755 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3756 ) 3757 3758 if haystack_first: 3759 haystack = seq_get(args, 0) 3760 needle = seq_get(args, 1) 3761 else: 3762 needle = seq_get(args, 0) 3763 haystack = seq_get(args, 1) 3764 3765 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3766 3767 self.validate_expression(this, args) 3768 3769 return this 3770 3771 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3772 args = self._parse_csv(self._parse_table) 3773 return exp.JoinHint(this=func_name.upper(), expressions=args) 3774 3775 def _parse_substring(self) -> exp.Expression: 3776 # Postgres supports the form: substring(string [from int] [for int]) 3777 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3778 3779 args = self._parse_csv(self._parse_bitwise) 3780 3781 if self._match(TokenType.FROM): 3782 args.append(self._parse_bitwise()) 3783 if self._match(TokenType.FOR): 3784 args.append(self._parse_bitwise()) 3785 3786 this = exp.Substring.from_arg_list(args) 3787 self.validate_expression(this, args) 3788 3789 return this 3790 3791 def _parse_struct(self) -> exp.Struct: 3792 return exp.Struct.from_arg_list(self._parse_csv(lambda: self._parse_lambda(alias=True))) 3793 3794 def _parse_trim(self) -> exp.Expression: 3795 # https://www.w3resource.com/sql/character-functions/trim.php 3796 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3797 3798 position = None 3799 collation = None 3800 3801 if self._match_texts(self.TRIM_TYPES): 3802 position = self._prev.text.upper() 3803 3804 expression = self._parse_bitwise() 3805 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3806 this = self._parse_bitwise() 3807 else: 3808 this = expression 3809 expression = None 3810 3811 if self._match(TokenType.COLLATE): 3812 collation = self._parse_bitwise() 3813 3814 return self.expression( 3815 exp.Trim, 3816 this=this, 3817 position=position, 3818 expression=expression, 3819 collation=collation, 3820 ) 3821 3822 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3823 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3824 3825 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3826 return self._parse_window(self._parse_id_var(), alias=True) 3827 3828 def _parse_respect_or_ignore_nulls( 3829 self, this: t.Optional[exp.Expression] 3830 ) -> t.Optional[exp.Expression]: 3831 if self._match_text_seq("IGNORE", "NULLS"): 3832 return self.expression(exp.IgnoreNulls, this=this) 3833 if self._match_text_seq("RESPECT", "NULLS"): 3834 return self.expression(exp.RespectNulls, this=this) 3835 return this 3836 3837 def _parse_window( 3838 self, this: t.Optional[exp.Expression], alias: bool = False 3839 ) -> t.Optional[exp.Expression]: 3840 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3841 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3842 self._match_r_paren() 3843 3844 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3845 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3846 if self._match_text_seq("WITHIN", "GROUP"): 3847 order = self._parse_wrapped(self._parse_order) 3848 this = self.expression(exp.WithinGroup, this=this, expression=order) 3849 3850 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3851 # Some dialects choose to implement and some do not. 3852 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3853 3854 # There is some code above in _parse_lambda that handles 3855 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3856 3857 # The below changes handle 3858 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3859 3860 # Oracle allows both formats 3861 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3862 # and Snowflake chose to do the same for familiarity 3863 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3864 this = self._parse_respect_or_ignore_nulls(this) 3865 3866 # bigquery select from window x AS (partition by ...) 3867 if alias: 3868 over = None 3869 self._match(TokenType.ALIAS) 3870 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3871 return this 3872 else: 3873 over = self._prev.text.upper() 3874 3875 if not self._match(TokenType.L_PAREN): 3876 return self.expression( 3877 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3878 ) 3879 3880 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3881 3882 first = self._match(TokenType.FIRST) 3883 if self._match_text_seq("LAST"): 3884 first = False 3885 3886 partition = self._parse_partition_by() 3887 order = self._parse_order() 3888 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3889 3890 if kind: 3891 self._match(TokenType.BETWEEN) 3892 start = self._parse_window_spec() 3893 self._match(TokenType.AND) 3894 end = self._parse_window_spec() 3895 3896 spec = self.expression( 3897 exp.WindowSpec, 3898 kind=kind, 3899 start=start["value"], 3900 start_side=start["side"], 3901 end=end["value"], 3902 end_side=end["side"], 3903 ) 3904 else: 3905 spec = None 3906 3907 self._match_r_paren() 3908 3909 return self.expression( 3910 exp.Window, 3911 this=this, 3912 partition_by=partition, 3913 order=order, 3914 spec=spec, 3915 alias=window_alias, 3916 over=over, 3917 first=first, 3918 ) 3919 3920 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3921 self._match(TokenType.BETWEEN) 3922 3923 return { 3924 "value": ( 3925 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 3926 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 3927 or self._parse_bitwise() 3928 ), 3929 "side": self._match_texts(("PRECEDING", "FOLLOWING")) and self._prev.text, 3930 } 3931 3932 def _parse_alias( 3933 self, this: t.Optional[exp.Expression], explicit: bool = False 3934 ) -> t.Optional[exp.Expression]: 3935 any_token = self._match(TokenType.ALIAS) 3936 3937 if explicit and not any_token: 3938 return this 3939 3940 if self._match(TokenType.L_PAREN): 3941 aliases = self.expression( 3942 exp.Aliases, 3943 this=this, 3944 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3945 ) 3946 self._match_r_paren(aliases) 3947 return aliases 3948 3949 alias = self._parse_id_var(any_token) 3950 3951 if alias: 3952 return self.expression(exp.Alias, this=this, alias=alias) 3953 3954 return this 3955 3956 def _parse_id_var( 3957 self, 3958 any_token: bool = True, 3959 tokens: t.Optional[t.Collection[TokenType]] = None, 3960 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3961 ) -> t.Optional[exp.Expression]: 3962 identifier = self._parse_identifier() 3963 3964 if identifier: 3965 return identifier 3966 3967 prefix = "" 3968 3969 if prefix_tokens: 3970 while self._match_set(prefix_tokens): 3971 prefix += self._prev.text 3972 3973 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3974 quoted = self._prev.token_type == TokenType.STRING 3975 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3976 3977 return None 3978 3979 def _parse_string(self) -> t.Optional[exp.Expression]: 3980 if self._match(TokenType.STRING): 3981 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3982 return self._parse_placeholder() 3983 3984 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3985 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3986 3987 def _parse_number(self) -> t.Optional[exp.Expression]: 3988 if self._match(TokenType.NUMBER): 3989 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3990 return self._parse_placeholder() 3991 3992 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3993 if self._match(TokenType.IDENTIFIER): 3994 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3995 return self._parse_placeholder() 3996 3997 def _parse_var( 3998 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3999 ) -> t.Optional[exp.Expression]: 4000 if ( 4001 (any_token and self._advance_any()) 4002 or self._match(TokenType.VAR) 4003 or (self._match_set(tokens) if tokens else False) 4004 ): 4005 return self.expression(exp.Var, this=self._prev.text) 4006 return self._parse_placeholder() 4007 4008 def _advance_any(self) -> t.Optional[Token]: 4009 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4010 self._advance() 4011 return self._prev 4012 return None 4013 4014 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4015 return self._parse_var() or self._parse_string() 4016 4017 def _parse_null(self) -> t.Optional[exp.Expression]: 4018 if self._match(TokenType.NULL): 4019 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4020 return None 4021 4022 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4023 if self._match(TokenType.TRUE): 4024 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4025 if self._match(TokenType.FALSE): 4026 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4027 return None 4028 4029 def _parse_star(self) -> t.Optional[exp.Expression]: 4030 if self._match(TokenType.STAR): 4031 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4032 return None 4033 4034 def _parse_parameter(self) -> exp.Expression: 4035 wrapped = self._match(TokenType.L_BRACE) 4036 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4037 self._match(TokenType.R_BRACE) 4038 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4039 4040 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4041 if self._match_set(self.PLACEHOLDER_PARSERS): 4042 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4043 if placeholder: 4044 return placeholder 4045 self._advance(-1) 4046 return None 4047 4048 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4049 if not self._match(TokenType.EXCEPT): 4050 return None 4051 if self._match(TokenType.L_PAREN, advance=False): 4052 return self._parse_wrapped_csv(self._parse_column) 4053 return self._parse_csv(self._parse_column) 4054 4055 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4056 if not self._match(TokenType.REPLACE): 4057 return None 4058 if self._match(TokenType.L_PAREN, advance=False): 4059 return self._parse_wrapped_csv(self._parse_expression) 4060 return self._parse_csv(self._parse_expression) 4061 4062 def _parse_csv( 4063 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4064 ) -> t.List[t.Optional[exp.Expression]]: 4065 parse_result = parse_method() 4066 items = [parse_result] if parse_result is not None else [] 4067 4068 while self._match(sep): 4069 self._add_comments(parse_result) 4070 parse_result = parse_method() 4071 if parse_result is not None: 4072 items.append(parse_result) 4073 4074 return items 4075 4076 def _parse_tokens( 4077 self, parse_method: t.Callable, expressions: t.Dict 4078 ) -> t.Optional[exp.Expression]: 4079 this = parse_method() 4080 4081 while self._match_set(expressions): 4082 this = self.expression( 4083 expressions[self._prev.token_type], 4084 this=this, 4085 comments=self._prev_comments, 4086 expression=parse_method(), 4087 ) 4088 4089 return this 4090 4091 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4092 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4093 4094 def _parse_wrapped_csv( 4095 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4096 ) -> t.List[t.Optional[exp.Expression]]: 4097 return self._parse_wrapped( 4098 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4099 ) 4100 4101 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4102 wrapped = self._match(TokenType.L_PAREN) 4103 if not wrapped and not optional: 4104 self.raise_error("Expecting (") 4105 parse_result = parse_method() 4106 if wrapped: 4107 self._match_r_paren() 4108 return parse_result 4109 4110 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4111 return self._parse_select() or self._parse_set_operations( 4112 self._parse_expression() if alias else self._parse_conjunction() 4113 ) 4114 4115 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4116 return self._parse_set_operations( 4117 self._parse_select(nested=True, parse_subquery_alias=False) 4118 ) 4119 4120 def _parse_transaction(self) -> exp.Expression: 4121 this = None 4122 if self._match_texts(self.TRANSACTION_KIND): 4123 this = self._prev.text 4124 4125 self._match_texts({"TRANSACTION", "WORK"}) 4126 4127 modes = [] 4128 while True: 4129 mode = [] 4130 while self._match(TokenType.VAR): 4131 mode.append(self._prev.text) 4132 4133 if mode: 4134 modes.append(" ".join(mode)) 4135 if not self._match(TokenType.COMMA): 4136 break 4137 4138 return self.expression(exp.Transaction, this=this, modes=modes) 4139 4140 def _parse_commit_or_rollback(self) -> exp.Expression: 4141 chain = None 4142 savepoint = None 4143 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4144 4145 self._match_texts({"TRANSACTION", "WORK"}) 4146 4147 if self._match_text_seq("TO"): 4148 self._match_text_seq("SAVEPOINT") 4149 savepoint = self._parse_id_var() 4150 4151 if self._match(TokenType.AND): 4152 chain = not self._match_text_seq("NO") 4153 self._match_text_seq("CHAIN") 4154 4155 if is_rollback: 4156 return self.expression(exp.Rollback, savepoint=savepoint) 4157 return self.expression(exp.Commit, chain=chain) 4158 4159 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4160 if not self._match_text_seq("ADD"): 4161 return None 4162 4163 self._match(TokenType.COLUMN) 4164 exists_column = self._parse_exists(not_=True) 4165 expression = self._parse_column_def(self._parse_field(any_token=True)) 4166 4167 if expression: 4168 expression.set("exists", exists_column) 4169 4170 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4171 if self._match_texts(("FIRST", "AFTER")): 4172 position = self._prev.text 4173 column_position = self.expression( 4174 exp.ColumnPosition, this=self._parse_column(), position=position 4175 ) 4176 expression.set("position", column_position) 4177 4178 return expression 4179 4180 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4181 drop = self._match(TokenType.DROP) and self._parse_drop() 4182 if drop and not isinstance(drop, exp.Command): 4183 drop.set("kind", drop.args.get("kind", "COLUMN")) 4184 return drop 4185 4186 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4187 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4188 return self.expression( 4189 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4190 ) 4191 4192 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4193 this = None 4194 kind = self._prev.token_type 4195 4196 if kind == TokenType.CONSTRAINT: 4197 this = self._parse_id_var() 4198 4199 if self._match_text_seq("CHECK"): 4200 expression = self._parse_wrapped(self._parse_conjunction) 4201 enforced = self._match_text_seq("ENFORCED") 4202 4203 return self.expression( 4204 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4205 ) 4206 4207 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4208 expression = self._parse_foreign_key() 4209 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4210 expression = self._parse_primary_key() 4211 else: 4212 expression = None 4213 4214 return self.expression(exp.AddConstraint, this=this, expression=expression) 4215 4216 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4217 index = self._index - 1 4218 4219 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4220 return self._parse_csv(self._parse_add_constraint) 4221 4222 self._retreat(index) 4223 return self._parse_csv(self._parse_add_column) 4224 4225 def _parse_alter_table_alter(self) -> exp.Expression: 4226 self._match(TokenType.COLUMN) 4227 column = self._parse_field(any_token=True) 4228 4229 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4230 return self.expression(exp.AlterColumn, this=column, drop=True) 4231 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4232 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4233 4234 self._match_text_seq("SET", "DATA") 4235 return self.expression( 4236 exp.AlterColumn, 4237 this=column, 4238 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4239 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4240 using=self._match(TokenType.USING) and self._parse_conjunction(), 4241 ) 4242 4243 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4244 index = self._index - 1 4245 4246 partition_exists = self._parse_exists() 4247 if self._match(TokenType.PARTITION, advance=False): 4248 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4249 4250 self._retreat(index) 4251 return self._parse_csv(self._parse_drop_column) 4252 4253 def _parse_alter_table_rename(self) -> exp.Expression: 4254 self._match_text_seq("TO") 4255 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4256 4257 def _parse_alter(self) -> t.Optional[exp.Expression]: 4258 start = self._prev 4259 4260 if not self._match(TokenType.TABLE): 4261 return self._parse_as_command(start) 4262 4263 exists = self._parse_exists() 4264 this = self._parse_table(schema=True) 4265 4266 if self._next: 4267 self._advance() 4268 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4269 4270 if parser: 4271 actions = ensure_list(parser(self)) 4272 4273 if not self._curr: 4274 return self.expression( 4275 exp.AlterTable, 4276 this=this, 4277 exists=exists, 4278 actions=actions, 4279 ) 4280 return self._parse_as_command(start) 4281 4282 def _parse_merge(self) -> exp.Expression: 4283 self._match(TokenType.INTO) 4284 target = self._parse_table() 4285 4286 self._match(TokenType.USING) 4287 using = self._parse_table() 4288 4289 self._match(TokenType.ON) 4290 on = self._parse_conjunction() 4291 4292 whens = [] 4293 while self._match(TokenType.WHEN): 4294 matched = not self._match(TokenType.NOT) 4295 self._match_text_seq("MATCHED") 4296 source = ( 4297 False 4298 if self._match_text_seq("BY", "TARGET") 4299 else self._match_text_seq("BY", "SOURCE") 4300 ) 4301 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4302 4303 self._match(TokenType.THEN) 4304 4305 if self._match(TokenType.INSERT): 4306 _this = self._parse_star() 4307 if _this: 4308 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4309 else: 4310 then = self.expression( 4311 exp.Insert, 4312 this=self._parse_value(), 4313 expression=self._match(TokenType.VALUES) and self._parse_value(), 4314 ) 4315 elif self._match(TokenType.UPDATE): 4316 expressions = self._parse_star() 4317 if expressions: 4318 then = self.expression(exp.Update, expressions=expressions) 4319 else: 4320 then = self.expression( 4321 exp.Update, 4322 expressions=self._match(TokenType.SET) 4323 and self._parse_csv(self._parse_equality), 4324 ) 4325 elif self._match(TokenType.DELETE): 4326 then = self.expression(exp.Var, this=self._prev.text) 4327 else: 4328 then = None 4329 4330 whens.append( 4331 self.expression( 4332 exp.When, 4333 matched=matched, 4334 source=source, 4335 condition=condition, 4336 then=then, 4337 ) 4338 ) 4339 4340 return self.expression( 4341 exp.Merge, 4342 this=target, 4343 using=using, 4344 on=on, 4345 expressions=whens, 4346 ) 4347 4348 def _parse_show(self) -> t.Optional[exp.Expression]: 4349 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4350 if parser: 4351 return parser(self) 4352 self._advance() 4353 return self.expression(exp.Show, this=self._prev.text.upper()) 4354 4355 def _parse_set_item_assignment( 4356 self, kind: t.Optional[str] = None 4357 ) -> t.Optional[exp.Expression]: 4358 index = self._index 4359 4360 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4361 return self._parse_set_transaction(global_=kind == "GLOBAL") 4362 4363 left = self._parse_primary() or self._parse_id_var() 4364 4365 if not self._match_texts(("=", "TO")): 4366 self._retreat(index) 4367 return None 4368 4369 right = self._parse_statement() or self._parse_id_var() 4370 this = self.expression( 4371 exp.EQ, 4372 this=left, 4373 expression=right, 4374 ) 4375 4376 return self.expression( 4377 exp.SetItem, 4378 this=this, 4379 kind=kind, 4380 ) 4381 4382 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4383 self._match_text_seq("TRANSACTION") 4384 characteristics = self._parse_csv( 4385 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4386 ) 4387 return self.expression( 4388 exp.SetItem, 4389 expressions=characteristics, 4390 kind="TRANSACTION", 4391 **{"global": global_}, # type: ignore 4392 ) 4393 4394 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4395 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4396 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4397 4398 def _parse_set(self) -> exp.Expression: 4399 index = self._index 4400 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4401 4402 if self._curr: 4403 self._retreat(index) 4404 return self._parse_as_command(self._prev) 4405 4406 return set_ 4407 4408 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4409 for option in options: 4410 if self._match_text_seq(*option.split(" ")): 4411 return exp.Var(this=option) 4412 return None 4413 4414 def _parse_as_command(self, start: Token) -> exp.Command: 4415 while self._curr: 4416 self._advance() 4417 text = self._find_sql(start, self._prev) 4418 size = len(start.text) 4419 return exp.Command(this=text[:size], expression=text[size:]) 4420 4421 def _find_parser( 4422 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4423 ) -> t.Optional[t.Callable]: 4424 if not self._curr: 4425 return None 4426 4427 index = self._index 4428 this = [] 4429 while True: 4430 # The current token might be multiple words 4431 curr = self._curr.text.upper() 4432 key = curr.split(" ") 4433 this.append(curr) 4434 self._advance() 4435 result, trie = in_trie(trie, key) 4436 if result == 0: 4437 break 4438 if result == 2: 4439 subparser = parsers[" ".join(this)] 4440 return subparser 4441 self._retreat(index) 4442 return None 4443 4444 def _match(self, token_type, advance=True, expression=None): 4445 if not self._curr: 4446 return None 4447 4448 if self._curr.token_type == token_type: 4449 if advance: 4450 self._advance() 4451 self._add_comments(expression) 4452 return True 4453 4454 return None 4455 4456 def _match_set(self, types, advance=True): 4457 if not self._curr: 4458 return None 4459 4460 if self._curr.token_type in types: 4461 if advance: 4462 self._advance() 4463 return True 4464 4465 return None 4466 4467 def _match_pair(self, token_type_a, token_type_b, advance=True): 4468 if not self._curr or not self._next: 4469 return None 4470 4471 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4472 if advance: 4473 self._advance(2) 4474 return True 4475 4476 return None 4477 4478 def _match_l_paren(self, expression=None): 4479 if not self._match(TokenType.L_PAREN, expression=expression): 4480 self.raise_error("Expecting (") 4481 4482 def _match_r_paren(self, expression=None): 4483 if not self._match(TokenType.R_PAREN, expression=expression): 4484 self.raise_error("Expecting )") 4485 4486 def _match_texts(self, texts, advance=True): 4487 if self._curr and self._curr.text.upper() in texts: 4488 if advance: 4489 self._advance() 4490 return True 4491 return False 4492 4493 def _match_text_seq(self, *texts, advance=True): 4494 index = self._index 4495 for text in texts: 4496 if self._curr and self._curr.text.upper() == text: 4497 self._advance() 4498 else: 4499 self._retreat(index) 4500 return False 4501 4502 if not advance: 4503 self._retreat(index) 4504 4505 return True 4506 4507 def _replace_columns_with_dots(self, this): 4508 if isinstance(this, exp.Dot): 4509 exp.replace_children(this, self._replace_columns_with_dots) 4510 elif isinstance(this, exp.Column): 4511 exp.replace_children(this, self._replace_columns_with_dots) 4512 table = this.args.get("table") 4513 this = ( 4514 self.expression(exp.Dot, this=table, expression=this.this) 4515 if table 4516 else self.expression(exp.Var, this=this.name) 4517 ) 4518 elif isinstance(this, exp.Identifier): 4519 this = self.expression(exp.Var, this=this.name) 4520 return this 4521 4522 def _replace_lambda(self, node, lambda_variables): 4523 for column in node.find_all(exp.Column): 4524 if column.parts[0].name in lambda_variables: 4525 dot_or_id = column.to_dot() if column.table else column.this 4526 parent = column.parent 4527 4528 while isinstance(parent, exp.Dot): 4529 if not isinstance(parent.parent, exp.Dot): 4530 parent.replace(dot_or_id) 4531 break 4532 parent = parent.parent 4533 else: 4534 if column is node: 4535 node = dot_or_id 4536 else: 4537 column.replace(dot_or_id) 4538 return node
19def parse_var_map(args: t.Sequence) -> exp.Expression: 20 if len(args) == 1 and args[0].is_star: 21 return exp.StarMap(this=args[0]) 22 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 )
56class Parser(metaclass=_Parser): 57 """ 58 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 59 a parsed syntax tree. 60 61 Args: 62 error_level: the desired error level. 63 Default: ErrorLevel.RAISE 64 error_message_context: determines the amount of context to capture from a 65 query string when displaying the error message (in number of characters). 66 Default: 50. 67 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 68 Default: 0 69 alias_post_tablesample: If the table alias comes after tablesample. 70 Default: False 71 max_errors: Maximum number of error messages to include in a raised ParseError. 72 This is only relevant if error_level is ErrorLevel.RAISE. 73 Default: 3 74 null_ordering: Indicates the default null ordering method to use if not explicitly set. 75 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 76 Default: "nulls_are_small" 77 """ 78 79 FUNCTIONS: t.Dict[str, t.Callable] = { 80 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 81 "DATE_TO_DATE_STR": lambda args: exp.Cast( 82 this=seq_get(args, 0), 83 to=exp.DataType(this=exp.DataType.Type.TEXT), 84 ), 85 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 86 "IFNULL": exp.Coalesce.from_arg_list, 87 "LIKE": parse_like, 88 "TIME_TO_TIME_STR": lambda args: exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 93 this=exp.Cast( 94 this=seq_get(args, 0), 95 to=exp.DataType(this=exp.DataType.Type.TEXT), 96 ), 97 start=exp.Literal.number(1), 98 length=exp.Literal.number(10), 99 ), 100 "VAR_MAP": parse_var_map, 101 } 102 103 NO_PAREN_FUNCTIONS = { 104 TokenType.CURRENT_DATE: exp.CurrentDate, 105 TokenType.CURRENT_DATETIME: exp.CurrentDate, 106 TokenType.CURRENT_TIME: exp.CurrentTime, 107 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 108 TokenType.CURRENT_USER: exp.CurrentUser, 109 } 110 111 JOIN_HINTS: t.Set[str] = set() 112 113 NESTED_TYPE_TOKENS = { 114 TokenType.ARRAY, 115 TokenType.MAP, 116 TokenType.NULLABLE, 117 TokenType.STRUCT, 118 } 119 120 TYPE_TOKENS = { 121 TokenType.BIT, 122 TokenType.BOOLEAN, 123 TokenType.TINYINT, 124 TokenType.UTINYINT, 125 TokenType.SMALLINT, 126 TokenType.USMALLINT, 127 TokenType.INT, 128 TokenType.UINT, 129 TokenType.BIGINT, 130 TokenType.UBIGINT, 131 TokenType.INT128, 132 TokenType.UINT128, 133 TokenType.INT256, 134 TokenType.UINT256, 135 TokenType.FLOAT, 136 TokenType.DOUBLE, 137 TokenType.CHAR, 138 TokenType.NCHAR, 139 TokenType.VARCHAR, 140 TokenType.NVARCHAR, 141 TokenType.TEXT, 142 TokenType.MEDIUMTEXT, 143 TokenType.LONGTEXT, 144 TokenType.MEDIUMBLOB, 145 TokenType.LONGBLOB, 146 TokenType.BINARY, 147 TokenType.VARBINARY, 148 TokenType.JSON, 149 TokenType.JSONB, 150 TokenType.INTERVAL, 151 TokenType.TIME, 152 TokenType.TIMESTAMP, 153 TokenType.TIMESTAMPTZ, 154 TokenType.TIMESTAMPLTZ, 155 TokenType.DATETIME, 156 TokenType.DATETIME64, 157 TokenType.DATE, 158 TokenType.DECIMAL, 159 TokenType.BIGDECIMAL, 160 TokenType.UUID, 161 TokenType.GEOGRAPHY, 162 TokenType.GEOMETRY, 163 TokenType.HLLSKETCH, 164 TokenType.HSTORE, 165 TokenType.PSEUDO_TYPE, 166 TokenType.SUPER, 167 TokenType.SERIAL, 168 TokenType.SMALLSERIAL, 169 TokenType.BIGSERIAL, 170 TokenType.XML, 171 TokenType.UNIQUEIDENTIFIER, 172 TokenType.MONEY, 173 TokenType.SMALLMONEY, 174 TokenType.ROWVERSION, 175 TokenType.IMAGE, 176 TokenType.VARIANT, 177 TokenType.OBJECT, 178 TokenType.INET, 179 *NESTED_TYPE_TOKENS, 180 } 181 182 SUBQUERY_PREDICATES = { 183 TokenType.ANY: exp.Any, 184 TokenType.ALL: exp.All, 185 TokenType.EXISTS: exp.Exists, 186 TokenType.SOME: exp.Any, 187 } 188 189 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 190 191 DB_CREATABLES = { 192 TokenType.DATABASE, 193 TokenType.SCHEMA, 194 TokenType.TABLE, 195 TokenType.VIEW, 196 } 197 198 CREATABLES = { 199 TokenType.COLUMN, 200 TokenType.FUNCTION, 201 TokenType.INDEX, 202 TokenType.PROCEDURE, 203 *DB_CREATABLES, 204 } 205 206 ID_VAR_TOKENS = { 207 TokenType.VAR, 208 TokenType.ANTI, 209 TokenType.APPLY, 210 TokenType.ASC, 211 TokenType.AUTO_INCREMENT, 212 TokenType.BEGIN, 213 TokenType.CACHE, 214 TokenType.COLLATE, 215 TokenType.COMMAND, 216 TokenType.COMMENT, 217 TokenType.COMMIT, 218 TokenType.CONSTRAINT, 219 TokenType.DEFAULT, 220 TokenType.DELETE, 221 TokenType.DESC, 222 TokenType.DESCRIBE, 223 TokenType.DIV, 224 TokenType.END, 225 TokenType.EXECUTE, 226 TokenType.ESCAPE, 227 TokenType.FALSE, 228 TokenType.FIRST, 229 TokenType.FILTER, 230 TokenType.FORMAT, 231 TokenType.FULL, 232 TokenType.IF, 233 TokenType.IS, 234 TokenType.ISNULL, 235 TokenType.INTERVAL, 236 TokenType.KEEP, 237 TokenType.LEFT, 238 TokenType.LOAD, 239 TokenType.MERGE, 240 TokenType.NATURAL, 241 TokenType.NEXT, 242 TokenType.OFFSET, 243 TokenType.ORDINALITY, 244 TokenType.OVERWRITE, 245 TokenType.PARTITION, 246 TokenType.PERCENT, 247 TokenType.PIVOT, 248 TokenType.PRAGMA, 249 TokenType.RANGE, 250 TokenType.REFERENCES, 251 TokenType.RIGHT, 252 TokenType.ROW, 253 TokenType.ROWS, 254 TokenType.SEMI, 255 TokenType.SET, 256 TokenType.SETTINGS, 257 TokenType.SHOW, 258 TokenType.TEMPORARY, 259 TokenType.TOP, 260 TokenType.TRUE, 261 TokenType.UNIQUE, 262 TokenType.UNPIVOT, 263 TokenType.VOLATILE, 264 TokenType.WINDOW, 265 *CREATABLES, 266 *SUBQUERY_PREDICATES, 267 *TYPE_TOKENS, 268 *NO_PAREN_FUNCTIONS, 269 } 270 271 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 272 273 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 274 TokenType.APPLY, 275 TokenType.FULL, 276 TokenType.LEFT, 277 TokenType.LOCK, 278 TokenType.NATURAL, 279 TokenType.OFFSET, 280 TokenType.RIGHT, 281 TokenType.WINDOW, 282 } 283 284 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 285 286 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 287 288 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 289 290 FUNC_TOKENS = { 291 TokenType.COMMAND, 292 TokenType.CURRENT_DATE, 293 TokenType.CURRENT_DATETIME, 294 TokenType.CURRENT_TIMESTAMP, 295 TokenType.CURRENT_TIME, 296 TokenType.CURRENT_USER, 297 TokenType.FILTER, 298 TokenType.FIRST, 299 TokenType.FORMAT, 300 TokenType.GLOB, 301 TokenType.IDENTIFIER, 302 TokenType.INDEX, 303 TokenType.ISNULL, 304 TokenType.ILIKE, 305 TokenType.LIKE, 306 TokenType.MERGE, 307 TokenType.OFFSET, 308 TokenType.PRIMARY_KEY, 309 TokenType.RANGE, 310 TokenType.REPLACE, 311 TokenType.ROW, 312 TokenType.UNNEST, 313 TokenType.VAR, 314 TokenType.LEFT, 315 TokenType.RIGHT, 316 TokenType.DATE, 317 TokenType.DATETIME, 318 TokenType.TABLE, 319 TokenType.TIMESTAMP, 320 TokenType.TIMESTAMPTZ, 321 TokenType.WINDOW, 322 *TYPE_TOKENS, 323 *SUBQUERY_PREDICATES, 324 } 325 326 CONJUNCTION = { 327 TokenType.AND: exp.And, 328 TokenType.OR: exp.Or, 329 } 330 331 EQUALITY = { 332 TokenType.EQ: exp.EQ, 333 TokenType.NEQ: exp.NEQ, 334 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 335 } 336 337 COMPARISON = { 338 TokenType.GT: exp.GT, 339 TokenType.GTE: exp.GTE, 340 TokenType.LT: exp.LT, 341 TokenType.LTE: exp.LTE, 342 } 343 344 BITWISE = { 345 TokenType.AMP: exp.BitwiseAnd, 346 TokenType.CARET: exp.BitwiseXor, 347 TokenType.PIPE: exp.BitwiseOr, 348 TokenType.DPIPE: exp.DPipe, 349 } 350 351 TERM = { 352 TokenType.DASH: exp.Sub, 353 TokenType.PLUS: exp.Add, 354 TokenType.MOD: exp.Mod, 355 TokenType.COLLATE: exp.Collate, 356 } 357 358 FACTOR = { 359 TokenType.DIV: exp.IntDiv, 360 TokenType.LR_ARROW: exp.Distance, 361 TokenType.SLASH: exp.Div, 362 TokenType.STAR: exp.Mul, 363 } 364 365 TIMESTAMPS = { 366 TokenType.TIME, 367 TokenType.TIMESTAMP, 368 TokenType.TIMESTAMPTZ, 369 TokenType.TIMESTAMPLTZ, 370 } 371 372 SET_OPERATIONS = { 373 TokenType.UNION, 374 TokenType.INTERSECT, 375 TokenType.EXCEPT, 376 } 377 378 JOIN_SIDES = { 379 TokenType.LEFT, 380 TokenType.RIGHT, 381 TokenType.FULL, 382 } 383 384 JOIN_KINDS = { 385 TokenType.INNER, 386 TokenType.OUTER, 387 TokenType.CROSS, 388 TokenType.SEMI, 389 TokenType.ANTI, 390 } 391 392 LAMBDAS = { 393 TokenType.ARROW: lambda self, expressions: self.expression( 394 exp.Lambda, 395 this=self._replace_lambda( 396 self._parse_conjunction(), 397 {node.name for node in expressions}, 398 ), 399 expressions=expressions, 400 ), 401 TokenType.FARROW: lambda self, expressions: self.expression( 402 exp.Kwarg, 403 this=exp.Var(this=expressions[0].name), 404 expression=self._parse_conjunction(), 405 ), 406 } 407 408 COLUMN_OPERATORS = { 409 TokenType.DOT: None, 410 TokenType.DCOLON: lambda self, this, to: self.expression( 411 exp.Cast if self.STRICT_CAST else exp.TryCast, 412 this=this, 413 to=to, 414 ), 415 TokenType.ARROW: lambda self, this, path: self.expression( 416 exp.JSONExtract, 417 this=this, 418 expression=path, 419 ), 420 TokenType.DARROW: lambda self, this, path: self.expression( 421 exp.JSONExtractScalar, 422 this=this, 423 expression=path, 424 ), 425 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 426 exp.JSONBExtract, 427 this=this, 428 expression=path, 429 ), 430 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 431 exp.JSONBExtractScalar, 432 this=this, 433 expression=path, 434 ), 435 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 436 exp.JSONBContains, 437 this=this, 438 expression=key, 439 ), 440 } 441 442 EXPRESSION_PARSERS = { 443 exp.Column: lambda self: self._parse_column(), 444 exp.DataType: lambda self: self._parse_types(), 445 exp.From: lambda self: self._parse_from(), 446 exp.Group: lambda self: self._parse_group(), 447 exp.Identifier: lambda self: self._parse_id_var(), 448 exp.Lateral: lambda self: self._parse_lateral(), 449 exp.Join: lambda self: self._parse_join(), 450 exp.Order: lambda self: self._parse_order(), 451 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 452 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 453 exp.Lambda: lambda self: self._parse_lambda(), 454 exp.Limit: lambda self: self._parse_limit(), 455 exp.Offset: lambda self: self._parse_offset(), 456 exp.TableAlias: lambda self: self._parse_table_alias(), 457 exp.Table: lambda self: self._parse_table_parts(), 458 exp.Condition: lambda self: self._parse_conjunction(), 459 exp.Expression: lambda self: self._parse_statement(), 460 exp.Properties: lambda self: self._parse_properties(), 461 exp.Where: lambda self: self._parse_where(), 462 exp.Ordered: lambda self: self._parse_ordered(), 463 exp.Having: lambda self: self._parse_having(), 464 exp.With: lambda self: self._parse_with(), 465 exp.Window: lambda self: self._parse_named_window(), 466 exp.Qualify: lambda self: self._parse_qualify(), 467 exp.Returning: lambda self: self._parse_returning(), 468 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 469 } 470 471 STATEMENT_PARSERS = { 472 TokenType.ALTER: lambda self: self._parse_alter(), 473 TokenType.BEGIN: lambda self: self._parse_transaction(), 474 TokenType.CACHE: lambda self: self._parse_cache(), 475 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 476 TokenType.COMMENT: lambda self: self._parse_comment(), 477 TokenType.CREATE: lambda self: self._parse_create(), 478 TokenType.DELETE: lambda self: self._parse_delete(), 479 TokenType.DESC: lambda self: self._parse_describe(), 480 TokenType.DESCRIBE: lambda self: self._parse_describe(), 481 TokenType.DROP: lambda self: self._parse_drop(), 482 TokenType.END: lambda self: self._parse_commit_or_rollback(), 483 TokenType.INSERT: lambda self: self._parse_insert(), 484 TokenType.LOAD: lambda self: self._parse_load(), 485 TokenType.MERGE: lambda self: self._parse_merge(), 486 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 487 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 488 TokenType.SET: lambda self: self._parse_set(), 489 TokenType.UNCACHE: lambda self: self._parse_uncache(), 490 TokenType.UPDATE: lambda self: self._parse_update(), 491 TokenType.USE: lambda self: self.expression( 492 exp.Use, 493 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 494 and exp.Var(this=self._prev.text), 495 this=self._parse_table(schema=False), 496 ), 497 } 498 499 UNARY_PARSERS = { 500 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 501 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 502 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 503 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 504 } 505 506 PRIMARY_PARSERS = { 507 TokenType.STRING: lambda self, token: self.expression( 508 exp.Literal, this=token.text, is_string=True 509 ), 510 TokenType.NUMBER: lambda self, token: self.expression( 511 exp.Literal, this=token.text, is_string=False 512 ), 513 TokenType.STAR: lambda self, _: self.expression( 514 exp.Star, 515 **{"except": self._parse_except(), "replace": self._parse_replace()}, 516 ), 517 TokenType.NULL: lambda self, _: self.expression(exp.Null), 518 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 519 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 520 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 521 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 522 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 523 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 524 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 525 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 526 } 527 528 PLACEHOLDER_PARSERS = { 529 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 530 TokenType.PARAMETER: lambda self: self._parse_parameter(), 531 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 532 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 533 else None, 534 } 535 536 RANGE_PARSERS = { 537 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 538 TokenType.GLOB: binary_range_parser(exp.Glob), 539 TokenType.ILIKE: binary_range_parser(exp.ILike), 540 TokenType.IN: lambda self, this: self._parse_in(this), 541 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 542 TokenType.IS: lambda self, this: self._parse_is(this), 543 TokenType.LIKE: binary_range_parser(exp.Like), 544 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 545 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 546 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 547 } 548 549 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 550 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 551 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 552 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 553 "CHARACTER SET": lambda self: self._parse_character_set(), 554 "CHECKSUM": lambda self: self._parse_checksum(), 555 "CLUSTER": lambda self: self._parse_cluster(), 556 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 557 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 558 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 559 "DEFINER": lambda self: self._parse_definer(), 560 "DETERMINISTIC": lambda self: self.expression( 561 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 562 ), 563 "DISTKEY": lambda self: self._parse_distkey(), 564 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 565 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 566 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 567 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 568 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 569 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 570 "FREESPACE": lambda self: self._parse_freespace(), 571 "IMMUTABLE": lambda self: self.expression( 572 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 573 ), 574 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 575 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 576 "LIKE": lambda self: self._parse_create_like(), 577 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 578 "LOCK": lambda self: self._parse_locking(), 579 "LOCKING": lambda self: self._parse_locking(), 580 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 581 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 582 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 583 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 584 "NO": lambda self: self._parse_no_property(), 585 "ON": lambda self: self._parse_on_property(), 586 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 587 "PARTITION BY": lambda self: self._parse_partitioned_by(), 588 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 589 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 590 "PRIMARY KEY": lambda self: self._parse_primary_key(), 591 "RETURNS": lambda self: self._parse_returns(), 592 "ROW": lambda self: self._parse_row(), 593 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 594 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 595 "SETTINGS": lambda self: self.expression( 596 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 597 ), 598 "SORTKEY": lambda self: self._parse_sortkey(), 599 "STABLE": lambda self: self.expression( 600 exp.StabilityProperty, this=exp.Literal.string("STABLE") 601 ), 602 "STORED": lambda self: self._parse_stored(), 603 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 604 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 605 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 606 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 607 "TTL": lambda self: self._parse_ttl(), 608 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 609 "VOLATILE": lambda self: self._parse_volatile_property(), 610 "WITH": lambda self: self._parse_with_property(), 611 } 612 613 CONSTRAINT_PARSERS = { 614 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 615 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 616 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 617 "CHARACTER SET": lambda self: self.expression( 618 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 619 ), 620 "CHECK": lambda self: self.expression( 621 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 622 ), 623 "COLLATE": lambda self: self.expression( 624 exp.CollateColumnConstraint, this=self._parse_var() 625 ), 626 "COMMENT": lambda self: self.expression( 627 exp.CommentColumnConstraint, this=self._parse_string() 628 ), 629 "COMPRESS": lambda self: self._parse_compress(), 630 "DEFAULT": lambda self: self.expression( 631 exp.DefaultColumnConstraint, this=self._parse_bitwise() 632 ), 633 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 634 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 635 "FORMAT": lambda self: self.expression( 636 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 637 ), 638 "GENERATED": lambda self: self._parse_generated_as_identity(), 639 "IDENTITY": lambda self: self._parse_auto_increment(), 640 "INLINE": lambda self: self._parse_inline(), 641 "LIKE": lambda self: self._parse_create_like(), 642 "NOT": lambda self: self._parse_not_constraint(), 643 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 644 "ON": lambda self: self._match(TokenType.UPDATE) 645 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 646 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 647 "PRIMARY KEY": lambda self: self._parse_primary_key(), 648 "REFERENCES": lambda self: self._parse_references(match=False), 649 "TITLE": lambda self: self.expression( 650 exp.TitleColumnConstraint, this=self._parse_var_or_string() 651 ), 652 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 653 "UNIQUE": lambda self: self._parse_unique(), 654 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 655 } 656 657 ALTER_PARSERS = { 658 "ADD": lambda self: self._parse_alter_table_add(), 659 "ALTER": lambda self: self._parse_alter_table_alter(), 660 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 661 "DROP": lambda self: self._parse_alter_table_drop(), 662 "RENAME": lambda self: self._parse_alter_table_rename(), 663 } 664 665 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 666 667 NO_PAREN_FUNCTION_PARSERS = { 668 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 669 TokenType.CASE: lambda self: self._parse_case(), 670 TokenType.IF: lambda self: self._parse_if(), 671 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 672 exp.NextValueFor, 673 this=self._parse_column(), 674 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 675 ), 676 } 677 678 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 679 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 680 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 681 "DECODE": lambda self: self._parse_decode(), 682 "EXTRACT": lambda self: self._parse_extract(), 683 "JSON_OBJECT": lambda self: self._parse_json_object(), 684 "LOG": lambda self: self._parse_logarithm(), 685 "MATCH": lambda self: self._parse_match_against(), 686 "OPENJSON": lambda self: self._parse_open_json(), 687 "POSITION": lambda self: self._parse_position(), 688 "STRING_AGG": lambda self: self._parse_string_agg(), 689 "SUBSTRING": lambda self: self._parse_substring(), 690 "STRUCT": lambda self: self._parse_struct(), 691 "TRIM": lambda self: self._parse_trim(), 692 "TRY_CAST": lambda self: self._parse_cast(False), 693 "TRY_CONVERT": lambda self: self._parse_convert(False), 694 } 695 696 QUERY_MODIFIER_PARSERS = { 697 "joins": lambda self: list(iter(self._parse_join, None)), 698 "laterals": lambda self: list(iter(self._parse_lateral, None)), 699 "match": lambda self: self._parse_match_recognize(), 700 "where": lambda self: self._parse_where(), 701 "group": lambda self: self._parse_group(), 702 "having": lambda self: self._parse_having(), 703 "qualify": lambda self: self._parse_qualify(), 704 "windows": lambda self: self._parse_window_clause(), 705 "order": lambda self: self._parse_order(), 706 "limit": lambda self: self._parse_limit(), 707 "offset": lambda self: self._parse_offset(), 708 "locks": lambda self: self._parse_locks(), 709 "sample": lambda self: self._parse_table_sample(as_modifier=True), 710 } 711 712 SET_PARSERS = { 713 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 714 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 715 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 716 "TRANSACTION": lambda self: self._parse_set_transaction(), 717 } 718 719 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 720 721 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 722 723 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 724 725 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 726 727 TRANSACTION_CHARACTERISTICS = { 728 "ISOLATION LEVEL REPEATABLE READ", 729 "ISOLATION LEVEL READ COMMITTED", 730 "ISOLATION LEVEL READ UNCOMMITTED", 731 "ISOLATION LEVEL SERIALIZABLE", 732 "READ WRITE", 733 "READ ONLY", 734 } 735 736 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 737 738 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 739 740 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 741 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 742 743 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 744 745 STRICT_CAST = True 746 747 CONVERT_TYPE_FIRST = False 748 749 PREFIXED_PIVOT_COLUMNS = False 750 IDENTIFY_PIVOT_STRINGS = False 751 752 LOG_BASE_FIRST = True 753 LOG_DEFAULTS_TO_LN = False 754 755 __slots__ = ( 756 "error_level", 757 "error_message_context", 758 "sql", 759 "errors", 760 "index_offset", 761 "unnest_column_only", 762 "alias_post_tablesample", 763 "max_errors", 764 "null_ordering", 765 "_tokens", 766 "_index", 767 "_curr", 768 "_next", 769 "_prev", 770 "_prev_comments", 771 "_show_trie", 772 "_set_trie", 773 ) 774 775 def __init__( 776 self, 777 error_level: t.Optional[ErrorLevel] = None, 778 error_message_context: int = 100, 779 index_offset: int = 0, 780 unnest_column_only: bool = False, 781 alias_post_tablesample: bool = False, 782 max_errors: int = 3, 783 null_ordering: t.Optional[str] = None, 784 ): 785 self.error_level = error_level or ErrorLevel.IMMEDIATE 786 self.error_message_context = error_message_context 787 self.index_offset = index_offset 788 self.unnest_column_only = unnest_column_only 789 self.alias_post_tablesample = alias_post_tablesample 790 self.max_errors = max_errors 791 self.null_ordering = null_ordering 792 self.reset() 793 794 def reset(self): 795 self.sql = "" 796 self.errors = [] 797 self._tokens = [] 798 self._index = 0 799 self._curr = None 800 self._next = None 801 self._prev = None 802 self._prev_comments = None 803 804 def parse( 805 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 806 ) -> t.List[t.Optional[exp.Expression]]: 807 """ 808 Parses a list of tokens and returns a list of syntax trees, one tree 809 per parsed SQL statement. 810 811 Args: 812 raw_tokens: the list of tokens. 813 sql: the original SQL string, used to produce helpful debug messages. 814 815 Returns: 816 The list of syntax trees. 817 """ 818 return self._parse( 819 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 820 ) 821 822 def parse_into( 823 self, 824 expression_types: exp.IntoType, 825 raw_tokens: t.List[Token], 826 sql: t.Optional[str] = None, 827 ) -> t.List[t.Optional[exp.Expression]]: 828 """ 829 Parses a list of tokens into a given Expression type. If a collection of Expression 830 types is given instead, this method will try to parse the token list into each one 831 of them, stopping at the first for which the parsing succeeds. 832 833 Args: 834 expression_types: the expression type(s) to try and parse the token list into. 835 raw_tokens: the list of tokens. 836 sql: the original SQL string, used to produce helpful debug messages. 837 838 Returns: 839 The target Expression. 840 """ 841 errors = [] 842 for expression_type in ensure_collection(expression_types): 843 parser = self.EXPRESSION_PARSERS.get(expression_type) 844 if not parser: 845 raise TypeError(f"No parser registered for {expression_type}") 846 try: 847 return self._parse(parser, raw_tokens, sql) 848 except ParseError as e: 849 e.errors[0]["into_expression"] = expression_type 850 errors.append(e) 851 raise ParseError( 852 f"Failed to parse into {expression_types}", 853 errors=merge_errors(errors), 854 ) from errors[-1] 855 856 def _parse( 857 self, 858 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 859 raw_tokens: t.List[Token], 860 sql: t.Optional[str] = None, 861 ) -> t.List[t.Optional[exp.Expression]]: 862 self.reset() 863 self.sql = sql or "" 864 total = len(raw_tokens) 865 chunks: t.List[t.List[Token]] = [[]] 866 867 for i, token in enumerate(raw_tokens): 868 if token.token_type == TokenType.SEMICOLON: 869 if i < total - 1: 870 chunks.append([]) 871 else: 872 chunks[-1].append(token) 873 874 expressions = [] 875 876 for tokens in chunks: 877 self._index = -1 878 self._tokens = tokens 879 self._advance() 880 881 expressions.append(parse_method(self)) 882 883 if self._index < len(self._tokens): 884 self.raise_error("Invalid expression / Unexpected token") 885 886 self.check_errors() 887 888 return expressions 889 890 def check_errors(self) -> None: 891 """ 892 Logs or raises any found errors, depending on the chosen error level setting. 893 """ 894 if self.error_level == ErrorLevel.WARN: 895 for error in self.errors: 896 logger.error(str(error)) 897 elif self.error_level == ErrorLevel.RAISE and self.errors: 898 raise ParseError( 899 concat_messages(self.errors, self.max_errors), 900 errors=merge_errors(self.errors), 901 ) 902 903 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 904 """ 905 Appends an error in the list of recorded errors or raises it, depending on the chosen 906 error level setting. 907 """ 908 token = token or self._curr or self._prev or Token.string("") 909 start = token.start 910 end = token.end + 1 911 start_context = self.sql[max(start - self.error_message_context, 0) : start] 912 highlight = self.sql[start:end] 913 end_context = self.sql[end : end + self.error_message_context] 914 915 error = ParseError.new( 916 f"{message}. Line {token.line}, Col: {token.col}.\n" 917 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 918 description=message, 919 line=token.line, 920 col=token.col, 921 start_context=start_context, 922 highlight=highlight, 923 end_context=end_context, 924 ) 925 926 if self.error_level == ErrorLevel.IMMEDIATE: 927 raise error 928 929 self.errors.append(error) 930 931 def expression( 932 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 933 ) -> E: 934 """ 935 Creates a new, validated Expression. 936 937 Args: 938 exp_class: the expression class to instantiate. 939 comments: an optional list of comments to attach to the expression. 940 kwargs: the arguments to set for the expression along with their respective values. 941 942 Returns: 943 The target expression. 944 """ 945 instance = exp_class(**kwargs) 946 instance.add_comments(comments) if comments else self._add_comments(instance) 947 self.validate_expression(instance) 948 return instance 949 950 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 951 if expression and self._prev_comments: 952 expression.add_comments(self._prev_comments) 953 self._prev_comments = None 954 955 def validate_expression( 956 self, expression: exp.Expression, args: t.Optional[t.List] = None 957 ) -> None: 958 """ 959 Validates an already instantiated expression, making sure that all its mandatory arguments 960 are set. 961 962 Args: 963 expression: the expression to validate. 964 args: an optional list of items that was used to instantiate the expression, if it's a Func. 965 """ 966 if self.error_level == ErrorLevel.IGNORE: 967 return 968 969 for error_message in expression.error_messages(args): 970 self.raise_error(error_message) 971 972 def _find_sql(self, start: Token, end: Token) -> str: 973 return self.sql[start.start : end.end + 1] 974 975 def _advance(self, times: int = 1) -> None: 976 self._index += times 977 self._curr = seq_get(self._tokens, self._index) 978 self._next = seq_get(self._tokens, self._index + 1) 979 if self._index > 0: 980 self._prev = self._tokens[self._index - 1] 981 self._prev_comments = self._prev.comments 982 else: 983 self._prev = None 984 self._prev_comments = None 985 986 def _retreat(self, index: int) -> None: 987 if index != self._index: 988 self._advance(index - self._index) 989 990 def _parse_command(self) -> exp.Command: 991 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 992 993 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 994 start = self._prev 995 exists = self._parse_exists() if allow_exists else None 996 997 self._match(TokenType.ON) 998 999 kind = self._match_set(self.CREATABLES) and self._prev 1000 1001 if not kind: 1002 return self._parse_as_command(start) 1003 1004 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1005 this = self._parse_user_defined_function(kind=kind.token_type) 1006 elif kind.token_type == TokenType.TABLE: 1007 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1008 elif kind.token_type == TokenType.COLUMN: 1009 this = self._parse_column() 1010 else: 1011 this = self._parse_id_var() 1012 1013 self._match(TokenType.IS) 1014 1015 return self.expression( 1016 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1017 ) 1018 1019 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1020 def _parse_ttl(self) -> exp.Expression: 1021 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1022 this = self._parse_bitwise() 1023 1024 if self._match_text_seq("DELETE"): 1025 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1026 if self._match_text_seq("RECOMPRESS"): 1027 return self.expression( 1028 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1029 ) 1030 if self._match_text_seq("TO", "DISK"): 1031 return self.expression( 1032 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1033 ) 1034 if self._match_text_seq("TO", "VOLUME"): 1035 return self.expression( 1036 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1037 ) 1038 1039 return this 1040 1041 expressions = self._parse_csv(_parse_ttl_action) 1042 where = self._parse_where() 1043 group = self._parse_group() 1044 1045 aggregates = None 1046 if group and self._match(TokenType.SET): 1047 aggregates = self._parse_csv(self._parse_set_item) 1048 1049 return self.expression( 1050 exp.MergeTreeTTL, 1051 expressions=expressions, 1052 where=where, 1053 group=group, 1054 aggregates=aggregates, 1055 ) 1056 1057 def _parse_statement(self) -> t.Optional[exp.Expression]: 1058 if self._curr is None: 1059 return None 1060 1061 if self._match_set(self.STATEMENT_PARSERS): 1062 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1063 1064 if self._match_set(Tokenizer.COMMANDS): 1065 return self._parse_command() 1066 1067 expression = self._parse_expression() 1068 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1069 return self._parse_query_modifiers(expression) 1070 1071 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1072 start = self._prev 1073 temporary = self._match(TokenType.TEMPORARY) 1074 materialized = self._match_text_seq("MATERIALIZED") 1075 kind = self._match_set(self.CREATABLES) and self._prev.text 1076 if not kind: 1077 return self._parse_as_command(start) 1078 1079 return self.expression( 1080 exp.Drop, 1081 exists=self._parse_exists(), 1082 this=self._parse_table(schema=True), 1083 kind=kind, 1084 temporary=temporary, 1085 materialized=materialized, 1086 cascade=self._match_text_seq("CASCADE"), 1087 constraints=self._match_text_seq("CONSTRAINTS"), 1088 purge=self._match_text_seq("PURGE"), 1089 ) 1090 1091 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1092 return ( 1093 self._match(TokenType.IF) 1094 and (not not_ or self._match(TokenType.NOT)) 1095 and self._match(TokenType.EXISTS) 1096 ) 1097 1098 def _parse_create(self) -> t.Optional[exp.Expression]: 1099 start = self._prev 1100 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1101 TokenType.OR, TokenType.REPLACE 1102 ) 1103 unique = self._match(TokenType.UNIQUE) 1104 1105 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1106 self._match(TokenType.TABLE) 1107 1108 properties = None 1109 create_token = self._match_set(self.CREATABLES) and self._prev 1110 1111 if not create_token: 1112 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1113 create_token = self._match_set(self.CREATABLES) and self._prev 1114 1115 if not properties or not create_token: 1116 return self._parse_as_command(start) 1117 1118 exists = self._parse_exists(not_=True) 1119 this = None 1120 expression = None 1121 indexes = None 1122 no_schema_binding = None 1123 begin = None 1124 clone = None 1125 1126 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1127 this = self._parse_user_defined_function(kind=create_token.token_type) 1128 temp_properties = self._parse_properties() 1129 if properties and temp_properties: 1130 properties.expressions.extend(temp_properties.expressions) 1131 elif temp_properties: 1132 properties = temp_properties 1133 1134 self._match(TokenType.ALIAS) 1135 begin = self._match(TokenType.BEGIN) 1136 return_ = self._match_text_seq("RETURN") 1137 expression = self._parse_statement() 1138 1139 if return_: 1140 expression = self.expression(exp.Return, this=expression) 1141 elif create_token.token_type == TokenType.INDEX: 1142 this = self._parse_index() 1143 elif create_token.token_type in self.DB_CREATABLES: 1144 table_parts = self._parse_table_parts(schema=True) 1145 1146 # exp.Properties.Location.POST_NAME 1147 if self._match(TokenType.COMMA): 1148 temp_properties = self._parse_properties(before=True) 1149 if properties and temp_properties: 1150 properties.expressions.extend(temp_properties.expressions) 1151 elif temp_properties: 1152 properties = temp_properties 1153 1154 this = self._parse_schema(this=table_parts) 1155 1156 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1157 temp_properties = self._parse_properties() 1158 if properties and temp_properties: 1159 properties.expressions.extend(temp_properties.expressions) 1160 elif temp_properties: 1161 properties = temp_properties 1162 1163 self._match(TokenType.ALIAS) 1164 1165 # exp.Properties.Location.POST_ALIAS 1166 if not ( 1167 self._match(TokenType.SELECT, advance=False) 1168 or self._match(TokenType.WITH, advance=False) 1169 or self._match(TokenType.L_PAREN, advance=False) 1170 ): 1171 temp_properties = self._parse_properties() 1172 if properties and temp_properties: 1173 properties.expressions.extend(temp_properties.expressions) 1174 elif temp_properties: 1175 properties = temp_properties 1176 1177 expression = self._parse_ddl_select() 1178 1179 if create_token.token_type == TokenType.TABLE: 1180 indexes = [] 1181 while True: 1182 index = self._parse_create_table_index() 1183 1184 # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX 1185 temp_properties = self._parse_properties() 1186 if properties and temp_properties: 1187 properties.expressions.extend(temp_properties.expressions) 1188 elif temp_properties: 1189 properties = temp_properties 1190 1191 if not index: 1192 break 1193 else: 1194 self._match(TokenType.COMMA) 1195 indexes.append(index) 1196 elif create_token.token_type == TokenType.VIEW: 1197 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1198 no_schema_binding = True 1199 1200 if self._match_text_seq("CLONE"): 1201 clone = self._parse_table(schema=True) 1202 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1203 clone_kind = ( 1204 self._match(TokenType.L_PAREN) 1205 and self._match_texts(self.CLONE_KINDS) 1206 and self._prev.text.upper() 1207 ) 1208 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1209 self._match(TokenType.R_PAREN) 1210 clone = self.expression( 1211 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1212 ) 1213 1214 return self.expression( 1215 exp.Create, 1216 this=this, 1217 kind=create_token.text, 1218 replace=replace, 1219 unique=unique, 1220 expression=expression, 1221 exists=exists, 1222 properties=properties, 1223 indexes=indexes, 1224 no_schema_binding=no_schema_binding, 1225 begin=begin, 1226 clone=clone, 1227 ) 1228 1229 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1230 # only used for teradata currently 1231 self._match(TokenType.COMMA) 1232 1233 kwargs = { 1234 "no": self._match_text_seq("NO"), 1235 "dual": self._match_text_seq("DUAL"), 1236 "before": self._match_text_seq("BEFORE"), 1237 "default": self._match_text_seq("DEFAULT"), 1238 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1239 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1240 "after": self._match_text_seq("AFTER"), 1241 "minimum": self._match_texts(("MIN", "MINIMUM")), 1242 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1243 } 1244 1245 if self._match_texts(self.PROPERTY_PARSERS): 1246 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1247 try: 1248 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1249 except TypeError: 1250 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1251 1252 return None 1253 1254 def _parse_property(self) -> t.Optional[exp.Expression]: 1255 if self._match_texts(self.PROPERTY_PARSERS): 1256 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1257 1258 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1259 return self._parse_character_set(default=True) 1260 1261 if self._match_text_seq("COMPOUND", "SORTKEY"): 1262 return self._parse_sortkey(compound=True) 1263 1264 if self._match_text_seq("SQL", "SECURITY"): 1265 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1266 1267 assignment = self._match_pair( 1268 TokenType.VAR, TokenType.EQ, advance=False 1269 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1270 1271 if assignment: 1272 key = self._parse_var_or_string() 1273 self._match(TokenType.EQ) 1274 return self.expression(exp.Property, this=key, value=self._parse_column()) 1275 1276 return None 1277 1278 def _parse_stored(self) -> exp.Expression: 1279 self._match(TokenType.ALIAS) 1280 1281 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1282 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1283 1284 return self.expression( 1285 exp.FileFormatProperty, 1286 this=self.expression( 1287 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1288 ) 1289 if input_format or output_format 1290 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1291 ) 1292 1293 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1294 self._match(TokenType.EQ) 1295 self._match(TokenType.ALIAS) 1296 return self.expression(exp_class, this=self._parse_field()) 1297 1298 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1299 properties = [] 1300 1301 while True: 1302 if before: 1303 prop = self._parse_property_before() 1304 else: 1305 prop = self._parse_property() 1306 1307 if not prop: 1308 break 1309 for p in ensure_list(prop): 1310 properties.append(p) 1311 1312 if properties: 1313 return self.expression(exp.Properties, expressions=properties) 1314 1315 return None 1316 1317 def _parse_fallback(self, no: bool = False) -> exp.Expression: 1318 return self.expression( 1319 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1320 ) 1321 1322 def _parse_volatile_property(self) -> exp.Expression: 1323 if self._index >= 2: 1324 pre_volatile_token = self._tokens[self._index - 2] 1325 else: 1326 pre_volatile_token = None 1327 1328 if pre_volatile_token and pre_volatile_token.token_type in ( 1329 TokenType.CREATE, 1330 TokenType.REPLACE, 1331 TokenType.UNIQUE, 1332 ): 1333 return exp.VolatileProperty() 1334 1335 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1336 1337 def _parse_with_property( 1338 self, 1339 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1340 self._match(TokenType.WITH) 1341 if self._match(TokenType.L_PAREN, advance=False): 1342 return self._parse_wrapped_csv(self._parse_property) 1343 1344 if self._match_text_seq("JOURNAL"): 1345 return self._parse_withjournaltable() 1346 1347 if self._match_text_seq("DATA"): 1348 return self._parse_withdata(no=False) 1349 elif self._match_text_seq("NO", "DATA"): 1350 return self._parse_withdata(no=True) 1351 1352 if not self._next: 1353 return None 1354 1355 return self._parse_withisolatedloading() 1356 1357 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1358 def _parse_definer(self) -> t.Optional[exp.Expression]: 1359 self._match(TokenType.EQ) 1360 1361 user = self._parse_id_var() 1362 self._match(TokenType.PARAMETER) 1363 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1364 1365 if not user or not host: 1366 return None 1367 1368 return exp.DefinerProperty(this=f"{user}@{host}") 1369 1370 def _parse_withjournaltable(self) -> exp.Expression: 1371 self._match(TokenType.TABLE) 1372 self._match(TokenType.EQ) 1373 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1374 1375 def _parse_log(self, no: bool = False) -> exp.Expression: 1376 return self.expression(exp.LogProperty, no=no) 1377 1378 def _parse_journal(self, **kwargs) -> exp.Expression: 1379 return self.expression(exp.JournalProperty, **kwargs) 1380 1381 def _parse_checksum(self) -> exp.Expression: 1382 self._match(TokenType.EQ) 1383 1384 on = None 1385 if self._match(TokenType.ON): 1386 on = True 1387 elif self._match_text_seq("OFF"): 1388 on = False 1389 default = self._match(TokenType.DEFAULT) 1390 1391 return self.expression( 1392 exp.ChecksumProperty, 1393 on=on, 1394 default=default, 1395 ) 1396 1397 def _parse_cluster(self) -> t.Optional[exp.Expression]: 1398 if not self._match_text_seq("BY"): 1399 self._retreat(self._index - 1) 1400 return None 1401 return self.expression( 1402 exp.Cluster, 1403 expressions=self._parse_csv(self._parse_ordered), 1404 ) 1405 1406 def _parse_freespace(self) -> exp.Expression: 1407 self._match(TokenType.EQ) 1408 return self.expression( 1409 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1410 ) 1411 1412 def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression: 1413 if self._match(TokenType.EQ): 1414 return self.expression( 1415 exp.MergeBlockRatioProperty, 1416 this=self._parse_number(), 1417 percent=self._match(TokenType.PERCENT), 1418 ) 1419 return self.expression( 1420 exp.MergeBlockRatioProperty, 1421 no=no, 1422 default=default, 1423 ) 1424 1425 def _parse_datablocksize( 1426 self, 1427 default: t.Optional[bool] = None, 1428 minimum: t.Optional[bool] = None, 1429 maximum: t.Optional[bool] = None, 1430 ) -> exp.Expression: 1431 self._match(TokenType.EQ) 1432 size = self._parse_number() 1433 units = None 1434 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1435 units = self._prev.text 1436 return self.expression( 1437 exp.DataBlocksizeProperty, 1438 size=size, 1439 units=units, 1440 default=default, 1441 minimum=minimum, 1442 maximum=maximum, 1443 ) 1444 1445 def _parse_blockcompression(self) -> exp.Expression: 1446 self._match(TokenType.EQ) 1447 always = self._match_text_seq("ALWAYS") 1448 manual = self._match_text_seq("MANUAL") 1449 never = self._match_text_seq("NEVER") 1450 default = self._match_text_seq("DEFAULT") 1451 autotemp = None 1452 if self._match_text_seq("AUTOTEMP"): 1453 autotemp = self._parse_schema() 1454 1455 return self.expression( 1456 exp.BlockCompressionProperty, 1457 always=always, 1458 manual=manual, 1459 never=never, 1460 default=default, 1461 autotemp=autotemp, 1462 ) 1463 1464 def _parse_withisolatedloading(self) -> exp.Expression: 1465 no = self._match_text_seq("NO") 1466 concurrent = self._match_text_seq("CONCURRENT") 1467 self._match_text_seq("ISOLATED", "LOADING") 1468 for_all = self._match_text_seq("FOR", "ALL") 1469 for_insert = self._match_text_seq("FOR", "INSERT") 1470 for_none = self._match_text_seq("FOR", "NONE") 1471 return self.expression( 1472 exp.IsolatedLoadingProperty, 1473 no=no, 1474 concurrent=concurrent, 1475 for_all=for_all, 1476 for_insert=for_insert, 1477 for_none=for_none, 1478 ) 1479 1480 def _parse_locking(self) -> exp.Expression: 1481 if self._match(TokenType.TABLE): 1482 kind = "TABLE" 1483 elif self._match(TokenType.VIEW): 1484 kind = "VIEW" 1485 elif self._match(TokenType.ROW): 1486 kind = "ROW" 1487 elif self._match_text_seq("DATABASE"): 1488 kind = "DATABASE" 1489 else: 1490 kind = None 1491 1492 if kind in ("DATABASE", "TABLE", "VIEW"): 1493 this = self._parse_table_parts() 1494 else: 1495 this = None 1496 1497 if self._match(TokenType.FOR): 1498 for_or_in = "FOR" 1499 elif self._match(TokenType.IN): 1500 for_or_in = "IN" 1501 else: 1502 for_or_in = None 1503 1504 if self._match_text_seq("ACCESS"): 1505 lock_type = "ACCESS" 1506 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1507 lock_type = "EXCLUSIVE" 1508 elif self._match_text_seq("SHARE"): 1509 lock_type = "SHARE" 1510 elif self._match_text_seq("READ"): 1511 lock_type = "READ" 1512 elif self._match_text_seq("WRITE"): 1513 lock_type = "WRITE" 1514 elif self._match_text_seq("CHECKSUM"): 1515 lock_type = "CHECKSUM" 1516 else: 1517 lock_type = None 1518 1519 override = self._match_text_seq("OVERRIDE") 1520 1521 return self.expression( 1522 exp.LockingProperty, 1523 this=this, 1524 kind=kind, 1525 for_or_in=for_or_in, 1526 lock_type=lock_type, 1527 override=override, 1528 ) 1529 1530 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1531 if self._match(TokenType.PARTITION_BY): 1532 return self._parse_csv(self._parse_conjunction) 1533 return [] 1534 1535 def _parse_partitioned_by(self) -> exp.Expression: 1536 self._match(TokenType.EQ) 1537 return self.expression( 1538 exp.PartitionedByProperty, 1539 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1540 ) 1541 1542 def _parse_withdata(self, no=False) -> exp.Expression: 1543 if self._match_text_seq("AND", "STATISTICS"): 1544 statistics = True 1545 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1546 statistics = False 1547 else: 1548 statistics = None 1549 1550 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1551 1552 def _parse_no_property(self) -> t.Optional[exp.Property]: 1553 if self._match_text_seq("PRIMARY", "INDEX"): 1554 return exp.NoPrimaryIndexProperty() 1555 return None 1556 1557 def _parse_on_property(self) -> t.Optional[exp.Property]: 1558 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1559 return exp.OnCommitProperty() 1560 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1561 return exp.OnCommitProperty(delete=True) 1562 return None 1563 1564 def _parse_distkey(self) -> exp.Expression: 1565 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1566 1567 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1568 table = self._parse_table(schema=True) 1569 options = [] 1570 while self._match_texts(("INCLUDING", "EXCLUDING")): 1571 this = self._prev.text.upper() 1572 id_var = self._parse_id_var() 1573 1574 if not id_var: 1575 return None 1576 1577 options.append( 1578 self.expression( 1579 exp.Property, 1580 this=this, 1581 value=exp.Var(this=id_var.this.upper()), 1582 ) 1583 ) 1584 return self.expression(exp.LikeProperty, this=table, expressions=options) 1585 1586 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1587 return self.expression( 1588 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1589 ) 1590 1591 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1592 self._match(TokenType.EQ) 1593 return self.expression( 1594 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1595 ) 1596 1597 def _parse_returns(self) -> exp.Expression: 1598 value: t.Optional[exp.Expression] 1599 is_table = self._match(TokenType.TABLE) 1600 1601 if is_table: 1602 if self._match(TokenType.LT): 1603 value = self.expression( 1604 exp.Schema, 1605 this="TABLE", 1606 expressions=self._parse_csv(self._parse_struct_types), 1607 ) 1608 if not self._match(TokenType.GT): 1609 self.raise_error("Expecting >") 1610 else: 1611 value = self._parse_schema(exp.Var(this="TABLE")) 1612 else: 1613 value = self._parse_types() 1614 1615 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1616 1617 def _parse_describe(self) -> exp.Expression: 1618 kind = self._match_set(self.CREATABLES) and self._prev.text 1619 this = self._parse_table() 1620 1621 return self.expression(exp.Describe, this=this, kind=kind) 1622 1623 def _parse_insert(self) -> exp.Expression: 1624 overwrite = self._match(TokenType.OVERWRITE) 1625 local = self._match_text_seq("LOCAL") 1626 alternative = None 1627 1628 if self._match_text_seq("DIRECTORY"): 1629 this: t.Optional[exp.Expression] = self.expression( 1630 exp.Directory, 1631 this=self._parse_var_or_string(), 1632 local=local, 1633 row_format=self._parse_row_format(match_row=True), 1634 ) 1635 else: 1636 if self._match(TokenType.OR): 1637 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1638 1639 self._match(TokenType.INTO) 1640 self._match(TokenType.TABLE) 1641 this = self._parse_table(schema=True) 1642 1643 return self.expression( 1644 exp.Insert, 1645 this=this, 1646 exists=self._parse_exists(), 1647 partition=self._parse_partition(), 1648 expression=self._parse_ddl_select(), 1649 conflict=self._parse_on_conflict(), 1650 returning=self._parse_returning(), 1651 overwrite=overwrite, 1652 alternative=alternative, 1653 ) 1654 1655 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1656 conflict = self._match_text_seq("ON", "CONFLICT") 1657 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1658 1659 if not (conflict or duplicate): 1660 return None 1661 1662 nothing = None 1663 expressions = None 1664 key = None 1665 constraint = None 1666 1667 if conflict: 1668 if self._match_text_seq("ON", "CONSTRAINT"): 1669 constraint = self._parse_id_var() 1670 else: 1671 key = self._parse_csv(self._parse_value) 1672 1673 self._match_text_seq("DO") 1674 if self._match_text_seq("NOTHING"): 1675 nothing = True 1676 else: 1677 self._match(TokenType.UPDATE) 1678 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1679 1680 return self.expression( 1681 exp.OnConflict, 1682 duplicate=duplicate, 1683 expressions=expressions, 1684 nothing=nothing, 1685 key=key, 1686 constraint=constraint, 1687 ) 1688 1689 def _parse_returning(self) -> t.Optional[exp.Expression]: 1690 if not self._match(TokenType.RETURNING): 1691 return None 1692 1693 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1694 1695 def _parse_row(self) -> t.Optional[exp.Expression]: 1696 if not self._match(TokenType.FORMAT): 1697 return None 1698 return self._parse_row_format() 1699 1700 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1701 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1702 return None 1703 1704 if self._match_text_seq("SERDE"): 1705 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1706 1707 self._match_text_seq("DELIMITED") 1708 1709 kwargs = {} 1710 1711 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1712 kwargs["fields"] = self._parse_string() 1713 if self._match_text_seq("ESCAPED", "BY"): 1714 kwargs["escaped"] = self._parse_string() 1715 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1716 kwargs["collection_items"] = self._parse_string() 1717 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1718 kwargs["map_keys"] = self._parse_string() 1719 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1720 kwargs["lines"] = self._parse_string() 1721 if self._match_text_seq("NULL", "DEFINED", "AS"): 1722 kwargs["null"] = self._parse_string() 1723 1724 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1725 1726 def _parse_load(self) -> exp.Expression: 1727 if self._match_text_seq("DATA"): 1728 local = self._match_text_seq("LOCAL") 1729 self._match_text_seq("INPATH") 1730 inpath = self._parse_string() 1731 overwrite = self._match(TokenType.OVERWRITE) 1732 self._match_pair(TokenType.INTO, TokenType.TABLE) 1733 1734 return self.expression( 1735 exp.LoadData, 1736 this=self._parse_table(schema=True), 1737 local=local, 1738 overwrite=overwrite, 1739 inpath=inpath, 1740 partition=self._parse_partition(), 1741 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1742 serde=self._match_text_seq("SERDE") and self._parse_string(), 1743 ) 1744 return self._parse_as_command(self._prev) 1745 1746 def _parse_delete(self) -> exp.Expression: 1747 self._match(TokenType.FROM) 1748 1749 return self.expression( 1750 exp.Delete, 1751 this=self._parse_table(), 1752 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1753 where=self._parse_where(), 1754 returning=self._parse_returning(), 1755 ) 1756 1757 def _parse_update(self) -> exp.Expression: 1758 return self.expression( 1759 exp.Update, 1760 **{ # type: ignore 1761 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1762 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1763 "from": self._parse_from(modifiers=True), 1764 "where": self._parse_where(), 1765 "returning": self._parse_returning(), 1766 }, 1767 ) 1768 1769 def _parse_uncache(self) -> exp.Expression: 1770 if not self._match(TokenType.TABLE): 1771 self.raise_error("Expecting TABLE after UNCACHE") 1772 1773 return self.expression( 1774 exp.Uncache, 1775 exists=self._parse_exists(), 1776 this=self._parse_table(schema=True), 1777 ) 1778 1779 def _parse_cache(self) -> exp.Expression: 1780 lazy = self._match_text_seq("LAZY") 1781 self._match(TokenType.TABLE) 1782 table = self._parse_table(schema=True) 1783 options = [] 1784 1785 if self._match_text_seq("OPTIONS"): 1786 self._match_l_paren() 1787 k = self._parse_string() 1788 self._match(TokenType.EQ) 1789 v = self._parse_string() 1790 options = [k, v] 1791 self._match_r_paren() 1792 1793 self._match(TokenType.ALIAS) 1794 return self.expression( 1795 exp.Cache, 1796 this=table, 1797 lazy=lazy, 1798 options=options, 1799 expression=self._parse_select(nested=True), 1800 ) 1801 1802 def _parse_partition(self) -> t.Optional[exp.Expression]: 1803 if not self._match(TokenType.PARTITION): 1804 return None 1805 1806 return self.expression( 1807 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1808 ) 1809 1810 def _parse_value(self) -> exp.Expression: 1811 if self._match(TokenType.L_PAREN): 1812 expressions = self._parse_csv(self._parse_conjunction) 1813 self._match_r_paren() 1814 return self.expression(exp.Tuple, expressions=expressions) 1815 1816 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1817 # Source: https://prestodb.io/docs/current/sql/values.html 1818 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1819 1820 def _parse_select( 1821 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1822 ) -> t.Optional[exp.Expression]: 1823 cte = self._parse_with() 1824 if cte: 1825 this = self._parse_statement() 1826 1827 if not this: 1828 self.raise_error("Failed to parse any statement following CTE") 1829 return cte 1830 1831 if "with" in this.arg_types: 1832 this.set("with", cte) 1833 else: 1834 self.raise_error(f"{this.key} does not support CTE") 1835 this = cte 1836 elif self._match(TokenType.SELECT): 1837 comments = self._prev_comments 1838 1839 hint = self._parse_hint() 1840 all_ = self._match(TokenType.ALL) 1841 distinct = self._match(TokenType.DISTINCT) 1842 1843 kind = ( 1844 self._match(TokenType.ALIAS) 1845 and self._match_texts(("STRUCT", "VALUE")) 1846 and self._prev.text 1847 ) 1848 1849 if distinct: 1850 distinct = self.expression( 1851 exp.Distinct, 1852 on=self._parse_value() if self._match(TokenType.ON) else None, 1853 ) 1854 1855 if all_ and distinct: 1856 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1857 1858 limit = self._parse_limit(top=True) 1859 expressions = self._parse_csv(self._parse_expression) 1860 1861 this = self.expression( 1862 exp.Select, 1863 kind=kind, 1864 hint=hint, 1865 distinct=distinct, 1866 expressions=expressions, 1867 limit=limit, 1868 ) 1869 this.comments = comments 1870 1871 into = self._parse_into() 1872 if into: 1873 this.set("into", into) 1874 1875 from_ = self._parse_from() 1876 if from_: 1877 this.set("from", from_) 1878 1879 this = self._parse_query_modifiers(this) 1880 elif (table or nested) and self._match(TokenType.L_PAREN): 1881 this = self._parse_table() if table else self._parse_select(nested=True) 1882 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1883 self._match_r_paren() 1884 1885 # early return so that subquery unions aren't parsed again 1886 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1887 # Union ALL should be a property of the top select node, not the subquery 1888 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1889 elif self._match(TokenType.VALUES): 1890 this = self.expression( 1891 exp.Values, 1892 expressions=self._parse_csv(self._parse_value), 1893 alias=self._parse_table_alias(), 1894 ) 1895 else: 1896 this = None 1897 1898 return self._parse_set_operations(this) 1899 1900 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1901 if not skip_with_token and not self._match(TokenType.WITH): 1902 return None 1903 1904 comments = self._prev_comments 1905 recursive = self._match(TokenType.RECURSIVE) 1906 1907 expressions = [] 1908 while True: 1909 expressions.append(self._parse_cte()) 1910 1911 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1912 break 1913 else: 1914 self._match(TokenType.WITH) 1915 1916 return self.expression( 1917 exp.With, comments=comments, expressions=expressions, recursive=recursive 1918 ) 1919 1920 def _parse_cte(self) -> exp.Expression: 1921 alias = self._parse_table_alias() 1922 if not alias or not alias.this: 1923 self.raise_error("Expected CTE to have alias") 1924 1925 self._match(TokenType.ALIAS) 1926 1927 return self.expression( 1928 exp.CTE, 1929 this=self._parse_wrapped(self._parse_statement), 1930 alias=alias, 1931 ) 1932 1933 def _parse_table_alias( 1934 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1935 ) -> t.Optional[exp.Expression]: 1936 any_token = self._match(TokenType.ALIAS) 1937 alias = ( 1938 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1939 or self._parse_string_as_identifier() 1940 ) 1941 1942 index = self._index 1943 if self._match(TokenType.L_PAREN): 1944 columns = self._parse_csv(self._parse_function_parameter) 1945 self._match_r_paren() if columns else self._retreat(index) 1946 else: 1947 columns = None 1948 1949 if not alias and not columns: 1950 return None 1951 1952 return self.expression(exp.TableAlias, this=alias, columns=columns) 1953 1954 def _parse_subquery( 1955 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1956 ) -> exp.Expression: 1957 return self.expression( 1958 exp.Subquery, 1959 this=this, 1960 pivots=self._parse_pivots(), 1961 alias=self._parse_table_alias() if parse_alias else None, 1962 ) 1963 1964 def _parse_query_modifiers( 1965 self, this: t.Optional[exp.Expression] 1966 ) -> t.Optional[exp.Expression]: 1967 if isinstance(this, self.MODIFIABLES): 1968 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1969 expression = parser(self) 1970 1971 if expression: 1972 this.set(key, expression) 1973 return this 1974 1975 def _parse_hint(self) -> t.Optional[exp.Expression]: 1976 if self._match(TokenType.HINT): 1977 hints = self._parse_csv(self._parse_function) 1978 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1979 self.raise_error("Expected */ after HINT") 1980 return self.expression(exp.Hint, expressions=hints) 1981 1982 return None 1983 1984 def _parse_into(self) -> t.Optional[exp.Expression]: 1985 if not self._match(TokenType.INTO): 1986 return None 1987 1988 temp = self._match(TokenType.TEMPORARY) 1989 unlogged = self._match_text_seq("UNLOGGED") 1990 self._match(TokenType.TABLE) 1991 1992 return self.expression( 1993 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1994 ) 1995 1996 def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]: 1997 if not self._match(TokenType.FROM): 1998 return None 1999 2000 comments = self._prev_comments 2001 this = self._parse_table() 2002 2003 return self.expression( 2004 exp.From, 2005 comments=comments, 2006 this=self._parse_query_modifiers(this) if modifiers else this, 2007 ) 2008 2009 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2010 if not self._match(TokenType.MATCH_RECOGNIZE): 2011 return None 2012 2013 self._match_l_paren() 2014 2015 partition = self._parse_partition_by() 2016 order = self._parse_order() 2017 measures = ( 2018 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2019 ) 2020 2021 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2022 rows = exp.Var(this="ONE ROW PER MATCH") 2023 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2024 text = "ALL ROWS PER MATCH" 2025 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2026 text += f" SHOW EMPTY MATCHES" 2027 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2028 text += f" OMIT EMPTY MATCHES" 2029 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2030 text += f" WITH UNMATCHED ROWS" 2031 rows = exp.Var(this=text) 2032 else: 2033 rows = None 2034 2035 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2036 text = "AFTER MATCH SKIP" 2037 if self._match_text_seq("PAST", "LAST", "ROW"): 2038 text += f" PAST LAST ROW" 2039 elif self._match_text_seq("TO", "NEXT", "ROW"): 2040 text += f" TO NEXT ROW" 2041 elif self._match_text_seq("TO", "FIRST"): 2042 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2043 elif self._match_text_seq("TO", "LAST"): 2044 text += f" TO LAST {self._advance_any().text}" # type: ignore 2045 after = exp.Var(this=text) 2046 else: 2047 after = None 2048 2049 if self._match_text_seq("PATTERN"): 2050 self._match_l_paren() 2051 2052 if not self._curr: 2053 self.raise_error("Expecting )", self._curr) 2054 2055 paren = 1 2056 start = self._curr 2057 2058 while self._curr and paren > 0: 2059 if self._curr.token_type == TokenType.L_PAREN: 2060 paren += 1 2061 if self._curr.token_type == TokenType.R_PAREN: 2062 paren -= 1 2063 end = self._prev 2064 self._advance() 2065 if paren > 0: 2066 self.raise_error("Expecting )", self._curr) 2067 pattern = exp.Var(this=self._find_sql(start, end)) 2068 else: 2069 pattern = None 2070 2071 define = ( 2072 self._parse_csv( 2073 lambda: self.expression( 2074 exp.Alias, 2075 alias=self._parse_id_var(any_token=True), 2076 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2077 ) 2078 ) 2079 if self._match_text_seq("DEFINE") 2080 else None 2081 ) 2082 2083 self._match_r_paren() 2084 2085 return self.expression( 2086 exp.MatchRecognize, 2087 partition_by=partition, 2088 order=order, 2089 measures=measures, 2090 rows=rows, 2091 after=after, 2092 pattern=pattern, 2093 define=define, 2094 alias=self._parse_table_alias(), 2095 ) 2096 2097 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2098 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2099 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2100 2101 if outer_apply or cross_apply: 2102 this = self._parse_select(table=True) 2103 view = None 2104 outer = not cross_apply 2105 elif self._match(TokenType.LATERAL): 2106 this = self._parse_select(table=True) 2107 view = self._match(TokenType.VIEW) 2108 outer = self._match(TokenType.OUTER) 2109 else: 2110 return None 2111 2112 if not this: 2113 this = self._parse_function() or self._parse_id_var(any_token=False) 2114 while self._match(TokenType.DOT): 2115 this = exp.Dot( 2116 this=this, 2117 expression=self._parse_function() or self._parse_id_var(any_token=False), 2118 ) 2119 2120 table_alias: t.Optional[exp.Expression] 2121 2122 if view: 2123 table = self._parse_id_var(any_token=False) 2124 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2125 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2126 else: 2127 table_alias = self._parse_table_alias() 2128 2129 expression = self.expression( 2130 exp.Lateral, 2131 this=this, 2132 view=view, 2133 outer=outer, 2134 alias=table_alias, 2135 ) 2136 2137 return expression 2138 2139 def _parse_join_side_and_kind( 2140 self, 2141 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2142 return ( 2143 self._match(TokenType.NATURAL) and self._prev, 2144 self._match_set(self.JOIN_SIDES) and self._prev, 2145 self._match_set(self.JOIN_KINDS) and self._prev, 2146 ) 2147 2148 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2149 if self._match(TokenType.COMMA): 2150 return self.expression(exp.Join, this=self._parse_table()) 2151 2152 index = self._index 2153 natural, side, kind = self._parse_join_side_and_kind() 2154 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2155 join = self._match(TokenType.JOIN) 2156 2157 if not skip_join_token and not join: 2158 self._retreat(index) 2159 kind = None 2160 natural = None 2161 side = None 2162 2163 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2164 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2165 2166 if not skip_join_token and not join and not outer_apply and not cross_apply: 2167 return None 2168 2169 if outer_apply: 2170 side = Token(TokenType.LEFT, "LEFT") 2171 2172 kwargs: t.Dict[ 2173 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2174 ] = {"this": self._parse_table()} 2175 2176 if natural: 2177 kwargs["natural"] = True 2178 if side: 2179 kwargs["side"] = side.text 2180 if kind: 2181 kwargs["kind"] = kind.text 2182 if hint: 2183 kwargs["hint"] = hint 2184 2185 if self._match(TokenType.ON): 2186 kwargs["on"] = self._parse_conjunction() 2187 elif self._match(TokenType.USING): 2188 kwargs["using"] = self._parse_wrapped_id_vars() 2189 2190 return self.expression(exp.Join, **kwargs) # type: ignore 2191 2192 def _parse_index(self) -> exp.Expression: 2193 index = self._parse_id_var() 2194 self._match(TokenType.ON) 2195 self._match(TokenType.TABLE) # hive 2196 2197 return self.expression( 2198 exp.Index, 2199 this=index, 2200 table=self.expression(exp.Table, this=self._parse_id_var()), 2201 columns=self._parse_expression(), 2202 ) 2203 2204 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2205 unique = self._match(TokenType.UNIQUE) 2206 primary = self._match_text_seq("PRIMARY") 2207 amp = self._match_text_seq("AMP") 2208 if not self._match(TokenType.INDEX): 2209 return None 2210 index = self._parse_id_var() 2211 columns = None 2212 if self._match(TokenType.L_PAREN, advance=False): 2213 columns = self._parse_wrapped_csv(self._parse_column) 2214 return self.expression( 2215 exp.Index, 2216 this=index, 2217 columns=columns, 2218 unique=unique, 2219 primary=primary, 2220 amp=amp, 2221 ) 2222 2223 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2224 return ( 2225 (not schema and self._parse_function()) 2226 or self._parse_id_var(any_token=False) 2227 or self._parse_string_as_identifier() 2228 or self._parse_placeholder() 2229 ) 2230 2231 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2232 catalog = None 2233 db = None 2234 table = self._parse_table_part(schema=schema) 2235 2236 while self._match(TokenType.DOT): 2237 if catalog: 2238 # This allows nesting the table in arbitrarily many dot expressions if needed 2239 table = self.expression( 2240 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2241 ) 2242 else: 2243 catalog = db 2244 db = table 2245 table = self._parse_table_part(schema=schema) 2246 2247 if not table: 2248 self.raise_error(f"Expected table name but got {self._curr}") 2249 2250 return self.expression( 2251 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2252 ) 2253 2254 def _parse_table( 2255 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2256 ) -> t.Optional[exp.Expression]: 2257 lateral = self._parse_lateral() 2258 if lateral: 2259 return lateral 2260 2261 unnest = self._parse_unnest() 2262 if unnest: 2263 return unnest 2264 2265 values = self._parse_derived_table_values() 2266 if values: 2267 return values 2268 2269 subquery = self._parse_select(table=True) 2270 if subquery: 2271 if not subquery.args.get("pivots"): 2272 subquery.set("pivots", self._parse_pivots()) 2273 return subquery 2274 2275 this: exp.Expression = self._parse_table_parts(schema=schema) 2276 2277 if schema: 2278 return self._parse_schema(this=this) 2279 2280 if self.alias_post_tablesample: 2281 table_sample = self._parse_table_sample() 2282 2283 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2284 if alias: 2285 this.set("alias", alias) 2286 2287 if not this.args.get("pivots"): 2288 this.set("pivots", self._parse_pivots()) 2289 2290 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2291 this.set( 2292 "hints", 2293 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2294 ) 2295 self._match_r_paren() 2296 2297 if not self.alias_post_tablesample: 2298 table_sample = self._parse_table_sample() 2299 2300 if table_sample: 2301 table_sample.set("this", this) 2302 this = table_sample 2303 2304 return this 2305 2306 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2307 if not self._match(TokenType.UNNEST): 2308 return None 2309 2310 expressions = self._parse_wrapped_csv(self._parse_type) 2311 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2312 alias = self._parse_table_alias() 2313 2314 if alias and self.unnest_column_only: 2315 if alias.args.get("columns"): 2316 self.raise_error("Unexpected extra column alias in unnest.") 2317 alias.set("columns", [alias.this]) 2318 alias.set("this", None) 2319 2320 offset = None 2321 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2322 self._match(TokenType.ALIAS) 2323 offset = self._parse_id_var() or exp.Identifier(this="offset") 2324 2325 return self.expression( 2326 exp.Unnest, 2327 expressions=expressions, 2328 ordinality=ordinality, 2329 alias=alias, 2330 offset=offset, 2331 ) 2332 2333 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2334 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2335 if not is_derived and not self._match(TokenType.VALUES): 2336 return None 2337 2338 expressions = self._parse_csv(self._parse_value) 2339 2340 if is_derived: 2341 self._match_r_paren() 2342 2343 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2344 2345 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2346 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2347 as_modifier and self._match_text_seq("USING", "SAMPLE") 2348 ): 2349 return None 2350 2351 bucket_numerator = None 2352 bucket_denominator = None 2353 bucket_field = None 2354 percent = None 2355 rows = None 2356 size = None 2357 seed = None 2358 2359 kind = ( 2360 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2361 ) 2362 method = self._parse_var(tokens=(TokenType.ROW,)) 2363 2364 self._match(TokenType.L_PAREN) 2365 2366 num = self._parse_number() 2367 2368 if self._match_text_seq("BUCKET"): 2369 bucket_numerator = self._parse_number() 2370 self._match_text_seq("OUT", "OF") 2371 bucket_denominator = bucket_denominator = self._parse_number() 2372 self._match(TokenType.ON) 2373 bucket_field = self._parse_field() 2374 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2375 percent = num 2376 elif self._match(TokenType.ROWS): 2377 rows = num 2378 else: 2379 size = num 2380 2381 self._match(TokenType.R_PAREN) 2382 2383 if self._match(TokenType.L_PAREN): 2384 method = self._parse_var() 2385 seed = self._match(TokenType.COMMA) and self._parse_number() 2386 self._match_r_paren() 2387 elif self._match_texts(("SEED", "REPEATABLE")): 2388 seed = self._parse_wrapped(self._parse_number) 2389 2390 return self.expression( 2391 exp.TableSample, 2392 method=method, 2393 bucket_numerator=bucket_numerator, 2394 bucket_denominator=bucket_denominator, 2395 bucket_field=bucket_field, 2396 percent=percent, 2397 rows=rows, 2398 size=size, 2399 seed=seed, 2400 kind=kind, 2401 ) 2402 2403 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2404 return list(iter(self._parse_pivot, None)) 2405 2406 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2407 index = self._index 2408 2409 if self._match(TokenType.PIVOT): 2410 unpivot = False 2411 elif self._match(TokenType.UNPIVOT): 2412 unpivot = True 2413 else: 2414 return None 2415 2416 expressions = [] 2417 field = None 2418 2419 if not self._match(TokenType.L_PAREN): 2420 self._retreat(index) 2421 return None 2422 2423 if unpivot: 2424 expressions = self._parse_csv(self._parse_column) 2425 else: 2426 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2427 2428 if not expressions: 2429 self.raise_error("Failed to parse PIVOT's aggregation list") 2430 2431 if not self._match(TokenType.FOR): 2432 self.raise_error("Expecting FOR") 2433 2434 value = self._parse_column() 2435 2436 if not self._match(TokenType.IN): 2437 self.raise_error("Expecting IN") 2438 2439 field = self._parse_in(value, alias=True) 2440 2441 self._match_r_paren() 2442 2443 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2444 2445 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2446 pivot.set("alias", self._parse_table_alias()) 2447 2448 if not unpivot: 2449 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2450 2451 columns: t.List[exp.Expression] = [] 2452 for fld in pivot.args["field"].expressions: 2453 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2454 for name in names: 2455 if self.PREFIXED_PIVOT_COLUMNS: 2456 name = f"{name}_{field_name}" if name else field_name 2457 else: 2458 name = f"{field_name}_{name}" if name else field_name 2459 2460 columns.append(exp.to_identifier(name)) 2461 2462 pivot.set("columns", columns) 2463 2464 return pivot 2465 2466 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2467 return [agg.alias for agg in aggregations] 2468 2469 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2470 if not skip_where_token and not self._match(TokenType.WHERE): 2471 return None 2472 2473 return self.expression( 2474 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2475 ) 2476 2477 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2478 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2479 return None 2480 2481 elements = defaultdict(list) 2482 2483 while True: 2484 expressions = self._parse_csv(self._parse_conjunction) 2485 if expressions: 2486 elements["expressions"].extend(expressions) 2487 2488 grouping_sets = self._parse_grouping_sets() 2489 if grouping_sets: 2490 elements["grouping_sets"].extend(grouping_sets) 2491 2492 rollup = None 2493 cube = None 2494 totals = None 2495 2496 with_ = self._match(TokenType.WITH) 2497 if self._match(TokenType.ROLLUP): 2498 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2499 elements["rollup"].extend(ensure_list(rollup)) 2500 2501 if self._match(TokenType.CUBE): 2502 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2503 elements["cube"].extend(ensure_list(cube)) 2504 2505 if self._match_text_seq("TOTALS"): 2506 totals = True 2507 elements["totals"] = True # type: ignore 2508 2509 if not (grouping_sets or rollup or cube or totals): 2510 break 2511 2512 return self.expression(exp.Group, **elements) # type: ignore 2513 2514 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2515 if not self._match(TokenType.GROUPING_SETS): 2516 return None 2517 2518 return self._parse_wrapped_csv(self._parse_grouping_set) 2519 2520 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2521 if self._match(TokenType.L_PAREN): 2522 grouping_set = self._parse_csv(self._parse_column) 2523 self._match_r_paren() 2524 return self.expression(exp.Tuple, expressions=grouping_set) 2525 2526 return self._parse_column() 2527 2528 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2529 if not skip_having_token and not self._match(TokenType.HAVING): 2530 return None 2531 return self.expression(exp.Having, this=self._parse_conjunction()) 2532 2533 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2534 if not self._match(TokenType.QUALIFY): 2535 return None 2536 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2537 2538 def _parse_order( 2539 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2540 ) -> t.Optional[exp.Expression]: 2541 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2542 return this 2543 2544 return self.expression( 2545 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2546 ) 2547 2548 def _parse_sort( 2549 self, exp_class: t.Type[exp.Expression], *texts: str 2550 ) -> t.Optional[exp.Expression]: 2551 if not self._match_text_seq(*texts): 2552 return None 2553 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2554 2555 def _parse_ordered(self) -> exp.Expression: 2556 this = self._parse_conjunction() 2557 self._match(TokenType.ASC) 2558 is_desc = self._match(TokenType.DESC) 2559 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2560 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2561 desc = is_desc or False 2562 asc = not desc 2563 nulls_first = is_nulls_first or False 2564 explicitly_null_ordered = is_nulls_first or is_nulls_last 2565 if ( 2566 not explicitly_null_ordered 2567 and ( 2568 (asc and self.null_ordering == "nulls_are_small") 2569 or (desc and self.null_ordering != "nulls_are_small") 2570 ) 2571 and self.null_ordering != "nulls_are_last" 2572 ): 2573 nulls_first = True 2574 2575 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2576 2577 def _parse_limit( 2578 self, this: t.Optional[exp.Expression] = None, top: bool = False 2579 ) -> t.Optional[exp.Expression]: 2580 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2581 limit_paren = self._match(TokenType.L_PAREN) 2582 limit_exp = self.expression( 2583 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2584 ) 2585 2586 if limit_paren: 2587 self._match_r_paren() 2588 2589 return limit_exp 2590 2591 if self._match(TokenType.FETCH): 2592 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2593 direction = self._prev.text if direction else "FIRST" 2594 2595 count = self._parse_number() 2596 percent = self._match(TokenType.PERCENT) 2597 2598 self._match_set((TokenType.ROW, TokenType.ROWS)) 2599 2600 only = self._match_text_seq("ONLY") 2601 with_ties = self._match_text_seq("WITH", "TIES") 2602 2603 if only and with_ties: 2604 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2605 2606 return self.expression( 2607 exp.Fetch, 2608 direction=direction, 2609 count=count, 2610 percent=percent, 2611 with_ties=with_ties, 2612 ) 2613 2614 return this 2615 2616 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2617 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2618 return this 2619 2620 count = self._parse_number() 2621 self._match_set((TokenType.ROW, TokenType.ROWS)) 2622 return self.expression(exp.Offset, this=this, expression=count) 2623 2624 def _parse_locks(self) -> t.List[exp.Expression]: 2625 # Lists are invariant, so we need to use a type hint here 2626 locks: t.List[exp.Expression] = [] 2627 2628 while True: 2629 if self._match_text_seq("FOR", "UPDATE"): 2630 update = True 2631 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2632 "LOCK", "IN", "SHARE", "MODE" 2633 ): 2634 update = False 2635 else: 2636 break 2637 2638 expressions = None 2639 if self._match_text_seq("OF"): 2640 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2641 2642 wait: t.Optional[bool | exp.Expression] = None 2643 if self._match_text_seq("NOWAIT"): 2644 wait = True 2645 elif self._match_text_seq("WAIT"): 2646 wait = self._parse_primary() 2647 elif self._match_text_seq("SKIP", "LOCKED"): 2648 wait = False 2649 2650 locks.append( 2651 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2652 ) 2653 2654 return locks 2655 2656 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2657 if not self._match_set(self.SET_OPERATIONS): 2658 return this 2659 2660 token_type = self._prev.token_type 2661 2662 if token_type == TokenType.UNION: 2663 expression = exp.Union 2664 elif token_type == TokenType.EXCEPT: 2665 expression = exp.Except 2666 else: 2667 expression = exp.Intersect 2668 2669 return self.expression( 2670 expression, 2671 this=this, 2672 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2673 expression=self._parse_set_operations(self._parse_select(nested=True)), 2674 ) 2675 2676 def _parse_expression(self) -> t.Optional[exp.Expression]: 2677 return self._parse_alias(self._parse_conjunction()) 2678 2679 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2680 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2681 2682 def _parse_equality(self) -> t.Optional[exp.Expression]: 2683 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2684 2685 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2686 return self._parse_tokens(self._parse_range, self.COMPARISON) 2687 2688 def _parse_range(self) -> t.Optional[exp.Expression]: 2689 this = self._parse_bitwise() 2690 negate = self._match(TokenType.NOT) 2691 2692 if self._match_set(self.RANGE_PARSERS): 2693 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2694 if not expression: 2695 return this 2696 2697 this = expression 2698 elif self._match(TokenType.ISNULL): 2699 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2700 2701 # Postgres supports ISNULL and NOTNULL for conditions. 2702 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2703 if self._match(TokenType.NOTNULL): 2704 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2705 this = self.expression(exp.Not, this=this) 2706 2707 if negate: 2708 this = self.expression(exp.Not, this=this) 2709 2710 if self._match(TokenType.IS): 2711 this = self._parse_is(this) 2712 2713 return this 2714 2715 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2716 index = self._index - 1 2717 negate = self._match(TokenType.NOT) 2718 if self._match_text_seq("DISTINCT", "FROM"): 2719 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2720 return self.expression(klass, this=this, expression=self._parse_expression()) 2721 2722 expression = self._parse_null() or self._parse_boolean() 2723 if not expression: 2724 self._retreat(index) 2725 return None 2726 2727 this = self.expression(exp.Is, this=this, expression=expression) 2728 return self.expression(exp.Not, this=this) if negate else this 2729 2730 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.Expression: 2731 unnest = self._parse_unnest() 2732 if unnest: 2733 this = self.expression(exp.In, this=this, unnest=unnest) 2734 elif self._match(TokenType.L_PAREN): 2735 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2736 2737 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2738 this = self.expression(exp.In, this=this, query=expressions[0]) 2739 else: 2740 this = self.expression(exp.In, this=this, expressions=expressions) 2741 2742 self._match_r_paren(this) 2743 else: 2744 this = self.expression(exp.In, this=this, field=self._parse_field()) 2745 2746 return this 2747 2748 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2749 low = self._parse_bitwise() 2750 self._match(TokenType.AND) 2751 high = self._parse_bitwise() 2752 return self.expression(exp.Between, this=this, low=low, high=high) 2753 2754 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2755 if not self._match(TokenType.ESCAPE): 2756 return this 2757 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2758 2759 def _parse_interval(self) -> t.Optional[exp.Expression]: 2760 if not self._match(TokenType.INTERVAL): 2761 return None 2762 2763 this = self._parse_primary() or self._parse_term() 2764 unit = self._parse_function() or self._parse_var() 2765 2766 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2767 # each INTERVAL expression into this canonical form so it's easy to transpile 2768 if this and isinstance(this, exp.Literal): 2769 if this.is_number: 2770 this = exp.Literal.string(this.name) 2771 2772 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2773 parts = this.name.split() 2774 if not unit and len(parts) <= 2: 2775 this = exp.Literal.string(seq_get(parts, 0)) 2776 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2777 2778 return self.expression(exp.Interval, this=this, unit=unit) 2779 2780 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2781 this = self._parse_term() 2782 2783 while True: 2784 if self._match_set(self.BITWISE): 2785 this = self.expression( 2786 self.BITWISE[self._prev.token_type], 2787 this=this, 2788 expression=self._parse_term(), 2789 ) 2790 elif self._match_pair(TokenType.LT, TokenType.LT): 2791 this = self.expression( 2792 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2793 ) 2794 elif self._match_pair(TokenType.GT, TokenType.GT): 2795 this = self.expression( 2796 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2797 ) 2798 else: 2799 break 2800 2801 return this 2802 2803 def _parse_term(self) -> t.Optional[exp.Expression]: 2804 return self._parse_tokens(self._parse_factor, self.TERM) 2805 2806 def _parse_factor(self) -> t.Optional[exp.Expression]: 2807 return self._parse_tokens(self._parse_unary, self.FACTOR) 2808 2809 def _parse_unary(self) -> t.Optional[exp.Expression]: 2810 if self._match_set(self.UNARY_PARSERS): 2811 return self.UNARY_PARSERS[self._prev.token_type](self) 2812 return self._parse_at_time_zone(self._parse_type()) 2813 2814 def _parse_type(self) -> t.Optional[exp.Expression]: 2815 interval = self._parse_interval() 2816 if interval: 2817 return interval 2818 2819 index = self._index 2820 data_type = self._parse_types(check_func=True) 2821 this = self._parse_column() 2822 2823 if data_type: 2824 if isinstance(this, exp.Literal): 2825 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2826 if parser: 2827 return parser(self, this, data_type) 2828 return self.expression(exp.Cast, this=this, to=data_type) 2829 if not data_type.expressions: 2830 self._retreat(index) 2831 return self._parse_column() 2832 return data_type 2833 2834 return this 2835 2836 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2837 this = self._parse_type() 2838 if not this: 2839 return None 2840 2841 return self.expression( 2842 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2843 ) 2844 2845 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2846 index = self._index 2847 2848 prefix = self._match_text_seq("SYSUDTLIB", ".") 2849 2850 if not self._match_set(self.TYPE_TOKENS): 2851 return None 2852 2853 type_token = self._prev.token_type 2854 2855 if type_token == TokenType.PSEUDO_TYPE: 2856 return self.expression(exp.PseudoType, this=self._prev.text) 2857 2858 nested = type_token in self.NESTED_TYPE_TOKENS 2859 is_struct = type_token == TokenType.STRUCT 2860 expressions = None 2861 maybe_func = False 2862 2863 if self._match(TokenType.L_PAREN): 2864 if is_struct: 2865 expressions = self._parse_csv(self._parse_struct_types) 2866 elif nested: 2867 expressions = self._parse_csv(self._parse_types) 2868 else: 2869 expressions = self._parse_csv(self._parse_type_size) 2870 2871 if not expressions or not self._match(TokenType.R_PAREN): 2872 self._retreat(index) 2873 return None 2874 2875 maybe_func = True 2876 2877 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2878 this = exp.DataType( 2879 this=exp.DataType.Type.ARRAY, 2880 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2881 nested=True, 2882 ) 2883 2884 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2885 this = exp.DataType( 2886 this=exp.DataType.Type.ARRAY, 2887 expressions=[this], 2888 nested=True, 2889 ) 2890 2891 return this 2892 2893 if self._match(TokenType.L_BRACKET): 2894 self._retreat(index) 2895 return None 2896 2897 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2898 if nested and self._match(TokenType.LT): 2899 if is_struct: 2900 expressions = self._parse_csv(self._parse_struct_types) 2901 else: 2902 expressions = self._parse_csv(self._parse_types) 2903 2904 if not self._match(TokenType.GT): 2905 self.raise_error("Expecting >") 2906 2907 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2908 values = self._parse_csv(self._parse_conjunction) 2909 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2910 2911 value: t.Optional[exp.Expression] = None 2912 if type_token in self.TIMESTAMPS: 2913 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2914 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2915 elif ( 2916 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2917 or type_token == TokenType.TIMESTAMPLTZ 2918 ): 2919 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2920 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2921 if type_token == TokenType.TIME: 2922 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2923 else: 2924 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2925 2926 maybe_func = maybe_func and value is None 2927 2928 if value is None: 2929 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2930 elif type_token == TokenType.INTERVAL: 2931 unit = self._parse_var() 2932 2933 if not unit: 2934 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2935 else: 2936 value = self.expression(exp.Interval, unit=unit) 2937 2938 if maybe_func and check_func: 2939 index2 = self._index 2940 peek = self._parse_string() 2941 2942 if not peek: 2943 self._retreat(index) 2944 return None 2945 2946 self._retreat(index2) 2947 2948 if value: 2949 return value 2950 2951 return exp.DataType( 2952 this=exp.DataType.Type[type_token.value.upper()], 2953 expressions=expressions, 2954 nested=nested, 2955 values=values, 2956 prefix=prefix, 2957 ) 2958 2959 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 2960 this = self._parse_type() or self._parse_id_var() 2961 self._match(TokenType.COLON) 2962 return self._parse_column_def(this) 2963 2964 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2965 if not self._match_text_seq("AT", "TIME", "ZONE"): 2966 return this 2967 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2968 2969 def _parse_column(self) -> t.Optional[exp.Expression]: 2970 this = self._parse_field() 2971 if isinstance(this, exp.Identifier): 2972 this = self.expression(exp.Column, this=this) 2973 elif not this: 2974 return self._parse_bracket(this) 2975 this = self._parse_bracket(this) 2976 2977 while self._match_set(self.COLUMN_OPERATORS): 2978 op_token = self._prev.token_type 2979 op = self.COLUMN_OPERATORS.get(op_token) 2980 2981 if op_token == TokenType.DCOLON: 2982 field = self._parse_types() 2983 if not field: 2984 self.raise_error("Expected type") 2985 elif op and self._curr: 2986 self._advance() 2987 value = self._prev.text 2988 field = ( 2989 exp.Literal.number(value) 2990 if self._prev.token_type == TokenType.NUMBER 2991 else exp.Literal.string(value) 2992 ) 2993 else: 2994 field = ( 2995 self._parse_star() 2996 or self._parse_function(anonymous=True) 2997 or self._parse_id_var() 2998 ) 2999 3000 if isinstance(field, exp.Func): 3001 # bigquery allows function calls like x.y.count(...) 3002 # SAFE.SUBSTR(...) 3003 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3004 this = self._replace_columns_with_dots(this) 3005 3006 if op: 3007 this = op(self, this, field) 3008 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3009 this = self.expression( 3010 exp.Column, 3011 this=field, 3012 table=this.this, 3013 db=this.args.get("table"), 3014 catalog=this.args.get("db"), 3015 ) 3016 else: 3017 this = self.expression(exp.Dot, this=this, expression=field) 3018 this = self._parse_bracket(this) 3019 3020 return this 3021 3022 def _parse_primary(self) -> t.Optional[exp.Expression]: 3023 if self._match_set(self.PRIMARY_PARSERS): 3024 token_type = self._prev.token_type 3025 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3026 3027 if token_type == TokenType.STRING: 3028 expressions = [primary] 3029 while self._match(TokenType.STRING): 3030 expressions.append(exp.Literal.string(self._prev.text)) 3031 if len(expressions) > 1: 3032 return self.expression(exp.Concat, expressions=expressions) 3033 return primary 3034 3035 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3036 return exp.Literal.number(f"0.{self._prev.text}") 3037 3038 if self._match(TokenType.L_PAREN): 3039 comments = self._prev_comments 3040 query = self._parse_select() 3041 3042 if query: 3043 expressions = [query] 3044 else: 3045 expressions = self._parse_csv(self._parse_expression) 3046 3047 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3048 3049 if isinstance(this, exp.Subqueryable): 3050 this = self._parse_set_operations( 3051 self._parse_subquery(this=this, parse_alias=False) 3052 ) 3053 elif len(expressions) > 1: 3054 this = self.expression(exp.Tuple, expressions=expressions) 3055 else: 3056 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3057 3058 if this: 3059 this.add_comments(comments) 3060 self._match_r_paren(expression=this) 3061 3062 return this 3063 3064 return None 3065 3066 def _parse_field( 3067 self, 3068 any_token: bool = False, 3069 tokens: t.Optional[t.Collection[TokenType]] = None, 3070 ) -> t.Optional[exp.Expression]: 3071 return ( 3072 self._parse_primary() 3073 or self._parse_function() 3074 or self._parse_id_var(any_token=any_token, tokens=tokens) 3075 ) 3076 3077 def _parse_function( 3078 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3079 ) -> t.Optional[exp.Expression]: 3080 if not self._curr: 3081 return None 3082 3083 token_type = self._curr.token_type 3084 3085 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3086 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3087 3088 if not self._next or self._next.token_type != TokenType.L_PAREN: 3089 if token_type in self.NO_PAREN_FUNCTIONS: 3090 self._advance() 3091 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3092 3093 return None 3094 3095 if token_type not in self.FUNC_TOKENS: 3096 return None 3097 3098 this = self._curr.text 3099 upper = this.upper() 3100 self._advance(2) 3101 3102 parser = self.FUNCTION_PARSERS.get(upper) 3103 3104 if parser and not anonymous: 3105 this = parser(self) 3106 else: 3107 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3108 3109 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3110 this = self.expression(subquery_predicate, this=self._parse_select()) 3111 self._match_r_paren() 3112 return this 3113 3114 if functions is None: 3115 functions = self.FUNCTIONS 3116 3117 function = functions.get(upper) 3118 args = self._parse_csv(self._parse_lambda) 3119 3120 if function and not anonymous: 3121 this = function(args) 3122 self.validate_expression(this, args) 3123 else: 3124 this = self.expression(exp.Anonymous, this=this, expressions=args) 3125 3126 self._match_r_paren(this) 3127 return self._parse_window(this) 3128 3129 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3130 return self._parse_column_def(self._parse_id_var()) 3131 3132 def _parse_user_defined_function( 3133 self, kind: t.Optional[TokenType] = None 3134 ) -> t.Optional[exp.Expression]: 3135 this = self._parse_id_var() 3136 3137 while self._match(TokenType.DOT): 3138 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3139 3140 if not self._match(TokenType.L_PAREN): 3141 return this 3142 3143 expressions = self._parse_csv(self._parse_function_parameter) 3144 self._match_r_paren() 3145 return self.expression( 3146 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3147 ) 3148 3149 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3150 literal = self._parse_primary() 3151 if literal: 3152 return self.expression(exp.Introducer, this=token.text, expression=literal) 3153 3154 return self.expression(exp.Identifier, this=token.text) 3155 3156 def _parse_national(self, token: Token) -> exp.Expression: 3157 return self.expression(exp.National, this=exp.Literal.string(token.text)) 3158 3159 def _parse_session_parameter(self) -> exp.Expression: 3160 kind = None 3161 this = self._parse_id_var() or self._parse_primary() 3162 3163 if this and self._match(TokenType.DOT): 3164 kind = this.name 3165 this = self._parse_var() or self._parse_primary() 3166 3167 return self.expression(exp.SessionParameter, this=this, kind=kind) 3168 3169 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3170 index = self._index 3171 3172 if self._match(TokenType.L_PAREN): 3173 expressions = self._parse_csv(self._parse_id_var) 3174 3175 if not self._match(TokenType.R_PAREN): 3176 self._retreat(index) 3177 else: 3178 expressions = [self._parse_id_var()] 3179 3180 if self._match_set(self.LAMBDAS): 3181 return self.LAMBDAS[self._prev.token_type](self, expressions) 3182 3183 self._retreat(index) 3184 3185 this: t.Optional[exp.Expression] 3186 3187 if self._match(TokenType.DISTINCT): 3188 this = self.expression( 3189 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3190 ) 3191 else: 3192 this = self._parse_select_or_expression(alias=alias) 3193 3194 if isinstance(this, exp.EQ): 3195 left = this.this 3196 if isinstance(left, exp.Column): 3197 left.replace(exp.Var(this=left.text("this"))) 3198 3199 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3200 3201 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3202 index = self._index 3203 3204 try: 3205 if self._parse_select(nested=True): 3206 return this 3207 except Exception: 3208 pass 3209 finally: 3210 self._retreat(index) 3211 3212 if not self._match(TokenType.L_PAREN): 3213 return this 3214 3215 args = self._parse_csv( 3216 lambda: self._parse_constraint() 3217 or self._parse_column_def(self._parse_field(any_token=True)) 3218 ) 3219 self._match_r_paren() 3220 return self.expression(exp.Schema, this=this, expressions=args) 3221 3222 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3223 # column defs are not really columns, they're identifiers 3224 if isinstance(this, exp.Column): 3225 this = this.this 3226 kind = self._parse_types() 3227 3228 if self._match_text_seq("FOR", "ORDINALITY"): 3229 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3230 3231 constraints = [] 3232 while True: 3233 constraint = self._parse_column_constraint() 3234 if not constraint: 3235 break 3236 constraints.append(constraint) 3237 3238 if not kind and not constraints: 3239 return this 3240 3241 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3242 3243 def _parse_auto_increment(self) -> exp.Expression: 3244 start = None 3245 increment = None 3246 3247 if self._match(TokenType.L_PAREN, advance=False): 3248 args = self._parse_wrapped_csv(self._parse_bitwise) 3249 start = seq_get(args, 0) 3250 increment = seq_get(args, 1) 3251 elif self._match_text_seq("START"): 3252 start = self._parse_bitwise() 3253 self._match_text_seq("INCREMENT") 3254 increment = self._parse_bitwise() 3255 3256 if start and increment: 3257 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3258 3259 return exp.AutoIncrementColumnConstraint() 3260 3261 def _parse_compress(self) -> exp.Expression: 3262 if self._match(TokenType.L_PAREN, advance=False): 3263 return self.expression( 3264 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3265 ) 3266 3267 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3268 3269 def _parse_generated_as_identity(self) -> exp.Expression: 3270 if self._match_text_seq("BY", "DEFAULT"): 3271 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3272 this = self.expression( 3273 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3274 ) 3275 else: 3276 self._match_text_seq("ALWAYS") 3277 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3278 3279 self._match(TokenType.ALIAS) 3280 identity = self._match_text_seq("IDENTITY") 3281 3282 if self._match(TokenType.L_PAREN): 3283 if self._match_text_seq("START", "WITH"): 3284 this.set("start", self._parse_bitwise()) 3285 if self._match_text_seq("INCREMENT", "BY"): 3286 this.set("increment", self._parse_bitwise()) 3287 if self._match_text_seq("MINVALUE"): 3288 this.set("minvalue", self._parse_bitwise()) 3289 if self._match_text_seq("MAXVALUE"): 3290 this.set("maxvalue", self._parse_bitwise()) 3291 3292 if self._match_text_seq("CYCLE"): 3293 this.set("cycle", True) 3294 elif self._match_text_seq("NO", "CYCLE"): 3295 this.set("cycle", False) 3296 3297 if not identity: 3298 this.set("expression", self._parse_bitwise()) 3299 3300 self._match_r_paren() 3301 3302 return this 3303 3304 def _parse_inline(self) -> t.Optional[exp.Expression]: 3305 self._match_text_seq("LENGTH") 3306 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3307 3308 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3309 if self._match_text_seq("NULL"): 3310 return self.expression(exp.NotNullColumnConstraint) 3311 if self._match_text_seq("CASESPECIFIC"): 3312 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3313 return None 3314 3315 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3316 if self._match(TokenType.CONSTRAINT): 3317 this = self._parse_id_var() 3318 else: 3319 this = None 3320 3321 if self._match_texts(self.CONSTRAINT_PARSERS): 3322 return self.expression( 3323 exp.ColumnConstraint, 3324 this=this, 3325 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3326 ) 3327 3328 return this 3329 3330 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3331 if not self._match(TokenType.CONSTRAINT): 3332 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3333 3334 this = self._parse_id_var() 3335 expressions = [] 3336 3337 while True: 3338 constraint = self._parse_unnamed_constraint() or self._parse_function() 3339 if not constraint: 3340 break 3341 expressions.append(constraint) 3342 3343 return self.expression(exp.Constraint, this=this, expressions=expressions) 3344 3345 def _parse_unnamed_constraint( 3346 self, constraints: t.Optional[t.Collection[str]] = None 3347 ) -> t.Optional[exp.Expression]: 3348 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3349 return None 3350 3351 constraint = self._prev.text.upper() 3352 if constraint not in self.CONSTRAINT_PARSERS: 3353 self.raise_error(f"No parser found for schema constraint {constraint}.") 3354 3355 return self.CONSTRAINT_PARSERS[constraint](self) 3356 3357 def _parse_unique(self) -> exp.Expression: 3358 if not self._match(TokenType.L_PAREN, advance=False): 3359 return self.expression(exp.UniqueColumnConstraint) 3360 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3361 3362 def _parse_key_constraint_options(self) -> t.List[str]: 3363 options = [] 3364 while True: 3365 if not self._curr: 3366 break 3367 3368 if self._match(TokenType.ON): 3369 action = None 3370 on = self._advance_any() and self._prev.text 3371 3372 if self._match_text_seq("NO", "ACTION"): 3373 action = "NO ACTION" 3374 elif self._match_text_seq("CASCADE"): 3375 action = "CASCADE" 3376 elif self._match_pair(TokenType.SET, TokenType.NULL): 3377 action = "SET NULL" 3378 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3379 action = "SET DEFAULT" 3380 else: 3381 self.raise_error("Invalid key constraint") 3382 3383 options.append(f"ON {on} {action}") 3384 elif self._match_text_seq("NOT", "ENFORCED"): 3385 options.append("NOT ENFORCED") 3386 elif self._match_text_seq("DEFERRABLE"): 3387 options.append("DEFERRABLE") 3388 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3389 options.append("INITIALLY DEFERRED") 3390 elif self._match_text_seq("NORELY"): 3391 options.append("NORELY") 3392 elif self._match_text_seq("MATCH", "FULL"): 3393 options.append("MATCH FULL") 3394 else: 3395 break 3396 3397 return options 3398 3399 def _parse_references(self, match=True) -> t.Optional[exp.Expression]: 3400 if match and not self._match(TokenType.REFERENCES): 3401 return None 3402 3403 expressions = None 3404 this = self._parse_id_var() 3405 3406 if self._match(TokenType.L_PAREN, advance=False): 3407 expressions = self._parse_wrapped_id_vars() 3408 3409 options = self._parse_key_constraint_options() 3410 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3411 3412 def _parse_foreign_key(self) -> exp.Expression: 3413 expressions = self._parse_wrapped_id_vars() 3414 reference = self._parse_references() 3415 options = {} 3416 3417 while self._match(TokenType.ON): 3418 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3419 self.raise_error("Expected DELETE or UPDATE") 3420 3421 kind = self._prev.text.lower() 3422 3423 if self._match_text_seq("NO", "ACTION"): 3424 action = "NO ACTION" 3425 elif self._match(TokenType.SET): 3426 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3427 action = "SET " + self._prev.text.upper() 3428 else: 3429 self._advance() 3430 action = self._prev.text.upper() 3431 3432 options[kind] = action 3433 3434 return self.expression( 3435 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3436 ) 3437 3438 def _parse_primary_key(self) -> exp.Expression: 3439 desc = ( 3440 self._match_set((TokenType.ASC, TokenType.DESC)) 3441 and self._prev.token_type == TokenType.DESC 3442 ) 3443 3444 if not self._match(TokenType.L_PAREN, advance=False): 3445 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3446 3447 expressions = self._parse_wrapped_csv(self._parse_field) 3448 options = self._parse_key_constraint_options() 3449 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3450 3451 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3452 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3453 return this 3454 3455 bracket_kind = self._prev.token_type 3456 expressions: t.List[t.Optional[exp.Expression]] 3457 3458 if self._match(TokenType.COLON): 3459 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3460 else: 3461 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3462 3463 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3464 if bracket_kind == TokenType.L_BRACE: 3465 this = self.expression(exp.Struct, expressions=expressions) 3466 elif not this or this.name.upper() == "ARRAY": 3467 this = self.expression(exp.Array, expressions=expressions) 3468 else: 3469 expressions = apply_index_offset(this, expressions, -self.index_offset) 3470 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3471 3472 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3473 self.raise_error("Expected ]") 3474 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3475 self.raise_error("Expected }") 3476 3477 self._add_comments(this) 3478 return self._parse_bracket(this) 3479 3480 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3481 if self._match(TokenType.COLON): 3482 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3483 return this 3484 3485 def _parse_case(self) -> t.Optional[exp.Expression]: 3486 ifs = [] 3487 default = None 3488 3489 expression = self._parse_conjunction() 3490 3491 while self._match(TokenType.WHEN): 3492 this = self._parse_conjunction() 3493 self._match(TokenType.THEN) 3494 then = self._parse_conjunction() 3495 ifs.append(self.expression(exp.If, this=this, true=then)) 3496 3497 if self._match(TokenType.ELSE): 3498 default = self._parse_conjunction() 3499 3500 if not self._match(TokenType.END): 3501 self.raise_error("Expected END after CASE", self._prev) 3502 3503 return self._parse_window( 3504 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3505 ) 3506 3507 def _parse_if(self) -> t.Optional[exp.Expression]: 3508 if self._match(TokenType.L_PAREN): 3509 args = self._parse_csv(self._parse_conjunction) 3510 this = exp.If.from_arg_list(args) 3511 self.validate_expression(this, args) 3512 self._match_r_paren() 3513 else: 3514 index = self._index - 1 3515 condition = self._parse_conjunction() 3516 3517 if not condition: 3518 self._retreat(index) 3519 return None 3520 3521 self._match(TokenType.THEN) 3522 true = self._parse_conjunction() 3523 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3524 self._match(TokenType.END) 3525 this = self.expression(exp.If, this=condition, true=true, false=false) 3526 3527 return self._parse_window(this) 3528 3529 def _parse_extract(self) -> exp.Expression: 3530 this = self._parse_function() or self._parse_var() or self._parse_type() 3531 3532 if self._match(TokenType.FROM): 3533 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3534 3535 if not self._match(TokenType.COMMA): 3536 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3537 3538 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3539 3540 def _parse_cast(self, strict: bool) -> exp.Expression: 3541 this = self._parse_conjunction() 3542 3543 if not self._match(TokenType.ALIAS): 3544 if self._match(TokenType.COMMA): 3545 return self.expression( 3546 exp.CastToStrType, this=this, expression=self._parse_string() 3547 ) 3548 else: 3549 self.raise_error("Expected AS after CAST") 3550 3551 to = self._parse_types() 3552 3553 if not to: 3554 self.raise_error("Expected TYPE after CAST") 3555 elif to.this == exp.DataType.Type.CHAR: 3556 if self._match(TokenType.CHARACTER_SET): 3557 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3558 3559 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3560 3561 def _parse_string_agg(self) -> exp.Expression: 3562 expression: t.Optional[exp.Expression] 3563 3564 if self._match(TokenType.DISTINCT): 3565 args = self._parse_csv(self._parse_conjunction) 3566 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3567 else: 3568 args = self._parse_csv(self._parse_conjunction) 3569 expression = seq_get(args, 0) 3570 3571 index = self._index 3572 if not self._match(TokenType.R_PAREN): 3573 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3574 order = self._parse_order(this=expression) 3575 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3576 3577 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3578 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3579 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3580 if not self._match_text_seq("WITHIN", "GROUP"): 3581 self._retreat(index) 3582 this = exp.GroupConcat.from_arg_list(args) 3583 self.validate_expression(this, args) 3584 return this 3585 3586 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3587 order = self._parse_order(this=expression) 3588 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3589 3590 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3591 to: t.Optional[exp.Expression] 3592 this = self._parse_bitwise() 3593 3594 if self._match(TokenType.USING): 3595 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3596 elif self._match(TokenType.COMMA): 3597 to = self._parse_bitwise() 3598 else: 3599 to = None 3600 3601 # Swap the argument order if needed to produce the correct AST 3602 if self.CONVERT_TYPE_FIRST: 3603 this, to = to, this 3604 3605 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3606 3607 def _parse_decode(self) -> t.Optional[exp.Expression]: 3608 """ 3609 There are generally two variants of the DECODE function: 3610 3611 - DECODE(bin, charset) 3612 - DECODE(expression, search, result [, search, result] ... [, default]) 3613 3614 The second variant will always be parsed into a CASE expression. Note that NULL 3615 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3616 instead of relying on pattern matching. 3617 """ 3618 args = self._parse_csv(self._parse_conjunction) 3619 3620 if len(args) < 3: 3621 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3622 3623 expression, *expressions = args 3624 if not expression: 3625 return None 3626 3627 ifs = [] 3628 for search, result in zip(expressions[::2], expressions[1::2]): 3629 if not search or not result: 3630 return None 3631 3632 if isinstance(search, exp.Literal): 3633 ifs.append( 3634 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3635 ) 3636 elif isinstance(search, exp.Null): 3637 ifs.append( 3638 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3639 ) 3640 else: 3641 cond = exp.or_( 3642 exp.EQ(this=expression.copy(), expression=search), 3643 exp.and_( 3644 exp.Is(this=expression.copy(), expression=exp.Null()), 3645 exp.Is(this=search.copy(), expression=exp.Null()), 3646 copy=False, 3647 ), 3648 copy=False, 3649 ) 3650 ifs.append(exp.If(this=cond, true=result)) 3651 3652 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3653 3654 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3655 self._match_text_seq("KEY") 3656 key = self._parse_field() 3657 self._match(TokenType.COLON) 3658 self._match_text_seq("VALUE") 3659 value = self._parse_field() 3660 if not key and not value: 3661 return None 3662 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3663 3664 def _parse_json_object(self) -> exp.Expression: 3665 expressions = self._parse_csv(self._parse_json_key_value) 3666 3667 null_handling = None 3668 if self._match_text_seq("NULL", "ON", "NULL"): 3669 null_handling = "NULL ON NULL" 3670 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3671 null_handling = "ABSENT ON NULL" 3672 3673 unique_keys = None 3674 if self._match_text_seq("WITH", "UNIQUE"): 3675 unique_keys = True 3676 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3677 unique_keys = False 3678 3679 self._match_text_seq("KEYS") 3680 3681 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3682 format_json = self._match_text_seq("FORMAT", "JSON") 3683 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3684 3685 return self.expression( 3686 exp.JSONObject, 3687 expressions=expressions, 3688 null_handling=null_handling, 3689 unique_keys=unique_keys, 3690 return_type=return_type, 3691 format_json=format_json, 3692 encoding=encoding, 3693 ) 3694 3695 def _parse_logarithm(self) -> exp.Expression: 3696 # Default argument order is base, expression 3697 args = self._parse_csv(self._parse_range) 3698 3699 if len(args) > 1: 3700 if not self.LOG_BASE_FIRST: 3701 args.reverse() 3702 return exp.Log.from_arg_list(args) 3703 3704 return self.expression( 3705 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3706 ) 3707 3708 def _parse_match_against(self) -> exp.Expression: 3709 expressions = self._parse_csv(self._parse_column) 3710 3711 self._match_text_seq(")", "AGAINST", "(") 3712 3713 this = self._parse_string() 3714 3715 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3716 modifier = "IN NATURAL LANGUAGE MODE" 3717 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3718 modifier = f"{modifier} WITH QUERY EXPANSION" 3719 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3720 modifier = "IN BOOLEAN MODE" 3721 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3722 modifier = "WITH QUERY EXPANSION" 3723 else: 3724 modifier = None 3725 3726 return self.expression( 3727 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3728 ) 3729 3730 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3731 def _parse_open_json(self) -> exp.Expression: 3732 this = self._parse_bitwise() 3733 path = self._match(TokenType.COMMA) and self._parse_string() 3734 3735 def _parse_open_json_column_def() -> exp.Expression: 3736 this = self._parse_field(any_token=True) 3737 kind = self._parse_types() 3738 path = self._parse_string() 3739 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3740 return self.expression( 3741 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3742 ) 3743 3744 expressions = None 3745 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3746 self._match_l_paren() 3747 expressions = self._parse_csv(_parse_open_json_column_def) 3748 3749 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3750 3751 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3752 args = self._parse_csv(self._parse_bitwise) 3753 3754 if self._match(TokenType.IN): 3755 return self.expression( 3756 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3757 ) 3758 3759 if haystack_first: 3760 haystack = seq_get(args, 0) 3761 needle = seq_get(args, 1) 3762 else: 3763 needle = seq_get(args, 0) 3764 haystack = seq_get(args, 1) 3765 3766 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3767 3768 self.validate_expression(this, args) 3769 3770 return this 3771 3772 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3773 args = self._parse_csv(self._parse_table) 3774 return exp.JoinHint(this=func_name.upper(), expressions=args) 3775 3776 def _parse_substring(self) -> exp.Expression: 3777 # Postgres supports the form: substring(string [from int] [for int]) 3778 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3779 3780 args = self._parse_csv(self._parse_bitwise) 3781 3782 if self._match(TokenType.FROM): 3783 args.append(self._parse_bitwise()) 3784 if self._match(TokenType.FOR): 3785 args.append(self._parse_bitwise()) 3786 3787 this = exp.Substring.from_arg_list(args) 3788 self.validate_expression(this, args) 3789 3790 return this 3791 3792 def _parse_struct(self) -> exp.Struct: 3793 return exp.Struct.from_arg_list(self._parse_csv(lambda: self._parse_lambda(alias=True))) 3794 3795 def _parse_trim(self) -> exp.Expression: 3796 # https://www.w3resource.com/sql/character-functions/trim.php 3797 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3798 3799 position = None 3800 collation = None 3801 3802 if self._match_texts(self.TRIM_TYPES): 3803 position = self._prev.text.upper() 3804 3805 expression = self._parse_bitwise() 3806 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3807 this = self._parse_bitwise() 3808 else: 3809 this = expression 3810 expression = None 3811 3812 if self._match(TokenType.COLLATE): 3813 collation = self._parse_bitwise() 3814 3815 return self.expression( 3816 exp.Trim, 3817 this=this, 3818 position=position, 3819 expression=expression, 3820 collation=collation, 3821 ) 3822 3823 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3824 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3825 3826 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3827 return self._parse_window(self._parse_id_var(), alias=True) 3828 3829 def _parse_respect_or_ignore_nulls( 3830 self, this: t.Optional[exp.Expression] 3831 ) -> t.Optional[exp.Expression]: 3832 if self._match_text_seq("IGNORE", "NULLS"): 3833 return self.expression(exp.IgnoreNulls, this=this) 3834 if self._match_text_seq("RESPECT", "NULLS"): 3835 return self.expression(exp.RespectNulls, this=this) 3836 return this 3837 3838 def _parse_window( 3839 self, this: t.Optional[exp.Expression], alias: bool = False 3840 ) -> t.Optional[exp.Expression]: 3841 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3842 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3843 self._match_r_paren() 3844 3845 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3846 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3847 if self._match_text_seq("WITHIN", "GROUP"): 3848 order = self._parse_wrapped(self._parse_order) 3849 this = self.expression(exp.WithinGroup, this=this, expression=order) 3850 3851 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3852 # Some dialects choose to implement and some do not. 3853 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3854 3855 # There is some code above in _parse_lambda that handles 3856 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3857 3858 # The below changes handle 3859 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3860 3861 # Oracle allows both formats 3862 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3863 # and Snowflake chose to do the same for familiarity 3864 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3865 this = self._parse_respect_or_ignore_nulls(this) 3866 3867 # bigquery select from window x AS (partition by ...) 3868 if alias: 3869 over = None 3870 self._match(TokenType.ALIAS) 3871 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3872 return this 3873 else: 3874 over = self._prev.text.upper() 3875 3876 if not self._match(TokenType.L_PAREN): 3877 return self.expression( 3878 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3879 ) 3880 3881 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3882 3883 first = self._match(TokenType.FIRST) 3884 if self._match_text_seq("LAST"): 3885 first = False 3886 3887 partition = self._parse_partition_by() 3888 order = self._parse_order() 3889 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3890 3891 if kind: 3892 self._match(TokenType.BETWEEN) 3893 start = self._parse_window_spec() 3894 self._match(TokenType.AND) 3895 end = self._parse_window_spec() 3896 3897 spec = self.expression( 3898 exp.WindowSpec, 3899 kind=kind, 3900 start=start["value"], 3901 start_side=start["side"], 3902 end=end["value"], 3903 end_side=end["side"], 3904 ) 3905 else: 3906 spec = None 3907 3908 self._match_r_paren() 3909 3910 return self.expression( 3911 exp.Window, 3912 this=this, 3913 partition_by=partition, 3914 order=order, 3915 spec=spec, 3916 alias=window_alias, 3917 over=over, 3918 first=first, 3919 ) 3920 3921 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3922 self._match(TokenType.BETWEEN) 3923 3924 return { 3925 "value": ( 3926 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 3927 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 3928 or self._parse_bitwise() 3929 ), 3930 "side": self._match_texts(("PRECEDING", "FOLLOWING")) and self._prev.text, 3931 } 3932 3933 def _parse_alias( 3934 self, this: t.Optional[exp.Expression], explicit: bool = False 3935 ) -> t.Optional[exp.Expression]: 3936 any_token = self._match(TokenType.ALIAS) 3937 3938 if explicit and not any_token: 3939 return this 3940 3941 if self._match(TokenType.L_PAREN): 3942 aliases = self.expression( 3943 exp.Aliases, 3944 this=this, 3945 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3946 ) 3947 self._match_r_paren(aliases) 3948 return aliases 3949 3950 alias = self._parse_id_var(any_token) 3951 3952 if alias: 3953 return self.expression(exp.Alias, this=this, alias=alias) 3954 3955 return this 3956 3957 def _parse_id_var( 3958 self, 3959 any_token: bool = True, 3960 tokens: t.Optional[t.Collection[TokenType]] = None, 3961 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3962 ) -> t.Optional[exp.Expression]: 3963 identifier = self._parse_identifier() 3964 3965 if identifier: 3966 return identifier 3967 3968 prefix = "" 3969 3970 if prefix_tokens: 3971 while self._match_set(prefix_tokens): 3972 prefix += self._prev.text 3973 3974 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3975 quoted = self._prev.token_type == TokenType.STRING 3976 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3977 3978 return None 3979 3980 def _parse_string(self) -> t.Optional[exp.Expression]: 3981 if self._match(TokenType.STRING): 3982 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3983 return self._parse_placeholder() 3984 3985 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3986 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3987 3988 def _parse_number(self) -> t.Optional[exp.Expression]: 3989 if self._match(TokenType.NUMBER): 3990 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3991 return self._parse_placeholder() 3992 3993 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3994 if self._match(TokenType.IDENTIFIER): 3995 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3996 return self._parse_placeholder() 3997 3998 def _parse_var( 3999 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4000 ) -> t.Optional[exp.Expression]: 4001 if ( 4002 (any_token and self._advance_any()) 4003 or self._match(TokenType.VAR) 4004 or (self._match_set(tokens) if tokens else False) 4005 ): 4006 return self.expression(exp.Var, this=self._prev.text) 4007 return self._parse_placeholder() 4008 4009 def _advance_any(self) -> t.Optional[Token]: 4010 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4011 self._advance() 4012 return self._prev 4013 return None 4014 4015 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4016 return self._parse_var() or self._parse_string() 4017 4018 def _parse_null(self) -> t.Optional[exp.Expression]: 4019 if self._match(TokenType.NULL): 4020 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4021 return None 4022 4023 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4024 if self._match(TokenType.TRUE): 4025 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4026 if self._match(TokenType.FALSE): 4027 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4028 return None 4029 4030 def _parse_star(self) -> t.Optional[exp.Expression]: 4031 if self._match(TokenType.STAR): 4032 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4033 return None 4034 4035 def _parse_parameter(self) -> exp.Expression: 4036 wrapped = self._match(TokenType.L_BRACE) 4037 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4038 self._match(TokenType.R_BRACE) 4039 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4040 4041 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4042 if self._match_set(self.PLACEHOLDER_PARSERS): 4043 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4044 if placeholder: 4045 return placeholder 4046 self._advance(-1) 4047 return None 4048 4049 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4050 if not self._match(TokenType.EXCEPT): 4051 return None 4052 if self._match(TokenType.L_PAREN, advance=False): 4053 return self._parse_wrapped_csv(self._parse_column) 4054 return self._parse_csv(self._parse_column) 4055 4056 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4057 if not self._match(TokenType.REPLACE): 4058 return None 4059 if self._match(TokenType.L_PAREN, advance=False): 4060 return self._parse_wrapped_csv(self._parse_expression) 4061 return self._parse_csv(self._parse_expression) 4062 4063 def _parse_csv( 4064 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4065 ) -> t.List[t.Optional[exp.Expression]]: 4066 parse_result = parse_method() 4067 items = [parse_result] if parse_result is not None else [] 4068 4069 while self._match(sep): 4070 self._add_comments(parse_result) 4071 parse_result = parse_method() 4072 if parse_result is not None: 4073 items.append(parse_result) 4074 4075 return items 4076 4077 def _parse_tokens( 4078 self, parse_method: t.Callable, expressions: t.Dict 4079 ) -> t.Optional[exp.Expression]: 4080 this = parse_method() 4081 4082 while self._match_set(expressions): 4083 this = self.expression( 4084 expressions[self._prev.token_type], 4085 this=this, 4086 comments=self._prev_comments, 4087 expression=parse_method(), 4088 ) 4089 4090 return this 4091 4092 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4093 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4094 4095 def _parse_wrapped_csv( 4096 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4097 ) -> t.List[t.Optional[exp.Expression]]: 4098 return self._parse_wrapped( 4099 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4100 ) 4101 4102 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4103 wrapped = self._match(TokenType.L_PAREN) 4104 if not wrapped and not optional: 4105 self.raise_error("Expecting (") 4106 parse_result = parse_method() 4107 if wrapped: 4108 self._match_r_paren() 4109 return parse_result 4110 4111 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4112 return self._parse_select() or self._parse_set_operations( 4113 self._parse_expression() if alias else self._parse_conjunction() 4114 ) 4115 4116 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4117 return self._parse_set_operations( 4118 self._parse_select(nested=True, parse_subquery_alias=False) 4119 ) 4120 4121 def _parse_transaction(self) -> exp.Expression: 4122 this = None 4123 if self._match_texts(self.TRANSACTION_KIND): 4124 this = self._prev.text 4125 4126 self._match_texts({"TRANSACTION", "WORK"}) 4127 4128 modes = [] 4129 while True: 4130 mode = [] 4131 while self._match(TokenType.VAR): 4132 mode.append(self._prev.text) 4133 4134 if mode: 4135 modes.append(" ".join(mode)) 4136 if not self._match(TokenType.COMMA): 4137 break 4138 4139 return self.expression(exp.Transaction, this=this, modes=modes) 4140 4141 def _parse_commit_or_rollback(self) -> exp.Expression: 4142 chain = None 4143 savepoint = None 4144 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4145 4146 self._match_texts({"TRANSACTION", "WORK"}) 4147 4148 if self._match_text_seq("TO"): 4149 self._match_text_seq("SAVEPOINT") 4150 savepoint = self._parse_id_var() 4151 4152 if self._match(TokenType.AND): 4153 chain = not self._match_text_seq("NO") 4154 self._match_text_seq("CHAIN") 4155 4156 if is_rollback: 4157 return self.expression(exp.Rollback, savepoint=savepoint) 4158 return self.expression(exp.Commit, chain=chain) 4159 4160 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4161 if not self._match_text_seq("ADD"): 4162 return None 4163 4164 self._match(TokenType.COLUMN) 4165 exists_column = self._parse_exists(not_=True) 4166 expression = self._parse_column_def(self._parse_field(any_token=True)) 4167 4168 if expression: 4169 expression.set("exists", exists_column) 4170 4171 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4172 if self._match_texts(("FIRST", "AFTER")): 4173 position = self._prev.text 4174 column_position = self.expression( 4175 exp.ColumnPosition, this=self._parse_column(), position=position 4176 ) 4177 expression.set("position", column_position) 4178 4179 return expression 4180 4181 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4182 drop = self._match(TokenType.DROP) and self._parse_drop() 4183 if drop and not isinstance(drop, exp.Command): 4184 drop.set("kind", drop.args.get("kind", "COLUMN")) 4185 return drop 4186 4187 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4188 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4189 return self.expression( 4190 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4191 ) 4192 4193 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4194 this = None 4195 kind = self._prev.token_type 4196 4197 if kind == TokenType.CONSTRAINT: 4198 this = self._parse_id_var() 4199 4200 if self._match_text_seq("CHECK"): 4201 expression = self._parse_wrapped(self._parse_conjunction) 4202 enforced = self._match_text_seq("ENFORCED") 4203 4204 return self.expression( 4205 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4206 ) 4207 4208 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4209 expression = self._parse_foreign_key() 4210 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4211 expression = self._parse_primary_key() 4212 else: 4213 expression = None 4214 4215 return self.expression(exp.AddConstraint, this=this, expression=expression) 4216 4217 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4218 index = self._index - 1 4219 4220 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4221 return self._parse_csv(self._parse_add_constraint) 4222 4223 self._retreat(index) 4224 return self._parse_csv(self._parse_add_column) 4225 4226 def _parse_alter_table_alter(self) -> exp.Expression: 4227 self._match(TokenType.COLUMN) 4228 column = self._parse_field(any_token=True) 4229 4230 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4231 return self.expression(exp.AlterColumn, this=column, drop=True) 4232 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4233 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4234 4235 self._match_text_seq("SET", "DATA") 4236 return self.expression( 4237 exp.AlterColumn, 4238 this=column, 4239 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4240 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4241 using=self._match(TokenType.USING) and self._parse_conjunction(), 4242 ) 4243 4244 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4245 index = self._index - 1 4246 4247 partition_exists = self._parse_exists() 4248 if self._match(TokenType.PARTITION, advance=False): 4249 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4250 4251 self._retreat(index) 4252 return self._parse_csv(self._parse_drop_column) 4253 4254 def _parse_alter_table_rename(self) -> exp.Expression: 4255 self._match_text_seq("TO") 4256 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4257 4258 def _parse_alter(self) -> t.Optional[exp.Expression]: 4259 start = self._prev 4260 4261 if not self._match(TokenType.TABLE): 4262 return self._parse_as_command(start) 4263 4264 exists = self._parse_exists() 4265 this = self._parse_table(schema=True) 4266 4267 if self._next: 4268 self._advance() 4269 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4270 4271 if parser: 4272 actions = ensure_list(parser(self)) 4273 4274 if not self._curr: 4275 return self.expression( 4276 exp.AlterTable, 4277 this=this, 4278 exists=exists, 4279 actions=actions, 4280 ) 4281 return self._parse_as_command(start) 4282 4283 def _parse_merge(self) -> exp.Expression: 4284 self._match(TokenType.INTO) 4285 target = self._parse_table() 4286 4287 self._match(TokenType.USING) 4288 using = self._parse_table() 4289 4290 self._match(TokenType.ON) 4291 on = self._parse_conjunction() 4292 4293 whens = [] 4294 while self._match(TokenType.WHEN): 4295 matched = not self._match(TokenType.NOT) 4296 self._match_text_seq("MATCHED") 4297 source = ( 4298 False 4299 if self._match_text_seq("BY", "TARGET") 4300 else self._match_text_seq("BY", "SOURCE") 4301 ) 4302 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4303 4304 self._match(TokenType.THEN) 4305 4306 if self._match(TokenType.INSERT): 4307 _this = self._parse_star() 4308 if _this: 4309 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4310 else: 4311 then = self.expression( 4312 exp.Insert, 4313 this=self._parse_value(), 4314 expression=self._match(TokenType.VALUES) and self._parse_value(), 4315 ) 4316 elif self._match(TokenType.UPDATE): 4317 expressions = self._parse_star() 4318 if expressions: 4319 then = self.expression(exp.Update, expressions=expressions) 4320 else: 4321 then = self.expression( 4322 exp.Update, 4323 expressions=self._match(TokenType.SET) 4324 and self._parse_csv(self._parse_equality), 4325 ) 4326 elif self._match(TokenType.DELETE): 4327 then = self.expression(exp.Var, this=self._prev.text) 4328 else: 4329 then = None 4330 4331 whens.append( 4332 self.expression( 4333 exp.When, 4334 matched=matched, 4335 source=source, 4336 condition=condition, 4337 then=then, 4338 ) 4339 ) 4340 4341 return self.expression( 4342 exp.Merge, 4343 this=target, 4344 using=using, 4345 on=on, 4346 expressions=whens, 4347 ) 4348 4349 def _parse_show(self) -> t.Optional[exp.Expression]: 4350 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4351 if parser: 4352 return parser(self) 4353 self._advance() 4354 return self.expression(exp.Show, this=self._prev.text.upper()) 4355 4356 def _parse_set_item_assignment( 4357 self, kind: t.Optional[str] = None 4358 ) -> t.Optional[exp.Expression]: 4359 index = self._index 4360 4361 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4362 return self._parse_set_transaction(global_=kind == "GLOBAL") 4363 4364 left = self._parse_primary() or self._parse_id_var() 4365 4366 if not self._match_texts(("=", "TO")): 4367 self._retreat(index) 4368 return None 4369 4370 right = self._parse_statement() or self._parse_id_var() 4371 this = self.expression( 4372 exp.EQ, 4373 this=left, 4374 expression=right, 4375 ) 4376 4377 return self.expression( 4378 exp.SetItem, 4379 this=this, 4380 kind=kind, 4381 ) 4382 4383 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4384 self._match_text_seq("TRANSACTION") 4385 characteristics = self._parse_csv( 4386 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4387 ) 4388 return self.expression( 4389 exp.SetItem, 4390 expressions=characteristics, 4391 kind="TRANSACTION", 4392 **{"global": global_}, # type: ignore 4393 ) 4394 4395 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4396 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4397 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4398 4399 def _parse_set(self) -> exp.Expression: 4400 index = self._index 4401 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4402 4403 if self._curr: 4404 self._retreat(index) 4405 return self._parse_as_command(self._prev) 4406 4407 return set_ 4408 4409 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4410 for option in options: 4411 if self._match_text_seq(*option.split(" ")): 4412 return exp.Var(this=option) 4413 return None 4414 4415 def _parse_as_command(self, start: Token) -> exp.Command: 4416 while self._curr: 4417 self._advance() 4418 text = self._find_sql(start, self._prev) 4419 size = len(start.text) 4420 return exp.Command(this=text[:size], expression=text[size:]) 4421 4422 def _find_parser( 4423 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4424 ) -> t.Optional[t.Callable]: 4425 if not self._curr: 4426 return None 4427 4428 index = self._index 4429 this = [] 4430 while True: 4431 # The current token might be multiple words 4432 curr = self._curr.text.upper() 4433 key = curr.split(" ") 4434 this.append(curr) 4435 self._advance() 4436 result, trie = in_trie(trie, key) 4437 if result == 0: 4438 break 4439 if result == 2: 4440 subparser = parsers[" ".join(this)] 4441 return subparser 4442 self._retreat(index) 4443 return None 4444 4445 def _match(self, token_type, advance=True, expression=None): 4446 if not self._curr: 4447 return None 4448 4449 if self._curr.token_type == token_type: 4450 if advance: 4451 self._advance() 4452 self._add_comments(expression) 4453 return True 4454 4455 return None 4456 4457 def _match_set(self, types, advance=True): 4458 if not self._curr: 4459 return None 4460 4461 if self._curr.token_type in types: 4462 if advance: 4463 self._advance() 4464 return True 4465 4466 return None 4467 4468 def _match_pair(self, token_type_a, token_type_b, advance=True): 4469 if not self._curr or not self._next: 4470 return None 4471 4472 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4473 if advance: 4474 self._advance(2) 4475 return True 4476 4477 return None 4478 4479 def _match_l_paren(self, expression=None): 4480 if not self._match(TokenType.L_PAREN, expression=expression): 4481 self.raise_error("Expecting (") 4482 4483 def _match_r_paren(self, expression=None): 4484 if not self._match(TokenType.R_PAREN, expression=expression): 4485 self.raise_error("Expecting )") 4486 4487 def _match_texts(self, texts, advance=True): 4488 if self._curr and self._curr.text.upper() in texts: 4489 if advance: 4490 self._advance() 4491 return True 4492 return False 4493 4494 def _match_text_seq(self, *texts, advance=True): 4495 index = self._index 4496 for text in texts: 4497 if self._curr and self._curr.text.upper() == text: 4498 self._advance() 4499 else: 4500 self._retreat(index) 4501 return False 4502 4503 if not advance: 4504 self._retreat(index) 4505 4506 return True 4507 4508 def _replace_columns_with_dots(self, this): 4509 if isinstance(this, exp.Dot): 4510 exp.replace_children(this, self._replace_columns_with_dots) 4511 elif isinstance(this, exp.Column): 4512 exp.replace_children(this, self._replace_columns_with_dots) 4513 table = this.args.get("table") 4514 this = ( 4515 self.expression(exp.Dot, this=table, expression=this.this) 4516 if table 4517 else self.expression(exp.Var, this=this.name) 4518 ) 4519 elif isinstance(this, exp.Identifier): 4520 this = self.expression(exp.Var, this=this.name) 4521 return this 4522 4523 def _replace_lambda(self, node, lambda_variables): 4524 for column in node.find_all(exp.Column): 4525 if column.parts[0].name in lambda_variables: 4526 dot_or_id = column.to_dot() if column.table else column.this 4527 parent = column.parent 4528 4529 while isinstance(parent, exp.Dot): 4530 if not isinstance(parent.parent, exp.Dot): 4531 parent.replace(dot_or_id) 4532 break 4533 parent = parent.parent 4534 else: 4535 if column is node: 4536 node = dot_or_id 4537 else: 4538 column.replace(dot_or_id) 4539 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
775 def __init__( 776 self, 777 error_level: t.Optional[ErrorLevel] = None, 778 error_message_context: int = 100, 779 index_offset: int = 0, 780 unnest_column_only: bool = False, 781 alias_post_tablesample: bool = False, 782 max_errors: int = 3, 783 null_ordering: t.Optional[str] = None, 784 ): 785 self.error_level = error_level or ErrorLevel.IMMEDIATE 786 self.error_message_context = error_message_context 787 self.index_offset = index_offset 788 self.unnest_column_only = unnest_column_only 789 self.alias_post_tablesample = alias_post_tablesample 790 self.max_errors = max_errors 791 self.null_ordering = null_ordering 792 self.reset()
804 def parse( 805 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 806 ) -> t.List[t.Optional[exp.Expression]]: 807 """ 808 Parses a list of tokens and returns a list of syntax trees, one tree 809 per parsed SQL statement. 810 811 Args: 812 raw_tokens: the list of tokens. 813 sql: the original SQL string, used to produce helpful debug messages. 814 815 Returns: 816 The list of syntax trees. 817 """ 818 return self._parse( 819 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 820 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
822 def parse_into( 823 self, 824 expression_types: exp.IntoType, 825 raw_tokens: t.List[Token], 826 sql: t.Optional[str] = None, 827 ) -> t.List[t.Optional[exp.Expression]]: 828 """ 829 Parses a list of tokens into a given Expression type. If a collection of Expression 830 types is given instead, this method will try to parse the token list into each one 831 of them, stopping at the first for which the parsing succeeds. 832 833 Args: 834 expression_types: the expression type(s) to try and parse the token list into. 835 raw_tokens: the list of tokens. 836 sql: the original SQL string, used to produce helpful debug messages. 837 838 Returns: 839 The target Expression. 840 """ 841 errors = [] 842 for expression_type in ensure_collection(expression_types): 843 parser = self.EXPRESSION_PARSERS.get(expression_type) 844 if not parser: 845 raise TypeError(f"No parser registered for {expression_type}") 846 try: 847 return self._parse(parser, raw_tokens, sql) 848 except ParseError as e: 849 e.errors[0]["into_expression"] = expression_type 850 errors.append(e) 851 raise ParseError( 852 f"Failed to parse into {expression_types}", 853 errors=merge_errors(errors), 854 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
890 def check_errors(self) -> None: 891 """ 892 Logs or raises any found errors, depending on the chosen error level setting. 893 """ 894 if self.error_level == ErrorLevel.WARN: 895 for error in self.errors: 896 logger.error(str(error)) 897 elif self.error_level == ErrorLevel.RAISE and self.errors: 898 raise ParseError( 899 concat_messages(self.errors, self.max_errors), 900 errors=merge_errors(self.errors), 901 )
Logs or raises any found errors, depending on the chosen error level setting.
903 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 904 """ 905 Appends an error in the list of recorded errors or raises it, depending on the chosen 906 error level setting. 907 """ 908 token = token or self._curr or self._prev or Token.string("") 909 start = token.start 910 end = token.end + 1 911 start_context = self.sql[max(start - self.error_message_context, 0) : start] 912 highlight = self.sql[start:end] 913 end_context = self.sql[end : end + self.error_message_context] 914 915 error = ParseError.new( 916 f"{message}. Line {token.line}, Col: {token.col}.\n" 917 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 918 description=message, 919 line=token.line, 920 col=token.col, 921 start_context=start_context, 922 highlight=highlight, 923 end_context=end_context, 924 ) 925 926 if self.error_level == ErrorLevel.IMMEDIATE: 927 raise error 928 929 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
931 def expression( 932 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 933 ) -> E: 934 """ 935 Creates a new, validated Expression. 936 937 Args: 938 exp_class: the expression class to instantiate. 939 comments: an optional list of comments to attach to the expression. 940 kwargs: the arguments to set for the expression along with their respective values. 941 942 Returns: 943 The target expression. 944 """ 945 instance = exp_class(**kwargs) 946 instance.add_comments(comments) if comments else self._add_comments(instance) 947 self.validate_expression(instance) 948 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
955 def validate_expression( 956 self, expression: exp.Expression, args: t.Optional[t.List] = None 957 ) -> None: 958 """ 959 Validates an already instantiated expression, making sure that all its mandatory arguments 960 are set. 961 962 Args: 963 expression: the expression to validate. 964 args: an optional list of items that was used to instantiate the expression, if it's a Func. 965 """ 966 if self.error_level == ErrorLevel.IGNORE: 967 return 968 969 for error_message in expression.error_messages(args): 970 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.