sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get 10from sqlglot.tokens import Token, Tokenizer, TokenType 11from sqlglot.trie import in_trie, new_trie 12 13logger = logging.getLogger("sqlglot") 14 15E = t.TypeVar("E", bound=exp.Expression) 16 17 18def parse_var_map(args: t.Sequence) -> exp.Expression: 19 if len(args) == 1 and args[0].is_star: 20 return exp.StarMap(this=args[0]) 21 22 keys = [] 23 values = [] 24 for i in range(0, len(args), 2): 25 keys.append(args[i]) 26 values.append(args[i + 1]) 27 return exp.VarMap( 28 keys=exp.Array(expressions=keys), 29 values=exp.Array(expressions=values), 30 ) 31 32 33def parse_like(args): 34 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 35 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 36 37 38def binary_range_parser( 39 expr_type: t.Type[exp.Expression], 40) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 41 return lambda self, this: self._parse_escape( 42 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 43 ) 44 45 46class _Parser(type): 47 def __new__(cls, clsname, bases, attrs): 48 klass = super().__new__(cls, clsname, bases, attrs) 49 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 50 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 51 52 return klass 53 54 55class Parser(metaclass=_Parser): 56 """ 57 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 58 a parsed syntax tree. 59 60 Args: 61 error_level: the desired error level. 62 Default: ErrorLevel.RAISE 63 error_message_context: determines the amount of context to capture from a 64 query string when displaying the error message (in number of characters). 65 Default: 50. 66 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 67 Default: 0 68 alias_post_tablesample: If the table alias comes after tablesample. 69 Default: False 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 null_ordering: Indicates the default null ordering method to use if not explicitly set. 74 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 75 Default: "nulls_are_small" 76 """ 77 78 FUNCTIONS: t.Dict[str, t.Callable] = { 79 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 80 "DATE_TO_DATE_STR": lambda args: exp.Cast( 81 this=seq_get(args, 0), 82 to=exp.DataType(this=exp.DataType.Type.TEXT), 83 ), 84 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 85 "IFNULL": exp.Coalesce.from_arg_list, 86 "LIKE": parse_like, 87 "TIME_TO_TIME_STR": lambda args: exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 92 this=exp.Cast( 93 this=seq_get(args, 0), 94 to=exp.DataType(this=exp.DataType.Type.TEXT), 95 ), 96 start=exp.Literal.number(1), 97 length=exp.Literal.number(10), 98 ), 99 "VAR_MAP": parse_var_map, 100 } 101 102 NO_PAREN_FUNCTIONS = { 103 TokenType.CURRENT_DATE: exp.CurrentDate, 104 TokenType.CURRENT_DATETIME: exp.CurrentDate, 105 TokenType.CURRENT_TIME: exp.CurrentTime, 106 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 107 TokenType.CURRENT_USER: exp.CurrentUser, 108 } 109 110 JOIN_HINTS: t.Set[str] = set() 111 112 NESTED_TYPE_TOKENS = { 113 TokenType.ARRAY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 TokenType.STRUCT, 117 } 118 119 TYPE_TOKENS = { 120 TokenType.BIT, 121 TokenType.BOOLEAN, 122 TokenType.TINYINT, 123 TokenType.UTINYINT, 124 TokenType.SMALLINT, 125 TokenType.USMALLINT, 126 TokenType.INT, 127 TokenType.UINT, 128 TokenType.BIGINT, 129 TokenType.UBIGINT, 130 TokenType.INT128, 131 TokenType.UINT128, 132 TokenType.INT256, 133 TokenType.UINT256, 134 TokenType.FLOAT, 135 TokenType.DOUBLE, 136 TokenType.CHAR, 137 TokenType.NCHAR, 138 TokenType.VARCHAR, 139 TokenType.NVARCHAR, 140 TokenType.TEXT, 141 TokenType.MEDIUMTEXT, 142 TokenType.LONGTEXT, 143 TokenType.MEDIUMBLOB, 144 TokenType.LONGBLOB, 145 TokenType.BINARY, 146 TokenType.VARBINARY, 147 TokenType.JSON, 148 TokenType.JSONB, 149 TokenType.INTERVAL, 150 TokenType.TIME, 151 TokenType.TIMESTAMP, 152 TokenType.TIMESTAMPTZ, 153 TokenType.TIMESTAMPLTZ, 154 TokenType.DATETIME, 155 TokenType.DATETIME64, 156 TokenType.DATE, 157 TokenType.DECIMAL, 158 TokenType.BIGDECIMAL, 159 TokenType.UUID, 160 TokenType.GEOGRAPHY, 161 TokenType.GEOMETRY, 162 TokenType.HLLSKETCH, 163 TokenType.HSTORE, 164 TokenType.PSEUDO_TYPE, 165 TokenType.SUPER, 166 TokenType.SERIAL, 167 TokenType.SMALLSERIAL, 168 TokenType.BIGSERIAL, 169 TokenType.XML, 170 TokenType.UNIQUEIDENTIFIER, 171 TokenType.MONEY, 172 TokenType.SMALLMONEY, 173 TokenType.ROWVERSION, 174 TokenType.IMAGE, 175 TokenType.VARIANT, 176 TokenType.OBJECT, 177 TokenType.INET, 178 *NESTED_TYPE_TOKENS, 179 } 180 181 SUBQUERY_PREDICATES = { 182 TokenType.ANY: exp.Any, 183 TokenType.ALL: exp.All, 184 TokenType.EXISTS: exp.Exists, 185 TokenType.SOME: exp.Any, 186 } 187 188 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 189 190 DB_CREATABLES = { 191 TokenType.DATABASE, 192 TokenType.SCHEMA, 193 TokenType.TABLE, 194 TokenType.VIEW, 195 } 196 197 CREATABLES = { 198 TokenType.COLUMN, 199 TokenType.FUNCTION, 200 TokenType.INDEX, 201 TokenType.PROCEDURE, 202 *DB_CREATABLES, 203 } 204 205 ID_VAR_TOKENS = { 206 TokenType.VAR, 207 TokenType.ANTI, 208 TokenType.APPLY, 209 TokenType.AUTO_INCREMENT, 210 TokenType.BEGIN, 211 TokenType.BOTH, 212 TokenType.BUCKET, 213 TokenType.CACHE, 214 TokenType.CASCADE, 215 TokenType.COLLATE, 216 TokenType.COMMAND, 217 TokenType.COMMENT, 218 TokenType.COMMIT, 219 TokenType.COMPOUND, 220 TokenType.CONSTRAINT, 221 TokenType.DEFAULT, 222 TokenType.DELETE, 223 TokenType.DESCRIBE, 224 TokenType.DIV, 225 TokenType.END, 226 TokenType.EXECUTE, 227 TokenType.ESCAPE, 228 TokenType.FALSE, 229 TokenType.FIRST, 230 TokenType.FILTER, 231 TokenType.FOLLOWING, 232 TokenType.FORMAT, 233 TokenType.FULL, 234 TokenType.IF, 235 TokenType.IS, 236 TokenType.ISNULL, 237 TokenType.INTERVAL, 238 TokenType.KEEP, 239 TokenType.LAZY, 240 TokenType.LEADING, 241 TokenType.LEFT, 242 TokenType.LOCAL, 243 TokenType.MATERIALIZED, 244 TokenType.MERGE, 245 TokenType.NATURAL, 246 TokenType.NEXT, 247 TokenType.OFFSET, 248 TokenType.ONLY, 249 TokenType.OPTIONS, 250 TokenType.ORDINALITY, 251 TokenType.OVERWRITE, 252 TokenType.PARTITION, 253 TokenType.PERCENT, 254 TokenType.PIVOT, 255 TokenType.PRAGMA, 256 TokenType.PRECEDING, 257 TokenType.RANGE, 258 TokenType.REFERENCES, 259 TokenType.RIGHT, 260 TokenType.ROW, 261 TokenType.ROWS, 262 TokenType.SEED, 263 TokenType.SEMI, 264 TokenType.SET, 265 TokenType.SETTINGS, 266 TokenType.SHOW, 267 TokenType.SORTKEY, 268 TokenType.TEMPORARY, 269 TokenType.TOP, 270 TokenType.TRAILING, 271 TokenType.TRUE, 272 TokenType.UNBOUNDED, 273 TokenType.UNIQUE, 274 TokenType.UNLOGGED, 275 TokenType.UNPIVOT, 276 TokenType.VOLATILE, 277 TokenType.WINDOW, 278 *CREATABLES, 279 *SUBQUERY_PREDICATES, 280 *TYPE_TOKENS, 281 *NO_PAREN_FUNCTIONS, 282 } 283 284 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 285 286 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 287 TokenType.APPLY, 288 TokenType.FULL, 289 TokenType.LEFT, 290 TokenType.LOCK, 291 TokenType.NATURAL, 292 TokenType.OFFSET, 293 TokenType.RIGHT, 294 TokenType.WINDOW, 295 } 296 297 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 298 299 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 300 301 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 302 303 FUNC_TOKENS = { 304 TokenType.COMMAND, 305 TokenType.CURRENT_DATE, 306 TokenType.CURRENT_DATETIME, 307 TokenType.CURRENT_TIMESTAMP, 308 TokenType.CURRENT_TIME, 309 TokenType.CURRENT_USER, 310 TokenType.FILTER, 311 TokenType.FIRST, 312 TokenType.FORMAT, 313 TokenType.GLOB, 314 TokenType.IDENTIFIER, 315 TokenType.INDEX, 316 TokenType.ISNULL, 317 TokenType.ILIKE, 318 TokenType.LIKE, 319 TokenType.MERGE, 320 TokenType.OFFSET, 321 TokenType.PRIMARY_KEY, 322 TokenType.RANGE, 323 TokenType.REPLACE, 324 TokenType.ROW, 325 TokenType.UNNEST, 326 TokenType.VAR, 327 TokenType.LEFT, 328 TokenType.RIGHT, 329 TokenType.DATE, 330 TokenType.DATETIME, 331 TokenType.TABLE, 332 TokenType.TIMESTAMP, 333 TokenType.TIMESTAMPTZ, 334 TokenType.WINDOW, 335 *TYPE_TOKENS, 336 *SUBQUERY_PREDICATES, 337 } 338 339 CONJUNCTION = { 340 TokenType.AND: exp.And, 341 TokenType.OR: exp.Or, 342 } 343 344 EQUALITY = { 345 TokenType.EQ: exp.EQ, 346 TokenType.NEQ: exp.NEQ, 347 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 348 } 349 350 COMPARISON = { 351 TokenType.GT: exp.GT, 352 TokenType.GTE: exp.GTE, 353 TokenType.LT: exp.LT, 354 TokenType.LTE: exp.LTE, 355 } 356 357 BITWISE = { 358 TokenType.AMP: exp.BitwiseAnd, 359 TokenType.CARET: exp.BitwiseXor, 360 TokenType.PIPE: exp.BitwiseOr, 361 TokenType.DPIPE: exp.DPipe, 362 } 363 364 TERM = { 365 TokenType.DASH: exp.Sub, 366 TokenType.PLUS: exp.Add, 367 TokenType.MOD: exp.Mod, 368 TokenType.COLLATE: exp.Collate, 369 } 370 371 FACTOR = { 372 TokenType.DIV: exp.IntDiv, 373 TokenType.LR_ARROW: exp.Distance, 374 TokenType.SLASH: exp.Div, 375 TokenType.STAR: exp.Mul, 376 } 377 378 TIMESTAMPS = { 379 TokenType.TIME, 380 TokenType.TIMESTAMP, 381 TokenType.TIMESTAMPTZ, 382 TokenType.TIMESTAMPLTZ, 383 } 384 385 SET_OPERATIONS = { 386 TokenType.UNION, 387 TokenType.INTERSECT, 388 TokenType.EXCEPT, 389 } 390 391 JOIN_SIDES = { 392 TokenType.LEFT, 393 TokenType.RIGHT, 394 TokenType.FULL, 395 } 396 397 JOIN_KINDS = { 398 TokenType.INNER, 399 TokenType.OUTER, 400 TokenType.CROSS, 401 TokenType.SEMI, 402 TokenType.ANTI, 403 } 404 405 LAMBDAS = { 406 TokenType.ARROW: lambda self, expressions: self.expression( 407 exp.Lambda, 408 this=self._replace_lambda( 409 self._parse_conjunction(), 410 {node.name for node in expressions}, 411 ), 412 expressions=expressions, 413 ), 414 TokenType.FARROW: lambda self, expressions: self.expression( 415 exp.Kwarg, 416 this=exp.Var(this=expressions[0].name), 417 expression=self._parse_conjunction(), 418 ), 419 } 420 421 COLUMN_OPERATORS = { 422 TokenType.DOT: None, 423 TokenType.DCOLON: lambda self, this, to: self.expression( 424 exp.Cast if self.STRICT_CAST else exp.TryCast, 425 this=this, 426 to=to, 427 ), 428 TokenType.ARROW: lambda self, this, path: self.expression( 429 exp.JSONExtract, 430 this=this, 431 expression=path, 432 ), 433 TokenType.DARROW: lambda self, this, path: self.expression( 434 exp.JSONExtractScalar, 435 this=this, 436 expression=path, 437 ), 438 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 439 exp.JSONBExtract, 440 this=this, 441 expression=path, 442 ), 443 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 444 exp.JSONBExtractScalar, 445 this=this, 446 expression=path, 447 ), 448 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 449 exp.JSONBContains, 450 this=this, 451 expression=key, 452 ), 453 } 454 455 EXPRESSION_PARSERS = { 456 exp.Column: lambda self: self._parse_column(), 457 exp.DataType: lambda self: self._parse_types(), 458 exp.From: lambda self: self._parse_from(), 459 exp.Group: lambda self: self._parse_group(), 460 exp.Identifier: lambda self: self._parse_id_var(), 461 exp.Lateral: lambda self: self._parse_lateral(), 462 exp.Join: lambda self: self._parse_join(), 463 exp.Order: lambda self: self._parse_order(), 464 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 465 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 466 exp.Lambda: lambda self: self._parse_lambda(), 467 exp.Limit: lambda self: self._parse_limit(), 468 exp.Offset: lambda self: self._parse_offset(), 469 exp.TableAlias: lambda self: self._parse_table_alias(), 470 exp.Table: lambda self: self._parse_table(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.Expression: lambda self: self._parse_statement(), 473 exp.Properties: lambda self: self._parse_properties(), 474 exp.Where: lambda self: self._parse_where(), 475 exp.Ordered: lambda self: self._parse_ordered(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.With: lambda self: self._parse_with(), 478 exp.Window: lambda self: self._parse_named_window(), 479 exp.Qualify: lambda self: self._parse_qualify(), 480 exp.Returning: lambda self: self._parse_returning(), 481 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 482 } 483 484 STATEMENT_PARSERS = { 485 TokenType.ALTER: lambda self: self._parse_alter(), 486 TokenType.BEGIN: lambda self: self._parse_transaction(), 487 TokenType.CACHE: lambda self: self._parse_cache(), 488 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 489 TokenType.COMMENT: lambda self: self._parse_comment(), 490 TokenType.CREATE: lambda self: self._parse_create(), 491 TokenType.DELETE: lambda self: self._parse_delete(), 492 TokenType.DESC: lambda self: self._parse_describe(), 493 TokenType.DESCRIBE: lambda self: self._parse_describe(), 494 TokenType.DROP: lambda self: self._parse_drop(), 495 TokenType.END: lambda self: self._parse_commit_or_rollback(), 496 TokenType.INSERT: lambda self: self._parse_insert(), 497 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 498 TokenType.MERGE: lambda self: self._parse_merge(), 499 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 500 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 501 TokenType.SET: lambda self: self._parse_set(), 502 TokenType.UNCACHE: lambda self: self._parse_uncache(), 503 TokenType.UPDATE: lambda self: self._parse_update(), 504 TokenType.USE: lambda self: self.expression( 505 exp.Use, 506 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 507 and exp.Var(this=self._prev.text), 508 this=self._parse_table(schema=False), 509 ), 510 } 511 512 UNARY_PARSERS = { 513 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 514 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 515 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 516 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 517 } 518 519 PRIMARY_PARSERS = { 520 TokenType.STRING: lambda self, token: self.expression( 521 exp.Literal, this=token.text, is_string=True 522 ), 523 TokenType.NUMBER: lambda self, token: self.expression( 524 exp.Literal, this=token.text, is_string=False 525 ), 526 TokenType.STAR: lambda self, _: self.expression( 527 exp.Star, 528 **{"except": self._parse_except(), "replace": self._parse_replace()}, 529 ), 530 TokenType.NULL: lambda self, _: self.expression(exp.Null), 531 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 532 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 533 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 534 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 535 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 536 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 537 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 538 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 539 } 540 541 PLACEHOLDER_PARSERS = { 542 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 543 TokenType.PARAMETER: lambda self: self._parse_parameter(), 544 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 545 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 546 else None, 547 } 548 549 RANGE_PARSERS = { 550 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 551 TokenType.GLOB: binary_range_parser(exp.Glob), 552 TokenType.ILIKE: binary_range_parser(exp.ILike), 553 TokenType.IN: lambda self, this: self._parse_in(this), 554 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 555 TokenType.IS: lambda self, this: self._parse_is(this), 556 TokenType.LIKE: binary_range_parser(exp.Like), 557 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 558 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 559 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 560 } 561 562 PROPERTY_PARSERS = { 563 "AFTER": lambda self: self._parse_afterjournal( 564 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 565 ), 566 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 567 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 568 "BEFORE": lambda self: self._parse_journal( 569 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 570 ), 571 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 572 "CHARACTER SET": lambda self: self._parse_character_set(), 573 "CHECKSUM": lambda self: self._parse_checksum(), 574 "CLUSTER BY": lambda self: self.expression( 575 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 576 ), 577 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 578 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 579 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 580 default=self._prev.text.upper() == "DEFAULT" 581 ), 582 "DEFINER": lambda self: self._parse_definer(), 583 "DETERMINISTIC": lambda self: self.expression( 584 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 585 ), 586 "DISTKEY": lambda self: self._parse_distkey(), 587 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 588 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 589 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 590 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 591 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 592 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 593 "FREESPACE": lambda self: self._parse_freespace(), 594 "GLOBAL": lambda self: self._parse_temporary(global_=True), 595 "IMMUTABLE": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "JOURNAL": lambda self: self._parse_journal( 599 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 600 ), 601 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 602 "LIKE": lambda self: self._parse_create_like(), 603 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 604 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 605 "LOCK": lambda self: self._parse_locking(), 606 "LOCKING": lambda self: self._parse_locking(), 607 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 608 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 609 "MAX": lambda self: self._parse_datablocksize(), 610 "MAXIMUM": lambda self: self._parse_datablocksize(), 611 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 612 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 613 ), 614 "MIN": lambda self: self._parse_datablocksize(), 615 "MINIMUM": lambda self: self._parse_datablocksize(), 616 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 617 "NO": lambda self: self._parse_noprimaryindex(), 618 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 619 "ON": lambda self: self._parse_oncommit(), 620 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 621 "PARTITION BY": lambda self: self._parse_partitioned_by(), 622 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 623 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 624 "PRIMARY KEY": lambda self: self._parse_primary_key(), 625 "RETURNS": lambda self: self._parse_returns(), 626 "ROW": lambda self: self._parse_row(), 627 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 628 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 629 "SETTINGS": lambda self: self.expression( 630 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 631 ), 632 "SORTKEY": lambda self: self._parse_sortkey(), 633 "STABLE": lambda self: self.expression( 634 exp.StabilityProperty, this=exp.Literal.string("STABLE") 635 ), 636 "STORED": lambda self: self._parse_stored(), 637 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 638 "TEMP": lambda self: self._parse_temporary(global_=False), 639 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 640 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 641 "TTL": lambda self: self._parse_ttl(), 642 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 643 "VOLATILE": lambda self: self._parse_volatile_property(), 644 "WITH": lambda self: self._parse_with_property(), 645 } 646 647 CONSTRAINT_PARSERS = { 648 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 649 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 650 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 651 "CHARACTER SET": lambda self: self.expression( 652 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 653 ), 654 "CHECK": lambda self: self.expression( 655 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 656 ), 657 "COLLATE": lambda self: self.expression( 658 exp.CollateColumnConstraint, this=self._parse_var() 659 ), 660 "COMMENT": lambda self: self.expression( 661 exp.CommentColumnConstraint, this=self._parse_string() 662 ), 663 "COMPRESS": lambda self: self._parse_compress(), 664 "DEFAULT": lambda self: self.expression( 665 exp.DefaultColumnConstraint, this=self._parse_bitwise() 666 ), 667 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 668 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 669 "FORMAT": lambda self: self.expression( 670 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 671 ), 672 "GENERATED": lambda self: self._parse_generated_as_identity(), 673 "IDENTITY": lambda self: self._parse_auto_increment(), 674 "INLINE": lambda self: self._parse_inline(), 675 "LIKE": lambda self: self._parse_create_like(), 676 "NOT": lambda self: self._parse_not_constraint(), 677 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 678 "ON": lambda self: self._match(TokenType.UPDATE) 679 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 680 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 681 "PRIMARY KEY": lambda self: self._parse_primary_key(), 682 "REFERENCES": lambda self: self._parse_references(match=False), 683 "TITLE": lambda self: self.expression( 684 exp.TitleColumnConstraint, this=self._parse_var_or_string() 685 ), 686 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 687 "UNIQUE": lambda self: self._parse_unique(), 688 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 689 } 690 691 ALTER_PARSERS = { 692 "ADD": lambda self: self._parse_alter_table_add(), 693 "ALTER": lambda self: self._parse_alter_table_alter(), 694 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 695 "DROP": lambda self: self._parse_alter_table_drop(), 696 "RENAME": lambda self: self._parse_alter_table_rename(), 697 } 698 699 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 700 701 NO_PAREN_FUNCTION_PARSERS = { 702 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 703 TokenType.CASE: lambda self: self._parse_case(), 704 TokenType.IF: lambda self: self._parse_if(), 705 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 706 exp.NextValueFor, 707 this=self._parse_column(), 708 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 709 ), 710 } 711 712 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 713 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 714 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 715 "DECODE": lambda self: self._parse_decode(), 716 "EXTRACT": lambda self: self._parse_extract(), 717 "JSON_OBJECT": lambda self: self._parse_json_object(), 718 "LOG": lambda self: self._parse_logarithm(), 719 "MATCH": lambda self: self._parse_match_against(), 720 "OPENJSON": lambda self: self._parse_open_json(), 721 "POSITION": lambda self: self._parse_position(), 722 "STRING_AGG": lambda self: self._parse_string_agg(), 723 "SUBSTRING": lambda self: self._parse_substring(), 724 "TRIM": lambda self: self._parse_trim(), 725 "TRY_CAST": lambda self: self._parse_cast(False), 726 "TRY_CONVERT": lambda self: self._parse_convert(False), 727 } 728 729 QUERY_MODIFIER_PARSERS = { 730 "joins": lambda self: list(iter(self._parse_join, None)), 731 "laterals": lambda self: list(iter(self._parse_lateral, None)), 732 "match": lambda self: self._parse_match_recognize(), 733 "where": lambda self: self._parse_where(), 734 "group": lambda self: self._parse_group(), 735 "having": lambda self: self._parse_having(), 736 "qualify": lambda self: self._parse_qualify(), 737 "windows": lambda self: self._parse_window_clause(), 738 "order": lambda self: self._parse_order(), 739 "limit": lambda self: self._parse_limit(), 740 "offset": lambda self: self._parse_offset(), 741 "locks": lambda self: self._parse_locks(), 742 "sample": lambda self: self._parse_table_sample(as_modifier=True), 743 } 744 745 SET_PARSERS = { 746 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 747 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 748 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 749 "TRANSACTION": lambda self: self._parse_set_transaction(), 750 } 751 752 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 753 754 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 755 756 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 757 758 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 759 760 TRANSACTION_CHARACTERISTICS = { 761 "ISOLATION LEVEL REPEATABLE READ", 762 "ISOLATION LEVEL READ COMMITTED", 763 "ISOLATION LEVEL READ UNCOMMITTED", 764 "ISOLATION LEVEL SERIALIZABLE", 765 "READ WRITE", 766 "READ ONLY", 767 } 768 769 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 770 771 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 772 773 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 774 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 775 776 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 777 778 STRICT_CAST = True 779 780 CONVERT_TYPE_FIRST = False 781 782 PREFIXED_PIVOT_COLUMNS = False 783 IDENTIFY_PIVOT_STRINGS = False 784 785 LOG_BASE_FIRST = True 786 LOG_DEFAULTS_TO_LN = False 787 788 __slots__ = ( 789 "error_level", 790 "error_message_context", 791 "sql", 792 "errors", 793 "index_offset", 794 "unnest_column_only", 795 "alias_post_tablesample", 796 "max_errors", 797 "null_ordering", 798 "_tokens", 799 "_index", 800 "_curr", 801 "_next", 802 "_prev", 803 "_prev_comments", 804 "_show_trie", 805 "_set_trie", 806 ) 807 808 def __init__( 809 self, 810 error_level: t.Optional[ErrorLevel] = None, 811 error_message_context: int = 100, 812 index_offset: int = 0, 813 unnest_column_only: bool = False, 814 alias_post_tablesample: bool = False, 815 max_errors: int = 3, 816 null_ordering: t.Optional[str] = None, 817 ): 818 self.error_level = error_level or ErrorLevel.IMMEDIATE 819 self.error_message_context = error_message_context 820 self.index_offset = index_offset 821 self.unnest_column_only = unnest_column_only 822 self.alias_post_tablesample = alias_post_tablesample 823 self.max_errors = max_errors 824 self.null_ordering = null_ordering 825 self.reset() 826 827 def reset(self): 828 self.sql = "" 829 self.errors = [] 830 self._tokens = [] 831 self._index = 0 832 self._curr = None 833 self._next = None 834 self._prev = None 835 self._prev_comments = None 836 837 def parse( 838 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 839 ) -> t.List[t.Optional[exp.Expression]]: 840 """ 841 Parses a list of tokens and returns a list of syntax trees, one tree 842 per parsed SQL statement. 843 844 Args: 845 raw_tokens: the list of tokens. 846 sql: the original SQL string, used to produce helpful debug messages. 847 848 Returns: 849 The list of syntax trees. 850 """ 851 return self._parse( 852 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 853 ) 854 855 def parse_into( 856 self, 857 expression_types: exp.IntoType, 858 raw_tokens: t.List[Token], 859 sql: t.Optional[str] = None, 860 ) -> t.List[t.Optional[exp.Expression]]: 861 """ 862 Parses a list of tokens into a given Expression type. If a collection of Expression 863 types is given instead, this method will try to parse the token list into each one 864 of them, stopping at the first for which the parsing succeeds. 865 866 Args: 867 expression_types: the expression type(s) to try and parse the token list into. 868 raw_tokens: the list of tokens. 869 sql: the original SQL string, used to produce helpful debug messages. 870 871 Returns: 872 The target Expression. 873 """ 874 errors = [] 875 for expression_type in ensure_collection(expression_types): 876 parser = self.EXPRESSION_PARSERS.get(expression_type) 877 if not parser: 878 raise TypeError(f"No parser registered for {expression_type}") 879 try: 880 return self._parse(parser, raw_tokens, sql) 881 except ParseError as e: 882 e.errors[0]["into_expression"] = expression_type 883 errors.append(e) 884 raise ParseError( 885 f"Failed to parse into {expression_types}", 886 errors=merge_errors(errors), 887 ) from errors[-1] 888 889 def _parse( 890 self, 891 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 892 raw_tokens: t.List[Token], 893 sql: t.Optional[str] = None, 894 ) -> t.List[t.Optional[exp.Expression]]: 895 self.reset() 896 self.sql = sql or "" 897 total = len(raw_tokens) 898 chunks: t.List[t.List[Token]] = [[]] 899 900 for i, token in enumerate(raw_tokens): 901 if token.token_type == TokenType.SEMICOLON: 902 if i < total - 1: 903 chunks.append([]) 904 else: 905 chunks[-1].append(token) 906 907 expressions = [] 908 909 for tokens in chunks: 910 self._index = -1 911 self._tokens = tokens 912 self._advance() 913 914 expressions.append(parse_method(self)) 915 916 if self._index < len(self._tokens): 917 self.raise_error("Invalid expression / Unexpected token") 918 919 self.check_errors() 920 921 return expressions 922 923 def check_errors(self) -> None: 924 """ 925 Logs or raises any found errors, depending on the chosen error level setting. 926 """ 927 if self.error_level == ErrorLevel.WARN: 928 for error in self.errors: 929 logger.error(str(error)) 930 elif self.error_level == ErrorLevel.RAISE and self.errors: 931 raise ParseError( 932 concat_messages(self.errors, self.max_errors), 933 errors=merge_errors(self.errors), 934 ) 935 936 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 937 """ 938 Appends an error in the list of recorded errors or raises it, depending on the chosen 939 error level setting. 940 """ 941 token = token or self._curr or self._prev or Token.string("") 942 start = token.start 943 end = token.end 944 start_context = self.sql[max(start - self.error_message_context, 0) : start] 945 highlight = self.sql[start:end] 946 end_context = self.sql[end : end + self.error_message_context] 947 948 error = ParseError.new( 949 f"{message}. Line {token.line}, Col: {token.col}.\n" 950 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 951 description=message, 952 line=token.line, 953 col=token.col, 954 start_context=start_context, 955 highlight=highlight, 956 end_context=end_context, 957 ) 958 959 if self.error_level == ErrorLevel.IMMEDIATE: 960 raise error 961 962 self.errors.append(error) 963 964 def expression( 965 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 966 ) -> E: 967 """ 968 Creates a new, validated Expression. 969 970 Args: 971 exp_class: the expression class to instantiate. 972 comments: an optional list of comments to attach to the expression. 973 kwargs: the arguments to set for the expression along with their respective values. 974 975 Returns: 976 The target expression. 977 """ 978 instance = exp_class(**kwargs) 979 instance.add_comments(comments) if comments else self._add_comments(instance) 980 self.validate_expression(instance) 981 return instance 982 983 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 984 if expression and self._prev_comments: 985 expression.add_comments(self._prev_comments) 986 self._prev_comments = None 987 988 def validate_expression( 989 self, expression: exp.Expression, args: t.Optional[t.List] = None 990 ) -> None: 991 """ 992 Validates an already instantiated expression, making sure that all its mandatory arguments 993 are set. 994 995 Args: 996 expression: the expression to validate. 997 args: an optional list of items that was used to instantiate the expression, if it's a Func. 998 """ 999 if self.error_level == ErrorLevel.IGNORE: 1000 return 1001 1002 for error_message in expression.error_messages(args): 1003 self.raise_error(error_message) 1004 1005 def _find_sql(self, start: Token, end: Token) -> str: 1006 return self.sql[start.start : end.end] 1007 1008 def _advance(self, times: int = 1) -> None: 1009 self._index += times 1010 self._curr = seq_get(self._tokens, self._index) 1011 self._next = seq_get(self._tokens, self._index + 1) 1012 if self._index > 0: 1013 self._prev = self._tokens[self._index - 1] 1014 self._prev_comments = self._prev.comments 1015 else: 1016 self._prev = None 1017 self._prev_comments = None 1018 1019 def _retreat(self, index: int) -> None: 1020 if index != self._index: 1021 self._advance(index - self._index) 1022 1023 def _parse_command(self) -> exp.Command: 1024 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1025 1026 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1027 start = self._prev 1028 exists = self._parse_exists() if allow_exists else None 1029 1030 self._match(TokenType.ON) 1031 1032 kind = self._match_set(self.CREATABLES) and self._prev 1033 1034 if not kind: 1035 return self._parse_as_command(start) 1036 1037 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1038 this = self._parse_user_defined_function(kind=kind.token_type) 1039 elif kind.token_type == TokenType.TABLE: 1040 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1041 elif kind.token_type == TokenType.COLUMN: 1042 this = self._parse_column() 1043 else: 1044 this = self._parse_id_var() 1045 1046 self._match(TokenType.IS) 1047 1048 return self.expression( 1049 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1050 ) 1051 1052 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1053 def _parse_ttl(self) -> exp.Expression: 1054 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1055 this = self._parse_bitwise() 1056 1057 if self._match_text_seq("DELETE"): 1058 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1059 if self._match_text_seq("RECOMPRESS"): 1060 return self.expression( 1061 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1062 ) 1063 if self._match_text_seq("TO", "DISK"): 1064 return self.expression( 1065 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1066 ) 1067 if self._match_text_seq("TO", "VOLUME"): 1068 return self.expression( 1069 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1070 ) 1071 1072 return this 1073 1074 expressions = self._parse_csv(_parse_ttl_action) 1075 where = self._parse_where() 1076 group = self._parse_group() 1077 1078 aggregates = None 1079 if group and self._match(TokenType.SET): 1080 aggregates = self._parse_csv(self._parse_set_item) 1081 1082 return self.expression( 1083 exp.MergeTreeTTL, 1084 expressions=expressions, 1085 where=where, 1086 group=group, 1087 aggregates=aggregates, 1088 ) 1089 1090 def _parse_statement(self) -> t.Optional[exp.Expression]: 1091 if self._curr is None: 1092 return None 1093 1094 if self._match_set(self.STATEMENT_PARSERS): 1095 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1096 1097 if self._match_set(Tokenizer.COMMANDS): 1098 return self._parse_command() 1099 1100 expression = self._parse_expression() 1101 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1102 return self._parse_query_modifiers(expression) 1103 1104 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1105 start = self._prev 1106 temporary = self._match(TokenType.TEMPORARY) 1107 materialized = self._match(TokenType.MATERIALIZED) 1108 kind = self._match_set(self.CREATABLES) and self._prev.text 1109 if not kind: 1110 return self._parse_as_command(start) 1111 1112 return self.expression( 1113 exp.Drop, 1114 exists=self._parse_exists(), 1115 this=self._parse_table(schema=True), 1116 kind=kind, 1117 temporary=temporary, 1118 materialized=materialized, 1119 cascade=self._match(TokenType.CASCADE), 1120 constraints=self._match_text_seq("CONSTRAINTS"), 1121 purge=self._match_text_seq("PURGE"), 1122 ) 1123 1124 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1125 return ( 1126 self._match(TokenType.IF) 1127 and (not not_ or self._match(TokenType.NOT)) 1128 and self._match(TokenType.EXISTS) 1129 ) 1130 1131 def _parse_create(self) -> t.Optional[exp.Expression]: 1132 start = self._prev 1133 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1134 TokenType.OR, TokenType.REPLACE 1135 ) 1136 unique = self._match(TokenType.UNIQUE) 1137 1138 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1139 self._match(TokenType.TABLE) 1140 1141 properties = None 1142 create_token = self._match_set(self.CREATABLES) and self._prev 1143 1144 if not create_token: 1145 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1146 create_token = self._match_set(self.CREATABLES) and self._prev 1147 1148 if not properties or not create_token: 1149 return self._parse_as_command(start) 1150 1151 exists = self._parse_exists(not_=True) 1152 this = None 1153 expression = None 1154 indexes = None 1155 no_schema_binding = None 1156 begin = None 1157 clone = None 1158 1159 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1160 this = self._parse_user_defined_function(kind=create_token.token_type) 1161 temp_properties = self._parse_properties() 1162 if properties and temp_properties: 1163 properties.expressions.extend(temp_properties.expressions) 1164 elif temp_properties: 1165 properties = temp_properties 1166 1167 self._match(TokenType.ALIAS) 1168 begin = self._match(TokenType.BEGIN) 1169 return_ = self._match_text_seq("RETURN") 1170 expression = self._parse_statement() 1171 1172 if return_: 1173 expression = self.expression(exp.Return, this=expression) 1174 elif create_token.token_type == TokenType.INDEX: 1175 this = self._parse_index() 1176 elif create_token.token_type in self.DB_CREATABLES: 1177 table_parts = self._parse_table_parts(schema=True) 1178 1179 # exp.Properties.Location.POST_NAME 1180 if self._match(TokenType.COMMA): 1181 temp_properties = self._parse_properties(before=True) 1182 if properties and temp_properties: 1183 properties.expressions.extend(temp_properties.expressions) 1184 elif temp_properties: 1185 properties = temp_properties 1186 1187 this = self._parse_schema(this=table_parts) 1188 1189 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1190 temp_properties = self._parse_properties() 1191 if properties and temp_properties: 1192 properties.expressions.extend(temp_properties.expressions) 1193 elif temp_properties: 1194 properties = temp_properties 1195 1196 self._match(TokenType.ALIAS) 1197 1198 # exp.Properties.Location.POST_ALIAS 1199 if not ( 1200 self._match(TokenType.SELECT, advance=False) 1201 or self._match(TokenType.WITH, advance=False) 1202 or self._match(TokenType.L_PAREN, advance=False) 1203 ): 1204 temp_properties = self._parse_properties() 1205 if properties and temp_properties: 1206 properties.expressions.extend(temp_properties.expressions) 1207 elif temp_properties: 1208 properties = temp_properties 1209 1210 expression = self._parse_ddl_select() 1211 1212 if create_token.token_type == TokenType.TABLE: 1213 # exp.Properties.Location.POST_EXPRESSION 1214 temp_properties = self._parse_properties() 1215 if properties and temp_properties: 1216 properties.expressions.extend(temp_properties.expressions) 1217 elif temp_properties: 1218 properties = temp_properties 1219 1220 indexes = [] 1221 while True: 1222 index = self._parse_create_table_index() 1223 1224 # exp.Properties.Location.POST_INDEX 1225 if self._match(TokenType.PARTITION_BY, advance=False): 1226 temp_properties = self._parse_properties() 1227 if properties and temp_properties: 1228 properties.expressions.extend(temp_properties.expressions) 1229 elif temp_properties: 1230 properties = temp_properties 1231 1232 if not index: 1233 break 1234 else: 1235 indexes.append(index) 1236 elif create_token.token_type == TokenType.VIEW: 1237 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1238 no_schema_binding = True 1239 1240 if self._match_text_seq("CLONE"): 1241 clone = self._parse_table(schema=True) 1242 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1243 clone_kind = ( 1244 self._match(TokenType.L_PAREN) 1245 and self._match_texts(self.CLONE_KINDS) 1246 and self._prev.text.upper() 1247 ) 1248 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1249 self._match(TokenType.R_PAREN) 1250 clone = self.expression( 1251 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1252 ) 1253 1254 return self.expression( 1255 exp.Create, 1256 this=this, 1257 kind=create_token.text, 1258 replace=replace, 1259 unique=unique, 1260 expression=expression, 1261 exists=exists, 1262 properties=properties, 1263 indexes=indexes, 1264 no_schema_binding=no_schema_binding, 1265 begin=begin, 1266 clone=clone, 1267 ) 1268 1269 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1270 self._match(TokenType.COMMA) 1271 1272 # parsers look to _prev for no/dual/default, so need to consume first 1273 self._match_text_seq("NO") 1274 self._match_text_seq("DUAL") 1275 self._match_text_seq("DEFAULT") 1276 1277 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1278 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1279 1280 return None 1281 1282 def _parse_property(self) -> t.Optional[exp.Expression]: 1283 if self._match_texts(self.PROPERTY_PARSERS): 1284 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1285 1286 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1287 return self._parse_character_set(default=True) 1288 1289 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1290 return self._parse_sortkey(compound=True) 1291 1292 if self._match_text_seq("SQL", "SECURITY"): 1293 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1294 1295 assignment = self._match_pair( 1296 TokenType.VAR, TokenType.EQ, advance=False 1297 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1298 1299 if assignment: 1300 key = self._parse_var_or_string() 1301 self._match(TokenType.EQ) 1302 return self.expression(exp.Property, this=key, value=self._parse_column()) 1303 1304 return None 1305 1306 def _parse_stored(self) -> exp.Expression: 1307 self._match(TokenType.ALIAS) 1308 1309 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1310 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1311 1312 return self.expression( 1313 exp.FileFormatProperty, 1314 this=self.expression( 1315 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1316 ) 1317 if input_format or output_format 1318 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1319 ) 1320 1321 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1322 self._match(TokenType.EQ) 1323 self._match(TokenType.ALIAS) 1324 return self.expression(exp_class, this=self._parse_field()) 1325 1326 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1327 properties = [] 1328 1329 while True: 1330 if before: 1331 identified_property = self._parse_property_before() 1332 else: 1333 identified_property = self._parse_property() 1334 1335 if not identified_property: 1336 break 1337 for p in ensure_list(identified_property): 1338 properties.append(p) 1339 1340 if properties: 1341 return self.expression(exp.Properties, expressions=properties) 1342 1343 return None 1344 1345 def _parse_fallback(self, no=False) -> exp.Expression: 1346 self._match_text_seq("FALLBACK") 1347 return self.expression( 1348 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1349 ) 1350 1351 def _parse_volatile_property(self) -> exp.Expression: 1352 if self._index >= 2: 1353 pre_volatile_token = self._tokens[self._index - 2] 1354 else: 1355 pre_volatile_token = None 1356 1357 if pre_volatile_token and pre_volatile_token.token_type in ( 1358 TokenType.CREATE, 1359 TokenType.REPLACE, 1360 TokenType.UNIQUE, 1361 ): 1362 return exp.VolatileProperty() 1363 1364 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1365 1366 def _parse_with_property( 1367 self, 1368 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1369 self._match(TokenType.WITH) 1370 if self._match(TokenType.L_PAREN, advance=False): 1371 return self._parse_wrapped_csv(self._parse_property) 1372 1373 if self._match_text_seq("JOURNAL"): 1374 return self._parse_withjournaltable() 1375 1376 if self._match_text_seq("DATA"): 1377 return self._parse_withdata(no=False) 1378 elif self._match_text_seq("NO", "DATA"): 1379 return self._parse_withdata(no=True) 1380 1381 if not self._next: 1382 return None 1383 1384 return self._parse_withisolatedloading() 1385 1386 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1387 def _parse_definer(self) -> t.Optional[exp.Expression]: 1388 self._match(TokenType.EQ) 1389 1390 user = self._parse_id_var() 1391 self._match(TokenType.PARAMETER) 1392 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1393 1394 if not user or not host: 1395 return None 1396 1397 return exp.DefinerProperty(this=f"{user}@{host}") 1398 1399 def _parse_withjournaltable(self) -> exp.Expression: 1400 self._match(TokenType.TABLE) 1401 self._match(TokenType.EQ) 1402 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1403 1404 def _parse_log(self, no=False) -> exp.Expression: 1405 self._match_text_seq("LOG") 1406 return self.expression(exp.LogProperty, no=no) 1407 1408 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1409 before = self._match_text_seq("BEFORE") 1410 self._match_text_seq("JOURNAL") 1411 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1412 1413 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1414 self._match_text_seq("NOT") 1415 self._match_text_seq("LOCAL") 1416 self._match_text_seq("AFTER", "JOURNAL") 1417 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1418 1419 def _parse_checksum(self) -> exp.Expression: 1420 self._match_text_seq("CHECKSUM") 1421 self._match(TokenType.EQ) 1422 1423 on = None 1424 if self._match(TokenType.ON): 1425 on = True 1426 elif self._match_text_seq("OFF"): 1427 on = False 1428 default = self._match(TokenType.DEFAULT) 1429 1430 return self.expression( 1431 exp.ChecksumProperty, 1432 on=on, 1433 default=default, 1434 ) 1435 1436 def _parse_freespace(self) -> exp.Expression: 1437 self._match_text_seq("FREESPACE") 1438 self._match(TokenType.EQ) 1439 return self.expression( 1440 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1441 ) 1442 1443 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1444 self._match_text_seq("MERGEBLOCKRATIO") 1445 if self._match(TokenType.EQ): 1446 return self.expression( 1447 exp.MergeBlockRatioProperty, 1448 this=self._parse_number(), 1449 percent=self._match(TokenType.PERCENT), 1450 ) 1451 else: 1452 return self.expression( 1453 exp.MergeBlockRatioProperty, 1454 no=no, 1455 default=default, 1456 ) 1457 1458 def _parse_datablocksize(self, default=None) -> exp.Expression: 1459 if default: 1460 self._match_text_seq("DATABLOCKSIZE") 1461 return self.expression(exp.DataBlocksizeProperty, default=True) 1462 elif self._match_texts(("MIN", "MINIMUM")): 1463 self._match_text_seq("DATABLOCKSIZE") 1464 return self.expression(exp.DataBlocksizeProperty, min=True) 1465 elif self._match_texts(("MAX", "MAXIMUM")): 1466 self._match_text_seq("DATABLOCKSIZE") 1467 return self.expression(exp.DataBlocksizeProperty, min=False) 1468 1469 self._match_text_seq("DATABLOCKSIZE") 1470 self._match(TokenType.EQ) 1471 size = self._parse_number() 1472 units = None 1473 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1474 units = self._prev.text 1475 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1476 1477 def _parse_blockcompression(self) -> exp.Expression: 1478 self._match_text_seq("BLOCKCOMPRESSION") 1479 self._match(TokenType.EQ) 1480 always = self._match_text_seq("ALWAYS") 1481 manual = self._match_text_seq("MANUAL") 1482 never = self._match_text_seq("NEVER") 1483 default = self._match_text_seq("DEFAULT") 1484 autotemp = None 1485 if self._match_text_seq("AUTOTEMP"): 1486 autotemp = self._parse_schema() 1487 1488 return self.expression( 1489 exp.BlockCompressionProperty, 1490 always=always, 1491 manual=manual, 1492 never=never, 1493 default=default, 1494 autotemp=autotemp, 1495 ) 1496 1497 def _parse_withisolatedloading(self) -> exp.Expression: 1498 no = self._match_text_seq("NO") 1499 concurrent = self._match_text_seq("CONCURRENT") 1500 self._match_text_seq("ISOLATED", "LOADING") 1501 for_all = self._match_text_seq("FOR", "ALL") 1502 for_insert = self._match_text_seq("FOR", "INSERT") 1503 for_none = self._match_text_seq("FOR", "NONE") 1504 return self.expression( 1505 exp.IsolatedLoadingProperty, 1506 no=no, 1507 concurrent=concurrent, 1508 for_all=for_all, 1509 for_insert=for_insert, 1510 for_none=for_none, 1511 ) 1512 1513 def _parse_locking(self) -> exp.Expression: 1514 if self._match(TokenType.TABLE): 1515 kind = "TABLE" 1516 elif self._match(TokenType.VIEW): 1517 kind = "VIEW" 1518 elif self._match(TokenType.ROW): 1519 kind = "ROW" 1520 elif self._match_text_seq("DATABASE"): 1521 kind = "DATABASE" 1522 else: 1523 kind = None 1524 1525 if kind in ("DATABASE", "TABLE", "VIEW"): 1526 this = self._parse_table_parts() 1527 else: 1528 this = None 1529 1530 if self._match(TokenType.FOR): 1531 for_or_in = "FOR" 1532 elif self._match(TokenType.IN): 1533 for_or_in = "IN" 1534 else: 1535 for_or_in = None 1536 1537 if self._match_text_seq("ACCESS"): 1538 lock_type = "ACCESS" 1539 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1540 lock_type = "EXCLUSIVE" 1541 elif self._match_text_seq("SHARE"): 1542 lock_type = "SHARE" 1543 elif self._match_text_seq("READ"): 1544 lock_type = "READ" 1545 elif self._match_text_seq("WRITE"): 1546 lock_type = "WRITE" 1547 elif self._match_text_seq("CHECKSUM"): 1548 lock_type = "CHECKSUM" 1549 else: 1550 lock_type = None 1551 1552 override = self._match_text_seq("OVERRIDE") 1553 1554 return self.expression( 1555 exp.LockingProperty, 1556 this=this, 1557 kind=kind, 1558 for_or_in=for_or_in, 1559 lock_type=lock_type, 1560 override=override, 1561 ) 1562 1563 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1564 if self._match(TokenType.PARTITION_BY): 1565 return self._parse_csv(self._parse_conjunction) 1566 return [] 1567 1568 def _parse_partitioned_by(self) -> exp.Expression: 1569 self._match(TokenType.EQ) 1570 return self.expression( 1571 exp.PartitionedByProperty, 1572 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1573 ) 1574 1575 def _parse_withdata(self, no=False) -> exp.Expression: 1576 if self._match_text_seq("AND", "STATISTICS"): 1577 statistics = True 1578 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1579 statistics = False 1580 else: 1581 statistics = None 1582 1583 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1584 1585 def _parse_noprimaryindex(self) -> exp.Expression: 1586 self._match_text_seq("PRIMARY", "INDEX") 1587 return exp.NoPrimaryIndexProperty() 1588 1589 def _parse_oncommit(self) -> exp.Expression: 1590 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1591 return exp.OnCommitProperty() 1592 1593 def _parse_distkey(self) -> exp.Expression: 1594 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1595 1596 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1597 table = self._parse_table(schema=True) 1598 options = [] 1599 while self._match_texts(("INCLUDING", "EXCLUDING")): 1600 this = self._prev.text.upper() 1601 id_var = self._parse_id_var() 1602 1603 if not id_var: 1604 return None 1605 1606 options.append( 1607 self.expression( 1608 exp.Property, 1609 this=this, 1610 value=exp.Var(this=id_var.this.upper()), 1611 ) 1612 ) 1613 return self.expression(exp.LikeProperty, this=table, expressions=options) 1614 1615 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1616 return self.expression( 1617 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1618 ) 1619 1620 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1621 self._match(TokenType.EQ) 1622 return self.expression( 1623 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1624 ) 1625 1626 def _parse_returns(self) -> exp.Expression: 1627 value: t.Optional[exp.Expression] 1628 is_table = self._match(TokenType.TABLE) 1629 1630 if is_table: 1631 if self._match(TokenType.LT): 1632 value = self.expression( 1633 exp.Schema, 1634 this="TABLE", 1635 expressions=self._parse_csv(self._parse_struct_types), 1636 ) 1637 if not self._match(TokenType.GT): 1638 self.raise_error("Expecting >") 1639 else: 1640 value = self._parse_schema(exp.Var(this="TABLE")) 1641 else: 1642 value = self._parse_types() 1643 1644 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1645 1646 def _parse_temporary(self, global_=False) -> exp.Expression: 1647 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1648 return self.expression(exp.TemporaryProperty, global_=global_) 1649 1650 def _parse_describe(self) -> exp.Expression: 1651 kind = self._match_set(self.CREATABLES) and self._prev.text 1652 this = self._parse_table() 1653 1654 return self.expression(exp.Describe, this=this, kind=kind) 1655 1656 def _parse_insert(self) -> exp.Expression: 1657 overwrite = self._match(TokenType.OVERWRITE) 1658 local = self._match(TokenType.LOCAL) 1659 alternative = None 1660 1661 if self._match_text_seq("DIRECTORY"): 1662 this: t.Optional[exp.Expression] = self.expression( 1663 exp.Directory, 1664 this=self._parse_var_or_string(), 1665 local=local, 1666 row_format=self._parse_row_format(match_row=True), 1667 ) 1668 else: 1669 if self._match(TokenType.OR): 1670 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1671 1672 self._match(TokenType.INTO) 1673 self._match(TokenType.TABLE) 1674 this = self._parse_table(schema=True) 1675 1676 return self.expression( 1677 exp.Insert, 1678 this=this, 1679 exists=self._parse_exists(), 1680 partition=self._parse_partition(), 1681 expression=self._parse_ddl_select(), 1682 conflict=self._parse_on_conflict(), 1683 returning=self._parse_returning(), 1684 overwrite=overwrite, 1685 alternative=alternative, 1686 ) 1687 1688 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1689 conflict = self._match_text_seq("ON", "CONFLICT") 1690 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1691 1692 if not (conflict or duplicate): 1693 return None 1694 1695 nothing = None 1696 expressions = None 1697 key = None 1698 constraint = None 1699 1700 if conflict: 1701 if self._match_text_seq("ON", "CONSTRAINT"): 1702 constraint = self._parse_id_var() 1703 else: 1704 key = self._parse_csv(self._parse_value) 1705 1706 self._match_text_seq("DO") 1707 if self._match_text_seq("NOTHING"): 1708 nothing = True 1709 else: 1710 self._match(TokenType.UPDATE) 1711 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1712 1713 return self.expression( 1714 exp.OnConflict, 1715 duplicate=duplicate, 1716 expressions=expressions, 1717 nothing=nothing, 1718 key=key, 1719 constraint=constraint, 1720 ) 1721 1722 def _parse_returning(self) -> t.Optional[exp.Expression]: 1723 if not self._match(TokenType.RETURNING): 1724 return None 1725 1726 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1727 1728 def _parse_row(self) -> t.Optional[exp.Expression]: 1729 if not self._match(TokenType.FORMAT): 1730 return None 1731 return self._parse_row_format() 1732 1733 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1734 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1735 return None 1736 1737 if self._match_text_seq("SERDE"): 1738 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1739 1740 self._match_text_seq("DELIMITED") 1741 1742 kwargs = {} 1743 1744 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1745 kwargs["fields"] = self._parse_string() 1746 if self._match_text_seq("ESCAPED", "BY"): 1747 kwargs["escaped"] = self._parse_string() 1748 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1749 kwargs["collection_items"] = self._parse_string() 1750 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1751 kwargs["map_keys"] = self._parse_string() 1752 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1753 kwargs["lines"] = self._parse_string() 1754 if self._match_text_seq("NULL", "DEFINED", "AS"): 1755 kwargs["null"] = self._parse_string() 1756 1757 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1758 1759 def _parse_load_data(self) -> exp.Expression: 1760 local = self._match(TokenType.LOCAL) 1761 self._match_text_seq("INPATH") 1762 inpath = self._parse_string() 1763 overwrite = self._match(TokenType.OVERWRITE) 1764 self._match_pair(TokenType.INTO, TokenType.TABLE) 1765 1766 return self.expression( 1767 exp.LoadData, 1768 this=self._parse_table(schema=True), 1769 local=local, 1770 overwrite=overwrite, 1771 inpath=inpath, 1772 partition=self._parse_partition(), 1773 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1774 serde=self._match_text_seq("SERDE") and self._parse_string(), 1775 ) 1776 1777 def _parse_delete(self) -> exp.Expression: 1778 self._match(TokenType.FROM) 1779 1780 return self.expression( 1781 exp.Delete, 1782 this=self._parse_table(), 1783 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1784 where=self._parse_where(), 1785 returning=self._parse_returning(), 1786 ) 1787 1788 def _parse_update(self) -> exp.Expression: 1789 return self.expression( 1790 exp.Update, 1791 **{ # type: ignore 1792 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1793 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1794 "from": self._parse_from(modifiers=True), 1795 "where": self._parse_where(), 1796 "returning": self._parse_returning(), 1797 }, 1798 ) 1799 1800 def _parse_uncache(self) -> exp.Expression: 1801 if not self._match(TokenType.TABLE): 1802 self.raise_error("Expecting TABLE after UNCACHE") 1803 1804 return self.expression( 1805 exp.Uncache, 1806 exists=self._parse_exists(), 1807 this=self._parse_table(schema=True), 1808 ) 1809 1810 def _parse_cache(self) -> exp.Expression: 1811 lazy = self._match(TokenType.LAZY) 1812 self._match(TokenType.TABLE) 1813 table = self._parse_table(schema=True) 1814 options = [] 1815 1816 if self._match(TokenType.OPTIONS): 1817 self._match_l_paren() 1818 k = self._parse_string() 1819 self._match(TokenType.EQ) 1820 v = self._parse_string() 1821 options = [k, v] 1822 self._match_r_paren() 1823 1824 self._match(TokenType.ALIAS) 1825 return self.expression( 1826 exp.Cache, 1827 this=table, 1828 lazy=lazy, 1829 options=options, 1830 expression=self._parse_select(nested=True), 1831 ) 1832 1833 def _parse_partition(self) -> t.Optional[exp.Expression]: 1834 if not self._match(TokenType.PARTITION): 1835 return None 1836 1837 return self.expression( 1838 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1839 ) 1840 1841 def _parse_value(self) -> exp.Expression: 1842 if self._match(TokenType.L_PAREN): 1843 expressions = self._parse_csv(self._parse_conjunction) 1844 self._match_r_paren() 1845 return self.expression(exp.Tuple, expressions=expressions) 1846 1847 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1848 # Source: https://prestodb.io/docs/current/sql/values.html 1849 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1850 1851 def _parse_select( 1852 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1853 ) -> t.Optional[exp.Expression]: 1854 cte = self._parse_with() 1855 if cte: 1856 this = self._parse_statement() 1857 1858 if not this: 1859 self.raise_error("Failed to parse any statement following CTE") 1860 return cte 1861 1862 if "with" in this.arg_types: 1863 this.set("with", cte) 1864 else: 1865 self.raise_error(f"{this.key} does not support CTE") 1866 this = cte 1867 elif self._match(TokenType.SELECT): 1868 comments = self._prev_comments 1869 1870 hint = self._parse_hint() 1871 all_ = self._match(TokenType.ALL) 1872 distinct = self._match(TokenType.DISTINCT) 1873 1874 kind = ( 1875 self._match(TokenType.ALIAS) 1876 and self._match_texts(("STRUCT", "VALUE")) 1877 and self._prev.text 1878 ) 1879 1880 if distinct: 1881 distinct = self.expression( 1882 exp.Distinct, 1883 on=self._parse_value() if self._match(TokenType.ON) else None, 1884 ) 1885 1886 if all_ and distinct: 1887 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1888 1889 limit = self._parse_limit(top=True) 1890 expressions = self._parse_csv(self._parse_expression) 1891 1892 this = self.expression( 1893 exp.Select, 1894 kind=kind, 1895 hint=hint, 1896 distinct=distinct, 1897 expressions=expressions, 1898 limit=limit, 1899 ) 1900 this.comments = comments 1901 1902 into = self._parse_into() 1903 if into: 1904 this.set("into", into) 1905 1906 from_ = self._parse_from() 1907 if from_: 1908 this.set("from", from_) 1909 1910 this = self._parse_query_modifiers(this) 1911 elif (table or nested) and self._match(TokenType.L_PAREN): 1912 this = self._parse_table() if table else self._parse_select(nested=True) 1913 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1914 self._match_r_paren() 1915 1916 # early return so that subquery unions aren't parsed again 1917 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1918 # Union ALL should be a property of the top select node, not the subquery 1919 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1920 elif self._match(TokenType.VALUES): 1921 this = self.expression( 1922 exp.Values, 1923 expressions=self._parse_csv(self._parse_value), 1924 alias=self._parse_table_alias(), 1925 ) 1926 else: 1927 this = None 1928 1929 return self._parse_set_operations(this) 1930 1931 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1932 if not skip_with_token and not self._match(TokenType.WITH): 1933 return None 1934 1935 comments = self._prev_comments 1936 recursive = self._match(TokenType.RECURSIVE) 1937 1938 expressions = [] 1939 while True: 1940 expressions.append(self._parse_cte()) 1941 1942 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1943 break 1944 else: 1945 self._match(TokenType.WITH) 1946 1947 return self.expression( 1948 exp.With, comments=comments, expressions=expressions, recursive=recursive 1949 ) 1950 1951 def _parse_cte(self) -> exp.Expression: 1952 alias = self._parse_table_alias() 1953 if not alias or not alias.this: 1954 self.raise_error("Expected CTE to have alias") 1955 1956 self._match(TokenType.ALIAS) 1957 1958 return self.expression( 1959 exp.CTE, 1960 this=self._parse_wrapped(self._parse_statement), 1961 alias=alias, 1962 ) 1963 1964 def _parse_table_alias( 1965 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1966 ) -> t.Optional[exp.Expression]: 1967 any_token = self._match(TokenType.ALIAS) 1968 alias = ( 1969 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1970 or self._parse_string_as_identifier() 1971 ) 1972 1973 index = self._index 1974 if self._match(TokenType.L_PAREN): 1975 columns = self._parse_csv(self._parse_function_parameter) 1976 self._match_r_paren() if columns else self._retreat(index) 1977 else: 1978 columns = None 1979 1980 if not alias and not columns: 1981 return None 1982 1983 return self.expression(exp.TableAlias, this=alias, columns=columns) 1984 1985 def _parse_subquery( 1986 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1987 ) -> exp.Expression: 1988 return self.expression( 1989 exp.Subquery, 1990 this=this, 1991 pivots=self._parse_pivots(), 1992 alias=self._parse_table_alias() if parse_alias else None, 1993 ) 1994 1995 def _parse_query_modifiers( 1996 self, this: t.Optional[exp.Expression] 1997 ) -> t.Optional[exp.Expression]: 1998 if isinstance(this, self.MODIFIABLES): 1999 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2000 expression = parser(self) 2001 2002 if expression: 2003 this.set(key, expression) 2004 return this 2005 2006 def _parse_hint(self) -> t.Optional[exp.Expression]: 2007 if self._match(TokenType.HINT): 2008 hints = self._parse_csv(self._parse_function) 2009 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2010 self.raise_error("Expected */ after HINT") 2011 return self.expression(exp.Hint, expressions=hints) 2012 2013 return None 2014 2015 def _parse_into(self) -> t.Optional[exp.Expression]: 2016 if not self._match(TokenType.INTO): 2017 return None 2018 2019 temp = self._match(TokenType.TEMPORARY) 2020 unlogged = self._match(TokenType.UNLOGGED) 2021 self._match(TokenType.TABLE) 2022 2023 return self.expression( 2024 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2025 ) 2026 2027 def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]: 2028 if not self._match(TokenType.FROM): 2029 return None 2030 2031 comments = self._prev_comments 2032 this = self._parse_table() 2033 2034 return self.expression( 2035 exp.From, 2036 comments=comments, 2037 this=self._parse_query_modifiers(this) if modifiers else this, 2038 ) 2039 2040 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2041 if not self._match(TokenType.MATCH_RECOGNIZE): 2042 return None 2043 2044 self._match_l_paren() 2045 2046 partition = self._parse_partition_by() 2047 order = self._parse_order() 2048 measures = ( 2049 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2050 ) 2051 2052 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2053 rows = exp.Var(this="ONE ROW PER MATCH") 2054 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2055 text = "ALL ROWS PER MATCH" 2056 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2057 text += f" SHOW EMPTY MATCHES" 2058 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2059 text += f" OMIT EMPTY MATCHES" 2060 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2061 text += f" WITH UNMATCHED ROWS" 2062 rows = exp.Var(this=text) 2063 else: 2064 rows = None 2065 2066 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2067 text = "AFTER MATCH SKIP" 2068 if self._match_text_seq("PAST", "LAST", "ROW"): 2069 text += f" PAST LAST ROW" 2070 elif self._match_text_seq("TO", "NEXT", "ROW"): 2071 text += f" TO NEXT ROW" 2072 elif self._match_text_seq("TO", "FIRST"): 2073 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2074 elif self._match_text_seq("TO", "LAST"): 2075 text += f" TO LAST {self._advance_any().text}" # type: ignore 2076 after = exp.Var(this=text) 2077 else: 2078 after = None 2079 2080 if self._match_text_seq("PATTERN"): 2081 self._match_l_paren() 2082 2083 if not self._curr: 2084 self.raise_error("Expecting )", self._curr) 2085 2086 paren = 1 2087 start = self._curr 2088 2089 while self._curr and paren > 0: 2090 if self._curr.token_type == TokenType.L_PAREN: 2091 paren += 1 2092 if self._curr.token_type == TokenType.R_PAREN: 2093 paren -= 1 2094 end = self._prev 2095 self._advance() 2096 if paren > 0: 2097 self.raise_error("Expecting )", self._curr) 2098 pattern = exp.Var(this=self._find_sql(start, end)) 2099 else: 2100 pattern = None 2101 2102 define = ( 2103 self._parse_csv( 2104 lambda: self.expression( 2105 exp.Alias, 2106 alias=self._parse_id_var(any_token=True), 2107 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2108 ) 2109 ) 2110 if self._match_text_seq("DEFINE") 2111 else None 2112 ) 2113 2114 self._match_r_paren() 2115 2116 return self.expression( 2117 exp.MatchRecognize, 2118 partition_by=partition, 2119 order=order, 2120 measures=measures, 2121 rows=rows, 2122 after=after, 2123 pattern=pattern, 2124 define=define, 2125 alias=self._parse_table_alias(), 2126 ) 2127 2128 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2129 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2130 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2131 2132 if outer_apply or cross_apply: 2133 this = self._parse_select(table=True) 2134 view = None 2135 outer = not cross_apply 2136 elif self._match(TokenType.LATERAL): 2137 this = self._parse_select(table=True) 2138 view = self._match(TokenType.VIEW) 2139 outer = self._match(TokenType.OUTER) 2140 else: 2141 return None 2142 2143 if not this: 2144 this = self._parse_function() or self._parse_id_var(any_token=False) 2145 while self._match(TokenType.DOT): 2146 this = exp.Dot( 2147 this=this, 2148 expression=self._parse_function() or self._parse_id_var(any_token=False), 2149 ) 2150 2151 table_alias: t.Optional[exp.Expression] 2152 2153 if view: 2154 table = self._parse_id_var(any_token=False) 2155 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2156 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2157 else: 2158 table_alias = self._parse_table_alias() 2159 2160 expression = self.expression( 2161 exp.Lateral, 2162 this=this, 2163 view=view, 2164 outer=outer, 2165 alias=table_alias, 2166 ) 2167 2168 return expression 2169 2170 def _parse_join_side_and_kind( 2171 self, 2172 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2173 return ( 2174 self._match(TokenType.NATURAL) and self._prev, 2175 self._match_set(self.JOIN_SIDES) and self._prev, 2176 self._match_set(self.JOIN_KINDS) and self._prev, 2177 ) 2178 2179 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2180 if self._match(TokenType.COMMA): 2181 return self.expression(exp.Join, this=self._parse_table()) 2182 2183 index = self._index 2184 natural, side, kind = self._parse_join_side_and_kind() 2185 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2186 join = self._match(TokenType.JOIN) 2187 2188 if not skip_join_token and not join: 2189 self._retreat(index) 2190 kind = None 2191 natural = None 2192 side = None 2193 2194 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2195 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2196 2197 if not skip_join_token and not join and not outer_apply and not cross_apply: 2198 return None 2199 2200 if outer_apply: 2201 side = Token(TokenType.LEFT, "LEFT") 2202 2203 kwargs: t.Dict[ 2204 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2205 ] = {"this": self._parse_table()} 2206 2207 if natural: 2208 kwargs["natural"] = True 2209 if side: 2210 kwargs["side"] = side.text 2211 if kind: 2212 kwargs["kind"] = kind.text 2213 if hint: 2214 kwargs["hint"] = hint 2215 2216 if self._match(TokenType.ON): 2217 kwargs["on"] = self._parse_conjunction() 2218 elif self._match(TokenType.USING): 2219 kwargs["using"] = self._parse_wrapped_id_vars() 2220 2221 return self.expression(exp.Join, **kwargs) # type: ignore 2222 2223 def _parse_index(self) -> exp.Expression: 2224 index = self._parse_id_var() 2225 self._match(TokenType.ON) 2226 self._match(TokenType.TABLE) # hive 2227 2228 return self.expression( 2229 exp.Index, 2230 this=index, 2231 table=self.expression(exp.Table, this=self._parse_id_var()), 2232 columns=self._parse_expression(), 2233 ) 2234 2235 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2236 unique = self._match(TokenType.UNIQUE) 2237 primary = self._match_text_seq("PRIMARY") 2238 amp = self._match_text_seq("AMP") 2239 if not self._match(TokenType.INDEX): 2240 return None 2241 index = self._parse_id_var() 2242 columns = None 2243 if self._match(TokenType.L_PAREN, advance=False): 2244 columns = self._parse_wrapped_csv(self._parse_column) 2245 return self.expression( 2246 exp.Index, 2247 this=index, 2248 columns=columns, 2249 unique=unique, 2250 primary=primary, 2251 amp=amp, 2252 ) 2253 2254 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2255 return ( 2256 (not schema and self._parse_function()) 2257 or self._parse_id_var(any_token=False) 2258 or self._parse_string_as_identifier() 2259 or self._parse_placeholder() 2260 ) 2261 2262 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2263 catalog = None 2264 db = None 2265 table = self._parse_table_part(schema=schema) 2266 2267 while self._match(TokenType.DOT): 2268 if catalog: 2269 # This allows nesting the table in arbitrarily many dot expressions if needed 2270 table = self.expression( 2271 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2272 ) 2273 else: 2274 catalog = db 2275 db = table 2276 table = self._parse_table_part(schema=schema) 2277 2278 if not table: 2279 self.raise_error(f"Expected table name but got {self._curr}") 2280 2281 return self.expression( 2282 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2283 ) 2284 2285 def _parse_table( 2286 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2287 ) -> t.Optional[exp.Expression]: 2288 lateral = self._parse_lateral() 2289 if lateral: 2290 return lateral 2291 2292 unnest = self._parse_unnest() 2293 if unnest: 2294 return unnest 2295 2296 values = self._parse_derived_table_values() 2297 if values: 2298 return values 2299 2300 subquery = self._parse_select(table=True) 2301 if subquery: 2302 if not subquery.args.get("pivots"): 2303 subquery.set("pivots", self._parse_pivots()) 2304 return subquery 2305 2306 this = self._parse_table_parts(schema=schema) 2307 2308 if schema: 2309 return self._parse_schema(this=this) 2310 2311 if self.alias_post_tablesample: 2312 table_sample = self._parse_table_sample() 2313 2314 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2315 if alias: 2316 this.set("alias", alias) 2317 2318 if not this.args.get("pivots"): 2319 this.set("pivots", self._parse_pivots()) 2320 2321 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2322 this.set( 2323 "hints", 2324 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2325 ) 2326 self._match_r_paren() 2327 2328 if not self.alias_post_tablesample: 2329 table_sample = self._parse_table_sample() 2330 2331 if table_sample: 2332 table_sample.set("this", this) 2333 this = table_sample 2334 2335 return this 2336 2337 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2338 if not self._match(TokenType.UNNEST): 2339 return None 2340 2341 expressions = self._parse_wrapped_csv(self._parse_type) 2342 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2343 alias = self._parse_table_alias() 2344 2345 if alias and self.unnest_column_only: 2346 if alias.args.get("columns"): 2347 self.raise_error("Unexpected extra column alias in unnest.") 2348 alias.set("columns", [alias.this]) 2349 alias.set("this", None) 2350 2351 offset = None 2352 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2353 self._match(TokenType.ALIAS) 2354 offset = self._parse_id_var() or exp.Identifier(this="offset") 2355 2356 return self.expression( 2357 exp.Unnest, 2358 expressions=expressions, 2359 ordinality=ordinality, 2360 alias=alias, 2361 offset=offset, 2362 ) 2363 2364 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2365 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2366 if not is_derived and not self._match(TokenType.VALUES): 2367 return None 2368 2369 expressions = self._parse_csv(self._parse_value) 2370 2371 if is_derived: 2372 self._match_r_paren() 2373 2374 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2375 2376 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2377 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2378 as_modifier and self._match_text_seq("USING", "SAMPLE") 2379 ): 2380 return None 2381 2382 bucket_numerator = None 2383 bucket_denominator = None 2384 bucket_field = None 2385 percent = None 2386 rows = None 2387 size = None 2388 seed = None 2389 2390 kind = ( 2391 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2392 ) 2393 method = self._parse_var(tokens=(TokenType.ROW,)) 2394 2395 self._match(TokenType.L_PAREN) 2396 2397 num = self._parse_number() 2398 2399 if self._match(TokenType.BUCKET): 2400 bucket_numerator = self._parse_number() 2401 self._match(TokenType.OUT_OF) 2402 bucket_denominator = bucket_denominator = self._parse_number() 2403 self._match(TokenType.ON) 2404 bucket_field = self._parse_field() 2405 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2406 percent = num 2407 elif self._match(TokenType.ROWS): 2408 rows = num 2409 else: 2410 size = num 2411 2412 self._match(TokenType.R_PAREN) 2413 2414 if self._match(TokenType.L_PAREN): 2415 method = self._parse_var() 2416 seed = self._match(TokenType.COMMA) and self._parse_number() 2417 self._match_r_paren() 2418 elif self._match_texts(("SEED", "REPEATABLE")): 2419 seed = self._parse_wrapped(self._parse_number) 2420 2421 return self.expression( 2422 exp.TableSample, 2423 method=method, 2424 bucket_numerator=bucket_numerator, 2425 bucket_denominator=bucket_denominator, 2426 bucket_field=bucket_field, 2427 percent=percent, 2428 rows=rows, 2429 size=size, 2430 seed=seed, 2431 kind=kind, 2432 ) 2433 2434 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2435 return list(iter(self._parse_pivot, None)) 2436 2437 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2438 index = self._index 2439 2440 if self._match(TokenType.PIVOT): 2441 unpivot = False 2442 elif self._match(TokenType.UNPIVOT): 2443 unpivot = True 2444 else: 2445 return None 2446 2447 expressions = [] 2448 field = None 2449 2450 if not self._match(TokenType.L_PAREN): 2451 self._retreat(index) 2452 return None 2453 2454 if unpivot: 2455 expressions = self._parse_csv(self._parse_column) 2456 else: 2457 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2458 2459 if not expressions: 2460 self.raise_error("Failed to parse PIVOT's aggregation list") 2461 2462 if not self._match(TokenType.FOR): 2463 self.raise_error("Expecting FOR") 2464 2465 value = self._parse_column() 2466 2467 if not self._match(TokenType.IN): 2468 self.raise_error("Expecting IN") 2469 2470 field = self._parse_in(value) 2471 2472 self._match_r_paren() 2473 2474 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2475 2476 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2477 pivot.set("alias", self._parse_table_alias()) 2478 2479 if not unpivot: 2480 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2481 2482 columns: t.List[exp.Expression] = [] 2483 for fld in pivot.args["field"].expressions: 2484 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2485 for name in names: 2486 if self.PREFIXED_PIVOT_COLUMNS: 2487 name = f"{name}_{field_name}" if name else field_name 2488 else: 2489 name = f"{field_name}_{name}" if name else field_name 2490 2491 columns.append(exp.to_identifier(name)) 2492 2493 pivot.set("columns", columns) 2494 2495 return pivot 2496 2497 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2498 return [agg.alias for agg in aggregations] 2499 2500 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2501 if not skip_where_token and not self._match(TokenType.WHERE): 2502 return None 2503 2504 return self.expression( 2505 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2506 ) 2507 2508 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2509 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2510 return None 2511 2512 elements = defaultdict(list) 2513 2514 while True: 2515 expressions = self._parse_csv(self._parse_conjunction) 2516 if expressions: 2517 elements["expressions"].extend(expressions) 2518 2519 grouping_sets = self._parse_grouping_sets() 2520 if grouping_sets: 2521 elements["grouping_sets"].extend(grouping_sets) 2522 2523 rollup = None 2524 cube = None 2525 totals = None 2526 2527 with_ = self._match(TokenType.WITH) 2528 if self._match(TokenType.ROLLUP): 2529 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2530 elements["rollup"].extend(ensure_list(rollup)) 2531 2532 if self._match(TokenType.CUBE): 2533 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2534 elements["cube"].extend(ensure_list(cube)) 2535 2536 if self._match_text_seq("TOTALS"): 2537 totals = True 2538 elements["totals"] = True # type: ignore 2539 2540 if not (grouping_sets or rollup or cube or totals): 2541 break 2542 2543 return self.expression(exp.Group, **elements) # type: ignore 2544 2545 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2546 if not self._match(TokenType.GROUPING_SETS): 2547 return None 2548 2549 return self._parse_wrapped_csv(self._parse_grouping_set) 2550 2551 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2552 if self._match(TokenType.L_PAREN): 2553 grouping_set = self._parse_csv(self._parse_column) 2554 self._match_r_paren() 2555 return self.expression(exp.Tuple, expressions=grouping_set) 2556 2557 return self._parse_column() 2558 2559 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2560 if not skip_having_token and not self._match(TokenType.HAVING): 2561 return None 2562 return self.expression(exp.Having, this=self._parse_conjunction()) 2563 2564 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2565 if not self._match(TokenType.QUALIFY): 2566 return None 2567 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2568 2569 def _parse_order( 2570 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2571 ) -> t.Optional[exp.Expression]: 2572 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2573 return this 2574 2575 return self.expression( 2576 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2577 ) 2578 2579 def _parse_sort( 2580 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2581 ) -> t.Optional[exp.Expression]: 2582 if not self._match(token_type): 2583 return None 2584 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2585 2586 def _parse_ordered(self) -> exp.Expression: 2587 this = self._parse_conjunction() 2588 self._match(TokenType.ASC) 2589 is_desc = self._match(TokenType.DESC) 2590 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2591 is_nulls_last = self._match(TokenType.NULLS_LAST) 2592 desc = is_desc or False 2593 asc = not desc 2594 nulls_first = is_nulls_first or False 2595 explicitly_null_ordered = is_nulls_first or is_nulls_last 2596 if ( 2597 not explicitly_null_ordered 2598 and ( 2599 (asc and self.null_ordering == "nulls_are_small") 2600 or (desc and self.null_ordering != "nulls_are_small") 2601 ) 2602 and self.null_ordering != "nulls_are_last" 2603 ): 2604 nulls_first = True 2605 2606 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2607 2608 def _parse_limit( 2609 self, this: t.Optional[exp.Expression] = None, top: bool = False 2610 ) -> t.Optional[exp.Expression]: 2611 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2612 limit_paren = self._match(TokenType.L_PAREN) 2613 limit_exp = self.expression( 2614 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2615 ) 2616 2617 if limit_paren: 2618 self._match_r_paren() 2619 2620 return limit_exp 2621 2622 if self._match(TokenType.FETCH): 2623 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2624 direction = self._prev.text if direction else "FIRST" 2625 2626 count = self._parse_number() 2627 percent = self._match(TokenType.PERCENT) 2628 2629 self._match_set((TokenType.ROW, TokenType.ROWS)) 2630 2631 only = self._match(TokenType.ONLY) 2632 with_ties = self._match_text_seq("WITH", "TIES") 2633 2634 if only and with_ties: 2635 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2636 2637 return self.expression( 2638 exp.Fetch, 2639 direction=direction, 2640 count=count, 2641 percent=percent, 2642 with_ties=with_ties, 2643 ) 2644 2645 return this 2646 2647 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2648 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2649 return this 2650 2651 count = self._parse_number() 2652 self._match_set((TokenType.ROW, TokenType.ROWS)) 2653 return self.expression(exp.Offset, this=this, expression=count) 2654 2655 def _parse_locks(self) -> t.List[exp.Expression]: 2656 # Lists are invariant, so we need to use a type hint here 2657 locks: t.List[exp.Expression] = [] 2658 2659 while True: 2660 if self._match_text_seq("FOR", "UPDATE"): 2661 update = True 2662 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2663 "LOCK", "IN", "SHARE", "MODE" 2664 ): 2665 update = False 2666 else: 2667 break 2668 2669 expressions = None 2670 if self._match_text_seq("OF"): 2671 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2672 2673 wait: t.Optional[bool | exp.Expression] = None 2674 if self._match_text_seq("NOWAIT"): 2675 wait = True 2676 elif self._match_text_seq("WAIT"): 2677 wait = self._parse_primary() 2678 elif self._match_text_seq("SKIP", "LOCKED"): 2679 wait = False 2680 2681 locks.append( 2682 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2683 ) 2684 2685 return locks 2686 2687 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2688 if not self._match_set(self.SET_OPERATIONS): 2689 return this 2690 2691 token_type = self._prev.token_type 2692 2693 if token_type == TokenType.UNION: 2694 expression = exp.Union 2695 elif token_type == TokenType.EXCEPT: 2696 expression = exp.Except 2697 else: 2698 expression = exp.Intersect 2699 2700 return self.expression( 2701 expression, 2702 this=this, 2703 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2704 expression=self._parse_set_operations(self._parse_select(nested=True)), 2705 ) 2706 2707 def _parse_expression(self) -> t.Optional[exp.Expression]: 2708 return self._parse_alias(self._parse_conjunction()) 2709 2710 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2711 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2712 2713 def _parse_equality(self) -> t.Optional[exp.Expression]: 2714 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2715 2716 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2717 return self._parse_tokens(self._parse_range, self.COMPARISON) 2718 2719 def _parse_range(self) -> t.Optional[exp.Expression]: 2720 this = self._parse_bitwise() 2721 negate = self._match(TokenType.NOT) 2722 2723 if self._match_set(self.RANGE_PARSERS): 2724 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2725 if not expression: 2726 return this 2727 2728 this = expression 2729 elif self._match(TokenType.ISNULL): 2730 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2731 2732 # Postgres supports ISNULL and NOTNULL for conditions. 2733 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2734 if self._match(TokenType.NOTNULL): 2735 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2736 this = self.expression(exp.Not, this=this) 2737 2738 if negate: 2739 this = self.expression(exp.Not, this=this) 2740 2741 if self._match(TokenType.IS): 2742 this = self._parse_is(this) 2743 2744 return this 2745 2746 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2747 index = self._index - 1 2748 negate = self._match(TokenType.NOT) 2749 if self._match(TokenType.DISTINCT_FROM): 2750 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2751 return self.expression(klass, this=this, expression=self._parse_expression()) 2752 2753 expression = self._parse_null() or self._parse_boolean() 2754 if not expression: 2755 self._retreat(index) 2756 return None 2757 2758 this = self.expression(exp.Is, this=this, expression=expression) 2759 return self.expression(exp.Not, this=this) if negate else this 2760 2761 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2762 unnest = self._parse_unnest() 2763 if unnest: 2764 this = self.expression(exp.In, this=this, unnest=unnest) 2765 elif self._match(TokenType.L_PAREN): 2766 expressions = self._parse_csv(self._parse_select_or_expression) 2767 2768 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2769 this = self.expression(exp.In, this=this, query=expressions[0]) 2770 else: 2771 this = self.expression(exp.In, this=this, expressions=expressions) 2772 2773 self._match_r_paren(this) 2774 else: 2775 this = self.expression(exp.In, this=this, field=self._parse_field()) 2776 2777 return this 2778 2779 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2780 low = self._parse_bitwise() 2781 self._match(TokenType.AND) 2782 high = self._parse_bitwise() 2783 return self.expression(exp.Between, this=this, low=low, high=high) 2784 2785 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2786 if not self._match(TokenType.ESCAPE): 2787 return this 2788 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2789 2790 def _parse_interval(self) -> t.Optional[exp.Expression]: 2791 if not self._match(TokenType.INTERVAL): 2792 return None 2793 2794 this = self._parse_primary() or self._parse_term() 2795 unit = self._parse_function() or self._parse_var() 2796 2797 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2798 # each INTERVAL expression into this canonical form so it's easy to transpile 2799 if this and isinstance(this, exp.Literal): 2800 if this.is_number: 2801 this = exp.Literal.string(this.name) 2802 2803 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2804 parts = this.name.split() 2805 if not unit and len(parts) <= 2: 2806 this = exp.Literal.string(seq_get(parts, 0)) 2807 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2808 2809 return self.expression(exp.Interval, this=this, unit=unit) 2810 2811 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2812 this = self._parse_term() 2813 2814 while True: 2815 if self._match_set(self.BITWISE): 2816 this = self.expression( 2817 self.BITWISE[self._prev.token_type], 2818 this=this, 2819 expression=self._parse_term(), 2820 ) 2821 elif self._match_pair(TokenType.LT, TokenType.LT): 2822 this = self.expression( 2823 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2824 ) 2825 elif self._match_pair(TokenType.GT, TokenType.GT): 2826 this = self.expression( 2827 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2828 ) 2829 else: 2830 break 2831 2832 return this 2833 2834 def _parse_term(self) -> t.Optional[exp.Expression]: 2835 return self._parse_tokens(self._parse_factor, self.TERM) 2836 2837 def _parse_factor(self) -> t.Optional[exp.Expression]: 2838 return self._parse_tokens(self._parse_unary, self.FACTOR) 2839 2840 def _parse_unary(self) -> t.Optional[exp.Expression]: 2841 if self._match_set(self.UNARY_PARSERS): 2842 return self.UNARY_PARSERS[self._prev.token_type](self) 2843 return self._parse_at_time_zone(self._parse_type()) 2844 2845 def _parse_type(self) -> t.Optional[exp.Expression]: 2846 interval = self._parse_interval() 2847 if interval: 2848 return interval 2849 2850 index = self._index 2851 data_type = self._parse_types(check_func=True) 2852 this = self._parse_column() 2853 2854 if data_type: 2855 if isinstance(this, exp.Literal): 2856 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2857 if parser: 2858 return parser(self, this, data_type) 2859 return self.expression(exp.Cast, this=this, to=data_type) 2860 if not data_type.expressions: 2861 self._retreat(index) 2862 return self._parse_column() 2863 return data_type 2864 2865 return this 2866 2867 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2868 this = self._parse_type() 2869 if not this: 2870 return None 2871 2872 return self.expression( 2873 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2874 ) 2875 2876 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2877 index = self._index 2878 2879 prefix = self._match_text_seq("SYSUDTLIB", ".") 2880 2881 if not self._match_set(self.TYPE_TOKENS): 2882 return None 2883 2884 type_token = self._prev.token_type 2885 2886 if type_token == TokenType.PSEUDO_TYPE: 2887 return self.expression(exp.PseudoType, this=self._prev.text) 2888 2889 nested = type_token in self.NESTED_TYPE_TOKENS 2890 is_struct = type_token == TokenType.STRUCT 2891 expressions = None 2892 maybe_func = False 2893 2894 if self._match(TokenType.L_PAREN): 2895 if is_struct: 2896 expressions = self._parse_csv(self._parse_struct_types) 2897 elif nested: 2898 expressions = self._parse_csv(self._parse_types) 2899 else: 2900 expressions = self._parse_csv(self._parse_type_size) 2901 2902 if not expressions or not self._match(TokenType.R_PAREN): 2903 self._retreat(index) 2904 return None 2905 2906 maybe_func = True 2907 2908 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2909 this = exp.DataType( 2910 this=exp.DataType.Type.ARRAY, 2911 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2912 nested=True, 2913 ) 2914 2915 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2916 this = exp.DataType( 2917 this=exp.DataType.Type.ARRAY, 2918 expressions=[this], 2919 nested=True, 2920 ) 2921 2922 return this 2923 2924 if self._match(TokenType.L_BRACKET): 2925 self._retreat(index) 2926 return None 2927 2928 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2929 if nested and self._match(TokenType.LT): 2930 if is_struct: 2931 expressions = self._parse_csv(self._parse_struct_types) 2932 else: 2933 expressions = self._parse_csv(self._parse_types) 2934 2935 if not self._match(TokenType.GT): 2936 self.raise_error("Expecting >") 2937 2938 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2939 values = self._parse_csv(self._parse_conjunction) 2940 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2941 2942 value: t.Optional[exp.Expression] = None 2943 if type_token in self.TIMESTAMPS: 2944 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2945 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2946 elif ( 2947 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2948 ): 2949 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2950 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2951 if type_token == TokenType.TIME: 2952 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2953 else: 2954 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2955 2956 maybe_func = maybe_func and value is None 2957 2958 if value is None: 2959 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2960 elif type_token == TokenType.INTERVAL: 2961 unit = self._parse_var() 2962 2963 if not unit: 2964 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2965 else: 2966 value = self.expression(exp.Interval, unit=unit) 2967 2968 if maybe_func and check_func: 2969 index2 = self._index 2970 peek = self._parse_string() 2971 2972 if not peek: 2973 self._retreat(index) 2974 return None 2975 2976 self._retreat(index2) 2977 2978 if value: 2979 return value 2980 2981 return exp.DataType( 2982 this=exp.DataType.Type[type_token.value.upper()], 2983 expressions=expressions, 2984 nested=nested, 2985 values=values, 2986 prefix=prefix, 2987 ) 2988 2989 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 2990 this = self._parse_type() or self._parse_id_var() 2991 self._match(TokenType.COLON) 2992 return self._parse_column_def(this) 2993 2994 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2995 if not self._match(TokenType.AT_TIME_ZONE): 2996 return this 2997 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2998 2999 def _parse_column(self) -> t.Optional[exp.Expression]: 3000 this = self._parse_field() 3001 if isinstance(this, exp.Identifier): 3002 this = self.expression(exp.Column, this=this) 3003 elif not this: 3004 return self._parse_bracket(this) 3005 this = self._parse_bracket(this) 3006 3007 while self._match_set(self.COLUMN_OPERATORS): 3008 op_token = self._prev.token_type 3009 op = self.COLUMN_OPERATORS.get(op_token) 3010 3011 if op_token == TokenType.DCOLON: 3012 field = self._parse_types() 3013 if not field: 3014 self.raise_error("Expected type") 3015 elif op and self._curr: 3016 self._advance() 3017 value = self._prev.text 3018 field = ( 3019 exp.Literal.number(value) 3020 if self._prev.token_type == TokenType.NUMBER 3021 else exp.Literal.string(value) 3022 ) 3023 else: 3024 field = ( 3025 self._parse_star() 3026 or self._parse_function(anonymous=True) 3027 or self._parse_id_var() 3028 ) 3029 3030 if isinstance(field, exp.Func): 3031 # bigquery allows function calls like x.y.count(...) 3032 # SAFE.SUBSTR(...) 3033 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3034 this = self._replace_columns_with_dots(this) 3035 3036 if op: 3037 this = op(self, this, field) 3038 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3039 this = self.expression( 3040 exp.Column, 3041 this=field, 3042 table=this.this, 3043 db=this.args.get("table"), 3044 catalog=this.args.get("db"), 3045 ) 3046 else: 3047 this = self.expression(exp.Dot, this=this, expression=field) 3048 this = self._parse_bracket(this) 3049 3050 return this 3051 3052 def _parse_primary(self) -> t.Optional[exp.Expression]: 3053 if self._match_set(self.PRIMARY_PARSERS): 3054 token_type = self._prev.token_type 3055 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3056 3057 if token_type == TokenType.STRING: 3058 expressions = [primary] 3059 while self._match(TokenType.STRING): 3060 expressions.append(exp.Literal.string(self._prev.text)) 3061 if len(expressions) > 1: 3062 return self.expression(exp.Concat, expressions=expressions) 3063 return primary 3064 3065 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3066 return exp.Literal.number(f"0.{self._prev.text}") 3067 3068 if self._match(TokenType.L_PAREN): 3069 comments = self._prev_comments 3070 query = self._parse_select() 3071 3072 if query: 3073 expressions = [query] 3074 else: 3075 expressions = self._parse_csv(self._parse_expression) 3076 3077 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3078 3079 if isinstance(this, exp.Subqueryable): 3080 this = self._parse_set_operations( 3081 self._parse_subquery(this=this, parse_alias=False) 3082 ) 3083 elif len(expressions) > 1: 3084 this = self.expression(exp.Tuple, expressions=expressions) 3085 else: 3086 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3087 3088 if this: 3089 this.add_comments(comments) 3090 self._match_r_paren(expression=this) 3091 3092 return this 3093 3094 return None 3095 3096 def _parse_field( 3097 self, 3098 any_token: bool = False, 3099 tokens: t.Optional[t.Collection[TokenType]] = None, 3100 ) -> t.Optional[exp.Expression]: 3101 return ( 3102 self._parse_primary() 3103 or self._parse_function() 3104 or self._parse_id_var(any_token=any_token, tokens=tokens) 3105 ) 3106 3107 def _parse_function( 3108 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3109 ) -> t.Optional[exp.Expression]: 3110 if not self._curr: 3111 return None 3112 3113 token_type = self._curr.token_type 3114 3115 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3116 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3117 3118 if not self._next or self._next.token_type != TokenType.L_PAREN: 3119 if token_type in self.NO_PAREN_FUNCTIONS: 3120 self._advance() 3121 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3122 3123 return None 3124 3125 if token_type not in self.FUNC_TOKENS: 3126 return None 3127 3128 this = self._curr.text 3129 upper = this.upper() 3130 self._advance(2) 3131 3132 parser = self.FUNCTION_PARSERS.get(upper) 3133 3134 if parser and not anonymous: 3135 this = parser(self) 3136 else: 3137 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3138 3139 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3140 this = self.expression(subquery_predicate, this=self._parse_select()) 3141 self._match_r_paren() 3142 return this 3143 3144 if functions is None: 3145 functions = self.FUNCTIONS 3146 3147 function = functions.get(upper) 3148 args = self._parse_csv(self._parse_lambda) 3149 3150 if function and not anonymous: 3151 this = function(args) 3152 self.validate_expression(this, args) 3153 else: 3154 this = self.expression(exp.Anonymous, this=this, expressions=args) 3155 3156 self._match_r_paren(this) 3157 return self._parse_window(this) 3158 3159 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3160 return self._parse_column_def(self._parse_id_var()) 3161 3162 def _parse_user_defined_function( 3163 self, kind: t.Optional[TokenType] = None 3164 ) -> t.Optional[exp.Expression]: 3165 this = self._parse_id_var() 3166 3167 while self._match(TokenType.DOT): 3168 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3169 3170 if not self._match(TokenType.L_PAREN): 3171 return this 3172 3173 expressions = self._parse_csv(self._parse_function_parameter) 3174 self._match_r_paren() 3175 return self.expression( 3176 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3177 ) 3178 3179 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3180 literal = self._parse_primary() 3181 if literal: 3182 return self.expression(exp.Introducer, this=token.text, expression=literal) 3183 3184 return self.expression(exp.Identifier, this=token.text) 3185 3186 def _parse_national(self, token: Token) -> exp.Expression: 3187 return self.expression(exp.National, this=exp.Literal.string(token.text)) 3188 3189 def _parse_session_parameter(self) -> exp.Expression: 3190 kind = None 3191 this = self._parse_id_var() or self._parse_primary() 3192 3193 if this and self._match(TokenType.DOT): 3194 kind = this.name 3195 this = self._parse_var() or self._parse_primary() 3196 3197 return self.expression(exp.SessionParameter, this=this, kind=kind) 3198 3199 def _parse_lambda(self) -> t.Optional[exp.Expression]: 3200 index = self._index 3201 3202 if self._match(TokenType.L_PAREN): 3203 expressions = self._parse_csv(self._parse_id_var) 3204 3205 if not self._match(TokenType.R_PAREN): 3206 self._retreat(index) 3207 else: 3208 expressions = [self._parse_id_var()] 3209 3210 if self._match_set(self.LAMBDAS): 3211 return self.LAMBDAS[self._prev.token_type](self, expressions) 3212 3213 self._retreat(index) 3214 3215 this: t.Optional[exp.Expression] 3216 3217 if self._match(TokenType.DISTINCT): 3218 this = self.expression( 3219 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3220 ) 3221 else: 3222 this = self._parse_select_or_expression() 3223 3224 if isinstance(this, exp.EQ): 3225 left = this.this 3226 if isinstance(left, exp.Column): 3227 left.replace(exp.Var(this=left.text("this"))) 3228 3229 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3230 3231 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3232 index = self._index 3233 3234 try: 3235 if self._parse_select(nested=True): 3236 return this 3237 except Exception: 3238 pass 3239 finally: 3240 self._retreat(index) 3241 3242 if not self._match(TokenType.L_PAREN): 3243 return this 3244 3245 args = self._parse_csv( 3246 lambda: self._parse_constraint() 3247 or self._parse_column_def(self._parse_field(any_token=True)) 3248 ) 3249 self._match_r_paren() 3250 return self.expression(exp.Schema, this=this, expressions=args) 3251 3252 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3253 # column defs are not really columns, they're identifiers 3254 if isinstance(this, exp.Column): 3255 this = this.this 3256 kind = self._parse_types() 3257 3258 if self._match_text_seq("FOR", "ORDINALITY"): 3259 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3260 3261 constraints = [] 3262 while True: 3263 constraint = self._parse_column_constraint() 3264 if not constraint: 3265 break 3266 constraints.append(constraint) 3267 3268 if not kind and not constraints: 3269 return this 3270 3271 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3272 3273 def _parse_auto_increment(self) -> exp.Expression: 3274 start = None 3275 increment = None 3276 3277 if self._match(TokenType.L_PAREN, advance=False): 3278 args = self._parse_wrapped_csv(self._parse_bitwise) 3279 start = seq_get(args, 0) 3280 increment = seq_get(args, 1) 3281 elif self._match_text_seq("START"): 3282 start = self._parse_bitwise() 3283 self._match_text_seq("INCREMENT") 3284 increment = self._parse_bitwise() 3285 3286 if start and increment: 3287 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3288 3289 return exp.AutoIncrementColumnConstraint() 3290 3291 def _parse_compress(self) -> exp.Expression: 3292 if self._match(TokenType.L_PAREN, advance=False): 3293 return self.expression( 3294 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3295 ) 3296 3297 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3298 3299 def _parse_generated_as_identity(self) -> exp.Expression: 3300 if self._match(TokenType.BY_DEFAULT): 3301 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3302 this = self.expression( 3303 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3304 ) 3305 else: 3306 self._match_text_seq("ALWAYS") 3307 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3308 3309 self._match_text_seq("AS", "IDENTITY") 3310 if self._match(TokenType.L_PAREN): 3311 if self._match_text_seq("START", "WITH"): 3312 this.set("start", self._parse_bitwise()) 3313 if self._match_text_seq("INCREMENT", "BY"): 3314 this.set("increment", self._parse_bitwise()) 3315 if self._match_text_seq("MINVALUE"): 3316 this.set("minvalue", self._parse_bitwise()) 3317 if self._match_text_seq("MAXVALUE"): 3318 this.set("maxvalue", self._parse_bitwise()) 3319 3320 if self._match_text_seq("CYCLE"): 3321 this.set("cycle", True) 3322 elif self._match_text_seq("NO", "CYCLE"): 3323 this.set("cycle", False) 3324 3325 self._match_r_paren() 3326 3327 return this 3328 3329 def _parse_inline(self) -> t.Optional[exp.Expression]: 3330 self._match_text_seq("LENGTH") 3331 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3332 3333 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3334 if self._match_text_seq("NULL"): 3335 return self.expression(exp.NotNullColumnConstraint) 3336 if self._match_text_seq("CASESPECIFIC"): 3337 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3338 return None 3339 3340 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3341 if self._match(TokenType.CONSTRAINT): 3342 this = self._parse_id_var() 3343 else: 3344 this = None 3345 3346 if self._match_texts(self.CONSTRAINT_PARSERS): 3347 return self.expression( 3348 exp.ColumnConstraint, 3349 this=this, 3350 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3351 ) 3352 3353 return this 3354 3355 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3356 if not self._match(TokenType.CONSTRAINT): 3357 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3358 3359 this = self._parse_id_var() 3360 expressions = [] 3361 3362 while True: 3363 constraint = self._parse_unnamed_constraint() or self._parse_function() 3364 if not constraint: 3365 break 3366 expressions.append(constraint) 3367 3368 return self.expression(exp.Constraint, this=this, expressions=expressions) 3369 3370 def _parse_unnamed_constraint( 3371 self, constraints: t.Optional[t.Collection[str]] = None 3372 ) -> t.Optional[exp.Expression]: 3373 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3374 return None 3375 3376 constraint = self._prev.text.upper() 3377 if constraint not in self.CONSTRAINT_PARSERS: 3378 self.raise_error(f"No parser found for schema constraint {constraint}.") 3379 3380 return self.CONSTRAINT_PARSERS[constraint](self) 3381 3382 def _parse_unique(self) -> exp.Expression: 3383 if not self._match(TokenType.L_PAREN, advance=False): 3384 return self.expression(exp.UniqueColumnConstraint) 3385 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3386 3387 def _parse_key_constraint_options(self) -> t.List[str]: 3388 options = [] 3389 while True: 3390 if not self._curr: 3391 break 3392 3393 if self._match(TokenType.ON): 3394 action = None 3395 on = self._advance_any() and self._prev.text 3396 3397 if self._match(TokenType.NO_ACTION): 3398 action = "NO ACTION" 3399 elif self._match(TokenType.CASCADE): 3400 action = "CASCADE" 3401 elif self._match_pair(TokenType.SET, TokenType.NULL): 3402 action = "SET NULL" 3403 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3404 action = "SET DEFAULT" 3405 else: 3406 self.raise_error("Invalid key constraint") 3407 3408 options.append(f"ON {on} {action}") 3409 elif self._match_text_seq("NOT", "ENFORCED"): 3410 options.append("NOT ENFORCED") 3411 elif self._match_text_seq("DEFERRABLE"): 3412 options.append("DEFERRABLE") 3413 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3414 options.append("INITIALLY DEFERRED") 3415 elif self._match_text_seq("NORELY"): 3416 options.append("NORELY") 3417 elif self._match_text_seq("MATCH", "FULL"): 3418 options.append("MATCH FULL") 3419 else: 3420 break 3421 3422 return options 3423 3424 def _parse_references(self, match=True) -> t.Optional[exp.Expression]: 3425 if match and not self._match(TokenType.REFERENCES): 3426 return None 3427 3428 expressions = None 3429 this = self._parse_id_var() 3430 3431 if self._match(TokenType.L_PAREN, advance=False): 3432 expressions = self._parse_wrapped_id_vars() 3433 3434 options = self._parse_key_constraint_options() 3435 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3436 3437 def _parse_foreign_key(self) -> exp.Expression: 3438 expressions = self._parse_wrapped_id_vars() 3439 reference = self._parse_references() 3440 options = {} 3441 3442 while self._match(TokenType.ON): 3443 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3444 self.raise_error("Expected DELETE or UPDATE") 3445 3446 kind = self._prev.text.lower() 3447 3448 if self._match(TokenType.NO_ACTION): 3449 action = "NO ACTION" 3450 elif self._match(TokenType.SET): 3451 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3452 action = "SET " + self._prev.text.upper() 3453 else: 3454 self._advance() 3455 action = self._prev.text.upper() 3456 3457 options[kind] = action 3458 3459 return self.expression( 3460 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3461 ) 3462 3463 def _parse_primary_key(self) -> exp.Expression: 3464 desc = ( 3465 self._match_set((TokenType.ASC, TokenType.DESC)) 3466 and self._prev.token_type == TokenType.DESC 3467 ) 3468 3469 if not self._match(TokenType.L_PAREN, advance=False): 3470 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3471 3472 expressions = self._parse_wrapped_csv(self._parse_field) 3473 options = self._parse_key_constraint_options() 3474 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3475 3476 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3477 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3478 return this 3479 3480 bracket_kind = self._prev.token_type 3481 expressions: t.List[t.Optional[exp.Expression]] 3482 3483 if self._match(TokenType.COLON): 3484 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3485 else: 3486 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3487 3488 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3489 if bracket_kind == TokenType.L_BRACE: 3490 this = self.expression(exp.Struct, expressions=expressions) 3491 elif not this or this.name.upper() == "ARRAY": 3492 this = self.expression(exp.Array, expressions=expressions) 3493 else: 3494 expressions = apply_index_offset(this, expressions, -self.index_offset) 3495 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3496 3497 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3498 self.raise_error("Expected ]") 3499 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3500 self.raise_error("Expected }") 3501 3502 self._add_comments(this) 3503 return self._parse_bracket(this) 3504 3505 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3506 if self._match(TokenType.COLON): 3507 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3508 return this 3509 3510 def _parse_case(self) -> t.Optional[exp.Expression]: 3511 ifs = [] 3512 default = None 3513 3514 expression = self._parse_conjunction() 3515 3516 while self._match(TokenType.WHEN): 3517 this = self._parse_conjunction() 3518 self._match(TokenType.THEN) 3519 then = self._parse_conjunction() 3520 ifs.append(self.expression(exp.If, this=this, true=then)) 3521 3522 if self._match(TokenType.ELSE): 3523 default = self._parse_conjunction() 3524 3525 if not self._match(TokenType.END): 3526 self.raise_error("Expected END after CASE", self._prev) 3527 3528 return self._parse_window( 3529 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3530 ) 3531 3532 def _parse_if(self) -> t.Optional[exp.Expression]: 3533 if self._match(TokenType.L_PAREN): 3534 args = self._parse_csv(self._parse_conjunction) 3535 this = exp.If.from_arg_list(args) 3536 self.validate_expression(this, args) 3537 self._match_r_paren() 3538 else: 3539 index = self._index - 1 3540 condition = self._parse_conjunction() 3541 3542 if not condition: 3543 self._retreat(index) 3544 return None 3545 3546 self._match(TokenType.THEN) 3547 true = self._parse_conjunction() 3548 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3549 self._match(TokenType.END) 3550 this = self.expression(exp.If, this=condition, true=true, false=false) 3551 3552 return self._parse_window(this) 3553 3554 def _parse_extract(self) -> exp.Expression: 3555 this = self._parse_function() or self._parse_var() or self._parse_type() 3556 3557 if self._match(TokenType.FROM): 3558 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3559 3560 if not self._match(TokenType.COMMA): 3561 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3562 3563 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3564 3565 def _parse_cast(self, strict: bool) -> exp.Expression: 3566 this = self._parse_conjunction() 3567 3568 if not self._match(TokenType.ALIAS): 3569 if self._match(TokenType.COMMA): 3570 return self.expression( 3571 exp.CastToStrType, this=this, expression=self._parse_string() 3572 ) 3573 else: 3574 self.raise_error("Expected AS after CAST") 3575 3576 to = self._parse_types() 3577 3578 if not to: 3579 self.raise_error("Expected TYPE after CAST") 3580 elif to.this == exp.DataType.Type.CHAR: 3581 if self._match(TokenType.CHARACTER_SET): 3582 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3583 3584 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3585 3586 def _parse_string_agg(self) -> exp.Expression: 3587 expression: t.Optional[exp.Expression] 3588 3589 if self._match(TokenType.DISTINCT): 3590 args = self._parse_csv(self._parse_conjunction) 3591 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3592 else: 3593 args = self._parse_csv(self._parse_conjunction) 3594 expression = seq_get(args, 0) 3595 3596 index = self._index 3597 if not self._match(TokenType.R_PAREN): 3598 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3599 order = self._parse_order(this=expression) 3600 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3601 3602 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3603 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3604 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3605 if not self._match(TokenType.WITHIN_GROUP): 3606 self._retreat(index) 3607 this = exp.GroupConcat.from_arg_list(args) 3608 self.validate_expression(this, args) 3609 return this 3610 3611 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3612 order = self._parse_order(this=expression) 3613 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3614 3615 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3616 to: t.Optional[exp.Expression] 3617 this = self._parse_bitwise() 3618 3619 if self._match(TokenType.USING): 3620 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3621 elif self._match(TokenType.COMMA): 3622 to = self._parse_bitwise() 3623 else: 3624 to = None 3625 3626 # Swap the argument order if needed to produce the correct AST 3627 if self.CONVERT_TYPE_FIRST: 3628 this, to = to, this 3629 3630 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3631 3632 def _parse_decode(self) -> t.Optional[exp.Expression]: 3633 """ 3634 There are generally two variants of the DECODE function: 3635 3636 - DECODE(bin, charset) 3637 - DECODE(expression, search, result [, search, result] ... [, default]) 3638 3639 The second variant will always be parsed into a CASE expression. Note that NULL 3640 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3641 instead of relying on pattern matching. 3642 """ 3643 args = self._parse_csv(self._parse_conjunction) 3644 3645 if len(args) < 3: 3646 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3647 3648 expression, *expressions = args 3649 if not expression: 3650 return None 3651 3652 ifs = [] 3653 for search, result in zip(expressions[::2], expressions[1::2]): 3654 if not search or not result: 3655 return None 3656 3657 if isinstance(search, exp.Literal): 3658 ifs.append( 3659 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3660 ) 3661 elif isinstance(search, exp.Null): 3662 ifs.append( 3663 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3664 ) 3665 else: 3666 cond = exp.or_( 3667 exp.EQ(this=expression.copy(), expression=search), 3668 exp.and_( 3669 exp.Is(this=expression.copy(), expression=exp.Null()), 3670 exp.Is(this=search.copy(), expression=exp.Null()), 3671 copy=False, 3672 ), 3673 copy=False, 3674 ) 3675 ifs.append(exp.If(this=cond, true=result)) 3676 3677 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3678 3679 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3680 self._match_text_seq("KEY") 3681 key = self._parse_field() 3682 self._match(TokenType.COLON) 3683 self._match_text_seq("VALUE") 3684 value = self._parse_field() 3685 if not key and not value: 3686 return None 3687 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3688 3689 def _parse_json_object(self) -> exp.Expression: 3690 expressions = self._parse_csv(self._parse_json_key_value) 3691 3692 null_handling = None 3693 if self._match_text_seq("NULL", "ON", "NULL"): 3694 null_handling = "NULL ON NULL" 3695 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3696 null_handling = "ABSENT ON NULL" 3697 3698 unique_keys = None 3699 if self._match_text_seq("WITH", "UNIQUE"): 3700 unique_keys = True 3701 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3702 unique_keys = False 3703 3704 self._match_text_seq("KEYS") 3705 3706 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3707 format_json = self._match_text_seq("FORMAT", "JSON") 3708 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3709 3710 return self.expression( 3711 exp.JSONObject, 3712 expressions=expressions, 3713 null_handling=null_handling, 3714 unique_keys=unique_keys, 3715 return_type=return_type, 3716 format_json=format_json, 3717 encoding=encoding, 3718 ) 3719 3720 def _parse_logarithm(self) -> exp.Expression: 3721 # Default argument order is base, expression 3722 args = self._parse_csv(self._parse_range) 3723 3724 if len(args) > 1: 3725 if not self.LOG_BASE_FIRST: 3726 args.reverse() 3727 return exp.Log.from_arg_list(args) 3728 3729 return self.expression( 3730 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3731 ) 3732 3733 def _parse_match_against(self) -> exp.Expression: 3734 expressions = self._parse_csv(self._parse_column) 3735 3736 self._match_text_seq(")", "AGAINST", "(") 3737 3738 this = self._parse_string() 3739 3740 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3741 modifier = "IN NATURAL LANGUAGE MODE" 3742 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3743 modifier = f"{modifier} WITH QUERY EXPANSION" 3744 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3745 modifier = "IN BOOLEAN MODE" 3746 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3747 modifier = "WITH QUERY EXPANSION" 3748 else: 3749 modifier = None 3750 3751 return self.expression( 3752 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3753 ) 3754 3755 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3756 def _parse_open_json(self) -> exp.Expression: 3757 this = self._parse_bitwise() 3758 path = self._match(TokenType.COMMA) and self._parse_string() 3759 3760 def _parse_open_json_column_def() -> exp.Expression: 3761 this = self._parse_field(any_token=True) 3762 kind = self._parse_types() 3763 path = self._parse_string() 3764 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3765 return self.expression( 3766 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3767 ) 3768 3769 expressions = None 3770 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3771 self._match_l_paren() 3772 expressions = self._parse_csv(_parse_open_json_column_def) 3773 3774 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3775 3776 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3777 args = self._parse_csv(self._parse_bitwise) 3778 3779 if self._match(TokenType.IN): 3780 return self.expression( 3781 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3782 ) 3783 3784 if haystack_first: 3785 haystack = seq_get(args, 0) 3786 needle = seq_get(args, 1) 3787 else: 3788 needle = seq_get(args, 0) 3789 haystack = seq_get(args, 1) 3790 3791 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3792 3793 self.validate_expression(this, args) 3794 3795 return this 3796 3797 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3798 args = self._parse_csv(self._parse_table) 3799 return exp.JoinHint(this=func_name.upper(), expressions=args) 3800 3801 def _parse_substring(self) -> exp.Expression: 3802 # Postgres supports the form: substring(string [from int] [for int]) 3803 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3804 3805 args = self._parse_csv(self._parse_bitwise) 3806 3807 if self._match(TokenType.FROM): 3808 args.append(self._parse_bitwise()) 3809 if self._match(TokenType.FOR): 3810 args.append(self._parse_bitwise()) 3811 3812 this = exp.Substring.from_arg_list(args) 3813 self.validate_expression(this, args) 3814 3815 return this 3816 3817 def _parse_trim(self) -> exp.Expression: 3818 # https://www.w3resource.com/sql/character-functions/trim.php 3819 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3820 3821 position = None 3822 collation = None 3823 3824 if self._match_set(self.TRIM_TYPES): 3825 position = self._prev.text.upper() 3826 3827 expression = self._parse_bitwise() 3828 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3829 this = self._parse_bitwise() 3830 else: 3831 this = expression 3832 expression = None 3833 3834 if self._match(TokenType.COLLATE): 3835 collation = self._parse_bitwise() 3836 3837 return self.expression( 3838 exp.Trim, 3839 this=this, 3840 position=position, 3841 expression=expression, 3842 collation=collation, 3843 ) 3844 3845 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3846 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3847 3848 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3849 return self._parse_window(self._parse_id_var(), alias=True) 3850 3851 def _parse_respect_or_ignore_nulls( 3852 self, this: t.Optional[exp.Expression] 3853 ) -> t.Optional[exp.Expression]: 3854 if self._match(TokenType.IGNORE_NULLS): 3855 return self.expression(exp.IgnoreNulls, this=this) 3856 if self._match(TokenType.RESPECT_NULLS): 3857 return self.expression(exp.RespectNulls, this=this) 3858 return this 3859 3860 def _parse_window( 3861 self, this: t.Optional[exp.Expression], alias: bool = False 3862 ) -> t.Optional[exp.Expression]: 3863 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3864 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3865 self._match_r_paren() 3866 3867 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3868 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3869 if self._match(TokenType.WITHIN_GROUP): 3870 order = self._parse_wrapped(self._parse_order) 3871 this = self.expression(exp.WithinGroup, this=this, expression=order) 3872 3873 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3874 # Some dialects choose to implement and some do not. 3875 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3876 3877 # There is some code above in _parse_lambda that handles 3878 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3879 3880 # The below changes handle 3881 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3882 3883 # Oracle allows both formats 3884 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3885 # and Snowflake chose to do the same for familiarity 3886 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3887 this = self._parse_respect_or_ignore_nulls(this) 3888 3889 # bigquery select from window x AS (partition by ...) 3890 if alias: 3891 over = None 3892 self._match(TokenType.ALIAS) 3893 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3894 return this 3895 else: 3896 over = self._prev.text.upper() 3897 3898 if not self._match(TokenType.L_PAREN): 3899 return self.expression( 3900 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3901 ) 3902 3903 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3904 3905 first = self._match(TokenType.FIRST) 3906 if self._match_text_seq("LAST"): 3907 first = False 3908 3909 partition = self._parse_partition_by() 3910 order = self._parse_order() 3911 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3912 3913 if kind: 3914 self._match(TokenType.BETWEEN) 3915 start = self._parse_window_spec() 3916 self._match(TokenType.AND) 3917 end = self._parse_window_spec() 3918 3919 spec = self.expression( 3920 exp.WindowSpec, 3921 kind=kind, 3922 start=start["value"], 3923 start_side=start["side"], 3924 end=end["value"], 3925 end_side=end["side"], 3926 ) 3927 else: 3928 spec = None 3929 3930 self._match_r_paren() 3931 3932 return self.expression( 3933 exp.Window, 3934 this=this, 3935 partition_by=partition, 3936 order=order, 3937 spec=spec, 3938 alias=window_alias, 3939 over=over, 3940 first=first, 3941 ) 3942 3943 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3944 self._match(TokenType.BETWEEN) 3945 3946 return { 3947 "value": ( 3948 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3949 ) 3950 or self._parse_bitwise(), 3951 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3952 } 3953 3954 def _parse_alias( 3955 self, this: t.Optional[exp.Expression], explicit: bool = False 3956 ) -> t.Optional[exp.Expression]: 3957 any_token = self._match(TokenType.ALIAS) 3958 3959 if explicit and not any_token: 3960 return this 3961 3962 if self._match(TokenType.L_PAREN): 3963 aliases = self.expression( 3964 exp.Aliases, 3965 this=this, 3966 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3967 ) 3968 self._match_r_paren(aliases) 3969 return aliases 3970 3971 alias = self._parse_id_var(any_token) 3972 3973 if alias: 3974 return self.expression(exp.Alias, this=this, alias=alias) 3975 3976 return this 3977 3978 def _parse_id_var( 3979 self, 3980 any_token: bool = True, 3981 tokens: t.Optional[t.Collection[TokenType]] = None, 3982 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3983 ) -> t.Optional[exp.Expression]: 3984 identifier = self._parse_identifier() 3985 3986 if identifier: 3987 return identifier 3988 3989 prefix = "" 3990 3991 if prefix_tokens: 3992 while self._match_set(prefix_tokens): 3993 prefix += self._prev.text 3994 3995 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3996 quoted = self._prev.token_type == TokenType.STRING 3997 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3998 3999 return None 4000 4001 def _parse_string(self) -> t.Optional[exp.Expression]: 4002 if self._match(TokenType.STRING): 4003 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4004 return self._parse_placeholder() 4005 4006 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 4007 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4008 4009 def _parse_number(self) -> t.Optional[exp.Expression]: 4010 if self._match(TokenType.NUMBER): 4011 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4012 return self._parse_placeholder() 4013 4014 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4015 if self._match(TokenType.IDENTIFIER): 4016 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4017 return self._parse_placeholder() 4018 4019 def _parse_var( 4020 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4021 ) -> t.Optional[exp.Expression]: 4022 if ( 4023 (any_token and self._advance_any()) 4024 or self._match(TokenType.VAR) 4025 or (self._match_set(tokens) if tokens else False) 4026 ): 4027 return self.expression(exp.Var, this=self._prev.text) 4028 return self._parse_placeholder() 4029 4030 def _advance_any(self) -> t.Optional[Token]: 4031 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4032 self._advance() 4033 return self._prev 4034 return None 4035 4036 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4037 return self._parse_var() or self._parse_string() 4038 4039 def _parse_null(self) -> t.Optional[exp.Expression]: 4040 if self._match(TokenType.NULL): 4041 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4042 return None 4043 4044 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4045 if self._match(TokenType.TRUE): 4046 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4047 if self._match(TokenType.FALSE): 4048 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4049 return None 4050 4051 def _parse_star(self) -> t.Optional[exp.Expression]: 4052 if self._match(TokenType.STAR): 4053 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4054 return None 4055 4056 def _parse_parameter(self) -> exp.Expression: 4057 wrapped = self._match(TokenType.L_BRACE) 4058 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4059 self._match(TokenType.R_BRACE) 4060 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4061 4062 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4063 if self._match_set(self.PLACEHOLDER_PARSERS): 4064 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4065 if placeholder: 4066 return placeholder 4067 self._advance(-1) 4068 return None 4069 4070 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4071 if not self._match(TokenType.EXCEPT): 4072 return None 4073 if self._match(TokenType.L_PAREN, advance=False): 4074 return self._parse_wrapped_csv(self._parse_column) 4075 return self._parse_csv(self._parse_column) 4076 4077 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4078 if not self._match(TokenType.REPLACE): 4079 return None 4080 if self._match(TokenType.L_PAREN, advance=False): 4081 return self._parse_wrapped_csv(self._parse_expression) 4082 return self._parse_csv(self._parse_expression) 4083 4084 def _parse_csv( 4085 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4086 ) -> t.List[t.Optional[exp.Expression]]: 4087 parse_result = parse_method() 4088 items = [parse_result] if parse_result is not None else [] 4089 4090 while self._match(sep): 4091 self._add_comments(parse_result) 4092 parse_result = parse_method() 4093 if parse_result is not None: 4094 items.append(parse_result) 4095 4096 return items 4097 4098 def _parse_tokens( 4099 self, parse_method: t.Callable, expressions: t.Dict 4100 ) -> t.Optional[exp.Expression]: 4101 this = parse_method() 4102 4103 while self._match_set(expressions): 4104 this = self.expression( 4105 expressions[self._prev.token_type], 4106 this=this, 4107 comments=self._prev_comments, 4108 expression=parse_method(), 4109 ) 4110 4111 return this 4112 4113 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4114 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4115 4116 def _parse_wrapped_csv( 4117 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4118 ) -> t.List[t.Optional[exp.Expression]]: 4119 return self._parse_wrapped( 4120 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4121 ) 4122 4123 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4124 wrapped = self._match(TokenType.L_PAREN) 4125 if not wrapped and not optional: 4126 self.raise_error("Expecting (") 4127 parse_result = parse_method() 4128 if wrapped: 4129 self._match_r_paren() 4130 return parse_result 4131 4132 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 4133 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 4134 4135 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4136 return self._parse_set_operations( 4137 self._parse_select(nested=True, parse_subquery_alias=False) 4138 ) 4139 4140 def _parse_transaction(self) -> exp.Expression: 4141 this = None 4142 if self._match_texts(self.TRANSACTION_KIND): 4143 this = self._prev.text 4144 4145 self._match_texts({"TRANSACTION", "WORK"}) 4146 4147 modes = [] 4148 while True: 4149 mode = [] 4150 while self._match(TokenType.VAR): 4151 mode.append(self._prev.text) 4152 4153 if mode: 4154 modes.append(" ".join(mode)) 4155 if not self._match(TokenType.COMMA): 4156 break 4157 4158 return self.expression(exp.Transaction, this=this, modes=modes) 4159 4160 def _parse_commit_or_rollback(self) -> exp.Expression: 4161 chain = None 4162 savepoint = None 4163 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4164 4165 self._match_texts({"TRANSACTION", "WORK"}) 4166 4167 if self._match_text_seq("TO"): 4168 self._match_text_seq("SAVEPOINT") 4169 savepoint = self._parse_id_var() 4170 4171 if self._match(TokenType.AND): 4172 chain = not self._match_text_seq("NO") 4173 self._match_text_seq("CHAIN") 4174 4175 if is_rollback: 4176 return self.expression(exp.Rollback, savepoint=savepoint) 4177 return self.expression(exp.Commit, chain=chain) 4178 4179 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4180 if not self._match_text_seq("ADD"): 4181 return None 4182 4183 self._match(TokenType.COLUMN) 4184 exists_column = self._parse_exists(not_=True) 4185 expression = self._parse_column_def(self._parse_field(any_token=True)) 4186 4187 if expression: 4188 expression.set("exists", exists_column) 4189 4190 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4191 if self._match_texts(("FIRST", "AFTER")): 4192 position = self._prev.text 4193 column_position = self.expression( 4194 exp.ColumnPosition, this=self._parse_column(), position=position 4195 ) 4196 expression.set("position", column_position) 4197 4198 return expression 4199 4200 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4201 drop = self._match(TokenType.DROP) and self._parse_drop() 4202 if drop and not isinstance(drop, exp.Command): 4203 drop.set("kind", drop.args.get("kind", "COLUMN")) 4204 return drop 4205 4206 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4207 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4208 return self.expression( 4209 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4210 ) 4211 4212 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4213 this = None 4214 kind = self._prev.token_type 4215 4216 if kind == TokenType.CONSTRAINT: 4217 this = self._parse_id_var() 4218 4219 if self._match_text_seq("CHECK"): 4220 expression = self._parse_wrapped(self._parse_conjunction) 4221 enforced = self._match_text_seq("ENFORCED") 4222 4223 return self.expression( 4224 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4225 ) 4226 4227 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4228 expression = self._parse_foreign_key() 4229 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4230 expression = self._parse_primary_key() 4231 else: 4232 expression = None 4233 4234 return self.expression(exp.AddConstraint, this=this, expression=expression) 4235 4236 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4237 index = self._index - 1 4238 4239 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4240 return self._parse_csv(self._parse_add_constraint) 4241 4242 self._retreat(index) 4243 return self._parse_csv(self._parse_add_column) 4244 4245 def _parse_alter_table_alter(self) -> exp.Expression: 4246 self._match(TokenType.COLUMN) 4247 column = self._parse_field(any_token=True) 4248 4249 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4250 return self.expression(exp.AlterColumn, this=column, drop=True) 4251 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4252 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4253 4254 self._match_text_seq("SET", "DATA") 4255 return self.expression( 4256 exp.AlterColumn, 4257 this=column, 4258 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4259 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4260 using=self._match(TokenType.USING) and self._parse_conjunction(), 4261 ) 4262 4263 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4264 index = self._index - 1 4265 4266 partition_exists = self._parse_exists() 4267 if self._match(TokenType.PARTITION, advance=False): 4268 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4269 4270 self._retreat(index) 4271 return self._parse_csv(self._parse_drop_column) 4272 4273 def _parse_alter_table_rename(self) -> exp.Expression: 4274 self._match_text_seq("TO") 4275 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4276 4277 def _parse_alter(self) -> t.Optional[exp.Expression]: 4278 start = self._prev 4279 4280 if not self._match(TokenType.TABLE): 4281 return self._parse_as_command(start) 4282 4283 exists = self._parse_exists() 4284 this = self._parse_table(schema=True) 4285 4286 if self._next: 4287 self._advance() 4288 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4289 4290 if parser: 4291 actions = ensure_list(parser(self)) 4292 4293 if not self._curr: 4294 return self.expression( 4295 exp.AlterTable, 4296 this=this, 4297 exists=exists, 4298 actions=actions, 4299 ) 4300 return self._parse_as_command(start) 4301 4302 def _parse_merge(self) -> exp.Expression: 4303 self._match(TokenType.INTO) 4304 target = self._parse_table() 4305 4306 self._match(TokenType.USING) 4307 using = self._parse_table() 4308 4309 self._match(TokenType.ON) 4310 on = self._parse_conjunction() 4311 4312 whens = [] 4313 while self._match(TokenType.WHEN): 4314 matched = not self._match(TokenType.NOT) 4315 self._match_text_seq("MATCHED") 4316 source = ( 4317 False 4318 if self._match_text_seq("BY", "TARGET") 4319 else self._match_text_seq("BY", "SOURCE") 4320 ) 4321 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4322 4323 self._match(TokenType.THEN) 4324 4325 if self._match(TokenType.INSERT): 4326 _this = self._parse_star() 4327 if _this: 4328 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4329 else: 4330 then = self.expression( 4331 exp.Insert, 4332 this=self._parse_value(), 4333 expression=self._match(TokenType.VALUES) and self._parse_value(), 4334 ) 4335 elif self._match(TokenType.UPDATE): 4336 expressions = self._parse_star() 4337 if expressions: 4338 then = self.expression(exp.Update, expressions=expressions) 4339 else: 4340 then = self.expression( 4341 exp.Update, 4342 expressions=self._match(TokenType.SET) 4343 and self._parse_csv(self._parse_equality), 4344 ) 4345 elif self._match(TokenType.DELETE): 4346 then = self.expression(exp.Var, this=self._prev.text) 4347 else: 4348 then = None 4349 4350 whens.append( 4351 self.expression( 4352 exp.When, 4353 matched=matched, 4354 source=source, 4355 condition=condition, 4356 then=then, 4357 ) 4358 ) 4359 4360 return self.expression( 4361 exp.Merge, 4362 this=target, 4363 using=using, 4364 on=on, 4365 expressions=whens, 4366 ) 4367 4368 def _parse_show(self) -> t.Optional[exp.Expression]: 4369 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4370 if parser: 4371 return parser(self) 4372 self._advance() 4373 return self.expression(exp.Show, this=self._prev.text.upper()) 4374 4375 def _parse_set_item_assignment( 4376 self, kind: t.Optional[str] = None 4377 ) -> t.Optional[exp.Expression]: 4378 index = self._index 4379 4380 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4381 return self._parse_set_transaction(global_=kind == "GLOBAL") 4382 4383 left = self._parse_primary() or self._parse_id_var() 4384 4385 if not self._match_texts(("=", "TO")): 4386 self._retreat(index) 4387 return None 4388 4389 right = self._parse_statement() or self._parse_id_var() 4390 this = self.expression( 4391 exp.EQ, 4392 this=left, 4393 expression=right, 4394 ) 4395 4396 return self.expression( 4397 exp.SetItem, 4398 this=this, 4399 kind=kind, 4400 ) 4401 4402 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4403 self._match_text_seq("TRANSACTION") 4404 characteristics = self._parse_csv( 4405 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4406 ) 4407 return self.expression( 4408 exp.SetItem, 4409 expressions=characteristics, 4410 kind="TRANSACTION", 4411 **{"global": global_}, # type: ignore 4412 ) 4413 4414 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4415 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4416 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4417 4418 def _parse_set(self) -> exp.Expression: 4419 index = self._index 4420 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4421 4422 if self._curr: 4423 self._retreat(index) 4424 return self._parse_as_command(self._prev) 4425 4426 return set_ 4427 4428 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4429 for option in options: 4430 if self._match_text_seq(*option.split(" ")): 4431 return exp.Var(this=option) 4432 return None 4433 4434 def _parse_as_command(self, start: Token) -> exp.Command: 4435 while self._curr: 4436 self._advance() 4437 text = self._find_sql(start, self._prev) 4438 size = len(start.text) 4439 return exp.Command(this=text[:size], expression=text[size:]) 4440 4441 def _find_parser( 4442 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4443 ) -> t.Optional[t.Callable]: 4444 if not self._curr: 4445 return None 4446 4447 index = self._index 4448 this = [] 4449 while True: 4450 # The current token might be multiple words 4451 curr = self._curr.text.upper() 4452 key = curr.split(" ") 4453 this.append(curr) 4454 self._advance() 4455 result, trie = in_trie(trie, key) 4456 if result == 0: 4457 break 4458 if result == 2: 4459 subparser = parsers[" ".join(this)] 4460 return subparser 4461 self._retreat(index) 4462 return None 4463 4464 def _match(self, token_type, advance=True, expression=None): 4465 if not self._curr: 4466 return None 4467 4468 if self._curr.token_type == token_type: 4469 if advance: 4470 self._advance() 4471 self._add_comments(expression) 4472 return True 4473 4474 return None 4475 4476 def _match_set(self, types, advance=True): 4477 if not self._curr: 4478 return None 4479 4480 if self._curr.token_type in types: 4481 if advance: 4482 self._advance() 4483 return True 4484 4485 return None 4486 4487 def _match_pair(self, token_type_a, token_type_b, advance=True): 4488 if not self._curr or not self._next: 4489 return None 4490 4491 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4492 if advance: 4493 self._advance(2) 4494 return True 4495 4496 return None 4497 4498 def _match_l_paren(self, expression=None): 4499 if not self._match(TokenType.L_PAREN, expression=expression): 4500 self.raise_error("Expecting (") 4501 4502 def _match_r_paren(self, expression=None): 4503 if not self._match(TokenType.R_PAREN, expression=expression): 4504 self.raise_error("Expecting )") 4505 4506 def _match_texts(self, texts, advance=True): 4507 if self._curr and self._curr.text.upper() in texts: 4508 if advance: 4509 self._advance() 4510 return True 4511 return False 4512 4513 def _match_text_seq(self, *texts, advance=True): 4514 index = self._index 4515 for text in texts: 4516 if self._curr and self._curr.text.upper() == text: 4517 self._advance() 4518 else: 4519 self._retreat(index) 4520 return False 4521 4522 if not advance: 4523 self._retreat(index) 4524 4525 return True 4526 4527 def _replace_columns_with_dots(self, this): 4528 if isinstance(this, exp.Dot): 4529 exp.replace_children(this, self._replace_columns_with_dots) 4530 elif isinstance(this, exp.Column): 4531 exp.replace_children(this, self._replace_columns_with_dots) 4532 table = this.args.get("table") 4533 this = ( 4534 self.expression(exp.Dot, this=table, expression=this.this) 4535 if table 4536 else self.expression(exp.Var, this=this.name) 4537 ) 4538 elif isinstance(this, exp.Identifier): 4539 this = self.expression(exp.Var, this=this.name) 4540 return this 4541 4542 def _replace_lambda(self, node, lambda_variables): 4543 for column in node.find_all(exp.Column): 4544 if column.parts[0].name in lambda_variables: 4545 dot_or_id = column.to_dot() if column.table else column.this 4546 parent = column.parent 4547 4548 while isinstance(parent, exp.Dot): 4549 if not isinstance(parent.parent, exp.Dot): 4550 parent.replace(dot_or_id) 4551 break 4552 parent = parent.parent 4553 else: 4554 if column is node: 4555 node = dot_or_id 4556 else: 4557 column.replace(dot_or_id) 4558 return node
19def parse_var_map(args: t.Sequence) -> exp.Expression: 20 if len(args) == 1 and args[0].is_star: 21 return exp.StarMap(this=args[0]) 22 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 )
56class Parser(metaclass=_Parser): 57 """ 58 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 59 a parsed syntax tree. 60 61 Args: 62 error_level: the desired error level. 63 Default: ErrorLevel.RAISE 64 error_message_context: determines the amount of context to capture from a 65 query string when displaying the error message (in number of characters). 66 Default: 50. 67 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 68 Default: 0 69 alias_post_tablesample: If the table alias comes after tablesample. 70 Default: False 71 max_errors: Maximum number of error messages to include in a raised ParseError. 72 This is only relevant if error_level is ErrorLevel.RAISE. 73 Default: 3 74 null_ordering: Indicates the default null ordering method to use if not explicitly set. 75 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 76 Default: "nulls_are_small" 77 """ 78 79 FUNCTIONS: t.Dict[str, t.Callable] = { 80 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 81 "DATE_TO_DATE_STR": lambda args: exp.Cast( 82 this=seq_get(args, 0), 83 to=exp.DataType(this=exp.DataType.Type.TEXT), 84 ), 85 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 86 "IFNULL": exp.Coalesce.from_arg_list, 87 "LIKE": parse_like, 88 "TIME_TO_TIME_STR": lambda args: exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 93 this=exp.Cast( 94 this=seq_get(args, 0), 95 to=exp.DataType(this=exp.DataType.Type.TEXT), 96 ), 97 start=exp.Literal.number(1), 98 length=exp.Literal.number(10), 99 ), 100 "VAR_MAP": parse_var_map, 101 } 102 103 NO_PAREN_FUNCTIONS = { 104 TokenType.CURRENT_DATE: exp.CurrentDate, 105 TokenType.CURRENT_DATETIME: exp.CurrentDate, 106 TokenType.CURRENT_TIME: exp.CurrentTime, 107 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 108 TokenType.CURRENT_USER: exp.CurrentUser, 109 } 110 111 JOIN_HINTS: t.Set[str] = set() 112 113 NESTED_TYPE_TOKENS = { 114 TokenType.ARRAY, 115 TokenType.MAP, 116 TokenType.NULLABLE, 117 TokenType.STRUCT, 118 } 119 120 TYPE_TOKENS = { 121 TokenType.BIT, 122 TokenType.BOOLEAN, 123 TokenType.TINYINT, 124 TokenType.UTINYINT, 125 TokenType.SMALLINT, 126 TokenType.USMALLINT, 127 TokenType.INT, 128 TokenType.UINT, 129 TokenType.BIGINT, 130 TokenType.UBIGINT, 131 TokenType.INT128, 132 TokenType.UINT128, 133 TokenType.INT256, 134 TokenType.UINT256, 135 TokenType.FLOAT, 136 TokenType.DOUBLE, 137 TokenType.CHAR, 138 TokenType.NCHAR, 139 TokenType.VARCHAR, 140 TokenType.NVARCHAR, 141 TokenType.TEXT, 142 TokenType.MEDIUMTEXT, 143 TokenType.LONGTEXT, 144 TokenType.MEDIUMBLOB, 145 TokenType.LONGBLOB, 146 TokenType.BINARY, 147 TokenType.VARBINARY, 148 TokenType.JSON, 149 TokenType.JSONB, 150 TokenType.INTERVAL, 151 TokenType.TIME, 152 TokenType.TIMESTAMP, 153 TokenType.TIMESTAMPTZ, 154 TokenType.TIMESTAMPLTZ, 155 TokenType.DATETIME, 156 TokenType.DATETIME64, 157 TokenType.DATE, 158 TokenType.DECIMAL, 159 TokenType.BIGDECIMAL, 160 TokenType.UUID, 161 TokenType.GEOGRAPHY, 162 TokenType.GEOMETRY, 163 TokenType.HLLSKETCH, 164 TokenType.HSTORE, 165 TokenType.PSEUDO_TYPE, 166 TokenType.SUPER, 167 TokenType.SERIAL, 168 TokenType.SMALLSERIAL, 169 TokenType.BIGSERIAL, 170 TokenType.XML, 171 TokenType.UNIQUEIDENTIFIER, 172 TokenType.MONEY, 173 TokenType.SMALLMONEY, 174 TokenType.ROWVERSION, 175 TokenType.IMAGE, 176 TokenType.VARIANT, 177 TokenType.OBJECT, 178 TokenType.INET, 179 *NESTED_TYPE_TOKENS, 180 } 181 182 SUBQUERY_PREDICATES = { 183 TokenType.ANY: exp.Any, 184 TokenType.ALL: exp.All, 185 TokenType.EXISTS: exp.Exists, 186 TokenType.SOME: exp.Any, 187 } 188 189 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 190 191 DB_CREATABLES = { 192 TokenType.DATABASE, 193 TokenType.SCHEMA, 194 TokenType.TABLE, 195 TokenType.VIEW, 196 } 197 198 CREATABLES = { 199 TokenType.COLUMN, 200 TokenType.FUNCTION, 201 TokenType.INDEX, 202 TokenType.PROCEDURE, 203 *DB_CREATABLES, 204 } 205 206 ID_VAR_TOKENS = { 207 TokenType.VAR, 208 TokenType.ANTI, 209 TokenType.APPLY, 210 TokenType.AUTO_INCREMENT, 211 TokenType.BEGIN, 212 TokenType.BOTH, 213 TokenType.BUCKET, 214 TokenType.CACHE, 215 TokenType.CASCADE, 216 TokenType.COLLATE, 217 TokenType.COMMAND, 218 TokenType.COMMENT, 219 TokenType.COMMIT, 220 TokenType.COMPOUND, 221 TokenType.CONSTRAINT, 222 TokenType.DEFAULT, 223 TokenType.DELETE, 224 TokenType.DESCRIBE, 225 TokenType.DIV, 226 TokenType.END, 227 TokenType.EXECUTE, 228 TokenType.ESCAPE, 229 TokenType.FALSE, 230 TokenType.FIRST, 231 TokenType.FILTER, 232 TokenType.FOLLOWING, 233 TokenType.FORMAT, 234 TokenType.FULL, 235 TokenType.IF, 236 TokenType.IS, 237 TokenType.ISNULL, 238 TokenType.INTERVAL, 239 TokenType.KEEP, 240 TokenType.LAZY, 241 TokenType.LEADING, 242 TokenType.LEFT, 243 TokenType.LOCAL, 244 TokenType.MATERIALIZED, 245 TokenType.MERGE, 246 TokenType.NATURAL, 247 TokenType.NEXT, 248 TokenType.OFFSET, 249 TokenType.ONLY, 250 TokenType.OPTIONS, 251 TokenType.ORDINALITY, 252 TokenType.OVERWRITE, 253 TokenType.PARTITION, 254 TokenType.PERCENT, 255 TokenType.PIVOT, 256 TokenType.PRAGMA, 257 TokenType.PRECEDING, 258 TokenType.RANGE, 259 TokenType.REFERENCES, 260 TokenType.RIGHT, 261 TokenType.ROW, 262 TokenType.ROWS, 263 TokenType.SEED, 264 TokenType.SEMI, 265 TokenType.SET, 266 TokenType.SETTINGS, 267 TokenType.SHOW, 268 TokenType.SORTKEY, 269 TokenType.TEMPORARY, 270 TokenType.TOP, 271 TokenType.TRAILING, 272 TokenType.TRUE, 273 TokenType.UNBOUNDED, 274 TokenType.UNIQUE, 275 TokenType.UNLOGGED, 276 TokenType.UNPIVOT, 277 TokenType.VOLATILE, 278 TokenType.WINDOW, 279 *CREATABLES, 280 *SUBQUERY_PREDICATES, 281 *TYPE_TOKENS, 282 *NO_PAREN_FUNCTIONS, 283 } 284 285 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 286 287 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 288 TokenType.APPLY, 289 TokenType.FULL, 290 TokenType.LEFT, 291 TokenType.LOCK, 292 TokenType.NATURAL, 293 TokenType.OFFSET, 294 TokenType.RIGHT, 295 TokenType.WINDOW, 296 } 297 298 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 299 300 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 301 302 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 303 304 FUNC_TOKENS = { 305 TokenType.COMMAND, 306 TokenType.CURRENT_DATE, 307 TokenType.CURRENT_DATETIME, 308 TokenType.CURRENT_TIMESTAMP, 309 TokenType.CURRENT_TIME, 310 TokenType.CURRENT_USER, 311 TokenType.FILTER, 312 TokenType.FIRST, 313 TokenType.FORMAT, 314 TokenType.GLOB, 315 TokenType.IDENTIFIER, 316 TokenType.INDEX, 317 TokenType.ISNULL, 318 TokenType.ILIKE, 319 TokenType.LIKE, 320 TokenType.MERGE, 321 TokenType.OFFSET, 322 TokenType.PRIMARY_KEY, 323 TokenType.RANGE, 324 TokenType.REPLACE, 325 TokenType.ROW, 326 TokenType.UNNEST, 327 TokenType.VAR, 328 TokenType.LEFT, 329 TokenType.RIGHT, 330 TokenType.DATE, 331 TokenType.DATETIME, 332 TokenType.TABLE, 333 TokenType.TIMESTAMP, 334 TokenType.TIMESTAMPTZ, 335 TokenType.WINDOW, 336 *TYPE_TOKENS, 337 *SUBQUERY_PREDICATES, 338 } 339 340 CONJUNCTION = { 341 TokenType.AND: exp.And, 342 TokenType.OR: exp.Or, 343 } 344 345 EQUALITY = { 346 TokenType.EQ: exp.EQ, 347 TokenType.NEQ: exp.NEQ, 348 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 349 } 350 351 COMPARISON = { 352 TokenType.GT: exp.GT, 353 TokenType.GTE: exp.GTE, 354 TokenType.LT: exp.LT, 355 TokenType.LTE: exp.LTE, 356 } 357 358 BITWISE = { 359 TokenType.AMP: exp.BitwiseAnd, 360 TokenType.CARET: exp.BitwiseXor, 361 TokenType.PIPE: exp.BitwiseOr, 362 TokenType.DPIPE: exp.DPipe, 363 } 364 365 TERM = { 366 TokenType.DASH: exp.Sub, 367 TokenType.PLUS: exp.Add, 368 TokenType.MOD: exp.Mod, 369 TokenType.COLLATE: exp.Collate, 370 } 371 372 FACTOR = { 373 TokenType.DIV: exp.IntDiv, 374 TokenType.LR_ARROW: exp.Distance, 375 TokenType.SLASH: exp.Div, 376 TokenType.STAR: exp.Mul, 377 } 378 379 TIMESTAMPS = { 380 TokenType.TIME, 381 TokenType.TIMESTAMP, 382 TokenType.TIMESTAMPTZ, 383 TokenType.TIMESTAMPLTZ, 384 } 385 386 SET_OPERATIONS = { 387 TokenType.UNION, 388 TokenType.INTERSECT, 389 TokenType.EXCEPT, 390 } 391 392 JOIN_SIDES = { 393 TokenType.LEFT, 394 TokenType.RIGHT, 395 TokenType.FULL, 396 } 397 398 JOIN_KINDS = { 399 TokenType.INNER, 400 TokenType.OUTER, 401 TokenType.CROSS, 402 TokenType.SEMI, 403 TokenType.ANTI, 404 } 405 406 LAMBDAS = { 407 TokenType.ARROW: lambda self, expressions: self.expression( 408 exp.Lambda, 409 this=self._replace_lambda( 410 self._parse_conjunction(), 411 {node.name for node in expressions}, 412 ), 413 expressions=expressions, 414 ), 415 TokenType.FARROW: lambda self, expressions: self.expression( 416 exp.Kwarg, 417 this=exp.Var(this=expressions[0].name), 418 expression=self._parse_conjunction(), 419 ), 420 } 421 422 COLUMN_OPERATORS = { 423 TokenType.DOT: None, 424 TokenType.DCOLON: lambda self, this, to: self.expression( 425 exp.Cast if self.STRICT_CAST else exp.TryCast, 426 this=this, 427 to=to, 428 ), 429 TokenType.ARROW: lambda self, this, path: self.expression( 430 exp.JSONExtract, 431 this=this, 432 expression=path, 433 ), 434 TokenType.DARROW: lambda self, this, path: self.expression( 435 exp.JSONExtractScalar, 436 this=this, 437 expression=path, 438 ), 439 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 440 exp.JSONBExtract, 441 this=this, 442 expression=path, 443 ), 444 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 445 exp.JSONBExtractScalar, 446 this=this, 447 expression=path, 448 ), 449 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 450 exp.JSONBContains, 451 this=this, 452 expression=key, 453 ), 454 } 455 456 EXPRESSION_PARSERS = { 457 exp.Column: lambda self: self._parse_column(), 458 exp.DataType: lambda self: self._parse_types(), 459 exp.From: lambda self: self._parse_from(), 460 exp.Group: lambda self: self._parse_group(), 461 exp.Identifier: lambda self: self._parse_id_var(), 462 exp.Lateral: lambda self: self._parse_lateral(), 463 exp.Join: lambda self: self._parse_join(), 464 exp.Order: lambda self: self._parse_order(), 465 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 466 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 467 exp.Lambda: lambda self: self._parse_lambda(), 468 exp.Limit: lambda self: self._parse_limit(), 469 exp.Offset: lambda self: self._parse_offset(), 470 exp.TableAlias: lambda self: self._parse_table_alias(), 471 exp.Table: lambda self: self._parse_table(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.Properties: lambda self: self._parse_properties(), 475 exp.Where: lambda self: self._parse_where(), 476 exp.Ordered: lambda self: self._parse_ordered(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.With: lambda self: self._parse_with(), 479 exp.Window: lambda self: self._parse_named_window(), 480 exp.Qualify: lambda self: self._parse_qualify(), 481 exp.Returning: lambda self: self._parse_returning(), 482 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 483 } 484 485 STATEMENT_PARSERS = { 486 TokenType.ALTER: lambda self: self._parse_alter(), 487 TokenType.BEGIN: lambda self: self._parse_transaction(), 488 TokenType.CACHE: lambda self: self._parse_cache(), 489 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 490 TokenType.COMMENT: lambda self: self._parse_comment(), 491 TokenType.CREATE: lambda self: self._parse_create(), 492 TokenType.DELETE: lambda self: self._parse_delete(), 493 TokenType.DESC: lambda self: self._parse_describe(), 494 TokenType.DESCRIBE: lambda self: self._parse_describe(), 495 TokenType.DROP: lambda self: self._parse_drop(), 496 TokenType.END: lambda self: self._parse_commit_or_rollback(), 497 TokenType.INSERT: lambda self: self._parse_insert(), 498 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 499 TokenType.MERGE: lambda self: self._parse_merge(), 500 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 501 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 502 TokenType.SET: lambda self: self._parse_set(), 503 TokenType.UNCACHE: lambda self: self._parse_uncache(), 504 TokenType.UPDATE: lambda self: self._parse_update(), 505 TokenType.USE: lambda self: self.expression( 506 exp.Use, 507 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 508 and exp.Var(this=self._prev.text), 509 this=self._parse_table(schema=False), 510 ), 511 } 512 513 UNARY_PARSERS = { 514 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 515 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 516 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 517 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 518 } 519 520 PRIMARY_PARSERS = { 521 TokenType.STRING: lambda self, token: self.expression( 522 exp.Literal, this=token.text, is_string=True 523 ), 524 TokenType.NUMBER: lambda self, token: self.expression( 525 exp.Literal, this=token.text, is_string=False 526 ), 527 TokenType.STAR: lambda self, _: self.expression( 528 exp.Star, 529 **{"except": self._parse_except(), "replace": self._parse_replace()}, 530 ), 531 TokenType.NULL: lambda self, _: self.expression(exp.Null), 532 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 533 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 534 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 535 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 536 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 537 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 538 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 539 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 540 } 541 542 PLACEHOLDER_PARSERS = { 543 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 544 TokenType.PARAMETER: lambda self: self._parse_parameter(), 545 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 546 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 547 else None, 548 } 549 550 RANGE_PARSERS = { 551 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 552 TokenType.GLOB: binary_range_parser(exp.Glob), 553 TokenType.ILIKE: binary_range_parser(exp.ILike), 554 TokenType.IN: lambda self, this: self._parse_in(this), 555 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 556 TokenType.IS: lambda self, this: self._parse_is(this), 557 TokenType.LIKE: binary_range_parser(exp.Like), 558 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 559 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 560 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 561 } 562 563 PROPERTY_PARSERS = { 564 "AFTER": lambda self: self._parse_afterjournal( 565 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 566 ), 567 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 568 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 569 "BEFORE": lambda self: self._parse_journal( 570 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 571 ), 572 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 573 "CHARACTER SET": lambda self: self._parse_character_set(), 574 "CHECKSUM": lambda self: self._parse_checksum(), 575 "CLUSTER BY": lambda self: self.expression( 576 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 577 ), 578 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 579 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 580 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 581 default=self._prev.text.upper() == "DEFAULT" 582 ), 583 "DEFINER": lambda self: self._parse_definer(), 584 "DETERMINISTIC": lambda self: self.expression( 585 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 586 ), 587 "DISTKEY": lambda self: self._parse_distkey(), 588 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 589 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 590 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 591 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 592 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 593 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 594 "FREESPACE": lambda self: self._parse_freespace(), 595 "GLOBAL": lambda self: self._parse_temporary(global_=True), 596 "IMMUTABLE": lambda self: self.expression( 597 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 598 ), 599 "JOURNAL": lambda self: self._parse_journal( 600 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 601 ), 602 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 603 "LIKE": lambda self: self._parse_create_like(), 604 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 605 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 606 "LOCK": lambda self: self._parse_locking(), 607 "LOCKING": lambda self: self._parse_locking(), 608 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 609 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 610 "MAX": lambda self: self._parse_datablocksize(), 611 "MAXIMUM": lambda self: self._parse_datablocksize(), 612 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 613 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 614 ), 615 "MIN": lambda self: self._parse_datablocksize(), 616 "MINIMUM": lambda self: self._parse_datablocksize(), 617 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 618 "NO": lambda self: self._parse_noprimaryindex(), 619 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 620 "ON": lambda self: self._parse_oncommit(), 621 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 622 "PARTITION BY": lambda self: self._parse_partitioned_by(), 623 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 625 "PRIMARY KEY": lambda self: self._parse_primary_key(), 626 "RETURNS": lambda self: self._parse_returns(), 627 "ROW": lambda self: self._parse_row(), 628 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 629 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 630 "SETTINGS": lambda self: self.expression( 631 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 632 ), 633 "SORTKEY": lambda self: self._parse_sortkey(), 634 "STABLE": lambda self: self.expression( 635 exp.StabilityProperty, this=exp.Literal.string("STABLE") 636 ), 637 "STORED": lambda self: self._parse_stored(), 638 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 639 "TEMP": lambda self: self._parse_temporary(global_=False), 640 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 641 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 642 "TTL": lambda self: self._parse_ttl(), 643 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 644 "VOLATILE": lambda self: self._parse_volatile_property(), 645 "WITH": lambda self: self._parse_with_property(), 646 } 647 648 CONSTRAINT_PARSERS = { 649 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 650 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 651 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 652 "CHARACTER SET": lambda self: self.expression( 653 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 654 ), 655 "CHECK": lambda self: self.expression( 656 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 657 ), 658 "COLLATE": lambda self: self.expression( 659 exp.CollateColumnConstraint, this=self._parse_var() 660 ), 661 "COMMENT": lambda self: self.expression( 662 exp.CommentColumnConstraint, this=self._parse_string() 663 ), 664 "COMPRESS": lambda self: self._parse_compress(), 665 "DEFAULT": lambda self: self.expression( 666 exp.DefaultColumnConstraint, this=self._parse_bitwise() 667 ), 668 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 669 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 670 "FORMAT": lambda self: self.expression( 671 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 672 ), 673 "GENERATED": lambda self: self._parse_generated_as_identity(), 674 "IDENTITY": lambda self: self._parse_auto_increment(), 675 "INLINE": lambda self: self._parse_inline(), 676 "LIKE": lambda self: self._parse_create_like(), 677 "NOT": lambda self: self._parse_not_constraint(), 678 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 679 "ON": lambda self: self._match(TokenType.UPDATE) 680 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 681 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 682 "PRIMARY KEY": lambda self: self._parse_primary_key(), 683 "REFERENCES": lambda self: self._parse_references(match=False), 684 "TITLE": lambda self: self.expression( 685 exp.TitleColumnConstraint, this=self._parse_var_or_string() 686 ), 687 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 688 "UNIQUE": lambda self: self._parse_unique(), 689 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 690 } 691 692 ALTER_PARSERS = { 693 "ADD": lambda self: self._parse_alter_table_add(), 694 "ALTER": lambda self: self._parse_alter_table_alter(), 695 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 696 "DROP": lambda self: self._parse_alter_table_drop(), 697 "RENAME": lambda self: self._parse_alter_table_rename(), 698 } 699 700 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 701 702 NO_PAREN_FUNCTION_PARSERS = { 703 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 704 TokenType.CASE: lambda self: self._parse_case(), 705 TokenType.IF: lambda self: self._parse_if(), 706 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 707 exp.NextValueFor, 708 this=self._parse_column(), 709 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 710 ), 711 } 712 713 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 714 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 715 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 716 "DECODE": lambda self: self._parse_decode(), 717 "EXTRACT": lambda self: self._parse_extract(), 718 "JSON_OBJECT": lambda self: self._parse_json_object(), 719 "LOG": lambda self: self._parse_logarithm(), 720 "MATCH": lambda self: self._parse_match_against(), 721 "OPENJSON": lambda self: self._parse_open_json(), 722 "POSITION": lambda self: self._parse_position(), 723 "STRING_AGG": lambda self: self._parse_string_agg(), 724 "SUBSTRING": lambda self: self._parse_substring(), 725 "TRIM": lambda self: self._parse_trim(), 726 "TRY_CAST": lambda self: self._parse_cast(False), 727 "TRY_CONVERT": lambda self: self._parse_convert(False), 728 } 729 730 QUERY_MODIFIER_PARSERS = { 731 "joins": lambda self: list(iter(self._parse_join, None)), 732 "laterals": lambda self: list(iter(self._parse_lateral, None)), 733 "match": lambda self: self._parse_match_recognize(), 734 "where": lambda self: self._parse_where(), 735 "group": lambda self: self._parse_group(), 736 "having": lambda self: self._parse_having(), 737 "qualify": lambda self: self._parse_qualify(), 738 "windows": lambda self: self._parse_window_clause(), 739 "order": lambda self: self._parse_order(), 740 "limit": lambda self: self._parse_limit(), 741 "offset": lambda self: self._parse_offset(), 742 "locks": lambda self: self._parse_locks(), 743 "sample": lambda self: self._parse_table_sample(as_modifier=True), 744 } 745 746 SET_PARSERS = { 747 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 748 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 749 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 750 "TRANSACTION": lambda self: self._parse_set_transaction(), 751 } 752 753 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 754 755 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 756 757 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 758 759 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 760 761 TRANSACTION_CHARACTERISTICS = { 762 "ISOLATION LEVEL REPEATABLE READ", 763 "ISOLATION LEVEL READ COMMITTED", 764 "ISOLATION LEVEL READ UNCOMMITTED", 765 "ISOLATION LEVEL SERIALIZABLE", 766 "READ WRITE", 767 "READ ONLY", 768 } 769 770 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 771 772 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 773 774 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 775 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 776 777 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 778 779 STRICT_CAST = True 780 781 CONVERT_TYPE_FIRST = False 782 783 PREFIXED_PIVOT_COLUMNS = False 784 IDENTIFY_PIVOT_STRINGS = False 785 786 LOG_BASE_FIRST = True 787 LOG_DEFAULTS_TO_LN = False 788 789 __slots__ = ( 790 "error_level", 791 "error_message_context", 792 "sql", 793 "errors", 794 "index_offset", 795 "unnest_column_only", 796 "alias_post_tablesample", 797 "max_errors", 798 "null_ordering", 799 "_tokens", 800 "_index", 801 "_curr", 802 "_next", 803 "_prev", 804 "_prev_comments", 805 "_show_trie", 806 "_set_trie", 807 ) 808 809 def __init__( 810 self, 811 error_level: t.Optional[ErrorLevel] = None, 812 error_message_context: int = 100, 813 index_offset: int = 0, 814 unnest_column_only: bool = False, 815 alias_post_tablesample: bool = False, 816 max_errors: int = 3, 817 null_ordering: t.Optional[str] = None, 818 ): 819 self.error_level = error_level or ErrorLevel.IMMEDIATE 820 self.error_message_context = error_message_context 821 self.index_offset = index_offset 822 self.unnest_column_only = unnest_column_only 823 self.alias_post_tablesample = alias_post_tablesample 824 self.max_errors = max_errors 825 self.null_ordering = null_ordering 826 self.reset() 827 828 def reset(self): 829 self.sql = "" 830 self.errors = [] 831 self._tokens = [] 832 self._index = 0 833 self._curr = None 834 self._next = None 835 self._prev = None 836 self._prev_comments = None 837 838 def parse( 839 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 840 ) -> t.List[t.Optional[exp.Expression]]: 841 """ 842 Parses a list of tokens and returns a list of syntax trees, one tree 843 per parsed SQL statement. 844 845 Args: 846 raw_tokens: the list of tokens. 847 sql: the original SQL string, used to produce helpful debug messages. 848 849 Returns: 850 The list of syntax trees. 851 """ 852 return self._parse( 853 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 854 ) 855 856 def parse_into( 857 self, 858 expression_types: exp.IntoType, 859 raw_tokens: t.List[Token], 860 sql: t.Optional[str] = None, 861 ) -> t.List[t.Optional[exp.Expression]]: 862 """ 863 Parses a list of tokens into a given Expression type. If a collection of Expression 864 types is given instead, this method will try to parse the token list into each one 865 of them, stopping at the first for which the parsing succeeds. 866 867 Args: 868 expression_types: the expression type(s) to try and parse the token list into. 869 raw_tokens: the list of tokens. 870 sql: the original SQL string, used to produce helpful debug messages. 871 872 Returns: 873 The target Expression. 874 """ 875 errors = [] 876 for expression_type in ensure_collection(expression_types): 877 parser = self.EXPRESSION_PARSERS.get(expression_type) 878 if not parser: 879 raise TypeError(f"No parser registered for {expression_type}") 880 try: 881 return self._parse(parser, raw_tokens, sql) 882 except ParseError as e: 883 e.errors[0]["into_expression"] = expression_type 884 errors.append(e) 885 raise ParseError( 886 f"Failed to parse into {expression_types}", 887 errors=merge_errors(errors), 888 ) from errors[-1] 889 890 def _parse( 891 self, 892 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 893 raw_tokens: t.List[Token], 894 sql: t.Optional[str] = None, 895 ) -> t.List[t.Optional[exp.Expression]]: 896 self.reset() 897 self.sql = sql or "" 898 total = len(raw_tokens) 899 chunks: t.List[t.List[Token]] = [[]] 900 901 for i, token in enumerate(raw_tokens): 902 if token.token_type == TokenType.SEMICOLON: 903 if i < total - 1: 904 chunks.append([]) 905 else: 906 chunks[-1].append(token) 907 908 expressions = [] 909 910 for tokens in chunks: 911 self._index = -1 912 self._tokens = tokens 913 self._advance() 914 915 expressions.append(parse_method(self)) 916 917 if self._index < len(self._tokens): 918 self.raise_error("Invalid expression / Unexpected token") 919 920 self.check_errors() 921 922 return expressions 923 924 def check_errors(self) -> None: 925 """ 926 Logs or raises any found errors, depending on the chosen error level setting. 927 """ 928 if self.error_level == ErrorLevel.WARN: 929 for error in self.errors: 930 logger.error(str(error)) 931 elif self.error_level == ErrorLevel.RAISE and self.errors: 932 raise ParseError( 933 concat_messages(self.errors, self.max_errors), 934 errors=merge_errors(self.errors), 935 ) 936 937 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 938 """ 939 Appends an error in the list of recorded errors or raises it, depending on the chosen 940 error level setting. 941 """ 942 token = token or self._curr or self._prev or Token.string("") 943 start = token.start 944 end = token.end 945 start_context = self.sql[max(start - self.error_message_context, 0) : start] 946 highlight = self.sql[start:end] 947 end_context = self.sql[end : end + self.error_message_context] 948 949 error = ParseError.new( 950 f"{message}. Line {token.line}, Col: {token.col}.\n" 951 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 952 description=message, 953 line=token.line, 954 col=token.col, 955 start_context=start_context, 956 highlight=highlight, 957 end_context=end_context, 958 ) 959 960 if self.error_level == ErrorLevel.IMMEDIATE: 961 raise error 962 963 self.errors.append(error) 964 965 def expression( 966 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 967 ) -> E: 968 """ 969 Creates a new, validated Expression. 970 971 Args: 972 exp_class: the expression class to instantiate. 973 comments: an optional list of comments to attach to the expression. 974 kwargs: the arguments to set for the expression along with their respective values. 975 976 Returns: 977 The target expression. 978 """ 979 instance = exp_class(**kwargs) 980 instance.add_comments(comments) if comments else self._add_comments(instance) 981 self.validate_expression(instance) 982 return instance 983 984 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 985 if expression and self._prev_comments: 986 expression.add_comments(self._prev_comments) 987 self._prev_comments = None 988 989 def validate_expression( 990 self, expression: exp.Expression, args: t.Optional[t.List] = None 991 ) -> None: 992 """ 993 Validates an already instantiated expression, making sure that all its mandatory arguments 994 are set. 995 996 Args: 997 expression: the expression to validate. 998 args: an optional list of items that was used to instantiate the expression, if it's a Func. 999 """ 1000 if self.error_level == ErrorLevel.IGNORE: 1001 return 1002 1003 for error_message in expression.error_messages(args): 1004 self.raise_error(error_message) 1005 1006 def _find_sql(self, start: Token, end: Token) -> str: 1007 return self.sql[start.start : end.end] 1008 1009 def _advance(self, times: int = 1) -> None: 1010 self._index += times 1011 self._curr = seq_get(self._tokens, self._index) 1012 self._next = seq_get(self._tokens, self._index + 1) 1013 if self._index > 0: 1014 self._prev = self._tokens[self._index - 1] 1015 self._prev_comments = self._prev.comments 1016 else: 1017 self._prev = None 1018 self._prev_comments = None 1019 1020 def _retreat(self, index: int) -> None: 1021 if index != self._index: 1022 self._advance(index - self._index) 1023 1024 def _parse_command(self) -> exp.Command: 1025 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1026 1027 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1028 start = self._prev 1029 exists = self._parse_exists() if allow_exists else None 1030 1031 self._match(TokenType.ON) 1032 1033 kind = self._match_set(self.CREATABLES) and self._prev 1034 1035 if not kind: 1036 return self._parse_as_command(start) 1037 1038 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1039 this = self._parse_user_defined_function(kind=kind.token_type) 1040 elif kind.token_type == TokenType.TABLE: 1041 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1042 elif kind.token_type == TokenType.COLUMN: 1043 this = self._parse_column() 1044 else: 1045 this = self._parse_id_var() 1046 1047 self._match(TokenType.IS) 1048 1049 return self.expression( 1050 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1051 ) 1052 1053 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1054 def _parse_ttl(self) -> exp.Expression: 1055 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1056 this = self._parse_bitwise() 1057 1058 if self._match_text_seq("DELETE"): 1059 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1060 if self._match_text_seq("RECOMPRESS"): 1061 return self.expression( 1062 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1063 ) 1064 if self._match_text_seq("TO", "DISK"): 1065 return self.expression( 1066 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1067 ) 1068 if self._match_text_seq("TO", "VOLUME"): 1069 return self.expression( 1070 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1071 ) 1072 1073 return this 1074 1075 expressions = self._parse_csv(_parse_ttl_action) 1076 where = self._parse_where() 1077 group = self._parse_group() 1078 1079 aggregates = None 1080 if group and self._match(TokenType.SET): 1081 aggregates = self._parse_csv(self._parse_set_item) 1082 1083 return self.expression( 1084 exp.MergeTreeTTL, 1085 expressions=expressions, 1086 where=where, 1087 group=group, 1088 aggregates=aggregates, 1089 ) 1090 1091 def _parse_statement(self) -> t.Optional[exp.Expression]: 1092 if self._curr is None: 1093 return None 1094 1095 if self._match_set(self.STATEMENT_PARSERS): 1096 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1097 1098 if self._match_set(Tokenizer.COMMANDS): 1099 return self._parse_command() 1100 1101 expression = self._parse_expression() 1102 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1103 return self._parse_query_modifiers(expression) 1104 1105 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1106 start = self._prev 1107 temporary = self._match(TokenType.TEMPORARY) 1108 materialized = self._match(TokenType.MATERIALIZED) 1109 kind = self._match_set(self.CREATABLES) and self._prev.text 1110 if not kind: 1111 return self._parse_as_command(start) 1112 1113 return self.expression( 1114 exp.Drop, 1115 exists=self._parse_exists(), 1116 this=self._parse_table(schema=True), 1117 kind=kind, 1118 temporary=temporary, 1119 materialized=materialized, 1120 cascade=self._match(TokenType.CASCADE), 1121 constraints=self._match_text_seq("CONSTRAINTS"), 1122 purge=self._match_text_seq("PURGE"), 1123 ) 1124 1125 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1126 return ( 1127 self._match(TokenType.IF) 1128 and (not not_ or self._match(TokenType.NOT)) 1129 and self._match(TokenType.EXISTS) 1130 ) 1131 1132 def _parse_create(self) -> t.Optional[exp.Expression]: 1133 start = self._prev 1134 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1135 TokenType.OR, TokenType.REPLACE 1136 ) 1137 unique = self._match(TokenType.UNIQUE) 1138 1139 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1140 self._match(TokenType.TABLE) 1141 1142 properties = None 1143 create_token = self._match_set(self.CREATABLES) and self._prev 1144 1145 if not create_token: 1146 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1147 create_token = self._match_set(self.CREATABLES) and self._prev 1148 1149 if not properties or not create_token: 1150 return self._parse_as_command(start) 1151 1152 exists = self._parse_exists(not_=True) 1153 this = None 1154 expression = None 1155 indexes = None 1156 no_schema_binding = None 1157 begin = None 1158 clone = None 1159 1160 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1161 this = self._parse_user_defined_function(kind=create_token.token_type) 1162 temp_properties = self._parse_properties() 1163 if properties and temp_properties: 1164 properties.expressions.extend(temp_properties.expressions) 1165 elif temp_properties: 1166 properties = temp_properties 1167 1168 self._match(TokenType.ALIAS) 1169 begin = self._match(TokenType.BEGIN) 1170 return_ = self._match_text_seq("RETURN") 1171 expression = self._parse_statement() 1172 1173 if return_: 1174 expression = self.expression(exp.Return, this=expression) 1175 elif create_token.token_type == TokenType.INDEX: 1176 this = self._parse_index() 1177 elif create_token.token_type in self.DB_CREATABLES: 1178 table_parts = self._parse_table_parts(schema=True) 1179 1180 # exp.Properties.Location.POST_NAME 1181 if self._match(TokenType.COMMA): 1182 temp_properties = self._parse_properties(before=True) 1183 if properties and temp_properties: 1184 properties.expressions.extend(temp_properties.expressions) 1185 elif temp_properties: 1186 properties = temp_properties 1187 1188 this = self._parse_schema(this=table_parts) 1189 1190 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1191 temp_properties = self._parse_properties() 1192 if properties and temp_properties: 1193 properties.expressions.extend(temp_properties.expressions) 1194 elif temp_properties: 1195 properties = temp_properties 1196 1197 self._match(TokenType.ALIAS) 1198 1199 # exp.Properties.Location.POST_ALIAS 1200 if not ( 1201 self._match(TokenType.SELECT, advance=False) 1202 or self._match(TokenType.WITH, advance=False) 1203 or self._match(TokenType.L_PAREN, advance=False) 1204 ): 1205 temp_properties = self._parse_properties() 1206 if properties and temp_properties: 1207 properties.expressions.extend(temp_properties.expressions) 1208 elif temp_properties: 1209 properties = temp_properties 1210 1211 expression = self._parse_ddl_select() 1212 1213 if create_token.token_type == TokenType.TABLE: 1214 # exp.Properties.Location.POST_EXPRESSION 1215 temp_properties = self._parse_properties() 1216 if properties and temp_properties: 1217 properties.expressions.extend(temp_properties.expressions) 1218 elif temp_properties: 1219 properties = temp_properties 1220 1221 indexes = [] 1222 while True: 1223 index = self._parse_create_table_index() 1224 1225 # exp.Properties.Location.POST_INDEX 1226 if self._match(TokenType.PARTITION_BY, advance=False): 1227 temp_properties = self._parse_properties() 1228 if properties and temp_properties: 1229 properties.expressions.extend(temp_properties.expressions) 1230 elif temp_properties: 1231 properties = temp_properties 1232 1233 if not index: 1234 break 1235 else: 1236 indexes.append(index) 1237 elif create_token.token_type == TokenType.VIEW: 1238 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1239 no_schema_binding = True 1240 1241 if self._match_text_seq("CLONE"): 1242 clone = self._parse_table(schema=True) 1243 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1244 clone_kind = ( 1245 self._match(TokenType.L_PAREN) 1246 and self._match_texts(self.CLONE_KINDS) 1247 and self._prev.text.upper() 1248 ) 1249 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1250 self._match(TokenType.R_PAREN) 1251 clone = self.expression( 1252 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1253 ) 1254 1255 return self.expression( 1256 exp.Create, 1257 this=this, 1258 kind=create_token.text, 1259 replace=replace, 1260 unique=unique, 1261 expression=expression, 1262 exists=exists, 1263 properties=properties, 1264 indexes=indexes, 1265 no_schema_binding=no_schema_binding, 1266 begin=begin, 1267 clone=clone, 1268 ) 1269 1270 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1271 self._match(TokenType.COMMA) 1272 1273 # parsers look to _prev for no/dual/default, so need to consume first 1274 self._match_text_seq("NO") 1275 self._match_text_seq("DUAL") 1276 self._match_text_seq("DEFAULT") 1277 1278 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1279 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1280 1281 return None 1282 1283 def _parse_property(self) -> t.Optional[exp.Expression]: 1284 if self._match_texts(self.PROPERTY_PARSERS): 1285 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1286 1287 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1288 return self._parse_character_set(default=True) 1289 1290 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1291 return self._parse_sortkey(compound=True) 1292 1293 if self._match_text_seq("SQL", "SECURITY"): 1294 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1295 1296 assignment = self._match_pair( 1297 TokenType.VAR, TokenType.EQ, advance=False 1298 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1299 1300 if assignment: 1301 key = self._parse_var_or_string() 1302 self._match(TokenType.EQ) 1303 return self.expression(exp.Property, this=key, value=self._parse_column()) 1304 1305 return None 1306 1307 def _parse_stored(self) -> exp.Expression: 1308 self._match(TokenType.ALIAS) 1309 1310 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1311 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1312 1313 return self.expression( 1314 exp.FileFormatProperty, 1315 this=self.expression( 1316 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1317 ) 1318 if input_format or output_format 1319 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1320 ) 1321 1322 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1323 self._match(TokenType.EQ) 1324 self._match(TokenType.ALIAS) 1325 return self.expression(exp_class, this=self._parse_field()) 1326 1327 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1328 properties = [] 1329 1330 while True: 1331 if before: 1332 identified_property = self._parse_property_before() 1333 else: 1334 identified_property = self._parse_property() 1335 1336 if not identified_property: 1337 break 1338 for p in ensure_list(identified_property): 1339 properties.append(p) 1340 1341 if properties: 1342 return self.expression(exp.Properties, expressions=properties) 1343 1344 return None 1345 1346 def _parse_fallback(self, no=False) -> exp.Expression: 1347 self._match_text_seq("FALLBACK") 1348 return self.expression( 1349 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1350 ) 1351 1352 def _parse_volatile_property(self) -> exp.Expression: 1353 if self._index >= 2: 1354 pre_volatile_token = self._tokens[self._index - 2] 1355 else: 1356 pre_volatile_token = None 1357 1358 if pre_volatile_token and pre_volatile_token.token_type in ( 1359 TokenType.CREATE, 1360 TokenType.REPLACE, 1361 TokenType.UNIQUE, 1362 ): 1363 return exp.VolatileProperty() 1364 1365 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1366 1367 def _parse_with_property( 1368 self, 1369 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1370 self._match(TokenType.WITH) 1371 if self._match(TokenType.L_PAREN, advance=False): 1372 return self._parse_wrapped_csv(self._parse_property) 1373 1374 if self._match_text_seq("JOURNAL"): 1375 return self._parse_withjournaltable() 1376 1377 if self._match_text_seq("DATA"): 1378 return self._parse_withdata(no=False) 1379 elif self._match_text_seq("NO", "DATA"): 1380 return self._parse_withdata(no=True) 1381 1382 if not self._next: 1383 return None 1384 1385 return self._parse_withisolatedloading() 1386 1387 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1388 def _parse_definer(self) -> t.Optional[exp.Expression]: 1389 self._match(TokenType.EQ) 1390 1391 user = self._parse_id_var() 1392 self._match(TokenType.PARAMETER) 1393 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1394 1395 if not user or not host: 1396 return None 1397 1398 return exp.DefinerProperty(this=f"{user}@{host}") 1399 1400 def _parse_withjournaltable(self) -> exp.Expression: 1401 self._match(TokenType.TABLE) 1402 self._match(TokenType.EQ) 1403 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1404 1405 def _parse_log(self, no=False) -> exp.Expression: 1406 self._match_text_seq("LOG") 1407 return self.expression(exp.LogProperty, no=no) 1408 1409 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1410 before = self._match_text_seq("BEFORE") 1411 self._match_text_seq("JOURNAL") 1412 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1413 1414 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1415 self._match_text_seq("NOT") 1416 self._match_text_seq("LOCAL") 1417 self._match_text_seq("AFTER", "JOURNAL") 1418 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1419 1420 def _parse_checksum(self) -> exp.Expression: 1421 self._match_text_seq("CHECKSUM") 1422 self._match(TokenType.EQ) 1423 1424 on = None 1425 if self._match(TokenType.ON): 1426 on = True 1427 elif self._match_text_seq("OFF"): 1428 on = False 1429 default = self._match(TokenType.DEFAULT) 1430 1431 return self.expression( 1432 exp.ChecksumProperty, 1433 on=on, 1434 default=default, 1435 ) 1436 1437 def _parse_freespace(self) -> exp.Expression: 1438 self._match_text_seq("FREESPACE") 1439 self._match(TokenType.EQ) 1440 return self.expression( 1441 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1442 ) 1443 1444 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1445 self._match_text_seq("MERGEBLOCKRATIO") 1446 if self._match(TokenType.EQ): 1447 return self.expression( 1448 exp.MergeBlockRatioProperty, 1449 this=self._parse_number(), 1450 percent=self._match(TokenType.PERCENT), 1451 ) 1452 else: 1453 return self.expression( 1454 exp.MergeBlockRatioProperty, 1455 no=no, 1456 default=default, 1457 ) 1458 1459 def _parse_datablocksize(self, default=None) -> exp.Expression: 1460 if default: 1461 self._match_text_seq("DATABLOCKSIZE") 1462 return self.expression(exp.DataBlocksizeProperty, default=True) 1463 elif self._match_texts(("MIN", "MINIMUM")): 1464 self._match_text_seq("DATABLOCKSIZE") 1465 return self.expression(exp.DataBlocksizeProperty, min=True) 1466 elif self._match_texts(("MAX", "MAXIMUM")): 1467 self._match_text_seq("DATABLOCKSIZE") 1468 return self.expression(exp.DataBlocksizeProperty, min=False) 1469 1470 self._match_text_seq("DATABLOCKSIZE") 1471 self._match(TokenType.EQ) 1472 size = self._parse_number() 1473 units = None 1474 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1475 units = self._prev.text 1476 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1477 1478 def _parse_blockcompression(self) -> exp.Expression: 1479 self._match_text_seq("BLOCKCOMPRESSION") 1480 self._match(TokenType.EQ) 1481 always = self._match_text_seq("ALWAYS") 1482 manual = self._match_text_seq("MANUAL") 1483 never = self._match_text_seq("NEVER") 1484 default = self._match_text_seq("DEFAULT") 1485 autotemp = None 1486 if self._match_text_seq("AUTOTEMP"): 1487 autotemp = self._parse_schema() 1488 1489 return self.expression( 1490 exp.BlockCompressionProperty, 1491 always=always, 1492 manual=manual, 1493 never=never, 1494 default=default, 1495 autotemp=autotemp, 1496 ) 1497 1498 def _parse_withisolatedloading(self) -> exp.Expression: 1499 no = self._match_text_seq("NO") 1500 concurrent = self._match_text_seq("CONCURRENT") 1501 self._match_text_seq("ISOLATED", "LOADING") 1502 for_all = self._match_text_seq("FOR", "ALL") 1503 for_insert = self._match_text_seq("FOR", "INSERT") 1504 for_none = self._match_text_seq("FOR", "NONE") 1505 return self.expression( 1506 exp.IsolatedLoadingProperty, 1507 no=no, 1508 concurrent=concurrent, 1509 for_all=for_all, 1510 for_insert=for_insert, 1511 for_none=for_none, 1512 ) 1513 1514 def _parse_locking(self) -> exp.Expression: 1515 if self._match(TokenType.TABLE): 1516 kind = "TABLE" 1517 elif self._match(TokenType.VIEW): 1518 kind = "VIEW" 1519 elif self._match(TokenType.ROW): 1520 kind = "ROW" 1521 elif self._match_text_seq("DATABASE"): 1522 kind = "DATABASE" 1523 else: 1524 kind = None 1525 1526 if kind in ("DATABASE", "TABLE", "VIEW"): 1527 this = self._parse_table_parts() 1528 else: 1529 this = None 1530 1531 if self._match(TokenType.FOR): 1532 for_or_in = "FOR" 1533 elif self._match(TokenType.IN): 1534 for_or_in = "IN" 1535 else: 1536 for_or_in = None 1537 1538 if self._match_text_seq("ACCESS"): 1539 lock_type = "ACCESS" 1540 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1541 lock_type = "EXCLUSIVE" 1542 elif self._match_text_seq("SHARE"): 1543 lock_type = "SHARE" 1544 elif self._match_text_seq("READ"): 1545 lock_type = "READ" 1546 elif self._match_text_seq("WRITE"): 1547 lock_type = "WRITE" 1548 elif self._match_text_seq("CHECKSUM"): 1549 lock_type = "CHECKSUM" 1550 else: 1551 lock_type = None 1552 1553 override = self._match_text_seq("OVERRIDE") 1554 1555 return self.expression( 1556 exp.LockingProperty, 1557 this=this, 1558 kind=kind, 1559 for_or_in=for_or_in, 1560 lock_type=lock_type, 1561 override=override, 1562 ) 1563 1564 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1565 if self._match(TokenType.PARTITION_BY): 1566 return self._parse_csv(self._parse_conjunction) 1567 return [] 1568 1569 def _parse_partitioned_by(self) -> exp.Expression: 1570 self._match(TokenType.EQ) 1571 return self.expression( 1572 exp.PartitionedByProperty, 1573 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1574 ) 1575 1576 def _parse_withdata(self, no=False) -> exp.Expression: 1577 if self._match_text_seq("AND", "STATISTICS"): 1578 statistics = True 1579 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1580 statistics = False 1581 else: 1582 statistics = None 1583 1584 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1585 1586 def _parse_noprimaryindex(self) -> exp.Expression: 1587 self._match_text_seq("PRIMARY", "INDEX") 1588 return exp.NoPrimaryIndexProperty() 1589 1590 def _parse_oncommit(self) -> exp.Expression: 1591 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1592 return exp.OnCommitProperty() 1593 1594 def _parse_distkey(self) -> exp.Expression: 1595 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1596 1597 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1598 table = self._parse_table(schema=True) 1599 options = [] 1600 while self._match_texts(("INCLUDING", "EXCLUDING")): 1601 this = self._prev.text.upper() 1602 id_var = self._parse_id_var() 1603 1604 if not id_var: 1605 return None 1606 1607 options.append( 1608 self.expression( 1609 exp.Property, 1610 this=this, 1611 value=exp.Var(this=id_var.this.upper()), 1612 ) 1613 ) 1614 return self.expression(exp.LikeProperty, this=table, expressions=options) 1615 1616 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1617 return self.expression( 1618 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1619 ) 1620 1621 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1622 self._match(TokenType.EQ) 1623 return self.expression( 1624 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1625 ) 1626 1627 def _parse_returns(self) -> exp.Expression: 1628 value: t.Optional[exp.Expression] 1629 is_table = self._match(TokenType.TABLE) 1630 1631 if is_table: 1632 if self._match(TokenType.LT): 1633 value = self.expression( 1634 exp.Schema, 1635 this="TABLE", 1636 expressions=self._parse_csv(self._parse_struct_types), 1637 ) 1638 if not self._match(TokenType.GT): 1639 self.raise_error("Expecting >") 1640 else: 1641 value = self._parse_schema(exp.Var(this="TABLE")) 1642 else: 1643 value = self._parse_types() 1644 1645 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1646 1647 def _parse_temporary(self, global_=False) -> exp.Expression: 1648 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1649 return self.expression(exp.TemporaryProperty, global_=global_) 1650 1651 def _parse_describe(self) -> exp.Expression: 1652 kind = self._match_set(self.CREATABLES) and self._prev.text 1653 this = self._parse_table() 1654 1655 return self.expression(exp.Describe, this=this, kind=kind) 1656 1657 def _parse_insert(self) -> exp.Expression: 1658 overwrite = self._match(TokenType.OVERWRITE) 1659 local = self._match(TokenType.LOCAL) 1660 alternative = None 1661 1662 if self._match_text_seq("DIRECTORY"): 1663 this: t.Optional[exp.Expression] = self.expression( 1664 exp.Directory, 1665 this=self._parse_var_or_string(), 1666 local=local, 1667 row_format=self._parse_row_format(match_row=True), 1668 ) 1669 else: 1670 if self._match(TokenType.OR): 1671 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1672 1673 self._match(TokenType.INTO) 1674 self._match(TokenType.TABLE) 1675 this = self._parse_table(schema=True) 1676 1677 return self.expression( 1678 exp.Insert, 1679 this=this, 1680 exists=self._parse_exists(), 1681 partition=self._parse_partition(), 1682 expression=self._parse_ddl_select(), 1683 conflict=self._parse_on_conflict(), 1684 returning=self._parse_returning(), 1685 overwrite=overwrite, 1686 alternative=alternative, 1687 ) 1688 1689 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1690 conflict = self._match_text_seq("ON", "CONFLICT") 1691 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1692 1693 if not (conflict or duplicate): 1694 return None 1695 1696 nothing = None 1697 expressions = None 1698 key = None 1699 constraint = None 1700 1701 if conflict: 1702 if self._match_text_seq("ON", "CONSTRAINT"): 1703 constraint = self._parse_id_var() 1704 else: 1705 key = self._parse_csv(self._parse_value) 1706 1707 self._match_text_seq("DO") 1708 if self._match_text_seq("NOTHING"): 1709 nothing = True 1710 else: 1711 self._match(TokenType.UPDATE) 1712 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1713 1714 return self.expression( 1715 exp.OnConflict, 1716 duplicate=duplicate, 1717 expressions=expressions, 1718 nothing=nothing, 1719 key=key, 1720 constraint=constraint, 1721 ) 1722 1723 def _parse_returning(self) -> t.Optional[exp.Expression]: 1724 if not self._match(TokenType.RETURNING): 1725 return None 1726 1727 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1728 1729 def _parse_row(self) -> t.Optional[exp.Expression]: 1730 if not self._match(TokenType.FORMAT): 1731 return None 1732 return self._parse_row_format() 1733 1734 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1735 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1736 return None 1737 1738 if self._match_text_seq("SERDE"): 1739 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1740 1741 self._match_text_seq("DELIMITED") 1742 1743 kwargs = {} 1744 1745 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1746 kwargs["fields"] = self._parse_string() 1747 if self._match_text_seq("ESCAPED", "BY"): 1748 kwargs["escaped"] = self._parse_string() 1749 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1750 kwargs["collection_items"] = self._parse_string() 1751 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1752 kwargs["map_keys"] = self._parse_string() 1753 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1754 kwargs["lines"] = self._parse_string() 1755 if self._match_text_seq("NULL", "DEFINED", "AS"): 1756 kwargs["null"] = self._parse_string() 1757 1758 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1759 1760 def _parse_load_data(self) -> exp.Expression: 1761 local = self._match(TokenType.LOCAL) 1762 self._match_text_seq("INPATH") 1763 inpath = self._parse_string() 1764 overwrite = self._match(TokenType.OVERWRITE) 1765 self._match_pair(TokenType.INTO, TokenType.TABLE) 1766 1767 return self.expression( 1768 exp.LoadData, 1769 this=self._parse_table(schema=True), 1770 local=local, 1771 overwrite=overwrite, 1772 inpath=inpath, 1773 partition=self._parse_partition(), 1774 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1775 serde=self._match_text_seq("SERDE") and self._parse_string(), 1776 ) 1777 1778 def _parse_delete(self) -> exp.Expression: 1779 self._match(TokenType.FROM) 1780 1781 return self.expression( 1782 exp.Delete, 1783 this=self._parse_table(), 1784 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1785 where=self._parse_where(), 1786 returning=self._parse_returning(), 1787 ) 1788 1789 def _parse_update(self) -> exp.Expression: 1790 return self.expression( 1791 exp.Update, 1792 **{ # type: ignore 1793 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1794 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1795 "from": self._parse_from(modifiers=True), 1796 "where": self._parse_where(), 1797 "returning": self._parse_returning(), 1798 }, 1799 ) 1800 1801 def _parse_uncache(self) -> exp.Expression: 1802 if not self._match(TokenType.TABLE): 1803 self.raise_error("Expecting TABLE after UNCACHE") 1804 1805 return self.expression( 1806 exp.Uncache, 1807 exists=self._parse_exists(), 1808 this=self._parse_table(schema=True), 1809 ) 1810 1811 def _parse_cache(self) -> exp.Expression: 1812 lazy = self._match(TokenType.LAZY) 1813 self._match(TokenType.TABLE) 1814 table = self._parse_table(schema=True) 1815 options = [] 1816 1817 if self._match(TokenType.OPTIONS): 1818 self._match_l_paren() 1819 k = self._parse_string() 1820 self._match(TokenType.EQ) 1821 v = self._parse_string() 1822 options = [k, v] 1823 self._match_r_paren() 1824 1825 self._match(TokenType.ALIAS) 1826 return self.expression( 1827 exp.Cache, 1828 this=table, 1829 lazy=lazy, 1830 options=options, 1831 expression=self._parse_select(nested=True), 1832 ) 1833 1834 def _parse_partition(self) -> t.Optional[exp.Expression]: 1835 if not self._match(TokenType.PARTITION): 1836 return None 1837 1838 return self.expression( 1839 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1840 ) 1841 1842 def _parse_value(self) -> exp.Expression: 1843 if self._match(TokenType.L_PAREN): 1844 expressions = self._parse_csv(self._parse_conjunction) 1845 self._match_r_paren() 1846 return self.expression(exp.Tuple, expressions=expressions) 1847 1848 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1849 # Source: https://prestodb.io/docs/current/sql/values.html 1850 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1851 1852 def _parse_select( 1853 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1854 ) -> t.Optional[exp.Expression]: 1855 cte = self._parse_with() 1856 if cte: 1857 this = self._parse_statement() 1858 1859 if not this: 1860 self.raise_error("Failed to parse any statement following CTE") 1861 return cte 1862 1863 if "with" in this.arg_types: 1864 this.set("with", cte) 1865 else: 1866 self.raise_error(f"{this.key} does not support CTE") 1867 this = cte 1868 elif self._match(TokenType.SELECT): 1869 comments = self._prev_comments 1870 1871 hint = self._parse_hint() 1872 all_ = self._match(TokenType.ALL) 1873 distinct = self._match(TokenType.DISTINCT) 1874 1875 kind = ( 1876 self._match(TokenType.ALIAS) 1877 and self._match_texts(("STRUCT", "VALUE")) 1878 and self._prev.text 1879 ) 1880 1881 if distinct: 1882 distinct = self.expression( 1883 exp.Distinct, 1884 on=self._parse_value() if self._match(TokenType.ON) else None, 1885 ) 1886 1887 if all_ and distinct: 1888 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1889 1890 limit = self._parse_limit(top=True) 1891 expressions = self._parse_csv(self._parse_expression) 1892 1893 this = self.expression( 1894 exp.Select, 1895 kind=kind, 1896 hint=hint, 1897 distinct=distinct, 1898 expressions=expressions, 1899 limit=limit, 1900 ) 1901 this.comments = comments 1902 1903 into = self._parse_into() 1904 if into: 1905 this.set("into", into) 1906 1907 from_ = self._parse_from() 1908 if from_: 1909 this.set("from", from_) 1910 1911 this = self._parse_query_modifiers(this) 1912 elif (table or nested) and self._match(TokenType.L_PAREN): 1913 this = self._parse_table() if table else self._parse_select(nested=True) 1914 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1915 self._match_r_paren() 1916 1917 # early return so that subquery unions aren't parsed again 1918 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1919 # Union ALL should be a property of the top select node, not the subquery 1920 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1921 elif self._match(TokenType.VALUES): 1922 this = self.expression( 1923 exp.Values, 1924 expressions=self._parse_csv(self._parse_value), 1925 alias=self._parse_table_alias(), 1926 ) 1927 else: 1928 this = None 1929 1930 return self._parse_set_operations(this) 1931 1932 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1933 if not skip_with_token and not self._match(TokenType.WITH): 1934 return None 1935 1936 comments = self._prev_comments 1937 recursive = self._match(TokenType.RECURSIVE) 1938 1939 expressions = [] 1940 while True: 1941 expressions.append(self._parse_cte()) 1942 1943 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1944 break 1945 else: 1946 self._match(TokenType.WITH) 1947 1948 return self.expression( 1949 exp.With, comments=comments, expressions=expressions, recursive=recursive 1950 ) 1951 1952 def _parse_cte(self) -> exp.Expression: 1953 alias = self._parse_table_alias() 1954 if not alias or not alias.this: 1955 self.raise_error("Expected CTE to have alias") 1956 1957 self._match(TokenType.ALIAS) 1958 1959 return self.expression( 1960 exp.CTE, 1961 this=self._parse_wrapped(self._parse_statement), 1962 alias=alias, 1963 ) 1964 1965 def _parse_table_alias( 1966 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1967 ) -> t.Optional[exp.Expression]: 1968 any_token = self._match(TokenType.ALIAS) 1969 alias = ( 1970 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1971 or self._parse_string_as_identifier() 1972 ) 1973 1974 index = self._index 1975 if self._match(TokenType.L_PAREN): 1976 columns = self._parse_csv(self._parse_function_parameter) 1977 self._match_r_paren() if columns else self._retreat(index) 1978 else: 1979 columns = None 1980 1981 if not alias and not columns: 1982 return None 1983 1984 return self.expression(exp.TableAlias, this=alias, columns=columns) 1985 1986 def _parse_subquery( 1987 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1988 ) -> exp.Expression: 1989 return self.expression( 1990 exp.Subquery, 1991 this=this, 1992 pivots=self._parse_pivots(), 1993 alias=self._parse_table_alias() if parse_alias else None, 1994 ) 1995 1996 def _parse_query_modifiers( 1997 self, this: t.Optional[exp.Expression] 1998 ) -> t.Optional[exp.Expression]: 1999 if isinstance(this, self.MODIFIABLES): 2000 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2001 expression = parser(self) 2002 2003 if expression: 2004 this.set(key, expression) 2005 return this 2006 2007 def _parse_hint(self) -> t.Optional[exp.Expression]: 2008 if self._match(TokenType.HINT): 2009 hints = self._parse_csv(self._parse_function) 2010 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2011 self.raise_error("Expected */ after HINT") 2012 return self.expression(exp.Hint, expressions=hints) 2013 2014 return None 2015 2016 def _parse_into(self) -> t.Optional[exp.Expression]: 2017 if not self._match(TokenType.INTO): 2018 return None 2019 2020 temp = self._match(TokenType.TEMPORARY) 2021 unlogged = self._match(TokenType.UNLOGGED) 2022 self._match(TokenType.TABLE) 2023 2024 return self.expression( 2025 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2026 ) 2027 2028 def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]: 2029 if not self._match(TokenType.FROM): 2030 return None 2031 2032 comments = self._prev_comments 2033 this = self._parse_table() 2034 2035 return self.expression( 2036 exp.From, 2037 comments=comments, 2038 this=self._parse_query_modifiers(this) if modifiers else this, 2039 ) 2040 2041 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2042 if not self._match(TokenType.MATCH_RECOGNIZE): 2043 return None 2044 2045 self._match_l_paren() 2046 2047 partition = self._parse_partition_by() 2048 order = self._parse_order() 2049 measures = ( 2050 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2051 ) 2052 2053 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2054 rows = exp.Var(this="ONE ROW PER MATCH") 2055 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2056 text = "ALL ROWS PER MATCH" 2057 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2058 text += f" SHOW EMPTY MATCHES" 2059 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2060 text += f" OMIT EMPTY MATCHES" 2061 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2062 text += f" WITH UNMATCHED ROWS" 2063 rows = exp.Var(this=text) 2064 else: 2065 rows = None 2066 2067 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2068 text = "AFTER MATCH SKIP" 2069 if self._match_text_seq("PAST", "LAST", "ROW"): 2070 text += f" PAST LAST ROW" 2071 elif self._match_text_seq("TO", "NEXT", "ROW"): 2072 text += f" TO NEXT ROW" 2073 elif self._match_text_seq("TO", "FIRST"): 2074 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2075 elif self._match_text_seq("TO", "LAST"): 2076 text += f" TO LAST {self._advance_any().text}" # type: ignore 2077 after = exp.Var(this=text) 2078 else: 2079 after = None 2080 2081 if self._match_text_seq("PATTERN"): 2082 self._match_l_paren() 2083 2084 if not self._curr: 2085 self.raise_error("Expecting )", self._curr) 2086 2087 paren = 1 2088 start = self._curr 2089 2090 while self._curr and paren > 0: 2091 if self._curr.token_type == TokenType.L_PAREN: 2092 paren += 1 2093 if self._curr.token_type == TokenType.R_PAREN: 2094 paren -= 1 2095 end = self._prev 2096 self._advance() 2097 if paren > 0: 2098 self.raise_error("Expecting )", self._curr) 2099 pattern = exp.Var(this=self._find_sql(start, end)) 2100 else: 2101 pattern = None 2102 2103 define = ( 2104 self._parse_csv( 2105 lambda: self.expression( 2106 exp.Alias, 2107 alias=self._parse_id_var(any_token=True), 2108 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2109 ) 2110 ) 2111 if self._match_text_seq("DEFINE") 2112 else None 2113 ) 2114 2115 self._match_r_paren() 2116 2117 return self.expression( 2118 exp.MatchRecognize, 2119 partition_by=partition, 2120 order=order, 2121 measures=measures, 2122 rows=rows, 2123 after=after, 2124 pattern=pattern, 2125 define=define, 2126 alias=self._parse_table_alias(), 2127 ) 2128 2129 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2130 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2131 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2132 2133 if outer_apply or cross_apply: 2134 this = self._parse_select(table=True) 2135 view = None 2136 outer = not cross_apply 2137 elif self._match(TokenType.LATERAL): 2138 this = self._parse_select(table=True) 2139 view = self._match(TokenType.VIEW) 2140 outer = self._match(TokenType.OUTER) 2141 else: 2142 return None 2143 2144 if not this: 2145 this = self._parse_function() or self._parse_id_var(any_token=False) 2146 while self._match(TokenType.DOT): 2147 this = exp.Dot( 2148 this=this, 2149 expression=self._parse_function() or self._parse_id_var(any_token=False), 2150 ) 2151 2152 table_alias: t.Optional[exp.Expression] 2153 2154 if view: 2155 table = self._parse_id_var(any_token=False) 2156 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2157 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2158 else: 2159 table_alias = self._parse_table_alias() 2160 2161 expression = self.expression( 2162 exp.Lateral, 2163 this=this, 2164 view=view, 2165 outer=outer, 2166 alias=table_alias, 2167 ) 2168 2169 return expression 2170 2171 def _parse_join_side_and_kind( 2172 self, 2173 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2174 return ( 2175 self._match(TokenType.NATURAL) and self._prev, 2176 self._match_set(self.JOIN_SIDES) and self._prev, 2177 self._match_set(self.JOIN_KINDS) and self._prev, 2178 ) 2179 2180 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2181 if self._match(TokenType.COMMA): 2182 return self.expression(exp.Join, this=self._parse_table()) 2183 2184 index = self._index 2185 natural, side, kind = self._parse_join_side_and_kind() 2186 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2187 join = self._match(TokenType.JOIN) 2188 2189 if not skip_join_token and not join: 2190 self._retreat(index) 2191 kind = None 2192 natural = None 2193 side = None 2194 2195 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2196 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2197 2198 if not skip_join_token and not join and not outer_apply and not cross_apply: 2199 return None 2200 2201 if outer_apply: 2202 side = Token(TokenType.LEFT, "LEFT") 2203 2204 kwargs: t.Dict[ 2205 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2206 ] = {"this": self._parse_table()} 2207 2208 if natural: 2209 kwargs["natural"] = True 2210 if side: 2211 kwargs["side"] = side.text 2212 if kind: 2213 kwargs["kind"] = kind.text 2214 if hint: 2215 kwargs["hint"] = hint 2216 2217 if self._match(TokenType.ON): 2218 kwargs["on"] = self._parse_conjunction() 2219 elif self._match(TokenType.USING): 2220 kwargs["using"] = self._parse_wrapped_id_vars() 2221 2222 return self.expression(exp.Join, **kwargs) # type: ignore 2223 2224 def _parse_index(self) -> exp.Expression: 2225 index = self._parse_id_var() 2226 self._match(TokenType.ON) 2227 self._match(TokenType.TABLE) # hive 2228 2229 return self.expression( 2230 exp.Index, 2231 this=index, 2232 table=self.expression(exp.Table, this=self._parse_id_var()), 2233 columns=self._parse_expression(), 2234 ) 2235 2236 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2237 unique = self._match(TokenType.UNIQUE) 2238 primary = self._match_text_seq("PRIMARY") 2239 amp = self._match_text_seq("AMP") 2240 if not self._match(TokenType.INDEX): 2241 return None 2242 index = self._parse_id_var() 2243 columns = None 2244 if self._match(TokenType.L_PAREN, advance=False): 2245 columns = self._parse_wrapped_csv(self._parse_column) 2246 return self.expression( 2247 exp.Index, 2248 this=index, 2249 columns=columns, 2250 unique=unique, 2251 primary=primary, 2252 amp=amp, 2253 ) 2254 2255 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2256 return ( 2257 (not schema and self._parse_function()) 2258 or self._parse_id_var(any_token=False) 2259 or self._parse_string_as_identifier() 2260 or self._parse_placeholder() 2261 ) 2262 2263 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2264 catalog = None 2265 db = None 2266 table = self._parse_table_part(schema=schema) 2267 2268 while self._match(TokenType.DOT): 2269 if catalog: 2270 # This allows nesting the table in arbitrarily many dot expressions if needed 2271 table = self.expression( 2272 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2273 ) 2274 else: 2275 catalog = db 2276 db = table 2277 table = self._parse_table_part(schema=schema) 2278 2279 if not table: 2280 self.raise_error(f"Expected table name but got {self._curr}") 2281 2282 return self.expression( 2283 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2284 ) 2285 2286 def _parse_table( 2287 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2288 ) -> t.Optional[exp.Expression]: 2289 lateral = self._parse_lateral() 2290 if lateral: 2291 return lateral 2292 2293 unnest = self._parse_unnest() 2294 if unnest: 2295 return unnest 2296 2297 values = self._parse_derived_table_values() 2298 if values: 2299 return values 2300 2301 subquery = self._parse_select(table=True) 2302 if subquery: 2303 if not subquery.args.get("pivots"): 2304 subquery.set("pivots", self._parse_pivots()) 2305 return subquery 2306 2307 this = self._parse_table_parts(schema=schema) 2308 2309 if schema: 2310 return self._parse_schema(this=this) 2311 2312 if self.alias_post_tablesample: 2313 table_sample = self._parse_table_sample() 2314 2315 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2316 if alias: 2317 this.set("alias", alias) 2318 2319 if not this.args.get("pivots"): 2320 this.set("pivots", self._parse_pivots()) 2321 2322 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2323 this.set( 2324 "hints", 2325 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2326 ) 2327 self._match_r_paren() 2328 2329 if not self.alias_post_tablesample: 2330 table_sample = self._parse_table_sample() 2331 2332 if table_sample: 2333 table_sample.set("this", this) 2334 this = table_sample 2335 2336 return this 2337 2338 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2339 if not self._match(TokenType.UNNEST): 2340 return None 2341 2342 expressions = self._parse_wrapped_csv(self._parse_type) 2343 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2344 alias = self._parse_table_alias() 2345 2346 if alias and self.unnest_column_only: 2347 if alias.args.get("columns"): 2348 self.raise_error("Unexpected extra column alias in unnest.") 2349 alias.set("columns", [alias.this]) 2350 alias.set("this", None) 2351 2352 offset = None 2353 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2354 self._match(TokenType.ALIAS) 2355 offset = self._parse_id_var() or exp.Identifier(this="offset") 2356 2357 return self.expression( 2358 exp.Unnest, 2359 expressions=expressions, 2360 ordinality=ordinality, 2361 alias=alias, 2362 offset=offset, 2363 ) 2364 2365 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2366 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2367 if not is_derived and not self._match(TokenType.VALUES): 2368 return None 2369 2370 expressions = self._parse_csv(self._parse_value) 2371 2372 if is_derived: 2373 self._match_r_paren() 2374 2375 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2376 2377 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2378 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2379 as_modifier and self._match_text_seq("USING", "SAMPLE") 2380 ): 2381 return None 2382 2383 bucket_numerator = None 2384 bucket_denominator = None 2385 bucket_field = None 2386 percent = None 2387 rows = None 2388 size = None 2389 seed = None 2390 2391 kind = ( 2392 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2393 ) 2394 method = self._parse_var(tokens=(TokenType.ROW,)) 2395 2396 self._match(TokenType.L_PAREN) 2397 2398 num = self._parse_number() 2399 2400 if self._match(TokenType.BUCKET): 2401 bucket_numerator = self._parse_number() 2402 self._match(TokenType.OUT_OF) 2403 bucket_denominator = bucket_denominator = self._parse_number() 2404 self._match(TokenType.ON) 2405 bucket_field = self._parse_field() 2406 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2407 percent = num 2408 elif self._match(TokenType.ROWS): 2409 rows = num 2410 else: 2411 size = num 2412 2413 self._match(TokenType.R_PAREN) 2414 2415 if self._match(TokenType.L_PAREN): 2416 method = self._parse_var() 2417 seed = self._match(TokenType.COMMA) and self._parse_number() 2418 self._match_r_paren() 2419 elif self._match_texts(("SEED", "REPEATABLE")): 2420 seed = self._parse_wrapped(self._parse_number) 2421 2422 return self.expression( 2423 exp.TableSample, 2424 method=method, 2425 bucket_numerator=bucket_numerator, 2426 bucket_denominator=bucket_denominator, 2427 bucket_field=bucket_field, 2428 percent=percent, 2429 rows=rows, 2430 size=size, 2431 seed=seed, 2432 kind=kind, 2433 ) 2434 2435 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2436 return list(iter(self._parse_pivot, None)) 2437 2438 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2439 index = self._index 2440 2441 if self._match(TokenType.PIVOT): 2442 unpivot = False 2443 elif self._match(TokenType.UNPIVOT): 2444 unpivot = True 2445 else: 2446 return None 2447 2448 expressions = [] 2449 field = None 2450 2451 if not self._match(TokenType.L_PAREN): 2452 self._retreat(index) 2453 return None 2454 2455 if unpivot: 2456 expressions = self._parse_csv(self._parse_column) 2457 else: 2458 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2459 2460 if not expressions: 2461 self.raise_error("Failed to parse PIVOT's aggregation list") 2462 2463 if not self._match(TokenType.FOR): 2464 self.raise_error("Expecting FOR") 2465 2466 value = self._parse_column() 2467 2468 if not self._match(TokenType.IN): 2469 self.raise_error("Expecting IN") 2470 2471 field = self._parse_in(value) 2472 2473 self._match_r_paren() 2474 2475 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2476 2477 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2478 pivot.set("alias", self._parse_table_alias()) 2479 2480 if not unpivot: 2481 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2482 2483 columns: t.List[exp.Expression] = [] 2484 for fld in pivot.args["field"].expressions: 2485 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2486 for name in names: 2487 if self.PREFIXED_PIVOT_COLUMNS: 2488 name = f"{name}_{field_name}" if name else field_name 2489 else: 2490 name = f"{field_name}_{name}" if name else field_name 2491 2492 columns.append(exp.to_identifier(name)) 2493 2494 pivot.set("columns", columns) 2495 2496 return pivot 2497 2498 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2499 return [agg.alias for agg in aggregations] 2500 2501 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2502 if not skip_where_token and not self._match(TokenType.WHERE): 2503 return None 2504 2505 return self.expression( 2506 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2507 ) 2508 2509 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2510 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2511 return None 2512 2513 elements = defaultdict(list) 2514 2515 while True: 2516 expressions = self._parse_csv(self._parse_conjunction) 2517 if expressions: 2518 elements["expressions"].extend(expressions) 2519 2520 grouping_sets = self._parse_grouping_sets() 2521 if grouping_sets: 2522 elements["grouping_sets"].extend(grouping_sets) 2523 2524 rollup = None 2525 cube = None 2526 totals = None 2527 2528 with_ = self._match(TokenType.WITH) 2529 if self._match(TokenType.ROLLUP): 2530 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2531 elements["rollup"].extend(ensure_list(rollup)) 2532 2533 if self._match(TokenType.CUBE): 2534 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2535 elements["cube"].extend(ensure_list(cube)) 2536 2537 if self._match_text_seq("TOTALS"): 2538 totals = True 2539 elements["totals"] = True # type: ignore 2540 2541 if not (grouping_sets or rollup or cube or totals): 2542 break 2543 2544 return self.expression(exp.Group, **elements) # type: ignore 2545 2546 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2547 if not self._match(TokenType.GROUPING_SETS): 2548 return None 2549 2550 return self._parse_wrapped_csv(self._parse_grouping_set) 2551 2552 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2553 if self._match(TokenType.L_PAREN): 2554 grouping_set = self._parse_csv(self._parse_column) 2555 self._match_r_paren() 2556 return self.expression(exp.Tuple, expressions=grouping_set) 2557 2558 return self._parse_column() 2559 2560 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2561 if not skip_having_token and not self._match(TokenType.HAVING): 2562 return None 2563 return self.expression(exp.Having, this=self._parse_conjunction()) 2564 2565 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2566 if not self._match(TokenType.QUALIFY): 2567 return None 2568 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2569 2570 def _parse_order( 2571 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2572 ) -> t.Optional[exp.Expression]: 2573 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2574 return this 2575 2576 return self.expression( 2577 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2578 ) 2579 2580 def _parse_sort( 2581 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2582 ) -> t.Optional[exp.Expression]: 2583 if not self._match(token_type): 2584 return None 2585 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2586 2587 def _parse_ordered(self) -> exp.Expression: 2588 this = self._parse_conjunction() 2589 self._match(TokenType.ASC) 2590 is_desc = self._match(TokenType.DESC) 2591 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2592 is_nulls_last = self._match(TokenType.NULLS_LAST) 2593 desc = is_desc or False 2594 asc = not desc 2595 nulls_first = is_nulls_first or False 2596 explicitly_null_ordered = is_nulls_first or is_nulls_last 2597 if ( 2598 not explicitly_null_ordered 2599 and ( 2600 (asc and self.null_ordering == "nulls_are_small") 2601 or (desc and self.null_ordering != "nulls_are_small") 2602 ) 2603 and self.null_ordering != "nulls_are_last" 2604 ): 2605 nulls_first = True 2606 2607 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2608 2609 def _parse_limit( 2610 self, this: t.Optional[exp.Expression] = None, top: bool = False 2611 ) -> t.Optional[exp.Expression]: 2612 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2613 limit_paren = self._match(TokenType.L_PAREN) 2614 limit_exp = self.expression( 2615 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2616 ) 2617 2618 if limit_paren: 2619 self._match_r_paren() 2620 2621 return limit_exp 2622 2623 if self._match(TokenType.FETCH): 2624 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2625 direction = self._prev.text if direction else "FIRST" 2626 2627 count = self._parse_number() 2628 percent = self._match(TokenType.PERCENT) 2629 2630 self._match_set((TokenType.ROW, TokenType.ROWS)) 2631 2632 only = self._match(TokenType.ONLY) 2633 with_ties = self._match_text_seq("WITH", "TIES") 2634 2635 if only and with_ties: 2636 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2637 2638 return self.expression( 2639 exp.Fetch, 2640 direction=direction, 2641 count=count, 2642 percent=percent, 2643 with_ties=with_ties, 2644 ) 2645 2646 return this 2647 2648 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2649 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2650 return this 2651 2652 count = self._parse_number() 2653 self._match_set((TokenType.ROW, TokenType.ROWS)) 2654 return self.expression(exp.Offset, this=this, expression=count) 2655 2656 def _parse_locks(self) -> t.List[exp.Expression]: 2657 # Lists are invariant, so we need to use a type hint here 2658 locks: t.List[exp.Expression] = [] 2659 2660 while True: 2661 if self._match_text_seq("FOR", "UPDATE"): 2662 update = True 2663 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2664 "LOCK", "IN", "SHARE", "MODE" 2665 ): 2666 update = False 2667 else: 2668 break 2669 2670 expressions = None 2671 if self._match_text_seq("OF"): 2672 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2673 2674 wait: t.Optional[bool | exp.Expression] = None 2675 if self._match_text_seq("NOWAIT"): 2676 wait = True 2677 elif self._match_text_seq("WAIT"): 2678 wait = self._parse_primary() 2679 elif self._match_text_seq("SKIP", "LOCKED"): 2680 wait = False 2681 2682 locks.append( 2683 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2684 ) 2685 2686 return locks 2687 2688 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2689 if not self._match_set(self.SET_OPERATIONS): 2690 return this 2691 2692 token_type = self._prev.token_type 2693 2694 if token_type == TokenType.UNION: 2695 expression = exp.Union 2696 elif token_type == TokenType.EXCEPT: 2697 expression = exp.Except 2698 else: 2699 expression = exp.Intersect 2700 2701 return self.expression( 2702 expression, 2703 this=this, 2704 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2705 expression=self._parse_set_operations(self._parse_select(nested=True)), 2706 ) 2707 2708 def _parse_expression(self) -> t.Optional[exp.Expression]: 2709 return self._parse_alias(self._parse_conjunction()) 2710 2711 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2712 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2713 2714 def _parse_equality(self) -> t.Optional[exp.Expression]: 2715 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2716 2717 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2718 return self._parse_tokens(self._parse_range, self.COMPARISON) 2719 2720 def _parse_range(self) -> t.Optional[exp.Expression]: 2721 this = self._parse_bitwise() 2722 negate = self._match(TokenType.NOT) 2723 2724 if self._match_set(self.RANGE_PARSERS): 2725 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2726 if not expression: 2727 return this 2728 2729 this = expression 2730 elif self._match(TokenType.ISNULL): 2731 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2732 2733 # Postgres supports ISNULL and NOTNULL for conditions. 2734 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2735 if self._match(TokenType.NOTNULL): 2736 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2737 this = self.expression(exp.Not, this=this) 2738 2739 if negate: 2740 this = self.expression(exp.Not, this=this) 2741 2742 if self._match(TokenType.IS): 2743 this = self._parse_is(this) 2744 2745 return this 2746 2747 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2748 index = self._index - 1 2749 negate = self._match(TokenType.NOT) 2750 if self._match(TokenType.DISTINCT_FROM): 2751 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2752 return self.expression(klass, this=this, expression=self._parse_expression()) 2753 2754 expression = self._parse_null() or self._parse_boolean() 2755 if not expression: 2756 self._retreat(index) 2757 return None 2758 2759 this = self.expression(exp.Is, this=this, expression=expression) 2760 return self.expression(exp.Not, this=this) if negate else this 2761 2762 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2763 unnest = self._parse_unnest() 2764 if unnest: 2765 this = self.expression(exp.In, this=this, unnest=unnest) 2766 elif self._match(TokenType.L_PAREN): 2767 expressions = self._parse_csv(self._parse_select_or_expression) 2768 2769 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2770 this = self.expression(exp.In, this=this, query=expressions[0]) 2771 else: 2772 this = self.expression(exp.In, this=this, expressions=expressions) 2773 2774 self._match_r_paren(this) 2775 else: 2776 this = self.expression(exp.In, this=this, field=self._parse_field()) 2777 2778 return this 2779 2780 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2781 low = self._parse_bitwise() 2782 self._match(TokenType.AND) 2783 high = self._parse_bitwise() 2784 return self.expression(exp.Between, this=this, low=low, high=high) 2785 2786 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2787 if not self._match(TokenType.ESCAPE): 2788 return this 2789 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2790 2791 def _parse_interval(self) -> t.Optional[exp.Expression]: 2792 if not self._match(TokenType.INTERVAL): 2793 return None 2794 2795 this = self._parse_primary() or self._parse_term() 2796 unit = self._parse_function() or self._parse_var() 2797 2798 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2799 # each INTERVAL expression into this canonical form so it's easy to transpile 2800 if this and isinstance(this, exp.Literal): 2801 if this.is_number: 2802 this = exp.Literal.string(this.name) 2803 2804 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2805 parts = this.name.split() 2806 if not unit and len(parts) <= 2: 2807 this = exp.Literal.string(seq_get(parts, 0)) 2808 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2809 2810 return self.expression(exp.Interval, this=this, unit=unit) 2811 2812 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2813 this = self._parse_term() 2814 2815 while True: 2816 if self._match_set(self.BITWISE): 2817 this = self.expression( 2818 self.BITWISE[self._prev.token_type], 2819 this=this, 2820 expression=self._parse_term(), 2821 ) 2822 elif self._match_pair(TokenType.LT, TokenType.LT): 2823 this = self.expression( 2824 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2825 ) 2826 elif self._match_pair(TokenType.GT, TokenType.GT): 2827 this = self.expression( 2828 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2829 ) 2830 else: 2831 break 2832 2833 return this 2834 2835 def _parse_term(self) -> t.Optional[exp.Expression]: 2836 return self._parse_tokens(self._parse_factor, self.TERM) 2837 2838 def _parse_factor(self) -> t.Optional[exp.Expression]: 2839 return self._parse_tokens(self._parse_unary, self.FACTOR) 2840 2841 def _parse_unary(self) -> t.Optional[exp.Expression]: 2842 if self._match_set(self.UNARY_PARSERS): 2843 return self.UNARY_PARSERS[self._prev.token_type](self) 2844 return self._parse_at_time_zone(self._parse_type()) 2845 2846 def _parse_type(self) -> t.Optional[exp.Expression]: 2847 interval = self._parse_interval() 2848 if interval: 2849 return interval 2850 2851 index = self._index 2852 data_type = self._parse_types(check_func=True) 2853 this = self._parse_column() 2854 2855 if data_type: 2856 if isinstance(this, exp.Literal): 2857 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2858 if parser: 2859 return parser(self, this, data_type) 2860 return self.expression(exp.Cast, this=this, to=data_type) 2861 if not data_type.expressions: 2862 self._retreat(index) 2863 return self._parse_column() 2864 return data_type 2865 2866 return this 2867 2868 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2869 this = self._parse_type() 2870 if not this: 2871 return None 2872 2873 return self.expression( 2874 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2875 ) 2876 2877 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2878 index = self._index 2879 2880 prefix = self._match_text_seq("SYSUDTLIB", ".") 2881 2882 if not self._match_set(self.TYPE_TOKENS): 2883 return None 2884 2885 type_token = self._prev.token_type 2886 2887 if type_token == TokenType.PSEUDO_TYPE: 2888 return self.expression(exp.PseudoType, this=self._prev.text) 2889 2890 nested = type_token in self.NESTED_TYPE_TOKENS 2891 is_struct = type_token == TokenType.STRUCT 2892 expressions = None 2893 maybe_func = False 2894 2895 if self._match(TokenType.L_PAREN): 2896 if is_struct: 2897 expressions = self._parse_csv(self._parse_struct_types) 2898 elif nested: 2899 expressions = self._parse_csv(self._parse_types) 2900 else: 2901 expressions = self._parse_csv(self._parse_type_size) 2902 2903 if not expressions or not self._match(TokenType.R_PAREN): 2904 self._retreat(index) 2905 return None 2906 2907 maybe_func = True 2908 2909 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2910 this = exp.DataType( 2911 this=exp.DataType.Type.ARRAY, 2912 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2913 nested=True, 2914 ) 2915 2916 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2917 this = exp.DataType( 2918 this=exp.DataType.Type.ARRAY, 2919 expressions=[this], 2920 nested=True, 2921 ) 2922 2923 return this 2924 2925 if self._match(TokenType.L_BRACKET): 2926 self._retreat(index) 2927 return None 2928 2929 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2930 if nested and self._match(TokenType.LT): 2931 if is_struct: 2932 expressions = self._parse_csv(self._parse_struct_types) 2933 else: 2934 expressions = self._parse_csv(self._parse_types) 2935 2936 if not self._match(TokenType.GT): 2937 self.raise_error("Expecting >") 2938 2939 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2940 values = self._parse_csv(self._parse_conjunction) 2941 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2942 2943 value: t.Optional[exp.Expression] = None 2944 if type_token in self.TIMESTAMPS: 2945 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2946 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2947 elif ( 2948 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2949 ): 2950 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2951 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2952 if type_token == TokenType.TIME: 2953 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2954 else: 2955 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2956 2957 maybe_func = maybe_func and value is None 2958 2959 if value is None: 2960 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2961 elif type_token == TokenType.INTERVAL: 2962 unit = self._parse_var() 2963 2964 if not unit: 2965 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2966 else: 2967 value = self.expression(exp.Interval, unit=unit) 2968 2969 if maybe_func and check_func: 2970 index2 = self._index 2971 peek = self._parse_string() 2972 2973 if not peek: 2974 self._retreat(index) 2975 return None 2976 2977 self._retreat(index2) 2978 2979 if value: 2980 return value 2981 2982 return exp.DataType( 2983 this=exp.DataType.Type[type_token.value.upper()], 2984 expressions=expressions, 2985 nested=nested, 2986 values=values, 2987 prefix=prefix, 2988 ) 2989 2990 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 2991 this = self._parse_type() or self._parse_id_var() 2992 self._match(TokenType.COLON) 2993 return self._parse_column_def(this) 2994 2995 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2996 if not self._match(TokenType.AT_TIME_ZONE): 2997 return this 2998 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2999 3000 def _parse_column(self) -> t.Optional[exp.Expression]: 3001 this = self._parse_field() 3002 if isinstance(this, exp.Identifier): 3003 this = self.expression(exp.Column, this=this) 3004 elif not this: 3005 return self._parse_bracket(this) 3006 this = self._parse_bracket(this) 3007 3008 while self._match_set(self.COLUMN_OPERATORS): 3009 op_token = self._prev.token_type 3010 op = self.COLUMN_OPERATORS.get(op_token) 3011 3012 if op_token == TokenType.DCOLON: 3013 field = self._parse_types() 3014 if not field: 3015 self.raise_error("Expected type") 3016 elif op and self._curr: 3017 self._advance() 3018 value = self._prev.text 3019 field = ( 3020 exp.Literal.number(value) 3021 if self._prev.token_type == TokenType.NUMBER 3022 else exp.Literal.string(value) 3023 ) 3024 else: 3025 field = ( 3026 self._parse_star() 3027 or self._parse_function(anonymous=True) 3028 or self._parse_id_var() 3029 ) 3030 3031 if isinstance(field, exp.Func): 3032 # bigquery allows function calls like x.y.count(...) 3033 # SAFE.SUBSTR(...) 3034 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3035 this = self._replace_columns_with_dots(this) 3036 3037 if op: 3038 this = op(self, this, field) 3039 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3040 this = self.expression( 3041 exp.Column, 3042 this=field, 3043 table=this.this, 3044 db=this.args.get("table"), 3045 catalog=this.args.get("db"), 3046 ) 3047 else: 3048 this = self.expression(exp.Dot, this=this, expression=field) 3049 this = self._parse_bracket(this) 3050 3051 return this 3052 3053 def _parse_primary(self) -> t.Optional[exp.Expression]: 3054 if self._match_set(self.PRIMARY_PARSERS): 3055 token_type = self._prev.token_type 3056 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3057 3058 if token_type == TokenType.STRING: 3059 expressions = [primary] 3060 while self._match(TokenType.STRING): 3061 expressions.append(exp.Literal.string(self._prev.text)) 3062 if len(expressions) > 1: 3063 return self.expression(exp.Concat, expressions=expressions) 3064 return primary 3065 3066 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3067 return exp.Literal.number(f"0.{self._prev.text}") 3068 3069 if self._match(TokenType.L_PAREN): 3070 comments = self._prev_comments 3071 query = self._parse_select() 3072 3073 if query: 3074 expressions = [query] 3075 else: 3076 expressions = self._parse_csv(self._parse_expression) 3077 3078 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3079 3080 if isinstance(this, exp.Subqueryable): 3081 this = self._parse_set_operations( 3082 self._parse_subquery(this=this, parse_alias=False) 3083 ) 3084 elif len(expressions) > 1: 3085 this = self.expression(exp.Tuple, expressions=expressions) 3086 else: 3087 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3088 3089 if this: 3090 this.add_comments(comments) 3091 self._match_r_paren(expression=this) 3092 3093 return this 3094 3095 return None 3096 3097 def _parse_field( 3098 self, 3099 any_token: bool = False, 3100 tokens: t.Optional[t.Collection[TokenType]] = None, 3101 ) -> t.Optional[exp.Expression]: 3102 return ( 3103 self._parse_primary() 3104 or self._parse_function() 3105 or self._parse_id_var(any_token=any_token, tokens=tokens) 3106 ) 3107 3108 def _parse_function( 3109 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3110 ) -> t.Optional[exp.Expression]: 3111 if not self._curr: 3112 return None 3113 3114 token_type = self._curr.token_type 3115 3116 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3117 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3118 3119 if not self._next or self._next.token_type != TokenType.L_PAREN: 3120 if token_type in self.NO_PAREN_FUNCTIONS: 3121 self._advance() 3122 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3123 3124 return None 3125 3126 if token_type not in self.FUNC_TOKENS: 3127 return None 3128 3129 this = self._curr.text 3130 upper = this.upper() 3131 self._advance(2) 3132 3133 parser = self.FUNCTION_PARSERS.get(upper) 3134 3135 if parser and not anonymous: 3136 this = parser(self) 3137 else: 3138 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3139 3140 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3141 this = self.expression(subquery_predicate, this=self._parse_select()) 3142 self._match_r_paren() 3143 return this 3144 3145 if functions is None: 3146 functions = self.FUNCTIONS 3147 3148 function = functions.get(upper) 3149 args = self._parse_csv(self._parse_lambda) 3150 3151 if function and not anonymous: 3152 this = function(args) 3153 self.validate_expression(this, args) 3154 else: 3155 this = self.expression(exp.Anonymous, this=this, expressions=args) 3156 3157 self._match_r_paren(this) 3158 return self._parse_window(this) 3159 3160 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3161 return self._parse_column_def(self._parse_id_var()) 3162 3163 def _parse_user_defined_function( 3164 self, kind: t.Optional[TokenType] = None 3165 ) -> t.Optional[exp.Expression]: 3166 this = self._parse_id_var() 3167 3168 while self._match(TokenType.DOT): 3169 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3170 3171 if not self._match(TokenType.L_PAREN): 3172 return this 3173 3174 expressions = self._parse_csv(self._parse_function_parameter) 3175 self._match_r_paren() 3176 return self.expression( 3177 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3178 ) 3179 3180 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3181 literal = self._parse_primary() 3182 if literal: 3183 return self.expression(exp.Introducer, this=token.text, expression=literal) 3184 3185 return self.expression(exp.Identifier, this=token.text) 3186 3187 def _parse_national(self, token: Token) -> exp.Expression: 3188 return self.expression(exp.National, this=exp.Literal.string(token.text)) 3189 3190 def _parse_session_parameter(self) -> exp.Expression: 3191 kind = None 3192 this = self._parse_id_var() or self._parse_primary() 3193 3194 if this and self._match(TokenType.DOT): 3195 kind = this.name 3196 this = self._parse_var() or self._parse_primary() 3197 3198 return self.expression(exp.SessionParameter, this=this, kind=kind) 3199 3200 def _parse_lambda(self) -> t.Optional[exp.Expression]: 3201 index = self._index 3202 3203 if self._match(TokenType.L_PAREN): 3204 expressions = self._parse_csv(self._parse_id_var) 3205 3206 if not self._match(TokenType.R_PAREN): 3207 self._retreat(index) 3208 else: 3209 expressions = [self._parse_id_var()] 3210 3211 if self._match_set(self.LAMBDAS): 3212 return self.LAMBDAS[self._prev.token_type](self, expressions) 3213 3214 self._retreat(index) 3215 3216 this: t.Optional[exp.Expression] 3217 3218 if self._match(TokenType.DISTINCT): 3219 this = self.expression( 3220 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3221 ) 3222 else: 3223 this = self._parse_select_or_expression() 3224 3225 if isinstance(this, exp.EQ): 3226 left = this.this 3227 if isinstance(left, exp.Column): 3228 left.replace(exp.Var(this=left.text("this"))) 3229 3230 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3231 3232 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3233 index = self._index 3234 3235 try: 3236 if self._parse_select(nested=True): 3237 return this 3238 except Exception: 3239 pass 3240 finally: 3241 self._retreat(index) 3242 3243 if not self._match(TokenType.L_PAREN): 3244 return this 3245 3246 args = self._parse_csv( 3247 lambda: self._parse_constraint() 3248 or self._parse_column_def(self._parse_field(any_token=True)) 3249 ) 3250 self._match_r_paren() 3251 return self.expression(exp.Schema, this=this, expressions=args) 3252 3253 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3254 # column defs are not really columns, they're identifiers 3255 if isinstance(this, exp.Column): 3256 this = this.this 3257 kind = self._parse_types() 3258 3259 if self._match_text_seq("FOR", "ORDINALITY"): 3260 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3261 3262 constraints = [] 3263 while True: 3264 constraint = self._parse_column_constraint() 3265 if not constraint: 3266 break 3267 constraints.append(constraint) 3268 3269 if not kind and not constraints: 3270 return this 3271 3272 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3273 3274 def _parse_auto_increment(self) -> exp.Expression: 3275 start = None 3276 increment = None 3277 3278 if self._match(TokenType.L_PAREN, advance=False): 3279 args = self._parse_wrapped_csv(self._parse_bitwise) 3280 start = seq_get(args, 0) 3281 increment = seq_get(args, 1) 3282 elif self._match_text_seq("START"): 3283 start = self._parse_bitwise() 3284 self._match_text_seq("INCREMENT") 3285 increment = self._parse_bitwise() 3286 3287 if start and increment: 3288 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3289 3290 return exp.AutoIncrementColumnConstraint() 3291 3292 def _parse_compress(self) -> exp.Expression: 3293 if self._match(TokenType.L_PAREN, advance=False): 3294 return self.expression( 3295 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3296 ) 3297 3298 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3299 3300 def _parse_generated_as_identity(self) -> exp.Expression: 3301 if self._match(TokenType.BY_DEFAULT): 3302 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3303 this = self.expression( 3304 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3305 ) 3306 else: 3307 self._match_text_seq("ALWAYS") 3308 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3309 3310 self._match_text_seq("AS", "IDENTITY") 3311 if self._match(TokenType.L_PAREN): 3312 if self._match_text_seq("START", "WITH"): 3313 this.set("start", self._parse_bitwise()) 3314 if self._match_text_seq("INCREMENT", "BY"): 3315 this.set("increment", self._parse_bitwise()) 3316 if self._match_text_seq("MINVALUE"): 3317 this.set("minvalue", self._parse_bitwise()) 3318 if self._match_text_seq("MAXVALUE"): 3319 this.set("maxvalue", self._parse_bitwise()) 3320 3321 if self._match_text_seq("CYCLE"): 3322 this.set("cycle", True) 3323 elif self._match_text_seq("NO", "CYCLE"): 3324 this.set("cycle", False) 3325 3326 self._match_r_paren() 3327 3328 return this 3329 3330 def _parse_inline(self) -> t.Optional[exp.Expression]: 3331 self._match_text_seq("LENGTH") 3332 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3333 3334 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3335 if self._match_text_seq("NULL"): 3336 return self.expression(exp.NotNullColumnConstraint) 3337 if self._match_text_seq("CASESPECIFIC"): 3338 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3339 return None 3340 3341 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3342 if self._match(TokenType.CONSTRAINT): 3343 this = self._parse_id_var() 3344 else: 3345 this = None 3346 3347 if self._match_texts(self.CONSTRAINT_PARSERS): 3348 return self.expression( 3349 exp.ColumnConstraint, 3350 this=this, 3351 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3352 ) 3353 3354 return this 3355 3356 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3357 if not self._match(TokenType.CONSTRAINT): 3358 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3359 3360 this = self._parse_id_var() 3361 expressions = [] 3362 3363 while True: 3364 constraint = self._parse_unnamed_constraint() or self._parse_function() 3365 if not constraint: 3366 break 3367 expressions.append(constraint) 3368 3369 return self.expression(exp.Constraint, this=this, expressions=expressions) 3370 3371 def _parse_unnamed_constraint( 3372 self, constraints: t.Optional[t.Collection[str]] = None 3373 ) -> t.Optional[exp.Expression]: 3374 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3375 return None 3376 3377 constraint = self._prev.text.upper() 3378 if constraint not in self.CONSTRAINT_PARSERS: 3379 self.raise_error(f"No parser found for schema constraint {constraint}.") 3380 3381 return self.CONSTRAINT_PARSERS[constraint](self) 3382 3383 def _parse_unique(self) -> exp.Expression: 3384 if not self._match(TokenType.L_PAREN, advance=False): 3385 return self.expression(exp.UniqueColumnConstraint) 3386 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3387 3388 def _parse_key_constraint_options(self) -> t.List[str]: 3389 options = [] 3390 while True: 3391 if not self._curr: 3392 break 3393 3394 if self._match(TokenType.ON): 3395 action = None 3396 on = self._advance_any() and self._prev.text 3397 3398 if self._match(TokenType.NO_ACTION): 3399 action = "NO ACTION" 3400 elif self._match(TokenType.CASCADE): 3401 action = "CASCADE" 3402 elif self._match_pair(TokenType.SET, TokenType.NULL): 3403 action = "SET NULL" 3404 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3405 action = "SET DEFAULT" 3406 else: 3407 self.raise_error("Invalid key constraint") 3408 3409 options.append(f"ON {on} {action}") 3410 elif self._match_text_seq("NOT", "ENFORCED"): 3411 options.append("NOT ENFORCED") 3412 elif self._match_text_seq("DEFERRABLE"): 3413 options.append("DEFERRABLE") 3414 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3415 options.append("INITIALLY DEFERRED") 3416 elif self._match_text_seq("NORELY"): 3417 options.append("NORELY") 3418 elif self._match_text_seq("MATCH", "FULL"): 3419 options.append("MATCH FULL") 3420 else: 3421 break 3422 3423 return options 3424 3425 def _parse_references(self, match=True) -> t.Optional[exp.Expression]: 3426 if match and not self._match(TokenType.REFERENCES): 3427 return None 3428 3429 expressions = None 3430 this = self._parse_id_var() 3431 3432 if self._match(TokenType.L_PAREN, advance=False): 3433 expressions = self._parse_wrapped_id_vars() 3434 3435 options = self._parse_key_constraint_options() 3436 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3437 3438 def _parse_foreign_key(self) -> exp.Expression: 3439 expressions = self._parse_wrapped_id_vars() 3440 reference = self._parse_references() 3441 options = {} 3442 3443 while self._match(TokenType.ON): 3444 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3445 self.raise_error("Expected DELETE or UPDATE") 3446 3447 kind = self._prev.text.lower() 3448 3449 if self._match(TokenType.NO_ACTION): 3450 action = "NO ACTION" 3451 elif self._match(TokenType.SET): 3452 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3453 action = "SET " + self._prev.text.upper() 3454 else: 3455 self._advance() 3456 action = self._prev.text.upper() 3457 3458 options[kind] = action 3459 3460 return self.expression( 3461 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3462 ) 3463 3464 def _parse_primary_key(self) -> exp.Expression: 3465 desc = ( 3466 self._match_set((TokenType.ASC, TokenType.DESC)) 3467 and self._prev.token_type == TokenType.DESC 3468 ) 3469 3470 if not self._match(TokenType.L_PAREN, advance=False): 3471 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3472 3473 expressions = self._parse_wrapped_csv(self._parse_field) 3474 options = self._parse_key_constraint_options() 3475 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3476 3477 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3478 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3479 return this 3480 3481 bracket_kind = self._prev.token_type 3482 expressions: t.List[t.Optional[exp.Expression]] 3483 3484 if self._match(TokenType.COLON): 3485 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3486 else: 3487 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3488 3489 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3490 if bracket_kind == TokenType.L_BRACE: 3491 this = self.expression(exp.Struct, expressions=expressions) 3492 elif not this or this.name.upper() == "ARRAY": 3493 this = self.expression(exp.Array, expressions=expressions) 3494 else: 3495 expressions = apply_index_offset(this, expressions, -self.index_offset) 3496 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3497 3498 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3499 self.raise_error("Expected ]") 3500 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3501 self.raise_error("Expected }") 3502 3503 self._add_comments(this) 3504 return self._parse_bracket(this) 3505 3506 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3507 if self._match(TokenType.COLON): 3508 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3509 return this 3510 3511 def _parse_case(self) -> t.Optional[exp.Expression]: 3512 ifs = [] 3513 default = None 3514 3515 expression = self._parse_conjunction() 3516 3517 while self._match(TokenType.WHEN): 3518 this = self._parse_conjunction() 3519 self._match(TokenType.THEN) 3520 then = self._parse_conjunction() 3521 ifs.append(self.expression(exp.If, this=this, true=then)) 3522 3523 if self._match(TokenType.ELSE): 3524 default = self._parse_conjunction() 3525 3526 if not self._match(TokenType.END): 3527 self.raise_error("Expected END after CASE", self._prev) 3528 3529 return self._parse_window( 3530 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3531 ) 3532 3533 def _parse_if(self) -> t.Optional[exp.Expression]: 3534 if self._match(TokenType.L_PAREN): 3535 args = self._parse_csv(self._parse_conjunction) 3536 this = exp.If.from_arg_list(args) 3537 self.validate_expression(this, args) 3538 self._match_r_paren() 3539 else: 3540 index = self._index - 1 3541 condition = self._parse_conjunction() 3542 3543 if not condition: 3544 self._retreat(index) 3545 return None 3546 3547 self._match(TokenType.THEN) 3548 true = self._parse_conjunction() 3549 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3550 self._match(TokenType.END) 3551 this = self.expression(exp.If, this=condition, true=true, false=false) 3552 3553 return self._parse_window(this) 3554 3555 def _parse_extract(self) -> exp.Expression: 3556 this = self._parse_function() or self._parse_var() or self._parse_type() 3557 3558 if self._match(TokenType.FROM): 3559 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3560 3561 if not self._match(TokenType.COMMA): 3562 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3563 3564 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3565 3566 def _parse_cast(self, strict: bool) -> exp.Expression: 3567 this = self._parse_conjunction() 3568 3569 if not self._match(TokenType.ALIAS): 3570 if self._match(TokenType.COMMA): 3571 return self.expression( 3572 exp.CastToStrType, this=this, expression=self._parse_string() 3573 ) 3574 else: 3575 self.raise_error("Expected AS after CAST") 3576 3577 to = self._parse_types() 3578 3579 if not to: 3580 self.raise_error("Expected TYPE after CAST") 3581 elif to.this == exp.DataType.Type.CHAR: 3582 if self._match(TokenType.CHARACTER_SET): 3583 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3584 3585 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3586 3587 def _parse_string_agg(self) -> exp.Expression: 3588 expression: t.Optional[exp.Expression] 3589 3590 if self._match(TokenType.DISTINCT): 3591 args = self._parse_csv(self._parse_conjunction) 3592 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3593 else: 3594 args = self._parse_csv(self._parse_conjunction) 3595 expression = seq_get(args, 0) 3596 3597 index = self._index 3598 if not self._match(TokenType.R_PAREN): 3599 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3600 order = self._parse_order(this=expression) 3601 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3602 3603 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3604 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3605 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3606 if not self._match(TokenType.WITHIN_GROUP): 3607 self._retreat(index) 3608 this = exp.GroupConcat.from_arg_list(args) 3609 self.validate_expression(this, args) 3610 return this 3611 3612 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3613 order = self._parse_order(this=expression) 3614 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3615 3616 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3617 to: t.Optional[exp.Expression] 3618 this = self._parse_bitwise() 3619 3620 if self._match(TokenType.USING): 3621 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3622 elif self._match(TokenType.COMMA): 3623 to = self._parse_bitwise() 3624 else: 3625 to = None 3626 3627 # Swap the argument order if needed to produce the correct AST 3628 if self.CONVERT_TYPE_FIRST: 3629 this, to = to, this 3630 3631 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3632 3633 def _parse_decode(self) -> t.Optional[exp.Expression]: 3634 """ 3635 There are generally two variants of the DECODE function: 3636 3637 - DECODE(bin, charset) 3638 - DECODE(expression, search, result [, search, result] ... [, default]) 3639 3640 The second variant will always be parsed into a CASE expression. Note that NULL 3641 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3642 instead of relying on pattern matching. 3643 """ 3644 args = self._parse_csv(self._parse_conjunction) 3645 3646 if len(args) < 3: 3647 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3648 3649 expression, *expressions = args 3650 if not expression: 3651 return None 3652 3653 ifs = [] 3654 for search, result in zip(expressions[::2], expressions[1::2]): 3655 if not search or not result: 3656 return None 3657 3658 if isinstance(search, exp.Literal): 3659 ifs.append( 3660 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3661 ) 3662 elif isinstance(search, exp.Null): 3663 ifs.append( 3664 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3665 ) 3666 else: 3667 cond = exp.or_( 3668 exp.EQ(this=expression.copy(), expression=search), 3669 exp.and_( 3670 exp.Is(this=expression.copy(), expression=exp.Null()), 3671 exp.Is(this=search.copy(), expression=exp.Null()), 3672 copy=False, 3673 ), 3674 copy=False, 3675 ) 3676 ifs.append(exp.If(this=cond, true=result)) 3677 3678 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3679 3680 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3681 self._match_text_seq("KEY") 3682 key = self._parse_field() 3683 self._match(TokenType.COLON) 3684 self._match_text_seq("VALUE") 3685 value = self._parse_field() 3686 if not key and not value: 3687 return None 3688 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3689 3690 def _parse_json_object(self) -> exp.Expression: 3691 expressions = self._parse_csv(self._parse_json_key_value) 3692 3693 null_handling = None 3694 if self._match_text_seq("NULL", "ON", "NULL"): 3695 null_handling = "NULL ON NULL" 3696 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3697 null_handling = "ABSENT ON NULL" 3698 3699 unique_keys = None 3700 if self._match_text_seq("WITH", "UNIQUE"): 3701 unique_keys = True 3702 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3703 unique_keys = False 3704 3705 self._match_text_seq("KEYS") 3706 3707 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3708 format_json = self._match_text_seq("FORMAT", "JSON") 3709 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3710 3711 return self.expression( 3712 exp.JSONObject, 3713 expressions=expressions, 3714 null_handling=null_handling, 3715 unique_keys=unique_keys, 3716 return_type=return_type, 3717 format_json=format_json, 3718 encoding=encoding, 3719 ) 3720 3721 def _parse_logarithm(self) -> exp.Expression: 3722 # Default argument order is base, expression 3723 args = self._parse_csv(self._parse_range) 3724 3725 if len(args) > 1: 3726 if not self.LOG_BASE_FIRST: 3727 args.reverse() 3728 return exp.Log.from_arg_list(args) 3729 3730 return self.expression( 3731 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3732 ) 3733 3734 def _parse_match_against(self) -> exp.Expression: 3735 expressions = self._parse_csv(self._parse_column) 3736 3737 self._match_text_seq(")", "AGAINST", "(") 3738 3739 this = self._parse_string() 3740 3741 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3742 modifier = "IN NATURAL LANGUAGE MODE" 3743 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3744 modifier = f"{modifier} WITH QUERY EXPANSION" 3745 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3746 modifier = "IN BOOLEAN MODE" 3747 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3748 modifier = "WITH QUERY EXPANSION" 3749 else: 3750 modifier = None 3751 3752 return self.expression( 3753 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3754 ) 3755 3756 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3757 def _parse_open_json(self) -> exp.Expression: 3758 this = self._parse_bitwise() 3759 path = self._match(TokenType.COMMA) and self._parse_string() 3760 3761 def _parse_open_json_column_def() -> exp.Expression: 3762 this = self._parse_field(any_token=True) 3763 kind = self._parse_types() 3764 path = self._parse_string() 3765 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3766 return self.expression( 3767 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3768 ) 3769 3770 expressions = None 3771 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3772 self._match_l_paren() 3773 expressions = self._parse_csv(_parse_open_json_column_def) 3774 3775 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3776 3777 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3778 args = self._parse_csv(self._parse_bitwise) 3779 3780 if self._match(TokenType.IN): 3781 return self.expression( 3782 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3783 ) 3784 3785 if haystack_first: 3786 haystack = seq_get(args, 0) 3787 needle = seq_get(args, 1) 3788 else: 3789 needle = seq_get(args, 0) 3790 haystack = seq_get(args, 1) 3791 3792 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3793 3794 self.validate_expression(this, args) 3795 3796 return this 3797 3798 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3799 args = self._parse_csv(self._parse_table) 3800 return exp.JoinHint(this=func_name.upper(), expressions=args) 3801 3802 def _parse_substring(self) -> exp.Expression: 3803 # Postgres supports the form: substring(string [from int] [for int]) 3804 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3805 3806 args = self._parse_csv(self._parse_bitwise) 3807 3808 if self._match(TokenType.FROM): 3809 args.append(self._parse_bitwise()) 3810 if self._match(TokenType.FOR): 3811 args.append(self._parse_bitwise()) 3812 3813 this = exp.Substring.from_arg_list(args) 3814 self.validate_expression(this, args) 3815 3816 return this 3817 3818 def _parse_trim(self) -> exp.Expression: 3819 # https://www.w3resource.com/sql/character-functions/trim.php 3820 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3821 3822 position = None 3823 collation = None 3824 3825 if self._match_set(self.TRIM_TYPES): 3826 position = self._prev.text.upper() 3827 3828 expression = self._parse_bitwise() 3829 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3830 this = self._parse_bitwise() 3831 else: 3832 this = expression 3833 expression = None 3834 3835 if self._match(TokenType.COLLATE): 3836 collation = self._parse_bitwise() 3837 3838 return self.expression( 3839 exp.Trim, 3840 this=this, 3841 position=position, 3842 expression=expression, 3843 collation=collation, 3844 ) 3845 3846 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3847 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3848 3849 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3850 return self._parse_window(self._parse_id_var(), alias=True) 3851 3852 def _parse_respect_or_ignore_nulls( 3853 self, this: t.Optional[exp.Expression] 3854 ) -> t.Optional[exp.Expression]: 3855 if self._match(TokenType.IGNORE_NULLS): 3856 return self.expression(exp.IgnoreNulls, this=this) 3857 if self._match(TokenType.RESPECT_NULLS): 3858 return self.expression(exp.RespectNulls, this=this) 3859 return this 3860 3861 def _parse_window( 3862 self, this: t.Optional[exp.Expression], alias: bool = False 3863 ) -> t.Optional[exp.Expression]: 3864 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3865 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3866 self._match_r_paren() 3867 3868 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3869 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3870 if self._match(TokenType.WITHIN_GROUP): 3871 order = self._parse_wrapped(self._parse_order) 3872 this = self.expression(exp.WithinGroup, this=this, expression=order) 3873 3874 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3875 # Some dialects choose to implement and some do not. 3876 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3877 3878 # There is some code above in _parse_lambda that handles 3879 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3880 3881 # The below changes handle 3882 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3883 3884 # Oracle allows both formats 3885 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3886 # and Snowflake chose to do the same for familiarity 3887 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3888 this = self._parse_respect_or_ignore_nulls(this) 3889 3890 # bigquery select from window x AS (partition by ...) 3891 if alias: 3892 over = None 3893 self._match(TokenType.ALIAS) 3894 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3895 return this 3896 else: 3897 over = self._prev.text.upper() 3898 3899 if not self._match(TokenType.L_PAREN): 3900 return self.expression( 3901 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3902 ) 3903 3904 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3905 3906 first = self._match(TokenType.FIRST) 3907 if self._match_text_seq("LAST"): 3908 first = False 3909 3910 partition = self._parse_partition_by() 3911 order = self._parse_order() 3912 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3913 3914 if kind: 3915 self._match(TokenType.BETWEEN) 3916 start = self._parse_window_spec() 3917 self._match(TokenType.AND) 3918 end = self._parse_window_spec() 3919 3920 spec = self.expression( 3921 exp.WindowSpec, 3922 kind=kind, 3923 start=start["value"], 3924 start_side=start["side"], 3925 end=end["value"], 3926 end_side=end["side"], 3927 ) 3928 else: 3929 spec = None 3930 3931 self._match_r_paren() 3932 3933 return self.expression( 3934 exp.Window, 3935 this=this, 3936 partition_by=partition, 3937 order=order, 3938 spec=spec, 3939 alias=window_alias, 3940 over=over, 3941 first=first, 3942 ) 3943 3944 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3945 self._match(TokenType.BETWEEN) 3946 3947 return { 3948 "value": ( 3949 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3950 ) 3951 or self._parse_bitwise(), 3952 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3953 } 3954 3955 def _parse_alias( 3956 self, this: t.Optional[exp.Expression], explicit: bool = False 3957 ) -> t.Optional[exp.Expression]: 3958 any_token = self._match(TokenType.ALIAS) 3959 3960 if explicit and not any_token: 3961 return this 3962 3963 if self._match(TokenType.L_PAREN): 3964 aliases = self.expression( 3965 exp.Aliases, 3966 this=this, 3967 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3968 ) 3969 self._match_r_paren(aliases) 3970 return aliases 3971 3972 alias = self._parse_id_var(any_token) 3973 3974 if alias: 3975 return self.expression(exp.Alias, this=this, alias=alias) 3976 3977 return this 3978 3979 def _parse_id_var( 3980 self, 3981 any_token: bool = True, 3982 tokens: t.Optional[t.Collection[TokenType]] = None, 3983 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3984 ) -> t.Optional[exp.Expression]: 3985 identifier = self._parse_identifier() 3986 3987 if identifier: 3988 return identifier 3989 3990 prefix = "" 3991 3992 if prefix_tokens: 3993 while self._match_set(prefix_tokens): 3994 prefix += self._prev.text 3995 3996 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3997 quoted = self._prev.token_type == TokenType.STRING 3998 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3999 4000 return None 4001 4002 def _parse_string(self) -> t.Optional[exp.Expression]: 4003 if self._match(TokenType.STRING): 4004 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4005 return self._parse_placeholder() 4006 4007 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 4008 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4009 4010 def _parse_number(self) -> t.Optional[exp.Expression]: 4011 if self._match(TokenType.NUMBER): 4012 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4013 return self._parse_placeholder() 4014 4015 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4016 if self._match(TokenType.IDENTIFIER): 4017 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4018 return self._parse_placeholder() 4019 4020 def _parse_var( 4021 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4022 ) -> t.Optional[exp.Expression]: 4023 if ( 4024 (any_token and self._advance_any()) 4025 or self._match(TokenType.VAR) 4026 or (self._match_set(tokens) if tokens else False) 4027 ): 4028 return self.expression(exp.Var, this=self._prev.text) 4029 return self._parse_placeholder() 4030 4031 def _advance_any(self) -> t.Optional[Token]: 4032 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4033 self._advance() 4034 return self._prev 4035 return None 4036 4037 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4038 return self._parse_var() or self._parse_string() 4039 4040 def _parse_null(self) -> t.Optional[exp.Expression]: 4041 if self._match(TokenType.NULL): 4042 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4043 return None 4044 4045 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4046 if self._match(TokenType.TRUE): 4047 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4048 if self._match(TokenType.FALSE): 4049 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4050 return None 4051 4052 def _parse_star(self) -> t.Optional[exp.Expression]: 4053 if self._match(TokenType.STAR): 4054 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4055 return None 4056 4057 def _parse_parameter(self) -> exp.Expression: 4058 wrapped = self._match(TokenType.L_BRACE) 4059 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4060 self._match(TokenType.R_BRACE) 4061 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4062 4063 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4064 if self._match_set(self.PLACEHOLDER_PARSERS): 4065 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4066 if placeholder: 4067 return placeholder 4068 self._advance(-1) 4069 return None 4070 4071 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4072 if not self._match(TokenType.EXCEPT): 4073 return None 4074 if self._match(TokenType.L_PAREN, advance=False): 4075 return self._parse_wrapped_csv(self._parse_column) 4076 return self._parse_csv(self._parse_column) 4077 4078 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4079 if not self._match(TokenType.REPLACE): 4080 return None 4081 if self._match(TokenType.L_PAREN, advance=False): 4082 return self._parse_wrapped_csv(self._parse_expression) 4083 return self._parse_csv(self._parse_expression) 4084 4085 def _parse_csv( 4086 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4087 ) -> t.List[t.Optional[exp.Expression]]: 4088 parse_result = parse_method() 4089 items = [parse_result] if parse_result is not None else [] 4090 4091 while self._match(sep): 4092 self._add_comments(parse_result) 4093 parse_result = parse_method() 4094 if parse_result is not None: 4095 items.append(parse_result) 4096 4097 return items 4098 4099 def _parse_tokens( 4100 self, parse_method: t.Callable, expressions: t.Dict 4101 ) -> t.Optional[exp.Expression]: 4102 this = parse_method() 4103 4104 while self._match_set(expressions): 4105 this = self.expression( 4106 expressions[self._prev.token_type], 4107 this=this, 4108 comments=self._prev_comments, 4109 expression=parse_method(), 4110 ) 4111 4112 return this 4113 4114 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4115 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4116 4117 def _parse_wrapped_csv( 4118 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4119 ) -> t.List[t.Optional[exp.Expression]]: 4120 return self._parse_wrapped( 4121 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4122 ) 4123 4124 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4125 wrapped = self._match(TokenType.L_PAREN) 4126 if not wrapped and not optional: 4127 self.raise_error("Expecting (") 4128 parse_result = parse_method() 4129 if wrapped: 4130 self._match_r_paren() 4131 return parse_result 4132 4133 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 4134 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 4135 4136 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4137 return self._parse_set_operations( 4138 self._parse_select(nested=True, parse_subquery_alias=False) 4139 ) 4140 4141 def _parse_transaction(self) -> exp.Expression: 4142 this = None 4143 if self._match_texts(self.TRANSACTION_KIND): 4144 this = self._prev.text 4145 4146 self._match_texts({"TRANSACTION", "WORK"}) 4147 4148 modes = [] 4149 while True: 4150 mode = [] 4151 while self._match(TokenType.VAR): 4152 mode.append(self._prev.text) 4153 4154 if mode: 4155 modes.append(" ".join(mode)) 4156 if not self._match(TokenType.COMMA): 4157 break 4158 4159 return self.expression(exp.Transaction, this=this, modes=modes) 4160 4161 def _parse_commit_or_rollback(self) -> exp.Expression: 4162 chain = None 4163 savepoint = None 4164 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4165 4166 self._match_texts({"TRANSACTION", "WORK"}) 4167 4168 if self._match_text_seq("TO"): 4169 self._match_text_seq("SAVEPOINT") 4170 savepoint = self._parse_id_var() 4171 4172 if self._match(TokenType.AND): 4173 chain = not self._match_text_seq("NO") 4174 self._match_text_seq("CHAIN") 4175 4176 if is_rollback: 4177 return self.expression(exp.Rollback, savepoint=savepoint) 4178 return self.expression(exp.Commit, chain=chain) 4179 4180 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4181 if not self._match_text_seq("ADD"): 4182 return None 4183 4184 self._match(TokenType.COLUMN) 4185 exists_column = self._parse_exists(not_=True) 4186 expression = self._parse_column_def(self._parse_field(any_token=True)) 4187 4188 if expression: 4189 expression.set("exists", exists_column) 4190 4191 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4192 if self._match_texts(("FIRST", "AFTER")): 4193 position = self._prev.text 4194 column_position = self.expression( 4195 exp.ColumnPosition, this=self._parse_column(), position=position 4196 ) 4197 expression.set("position", column_position) 4198 4199 return expression 4200 4201 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4202 drop = self._match(TokenType.DROP) and self._parse_drop() 4203 if drop and not isinstance(drop, exp.Command): 4204 drop.set("kind", drop.args.get("kind", "COLUMN")) 4205 return drop 4206 4207 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4208 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4209 return self.expression( 4210 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4211 ) 4212 4213 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4214 this = None 4215 kind = self._prev.token_type 4216 4217 if kind == TokenType.CONSTRAINT: 4218 this = self._parse_id_var() 4219 4220 if self._match_text_seq("CHECK"): 4221 expression = self._parse_wrapped(self._parse_conjunction) 4222 enforced = self._match_text_seq("ENFORCED") 4223 4224 return self.expression( 4225 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4226 ) 4227 4228 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4229 expression = self._parse_foreign_key() 4230 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4231 expression = self._parse_primary_key() 4232 else: 4233 expression = None 4234 4235 return self.expression(exp.AddConstraint, this=this, expression=expression) 4236 4237 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4238 index = self._index - 1 4239 4240 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4241 return self._parse_csv(self._parse_add_constraint) 4242 4243 self._retreat(index) 4244 return self._parse_csv(self._parse_add_column) 4245 4246 def _parse_alter_table_alter(self) -> exp.Expression: 4247 self._match(TokenType.COLUMN) 4248 column = self._parse_field(any_token=True) 4249 4250 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4251 return self.expression(exp.AlterColumn, this=column, drop=True) 4252 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4253 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4254 4255 self._match_text_seq("SET", "DATA") 4256 return self.expression( 4257 exp.AlterColumn, 4258 this=column, 4259 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4260 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4261 using=self._match(TokenType.USING) and self._parse_conjunction(), 4262 ) 4263 4264 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4265 index = self._index - 1 4266 4267 partition_exists = self._parse_exists() 4268 if self._match(TokenType.PARTITION, advance=False): 4269 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4270 4271 self._retreat(index) 4272 return self._parse_csv(self._parse_drop_column) 4273 4274 def _parse_alter_table_rename(self) -> exp.Expression: 4275 self._match_text_seq("TO") 4276 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4277 4278 def _parse_alter(self) -> t.Optional[exp.Expression]: 4279 start = self._prev 4280 4281 if not self._match(TokenType.TABLE): 4282 return self._parse_as_command(start) 4283 4284 exists = self._parse_exists() 4285 this = self._parse_table(schema=True) 4286 4287 if self._next: 4288 self._advance() 4289 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4290 4291 if parser: 4292 actions = ensure_list(parser(self)) 4293 4294 if not self._curr: 4295 return self.expression( 4296 exp.AlterTable, 4297 this=this, 4298 exists=exists, 4299 actions=actions, 4300 ) 4301 return self._parse_as_command(start) 4302 4303 def _parse_merge(self) -> exp.Expression: 4304 self._match(TokenType.INTO) 4305 target = self._parse_table() 4306 4307 self._match(TokenType.USING) 4308 using = self._parse_table() 4309 4310 self._match(TokenType.ON) 4311 on = self._parse_conjunction() 4312 4313 whens = [] 4314 while self._match(TokenType.WHEN): 4315 matched = not self._match(TokenType.NOT) 4316 self._match_text_seq("MATCHED") 4317 source = ( 4318 False 4319 if self._match_text_seq("BY", "TARGET") 4320 else self._match_text_seq("BY", "SOURCE") 4321 ) 4322 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4323 4324 self._match(TokenType.THEN) 4325 4326 if self._match(TokenType.INSERT): 4327 _this = self._parse_star() 4328 if _this: 4329 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4330 else: 4331 then = self.expression( 4332 exp.Insert, 4333 this=self._parse_value(), 4334 expression=self._match(TokenType.VALUES) and self._parse_value(), 4335 ) 4336 elif self._match(TokenType.UPDATE): 4337 expressions = self._parse_star() 4338 if expressions: 4339 then = self.expression(exp.Update, expressions=expressions) 4340 else: 4341 then = self.expression( 4342 exp.Update, 4343 expressions=self._match(TokenType.SET) 4344 and self._parse_csv(self._parse_equality), 4345 ) 4346 elif self._match(TokenType.DELETE): 4347 then = self.expression(exp.Var, this=self._prev.text) 4348 else: 4349 then = None 4350 4351 whens.append( 4352 self.expression( 4353 exp.When, 4354 matched=matched, 4355 source=source, 4356 condition=condition, 4357 then=then, 4358 ) 4359 ) 4360 4361 return self.expression( 4362 exp.Merge, 4363 this=target, 4364 using=using, 4365 on=on, 4366 expressions=whens, 4367 ) 4368 4369 def _parse_show(self) -> t.Optional[exp.Expression]: 4370 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4371 if parser: 4372 return parser(self) 4373 self._advance() 4374 return self.expression(exp.Show, this=self._prev.text.upper()) 4375 4376 def _parse_set_item_assignment( 4377 self, kind: t.Optional[str] = None 4378 ) -> t.Optional[exp.Expression]: 4379 index = self._index 4380 4381 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4382 return self._parse_set_transaction(global_=kind == "GLOBAL") 4383 4384 left = self._parse_primary() or self._parse_id_var() 4385 4386 if not self._match_texts(("=", "TO")): 4387 self._retreat(index) 4388 return None 4389 4390 right = self._parse_statement() or self._parse_id_var() 4391 this = self.expression( 4392 exp.EQ, 4393 this=left, 4394 expression=right, 4395 ) 4396 4397 return self.expression( 4398 exp.SetItem, 4399 this=this, 4400 kind=kind, 4401 ) 4402 4403 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4404 self._match_text_seq("TRANSACTION") 4405 characteristics = self._parse_csv( 4406 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4407 ) 4408 return self.expression( 4409 exp.SetItem, 4410 expressions=characteristics, 4411 kind="TRANSACTION", 4412 **{"global": global_}, # type: ignore 4413 ) 4414 4415 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4416 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4417 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4418 4419 def _parse_set(self) -> exp.Expression: 4420 index = self._index 4421 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4422 4423 if self._curr: 4424 self._retreat(index) 4425 return self._parse_as_command(self._prev) 4426 4427 return set_ 4428 4429 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4430 for option in options: 4431 if self._match_text_seq(*option.split(" ")): 4432 return exp.Var(this=option) 4433 return None 4434 4435 def _parse_as_command(self, start: Token) -> exp.Command: 4436 while self._curr: 4437 self._advance() 4438 text = self._find_sql(start, self._prev) 4439 size = len(start.text) 4440 return exp.Command(this=text[:size], expression=text[size:]) 4441 4442 def _find_parser( 4443 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4444 ) -> t.Optional[t.Callable]: 4445 if not self._curr: 4446 return None 4447 4448 index = self._index 4449 this = [] 4450 while True: 4451 # The current token might be multiple words 4452 curr = self._curr.text.upper() 4453 key = curr.split(" ") 4454 this.append(curr) 4455 self._advance() 4456 result, trie = in_trie(trie, key) 4457 if result == 0: 4458 break 4459 if result == 2: 4460 subparser = parsers[" ".join(this)] 4461 return subparser 4462 self._retreat(index) 4463 return None 4464 4465 def _match(self, token_type, advance=True, expression=None): 4466 if not self._curr: 4467 return None 4468 4469 if self._curr.token_type == token_type: 4470 if advance: 4471 self._advance() 4472 self._add_comments(expression) 4473 return True 4474 4475 return None 4476 4477 def _match_set(self, types, advance=True): 4478 if not self._curr: 4479 return None 4480 4481 if self._curr.token_type in types: 4482 if advance: 4483 self._advance() 4484 return True 4485 4486 return None 4487 4488 def _match_pair(self, token_type_a, token_type_b, advance=True): 4489 if not self._curr or not self._next: 4490 return None 4491 4492 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4493 if advance: 4494 self._advance(2) 4495 return True 4496 4497 return None 4498 4499 def _match_l_paren(self, expression=None): 4500 if not self._match(TokenType.L_PAREN, expression=expression): 4501 self.raise_error("Expecting (") 4502 4503 def _match_r_paren(self, expression=None): 4504 if not self._match(TokenType.R_PAREN, expression=expression): 4505 self.raise_error("Expecting )") 4506 4507 def _match_texts(self, texts, advance=True): 4508 if self._curr and self._curr.text.upper() in texts: 4509 if advance: 4510 self._advance() 4511 return True 4512 return False 4513 4514 def _match_text_seq(self, *texts, advance=True): 4515 index = self._index 4516 for text in texts: 4517 if self._curr and self._curr.text.upper() == text: 4518 self._advance() 4519 else: 4520 self._retreat(index) 4521 return False 4522 4523 if not advance: 4524 self._retreat(index) 4525 4526 return True 4527 4528 def _replace_columns_with_dots(self, this): 4529 if isinstance(this, exp.Dot): 4530 exp.replace_children(this, self._replace_columns_with_dots) 4531 elif isinstance(this, exp.Column): 4532 exp.replace_children(this, self._replace_columns_with_dots) 4533 table = this.args.get("table") 4534 this = ( 4535 self.expression(exp.Dot, this=table, expression=this.this) 4536 if table 4537 else self.expression(exp.Var, this=this.name) 4538 ) 4539 elif isinstance(this, exp.Identifier): 4540 this = self.expression(exp.Var, this=this.name) 4541 return this 4542 4543 def _replace_lambda(self, node, lambda_variables): 4544 for column in node.find_all(exp.Column): 4545 if column.parts[0].name in lambda_variables: 4546 dot_or_id = column.to_dot() if column.table else column.this 4547 parent = column.parent 4548 4549 while isinstance(parent, exp.Dot): 4550 if not isinstance(parent.parent, exp.Dot): 4551 parent.replace(dot_or_id) 4552 break 4553 parent = parent.parent 4554 else: 4555 if column is node: 4556 node = dot_or_id 4557 else: 4558 column.replace(dot_or_id) 4559 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
809 def __init__( 810 self, 811 error_level: t.Optional[ErrorLevel] = None, 812 error_message_context: int = 100, 813 index_offset: int = 0, 814 unnest_column_only: bool = False, 815 alias_post_tablesample: bool = False, 816 max_errors: int = 3, 817 null_ordering: t.Optional[str] = None, 818 ): 819 self.error_level = error_level or ErrorLevel.IMMEDIATE 820 self.error_message_context = error_message_context 821 self.index_offset = index_offset 822 self.unnest_column_only = unnest_column_only 823 self.alias_post_tablesample = alias_post_tablesample 824 self.max_errors = max_errors 825 self.null_ordering = null_ordering 826 self.reset()
838 def parse( 839 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 840 ) -> t.List[t.Optional[exp.Expression]]: 841 """ 842 Parses a list of tokens and returns a list of syntax trees, one tree 843 per parsed SQL statement. 844 845 Args: 846 raw_tokens: the list of tokens. 847 sql: the original SQL string, used to produce helpful debug messages. 848 849 Returns: 850 The list of syntax trees. 851 """ 852 return self._parse( 853 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 854 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
856 def parse_into( 857 self, 858 expression_types: exp.IntoType, 859 raw_tokens: t.List[Token], 860 sql: t.Optional[str] = None, 861 ) -> t.List[t.Optional[exp.Expression]]: 862 """ 863 Parses a list of tokens into a given Expression type. If a collection of Expression 864 types is given instead, this method will try to parse the token list into each one 865 of them, stopping at the first for which the parsing succeeds. 866 867 Args: 868 expression_types: the expression type(s) to try and parse the token list into. 869 raw_tokens: the list of tokens. 870 sql: the original SQL string, used to produce helpful debug messages. 871 872 Returns: 873 The target Expression. 874 """ 875 errors = [] 876 for expression_type in ensure_collection(expression_types): 877 parser = self.EXPRESSION_PARSERS.get(expression_type) 878 if not parser: 879 raise TypeError(f"No parser registered for {expression_type}") 880 try: 881 return self._parse(parser, raw_tokens, sql) 882 except ParseError as e: 883 e.errors[0]["into_expression"] = expression_type 884 errors.append(e) 885 raise ParseError( 886 f"Failed to parse into {expression_types}", 887 errors=merge_errors(errors), 888 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
924 def check_errors(self) -> None: 925 """ 926 Logs or raises any found errors, depending on the chosen error level setting. 927 """ 928 if self.error_level == ErrorLevel.WARN: 929 for error in self.errors: 930 logger.error(str(error)) 931 elif self.error_level == ErrorLevel.RAISE and self.errors: 932 raise ParseError( 933 concat_messages(self.errors, self.max_errors), 934 errors=merge_errors(self.errors), 935 )
Logs or raises any found errors, depending on the chosen error level setting.
937 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 938 """ 939 Appends an error in the list of recorded errors or raises it, depending on the chosen 940 error level setting. 941 """ 942 token = token or self._curr or self._prev or Token.string("") 943 start = token.start 944 end = token.end 945 start_context = self.sql[max(start - self.error_message_context, 0) : start] 946 highlight = self.sql[start:end] 947 end_context = self.sql[end : end + self.error_message_context] 948 949 error = ParseError.new( 950 f"{message}. Line {token.line}, Col: {token.col}.\n" 951 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 952 description=message, 953 line=token.line, 954 col=token.col, 955 start_context=start_context, 956 highlight=highlight, 957 end_context=end_context, 958 ) 959 960 if self.error_level == ErrorLevel.IMMEDIATE: 961 raise error 962 963 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
965 def expression( 966 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 967 ) -> E: 968 """ 969 Creates a new, validated Expression. 970 971 Args: 972 exp_class: the expression class to instantiate. 973 comments: an optional list of comments to attach to the expression. 974 kwargs: the arguments to set for the expression along with their respective values. 975 976 Returns: 977 The target expression. 978 """ 979 instance = exp_class(**kwargs) 980 instance.add_comments(comments) if comments else self._add_comments(instance) 981 self.validate_expression(instance) 982 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
989 def validate_expression( 990 self, expression: exp.Expression, args: t.Optional[t.List] = None 991 ) -> None: 992 """ 993 Validates an already instantiated expression, making sure that all its mandatory arguments 994 are set. 995 996 Args: 997 expression: the expression to validate. 998 args: an optional list of items that was used to instantiate the expression, if it's a Func. 999 """ 1000 if self.error_level == ErrorLevel.IGNORE: 1001 return 1002 1003 for error_message in expression.error_messages(args): 1004 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.