sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get 10from sqlglot.tokens import Token, Tokenizer, TokenType 11from sqlglot.trie import in_trie, new_trie 12 13logger = logging.getLogger("sqlglot") 14 15E = t.TypeVar("E", bound=exp.Expression) 16 17 18def parse_var_map(args: t.Sequence) -> exp.Expression: 19 if len(args) == 1 and args[0].is_star: 20 return exp.StarMap(this=args[0]) 21 22 keys = [] 23 values = [] 24 for i in range(0, len(args), 2): 25 keys.append(args[i]) 26 values.append(args[i + 1]) 27 return exp.VarMap( 28 keys=exp.Array(expressions=keys), 29 values=exp.Array(expressions=values), 30 ) 31 32 33def parse_like(args): 34 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 35 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 36 37 38def binary_range_parser( 39 expr_type: t.Type[exp.Expression], 40) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 41 return lambda self, this: self._parse_escape( 42 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 43 ) 44 45 46class _Parser(type): 47 def __new__(cls, clsname, bases, attrs): 48 klass = super().__new__(cls, clsname, bases, attrs) 49 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 50 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 51 52 return klass 53 54 55class Parser(metaclass=_Parser): 56 """ 57 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 58 a parsed syntax tree. 59 60 Args: 61 error_level: the desired error level. 62 Default: ErrorLevel.RAISE 63 error_message_context: determines the amount of context to capture from a 64 query string when displaying the error message (in number of characters). 65 Default: 50. 66 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 67 Default: 0 68 alias_post_tablesample: If the table alias comes after tablesample. 69 Default: False 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 null_ordering: Indicates the default null ordering method to use if not explicitly set. 74 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 75 Default: "nulls_are_small" 76 """ 77 78 FUNCTIONS: t.Dict[str, t.Callable] = { 79 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 80 "DATE_TO_DATE_STR": lambda args: exp.Cast( 81 this=seq_get(args, 0), 82 to=exp.DataType(this=exp.DataType.Type.TEXT), 83 ), 84 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 85 "IFNULL": exp.Coalesce.from_arg_list, 86 "LIKE": parse_like, 87 "TIME_TO_TIME_STR": lambda args: exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 92 this=exp.Cast( 93 this=seq_get(args, 0), 94 to=exp.DataType(this=exp.DataType.Type.TEXT), 95 ), 96 start=exp.Literal.number(1), 97 length=exp.Literal.number(10), 98 ), 99 "VAR_MAP": parse_var_map, 100 } 101 102 NO_PAREN_FUNCTIONS = { 103 TokenType.CURRENT_DATE: exp.CurrentDate, 104 TokenType.CURRENT_DATETIME: exp.CurrentDate, 105 TokenType.CURRENT_TIME: exp.CurrentTime, 106 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 107 TokenType.CURRENT_USER: exp.CurrentUser, 108 } 109 110 JOIN_HINTS: t.Set[str] = set() 111 112 NESTED_TYPE_TOKENS = { 113 TokenType.ARRAY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 TokenType.STRUCT, 117 } 118 119 TYPE_TOKENS = { 120 TokenType.BIT, 121 TokenType.BOOLEAN, 122 TokenType.TINYINT, 123 TokenType.UTINYINT, 124 TokenType.SMALLINT, 125 TokenType.USMALLINT, 126 TokenType.INT, 127 TokenType.UINT, 128 TokenType.BIGINT, 129 TokenType.UBIGINT, 130 TokenType.INT128, 131 TokenType.UINT128, 132 TokenType.INT256, 133 TokenType.UINT256, 134 TokenType.FLOAT, 135 TokenType.DOUBLE, 136 TokenType.CHAR, 137 TokenType.NCHAR, 138 TokenType.VARCHAR, 139 TokenType.NVARCHAR, 140 TokenType.TEXT, 141 TokenType.MEDIUMTEXT, 142 TokenType.LONGTEXT, 143 TokenType.MEDIUMBLOB, 144 TokenType.LONGBLOB, 145 TokenType.BINARY, 146 TokenType.VARBINARY, 147 TokenType.JSON, 148 TokenType.JSONB, 149 TokenType.INTERVAL, 150 TokenType.TIME, 151 TokenType.TIMESTAMP, 152 TokenType.TIMESTAMPTZ, 153 TokenType.TIMESTAMPLTZ, 154 TokenType.DATETIME, 155 TokenType.DATETIME64, 156 TokenType.DATE, 157 TokenType.DECIMAL, 158 TokenType.BIGDECIMAL, 159 TokenType.UUID, 160 TokenType.GEOGRAPHY, 161 TokenType.GEOMETRY, 162 TokenType.HLLSKETCH, 163 TokenType.HSTORE, 164 TokenType.PSEUDO_TYPE, 165 TokenType.SUPER, 166 TokenType.SERIAL, 167 TokenType.SMALLSERIAL, 168 TokenType.BIGSERIAL, 169 TokenType.XML, 170 TokenType.UNIQUEIDENTIFIER, 171 TokenType.MONEY, 172 TokenType.SMALLMONEY, 173 TokenType.ROWVERSION, 174 TokenType.IMAGE, 175 TokenType.VARIANT, 176 TokenType.OBJECT, 177 TokenType.INET, 178 *NESTED_TYPE_TOKENS, 179 } 180 181 SUBQUERY_PREDICATES = { 182 TokenType.ANY: exp.Any, 183 TokenType.ALL: exp.All, 184 TokenType.EXISTS: exp.Exists, 185 TokenType.SOME: exp.Any, 186 } 187 188 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 189 190 DB_CREATABLES = { 191 TokenType.DATABASE, 192 TokenType.SCHEMA, 193 TokenType.TABLE, 194 TokenType.VIEW, 195 } 196 197 CREATABLES = { 198 TokenType.COLUMN, 199 TokenType.FUNCTION, 200 TokenType.INDEX, 201 TokenType.PROCEDURE, 202 *DB_CREATABLES, 203 } 204 205 ID_VAR_TOKENS = { 206 TokenType.VAR, 207 TokenType.ANTI, 208 TokenType.APPLY, 209 TokenType.ASC, 210 TokenType.AUTO_INCREMENT, 211 TokenType.BEGIN, 212 TokenType.BOTH, 213 TokenType.BUCKET, 214 TokenType.CACHE, 215 TokenType.CASCADE, 216 TokenType.COLLATE, 217 TokenType.COMMAND, 218 TokenType.COMMENT, 219 TokenType.COMMIT, 220 TokenType.COMPOUND, 221 TokenType.CONSTRAINT, 222 TokenType.DEFAULT, 223 TokenType.DELETE, 224 TokenType.DESC, 225 TokenType.DESCRIBE, 226 TokenType.DIV, 227 TokenType.END, 228 TokenType.EXECUTE, 229 TokenType.ESCAPE, 230 TokenType.FALSE, 231 TokenType.FIRST, 232 TokenType.FILTER, 233 TokenType.FOLLOWING, 234 TokenType.FORMAT, 235 TokenType.FULL, 236 TokenType.IF, 237 TokenType.IS, 238 TokenType.ISNULL, 239 TokenType.INTERVAL, 240 TokenType.KEEP, 241 TokenType.LAZY, 242 TokenType.LEADING, 243 TokenType.LEFT, 244 TokenType.LOCAL, 245 TokenType.MATERIALIZED, 246 TokenType.MERGE, 247 TokenType.NATURAL, 248 TokenType.NEXT, 249 TokenType.OFFSET, 250 TokenType.ONLY, 251 TokenType.OPTIONS, 252 TokenType.ORDINALITY, 253 TokenType.OVERWRITE, 254 TokenType.PARTITION, 255 TokenType.PERCENT, 256 TokenType.PIVOT, 257 TokenType.PRAGMA, 258 TokenType.PRECEDING, 259 TokenType.RANGE, 260 TokenType.REFERENCES, 261 TokenType.RIGHT, 262 TokenType.ROW, 263 TokenType.ROWS, 264 TokenType.SEED, 265 TokenType.SEMI, 266 TokenType.SET, 267 TokenType.SETTINGS, 268 TokenType.SHOW, 269 TokenType.SORTKEY, 270 TokenType.TEMPORARY, 271 TokenType.TOP, 272 TokenType.TRAILING, 273 TokenType.TRUE, 274 TokenType.UNBOUNDED, 275 TokenType.UNIQUE, 276 TokenType.UNLOGGED, 277 TokenType.UNPIVOT, 278 TokenType.VOLATILE, 279 TokenType.WINDOW, 280 *CREATABLES, 281 *SUBQUERY_PREDICATES, 282 *TYPE_TOKENS, 283 *NO_PAREN_FUNCTIONS, 284 } 285 286 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 287 288 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 289 TokenType.APPLY, 290 TokenType.FULL, 291 TokenType.LEFT, 292 TokenType.LOCK, 293 TokenType.NATURAL, 294 TokenType.OFFSET, 295 TokenType.RIGHT, 296 TokenType.WINDOW, 297 } 298 299 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 300 301 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 302 303 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 304 305 FUNC_TOKENS = { 306 TokenType.COMMAND, 307 TokenType.CURRENT_DATE, 308 TokenType.CURRENT_DATETIME, 309 TokenType.CURRENT_TIMESTAMP, 310 TokenType.CURRENT_TIME, 311 TokenType.CURRENT_USER, 312 TokenType.FILTER, 313 TokenType.FIRST, 314 TokenType.FORMAT, 315 TokenType.GLOB, 316 TokenType.IDENTIFIER, 317 TokenType.INDEX, 318 TokenType.ISNULL, 319 TokenType.ILIKE, 320 TokenType.LIKE, 321 TokenType.MERGE, 322 TokenType.OFFSET, 323 TokenType.PRIMARY_KEY, 324 TokenType.RANGE, 325 TokenType.REPLACE, 326 TokenType.ROW, 327 TokenType.UNNEST, 328 TokenType.VAR, 329 TokenType.LEFT, 330 TokenType.RIGHT, 331 TokenType.DATE, 332 TokenType.DATETIME, 333 TokenType.TABLE, 334 TokenType.TIMESTAMP, 335 TokenType.TIMESTAMPTZ, 336 TokenType.WINDOW, 337 *TYPE_TOKENS, 338 *SUBQUERY_PREDICATES, 339 } 340 341 CONJUNCTION = { 342 TokenType.AND: exp.And, 343 TokenType.OR: exp.Or, 344 } 345 346 EQUALITY = { 347 TokenType.EQ: exp.EQ, 348 TokenType.NEQ: exp.NEQ, 349 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 350 } 351 352 COMPARISON = { 353 TokenType.GT: exp.GT, 354 TokenType.GTE: exp.GTE, 355 TokenType.LT: exp.LT, 356 TokenType.LTE: exp.LTE, 357 } 358 359 BITWISE = { 360 TokenType.AMP: exp.BitwiseAnd, 361 TokenType.CARET: exp.BitwiseXor, 362 TokenType.PIPE: exp.BitwiseOr, 363 TokenType.DPIPE: exp.DPipe, 364 } 365 366 TERM = { 367 TokenType.DASH: exp.Sub, 368 TokenType.PLUS: exp.Add, 369 TokenType.MOD: exp.Mod, 370 TokenType.COLLATE: exp.Collate, 371 } 372 373 FACTOR = { 374 TokenType.DIV: exp.IntDiv, 375 TokenType.LR_ARROW: exp.Distance, 376 TokenType.SLASH: exp.Div, 377 TokenType.STAR: exp.Mul, 378 } 379 380 TIMESTAMPS = { 381 TokenType.TIME, 382 TokenType.TIMESTAMP, 383 TokenType.TIMESTAMPTZ, 384 TokenType.TIMESTAMPLTZ, 385 } 386 387 SET_OPERATIONS = { 388 TokenType.UNION, 389 TokenType.INTERSECT, 390 TokenType.EXCEPT, 391 } 392 393 JOIN_SIDES = { 394 TokenType.LEFT, 395 TokenType.RIGHT, 396 TokenType.FULL, 397 } 398 399 JOIN_KINDS = { 400 TokenType.INNER, 401 TokenType.OUTER, 402 TokenType.CROSS, 403 TokenType.SEMI, 404 TokenType.ANTI, 405 } 406 407 LAMBDAS = { 408 TokenType.ARROW: lambda self, expressions: self.expression( 409 exp.Lambda, 410 this=self._replace_lambda( 411 self._parse_conjunction(), 412 {node.name for node in expressions}, 413 ), 414 expressions=expressions, 415 ), 416 TokenType.FARROW: lambda self, expressions: self.expression( 417 exp.Kwarg, 418 this=exp.Var(this=expressions[0].name), 419 expression=self._parse_conjunction(), 420 ), 421 } 422 423 COLUMN_OPERATORS = { 424 TokenType.DOT: None, 425 TokenType.DCOLON: lambda self, this, to: self.expression( 426 exp.Cast if self.STRICT_CAST else exp.TryCast, 427 this=this, 428 to=to, 429 ), 430 TokenType.ARROW: lambda self, this, path: self.expression( 431 exp.JSONExtract, 432 this=this, 433 expression=path, 434 ), 435 TokenType.DARROW: lambda self, this, path: self.expression( 436 exp.JSONExtractScalar, 437 this=this, 438 expression=path, 439 ), 440 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 441 exp.JSONBExtract, 442 this=this, 443 expression=path, 444 ), 445 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 446 exp.JSONBExtractScalar, 447 this=this, 448 expression=path, 449 ), 450 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 451 exp.JSONBContains, 452 this=this, 453 expression=key, 454 ), 455 } 456 457 EXPRESSION_PARSERS = { 458 exp.Column: lambda self: self._parse_column(), 459 exp.DataType: lambda self: self._parse_types(), 460 exp.From: lambda self: self._parse_from(), 461 exp.Group: lambda self: self._parse_group(), 462 exp.Identifier: lambda self: self._parse_id_var(), 463 exp.Lateral: lambda self: self._parse_lateral(), 464 exp.Join: lambda self: self._parse_join(), 465 exp.Order: lambda self: self._parse_order(), 466 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 467 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 468 exp.Lambda: lambda self: self._parse_lambda(), 469 exp.Limit: lambda self: self._parse_limit(), 470 exp.Offset: lambda self: self._parse_offset(), 471 exp.TableAlias: lambda self: self._parse_table_alias(), 472 exp.Table: lambda self: self._parse_table(), 473 exp.Condition: lambda self: self._parse_conjunction(), 474 exp.Expression: lambda self: self._parse_statement(), 475 exp.Properties: lambda self: self._parse_properties(), 476 exp.Where: lambda self: self._parse_where(), 477 exp.Ordered: lambda self: self._parse_ordered(), 478 exp.Having: lambda self: self._parse_having(), 479 exp.With: lambda self: self._parse_with(), 480 exp.Window: lambda self: self._parse_named_window(), 481 exp.Qualify: lambda self: self._parse_qualify(), 482 exp.Returning: lambda self: self._parse_returning(), 483 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 484 } 485 486 STATEMENT_PARSERS = { 487 TokenType.ALTER: lambda self: self._parse_alter(), 488 TokenType.BEGIN: lambda self: self._parse_transaction(), 489 TokenType.CACHE: lambda self: self._parse_cache(), 490 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 491 TokenType.COMMENT: lambda self: self._parse_comment(), 492 TokenType.CREATE: lambda self: self._parse_create(), 493 TokenType.DELETE: lambda self: self._parse_delete(), 494 TokenType.DESC: lambda self: self._parse_describe(), 495 TokenType.DESCRIBE: lambda self: self._parse_describe(), 496 TokenType.DROP: lambda self: self._parse_drop(), 497 TokenType.END: lambda self: self._parse_commit_or_rollback(), 498 TokenType.INSERT: lambda self: self._parse_insert(), 499 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 500 TokenType.MERGE: lambda self: self._parse_merge(), 501 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 502 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 503 TokenType.SET: lambda self: self._parse_set(), 504 TokenType.UNCACHE: lambda self: self._parse_uncache(), 505 TokenType.UPDATE: lambda self: self._parse_update(), 506 TokenType.USE: lambda self: self.expression( 507 exp.Use, 508 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 509 and exp.Var(this=self._prev.text), 510 this=self._parse_table(schema=False), 511 ), 512 } 513 514 UNARY_PARSERS = { 515 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 516 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 517 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 518 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 519 } 520 521 PRIMARY_PARSERS = { 522 TokenType.STRING: lambda self, token: self.expression( 523 exp.Literal, this=token.text, is_string=True 524 ), 525 TokenType.NUMBER: lambda self, token: self.expression( 526 exp.Literal, this=token.text, is_string=False 527 ), 528 TokenType.STAR: lambda self, _: self.expression( 529 exp.Star, 530 **{"except": self._parse_except(), "replace": self._parse_replace()}, 531 ), 532 TokenType.NULL: lambda self, _: self.expression(exp.Null), 533 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 534 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 535 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 536 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 537 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 538 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 539 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 540 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 541 } 542 543 PLACEHOLDER_PARSERS = { 544 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 545 TokenType.PARAMETER: lambda self: self._parse_parameter(), 546 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 547 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 548 else None, 549 } 550 551 RANGE_PARSERS = { 552 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 553 TokenType.GLOB: binary_range_parser(exp.Glob), 554 TokenType.ILIKE: binary_range_parser(exp.ILike), 555 TokenType.IN: lambda self, this: self._parse_in(this), 556 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 557 TokenType.IS: lambda self, this: self._parse_is(this), 558 TokenType.LIKE: binary_range_parser(exp.Like), 559 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 560 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 561 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 562 } 563 564 PROPERTY_PARSERS = { 565 "AFTER": lambda self: self._parse_afterjournal( 566 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 567 ), 568 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 569 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 570 "BEFORE": lambda self: self._parse_journal( 571 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 572 ), 573 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 574 "CHARACTER SET": lambda self: self._parse_character_set(), 575 "CHECKSUM": lambda self: self._parse_checksum(), 576 "CLUSTER BY": lambda self: self.expression( 577 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 578 ), 579 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 580 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 581 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 582 default=self._prev.text.upper() == "DEFAULT" 583 ), 584 "DEFINER": lambda self: self._parse_definer(), 585 "DETERMINISTIC": lambda self: self.expression( 586 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 587 ), 588 "DISTKEY": lambda self: self._parse_distkey(), 589 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 590 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 591 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 592 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 593 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 594 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 595 "FREESPACE": lambda self: self._parse_freespace(), 596 "GLOBAL": lambda self: self._parse_temporary(global_=True), 597 "IMMUTABLE": lambda self: self.expression( 598 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 599 ), 600 "JOURNAL": lambda self: self._parse_journal( 601 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 602 ), 603 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 604 "LIKE": lambda self: self._parse_create_like(), 605 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 606 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 607 "LOCK": lambda self: self._parse_locking(), 608 "LOCKING": lambda self: self._parse_locking(), 609 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 610 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 611 "MAX": lambda self: self._parse_datablocksize(), 612 "MAXIMUM": lambda self: self._parse_datablocksize(), 613 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 614 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 615 ), 616 "MIN": lambda self: self._parse_datablocksize(), 617 "MINIMUM": lambda self: self._parse_datablocksize(), 618 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 619 "NO": lambda self: self._parse_noprimaryindex(), 620 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 621 "ON": lambda self: self._parse_oncommit(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(), 627 "RETURNS": lambda self: self._parse_returns(), 628 "ROW": lambda self: self._parse_row(), 629 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 630 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 631 "SETTINGS": lambda self: self.expression( 632 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 633 ), 634 "SORTKEY": lambda self: self._parse_sortkey(), 635 "STABLE": lambda self: self.expression( 636 exp.StabilityProperty, this=exp.Literal.string("STABLE") 637 ), 638 "STORED": lambda self: self._parse_stored(), 639 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 640 "TEMP": lambda self: self._parse_temporary(global_=False), 641 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 642 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 643 "TTL": lambda self: self._parse_ttl(), 644 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 645 "VOLATILE": lambda self: self._parse_volatile_property(), 646 "WITH": lambda self: self._parse_with_property(), 647 } 648 649 CONSTRAINT_PARSERS = { 650 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 651 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 652 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 653 "CHARACTER SET": lambda self: self.expression( 654 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 655 ), 656 "CHECK": lambda self: self.expression( 657 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 658 ), 659 "COLLATE": lambda self: self.expression( 660 exp.CollateColumnConstraint, this=self._parse_var() 661 ), 662 "COMMENT": lambda self: self.expression( 663 exp.CommentColumnConstraint, this=self._parse_string() 664 ), 665 "COMPRESS": lambda self: self._parse_compress(), 666 "DEFAULT": lambda self: self.expression( 667 exp.DefaultColumnConstraint, this=self._parse_bitwise() 668 ), 669 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 670 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 671 "FORMAT": lambda self: self.expression( 672 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 673 ), 674 "GENERATED": lambda self: self._parse_generated_as_identity(), 675 "IDENTITY": lambda self: self._parse_auto_increment(), 676 "INLINE": lambda self: self._parse_inline(), 677 "LIKE": lambda self: self._parse_create_like(), 678 "NOT": lambda self: self._parse_not_constraint(), 679 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 680 "ON": lambda self: self._match(TokenType.UPDATE) 681 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 682 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 683 "PRIMARY KEY": lambda self: self._parse_primary_key(), 684 "REFERENCES": lambda self: self._parse_references(match=False), 685 "TITLE": lambda self: self.expression( 686 exp.TitleColumnConstraint, this=self._parse_var_or_string() 687 ), 688 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 689 "UNIQUE": lambda self: self._parse_unique(), 690 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 691 } 692 693 ALTER_PARSERS = { 694 "ADD": lambda self: self._parse_alter_table_add(), 695 "ALTER": lambda self: self._parse_alter_table_alter(), 696 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 697 "DROP": lambda self: self._parse_alter_table_drop(), 698 "RENAME": lambda self: self._parse_alter_table_rename(), 699 } 700 701 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 702 703 NO_PAREN_FUNCTION_PARSERS = { 704 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 705 TokenType.CASE: lambda self: self._parse_case(), 706 TokenType.IF: lambda self: self._parse_if(), 707 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 708 exp.NextValueFor, 709 this=self._parse_column(), 710 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 711 ), 712 } 713 714 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 715 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 716 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 717 "DECODE": lambda self: self._parse_decode(), 718 "EXTRACT": lambda self: self._parse_extract(), 719 "JSON_OBJECT": lambda self: self._parse_json_object(), 720 "LOG": lambda self: self._parse_logarithm(), 721 "MATCH": lambda self: self._parse_match_against(), 722 "OPENJSON": lambda self: self._parse_open_json(), 723 "POSITION": lambda self: self._parse_position(), 724 "STRING_AGG": lambda self: self._parse_string_agg(), 725 "SUBSTRING": lambda self: self._parse_substring(), 726 "TRIM": lambda self: self._parse_trim(), 727 "TRY_CAST": lambda self: self._parse_cast(False), 728 "TRY_CONVERT": lambda self: self._parse_convert(False), 729 } 730 731 QUERY_MODIFIER_PARSERS = { 732 "joins": lambda self: list(iter(self._parse_join, None)), 733 "laterals": lambda self: list(iter(self._parse_lateral, None)), 734 "match": lambda self: self._parse_match_recognize(), 735 "where": lambda self: self._parse_where(), 736 "group": lambda self: self._parse_group(), 737 "having": lambda self: self._parse_having(), 738 "qualify": lambda self: self._parse_qualify(), 739 "windows": lambda self: self._parse_window_clause(), 740 "order": lambda self: self._parse_order(), 741 "limit": lambda self: self._parse_limit(), 742 "offset": lambda self: self._parse_offset(), 743 "locks": lambda self: self._parse_locks(), 744 "sample": lambda self: self._parse_table_sample(as_modifier=True), 745 } 746 747 SET_PARSERS = { 748 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 749 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 750 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 751 "TRANSACTION": lambda self: self._parse_set_transaction(), 752 } 753 754 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 755 756 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 757 758 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 759 760 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 761 762 TRANSACTION_CHARACTERISTICS = { 763 "ISOLATION LEVEL REPEATABLE READ", 764 "ISOLATION LEVEL READ COMMITTED", 765 "ISOLATION LEVEL READ UNCOMMITTED", 766 "ISOLATION LEVEL SERIALIZABLE", 767 "READ WRITE", 768 "READ ONLY", 769 } 770 771 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 772 773 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 774 775 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 776 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 777 778 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 779 780 STRICT_CAST = True 781 782 CONVERT_TYPE_FIRST = False 783 784 PREFIXED_PIVOT_COLUMNS = False 785 IDENTIFY_PIVOT_STRINGS = False 786 787 LOG_BASE_FIRST = True 788 LOG_DEFAULTS_TO_LN = False 789 790 __slots__ = ( 791 "error_level", 792 "error_message_context", 793 "sql", 794 "errors", 795 "index_offset", 796 "unnest_column_only", 797 "alias_post_tablesample", 798 "max_errors", 799 "null_ordering", 800 "_tokens", 801 "_index", 802 "_curr", 803 "_next", 804 "_prev", 805 "_prev_comments", 806 "_show_trie", 807 "_set_trie", 808 ) 809 810 def __init__( 811 self, 812 error_level: t.Optional[ErrorLevel] = None, 813 error_message_context: int = 100, 814 index_offset: int = 0, 815 unnest_column_only: bool = False, 816 alias_post_tablesample: bool = False, 817 max_errors: int = 3, 818 null_ordering: t.Optional[str] = None, 819 ): 820 self.error_level = error_level or ErrorLevel.IMMEDIATE 821 self.error_message_context = error_message_context 822 self.index_offset = index_offset 823 self.unnest_column_only = unnest_column_only 824 self.alias_post_tablesample = alias_post_tablesample 825 self.max_errors = max_errors 826 self.null_ordering = null_ordering 827 self.reset() 828 829 def reset(self): 830 self.sql = "" 831 self.errors = [] 832 self._tokens = [] 833 self._index = 0 834 self._curr = None 835 self._next = None 836 self._prev = None 837 self._prev_comments = None 838 839 def parse( 840 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 841 ) -> t.List[t.Optional[exp.Expression]]: 842 """ 843 Parses a list of tokens and returns a list of syntax trees, one tree 844 per parsed SQL statement. 845 846 Args: 847 raw_tokens: the list of tokens. 848 sql: the original SQL string, used to produce helpful debug messages. 849 850 Returns: 851 The list of syntax trees. 852 """ 853 return self._parse( 854 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 855 ) 856 857 def parse_into( 858 self, 859 expression_types: exp.IntoType, 860 raw_tokens: t.List[Token], 861 sql: t.Optional[str] = None, 862 ) -> t.List[t.Optional[exp.Expression]]: 863 """ 864 Parses a list of tokens into a given Expression type. If a collection of Expression 865 types is given instead, this method will try to parse the token list into each one 866 of them, stopping at the first for which the parsing succeeds. 867 868 Args: 869 expression_types: the expression type(s) to try and parse the token list into. 870 raw_tokens: the list of tokens. 871 sql: the original SQL string, used to produce helpful debug messages. 872 873 Returns: 874 The target Expression. 875 """ 876 errors = [] 877 for expression_type in ensure_collection(expression_types): 878 parser = self.EXPRESSION_PARSERS.get(expression_type) 879 if not parser: 880 raise TypeError(f"No parser registered for {expression_type}") 881 try: 882 return self._parse(parser, raw_tokens, sql) 883 except ParseError as e: 884 e.errors[0]["into_expression"] = expression_type 885 errors.append(e) 886 raise ParseError( 887 f"Failed to parse into {expression_types}", 888 errors=merge_errors(errors), 889 ) from errors[-1] 890 891 def _parse( 892 self, 893 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 894 raw_tokens: t.List[Token], 895 sql: t.Optional[str] = None, 896 ) -> t.List[t.Optional[exp.Expression]]: 897 self.reset() 898 self.sql = sql or "" 899 total = len(raw_tokens) 900 chunks: t.List[t.List[Token]] = [[]] 901 902 for i, token in enumerate(raw_tokens): 903 if token.token_type == TokenType.SEMICOLON: 904 if i < total - 1: 905 chunks.append([]) 906 else: 907 chunks[-1].append(token) 908 909 expressions = [] 910 911 for tokens in chunks: 912 self._index = -1 913 self._tokens = tokens 914 self._advance() 915 916 expressions.append(parse_method(self)) 917 918 if self._index < len(self._tokens): 919 self.raise_error("Invalid expression / Unexpected token") 920 921 self.check_errors() 922 923 return expressions 924 925 def check_errors(self) -> None: 926 """ 927 Logs or raises any found errors, depending on the chosen error level setting. 928 """ 929 if self.error_level == ErrorLevel.WARN: 930 for error in self.errors: 931 logger.error(str(error)) 932 elif self.error_level == ErrorLevel.RAISE and self.errors: 933 raise ParseError( 934 concat_messages(self.errors, self.max_errors), 935 errors=merge_errors(self.errors), 936 ) 937 938 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 939 """ 940 Appends an error in the list of recorded errors or raises it, depending on the chosen 941 error level setting. 942 """ 943 token = token or self._curr or self._prev or Token.string("") 944 start = token.start 945 end = token.end + 1 946 start_context = self.sql[max(start - self.error_message_context, 0) : start] 947 highlight = self.sql[start:end] 948 end_context = self.sql[end : end + self.error_message_context] 949 950 error = ParseError.new( 951 f"{message}. Line {token.line}, Col: {token.col}.\n" 952 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 953 description=message, 954 line=token.line, 955 col=token.col, 956 start_context=start_context, 957 highlight=highlight, 958 end_context=end_context, 959 ) 960 961 if self.error_level == ErrorLevel.IMMEDIATE: 962 raise error 963 964 self.errors.append(error) 965 966 def expression( 967 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 968 ) -> E: 969 """ 970 Creates a new, validated Expression. 971 972 Args: 973 exp_class: the expression class to instantiate. 974 comments: an optional list of comments to attach to the expression. 975 kwargs: the arguments to set for the expression along with their respective values. 976 977 Returns: 978 The target expression. 979 """ 980 instance = exp_class(**kwargs) 981 instance.add_comments(comments) if comments else self._add_comments(instance) 982 self.validate_expression(instance) 983 return instance 984 985 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 986 if expression and self._prev_comments: 987 expression.add_comments(self._prev_comments) 988 self._prev_comments = None 989 990 def validate_expression( 991 self, expression: exp.Expression, args: t.Optional[t.List] = None 992 ) -> None: 993 """ 994 Validates an already instantiated expression, making sure that all its mandatory arguments 995 are set. 996 997 Args: 998 expression: the expression to validate. 999 args: an optional list of items that was used to instantiate the expression, if it's a Func. 1000 """ 1001 if self.error_level == ErrorLevel.IGNORE: 1002 return 1003 1004 for error_message in expression.error_messages(args): 1005 self.raise_error(error_message) 1006 1007 def _find_sql(self, start: Token, end: Token) -> str: 1008 return self.sql[start.start : end.end + 1] 1009 1010 def _advance(self, times: int = 1) -> None: 1011 self._index += times 1012 self._curr = seq_get(self._tokens, self._index) 1013 self._next = seq_get(self._tokens, self._index + 1) 1014 if self._index > 0: 1015 self._prev = self._tokens[self._index - 1] 1016 self._prev_comments = self._prev.comments 1017 else: 1018 self._prev = None 1019 self._prev_comments = None 1020 1021 def _retreat(self, index: int) -> None: 1022 if index != self._index: 1023 self._advance(index - self._index) 1024 1025 def _parse_command(self) -> exp.Command: 1026 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1027 1028 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1029 start = self._prev 1030 exists = self._parse_exists() if allow_exists else None 1031 1032 self._match(TokenType.ON) 1033 1034 kind = self._match_set(self.CREATABLES) and self._prev 1035 1036 if not kind: 1037 return self._parse_as_command(start) 1038 1039 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1040 this = self._parse_user_defined_function(kind=kind.token_type) 1041 elif kind.token_type == TokenType.TABLE: 1042 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1043 elif kind.token_type == TokenType.COLUMN: 1044 this = self._parse_column() 1045 else: 1046 this = self._parse_id_var() 1047 1048 self._match(TokenType.IS) 1049 1050 return self.expression( 1051 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1052 ) 1053 1054 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1055 def _parse_ttl(self) -> exp.Expression: 1056 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1057 this = self._parse_bitwise() 1058 1059 if self._match_text_seq("DELETE"): 1060 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1061 if self._match_text_seq("RECOMPRESS"): 1062 return self.expression( 1063 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1064 ) 1065 if self._match_text_seq("TO", "DISK"): 1066 return self.expression( 1067 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1068 ) 1069 if self._match_text_seq("TO", "VOLUME"): 1070 return self.expression( 1071 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1072 ) 1073 1074 return this 1075 1076 expressions = self._parse_csv(_parse_ttl_action) 1077 where = self._parse_where() 1078 group = self._parse_group() 1079 1080 aggregates = None 1081 if group and self._match(TokenType.SET): 1082 aggregates = self._parse_csv(self._parse_set_item) 1083 1084 return self.expression( 1085 exp.MergeTreeTTL, 1086 expressions=expressions, 1087 where=where, 1088 group=group, 1089 aggregates=aggregates, 1090 ) 1091 1092 def _parse_statement(self) -> t.Optional[exp.Expression]: 1093 if self._curr is None: 1094 return None 1095 1096 if self._match_set(self.STATEMENT_PARSERS): 1097 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1098 1099 if self._match_set(Tokenizer.COMMANDS): 1100 return self._parse_command() 1101 1102 expression = self._parse_expression() 1103 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1104 return self._parse_query_modifiers(expression) 1105 1106 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1107 start = self._prev 1108 temporary = self._match(TokenType.TEMPORARY) 1109 materialized = self._match(TokenType.MATERIALIZED) 1110 kind = self._match_set(self.CREATABLES) and self._prev.text 1111 if not kind: 1112 return self._parse_as_command(start) 1113 1114 return self.expression( 1115 exp.Drop, 1116 exists=self._parse_exists(), 1117 this=self._parse_table(schema=True), 1118 kind=kind, 1119 temporary=temporary, 1120 materialized=materialized, 1121 cascade=self._match(TokenType.CASCADE), 1122 constraints=self._match_text_seq("CONSTRAINTS"), 1123 purge=self._match_text_seq("PURGE"), 1124 ) 1125 1126 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1127 return ( 1128 self._match(TokenType.IF) 1129 and (not not_ or self._match(TokenType.NOT)) 1130 and self._match(TokenType.EXISTS) 1131 ) 1132 1133 def _parse_create(self) -> t.Optional[exp.Expression]: 1134 start = self._prev 1135 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1136 TokenType.OR, TokenType.REPLACE 1137 ) 1138 unique = self._match(TokenType.UNIQUE) 1139 1140 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1141 self._match(TokenType.TABLE) 1142 1143 properties = None 1144 create_token = self._match_set(self.CREATABLES) and self._prev 1145 1146 if not create_token: 1147 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1148 create_token = self._match_set(self.CREATABLES) and self._prev 1149 1150 if not properties or not create_token: 1151 return self._parse_as_command(start) 1152 1153 exists = self._parse_exists(not_=True) 1154 this = None 1155 expression = None 1156 indexes = None 1157 no_schema_binding = None 1158 begin = None 1159 clone = None 1160 1161 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1162 this = self._parse_user_defined_function(kind=create_token.token_type) 1163 temp_properties = self._parse_properties() 1164 if properties and temp_properties: 1165 properties.expressions.extend(temp_properties.expressions) 1166 elif temp_properties: 1167 properties = temp_properties 1168 1169 self._match(TokenType.ALIAS) 1170 begin = self._match(TokenType.BEGIN) 1171 return_ = self._match_text_seq("RETURN") 1172 expression = self._parse_statement() 1173 1174 if return_: 1175 expression = self.expression(exp.Return, this=expression) 1176 elif create_token.token_type == TokenType.INDEX: 1177 this = self._parse_index() 1178 elif create_token.token_type in self.DB_CREATABLES: 1179 table_parts = self._parse_table_parts(schema=True) 1180 1181 # exp.Properties.Location.POST_NAME 1182 if self._match(TokenType.COMMA): 1183 temp_properties = self._parse_properties(before=True) 1184 if properties and temp_properties: 1185 properties.expressions.extend(temp_properties.expressions) 1186 elif temp_properties: 1187 properties = temp_properties 1188 1189 this = self._parse_schema(this=table_parts) 1190 1191 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1192 temp_properties = self._parse_properties() 1193 if properties and temp_properties: 1194 properties.expressions.extend(temp_properties.expressions) 1195 elif temp_properties: 1196 properties = temp_properties 1197 1198 self._match(TokenType.ALIAS) 1199 1200 # exp.Properties.Location.POST_ALIAS 1201 if not ( 1202 self._match(TokenType.SELECT, advance=False) 1203 or self._match(TokenType.WITH, advance=False) 1204 or self._match(TokenType.L_PAREN, advance=False) 1205 ): 1206 temp_properties = self._parse_properties() 1207 if properties and temp_properties: 1208 properties.expressions.extend(temp_properties.expressions) 1209 elif temp_properties: 1210 properties = temp_properties 1211 1212 expression = self._parse_ddl_select() 1213 1214 if create_token.token_type == TokenType.TABLE: 1215 indexes = [] 1216 while True: 1217 index = self._parse_create_table_index() 1218 1219 # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX 1220 temp_properties = self._parse_properties() 1221 if properties and temp_properties: 1222 properties.expressions.extend(temp_properties.expressions) 1223 elif temp_properties: 1224 properties = temp_properties 1225 1226 if not index: 1227 break 1228 else: 1229 self._match(TokenType.COMMA) 1230 indexes.append(index) 1231 elif create_token.token_type == TokenType.VIEW: 1232 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1233 no_schema_binding = True 1234 1235 if self._match_text_seq("CLONE"): 1236 clone = self._parse_table(schema=True) 1237 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1238 clone_kind = ( 1239 self._match(TokenType.L_PAREN) 1240 and self._match_texts(self.CLONE_KINDS) 1241 and self._prev.text.upper() 1242 ) 1243 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1244 self._match(TokenType.R_PAREN) 1245 clone = self.expression( 1246 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1247 ) 1248 1249 return self.expression( 1250 exp.Create, 1251 this=this, 1252 kind=create_token.text, 1253 replace=replace, 1254 unique=unique, 1255 expression=expression, 1256 exists=exists, 1257 properties=properties, 1258 indexes=indexes, 1259 no_schema_binding=no_schema_binding, 1260 begin=begin, 1261 clone=clone, 1262 ) 1263 1264 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1265 self._match(TokenType.COMMA) 1266 1267 # parsers look to _prev for no/dual/default, so need to consume first 1268 self._match_text_seq("NO") 1269 self._match_text_seq("DUAL") 1270 self._match_text_seq("DEFAULT") 1271 1272 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1273 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1274 1275 return None 1276 1277 def _parse_property(self) -> t.Optional[exp.Expression]: 1278 if self._match_texts(self.PROPERTY_PARSERS): 1279 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1280 1281 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1282 return self._parse_character_set(default=True) 1283 1284 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1285 return self._parse_sortkey(compound=True) 1286 1287 if self._match_text_seq("SQL", "SECURITY"): 1288 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1289 1290 assignment = self._match_pair( 1291 TokenType.VAR, TokenType.EQ, advance=False 1292 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1293 1294 if assignment: 1295 key = self._parse_var_or_string() 1296 self._match(TokenType.EQ) 1297 return self.expression(exp.Property, this=key, value=self._parse_column()) 1298 1299 return None 1300 1301 def _parse_stored(self) -> exp.Expression: 1302 self._match(TokenType.ALIAS) 1303 1304 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1305 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1306 1307 return self.expression( 1308 exp.FileFormatProperty, 1309 this=self.expression( 1310 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1311 ) 1312 if input_format or output_format 1313 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1314 ) 1315 1316 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1317 self._match(TokenType.EQ) 1318 self._match(TokenType.ALIAS) 1319 return self.expression(exp_class, this=self._parse_field()) 1320 1321 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1322 properties = [] 1323 1324 while True: 1325 if before: 1326 identified_property = self._parse_property_before() 1327 else: 1328 identified_property = self._parse_property() 1329 1330 if not identified_property: 1331 break 1332 for p in ensure_list(identified_property): 1333 properties.append(p) 1334 1335 if properties: 1336 return self.expression(exp.Properties, expressions=properties) 1337 1338 return None 1339 1340 def _parse_fallback(self, no=False) -> exp.Expression: 1341 self._match_text_seq("FALLBACK") 1342 return self.expression( 1343 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1344 ) 1345 1346 def _parse_volatile_property(self) -> exp.Expression: 1347 if self._index >= 2: 1348 pre_volatile_token = self._tokens[self._index - 2] 1349 else: 1350 pre_volatile_token = None 1351 1352 if pre_volatile_token and pre_volatile_token.token_type in ( 1353 TokenType.CREATE, 1354 TokenType.REPLACE, 1355 TokenType.UNIQUE, 1356 ): 1357 return exp.VolatileProperty() 1358 1359 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1360 1361 def _parse_with_property( 1362 self, 1363 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1364 self._match(TokenType.WITH) 1365 if self._match(TokenType.L_PAREN, advance=False): 1366 return self._parse_wrapped_csv(self._parse_property) 1367 1368 if self._match_text_seq("JOURNAL"): 1369 return self._parse_withjournaltable() 1370 1371 if self._match_text_seq("DATA"): 1372 return self._parse_withdata(no=False) 1373 elif self._match_text_seq("NO", "DATA"): 1374 return self._parse_withdata(no=True) 1375 1376 if not self._next: 1377 return None 1378 1379 return self._parse_withisolatedloading() 1380 1381 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1382 def _parse_definer(self) -> t.Optional[exp.Expression]: 1383 self._match(TokenType.EQ) 1384 1385 user = self._parse_id_var() 1386 self._match(TokenType.PARAMETER) 1387 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1388 1389 if not user or not host: 1390 return None 1391 1392 return exp.DefinerProperty(this=f"{user}@{host}") 1393 1394 def _parse_withjournaltable(self) -> exp.Expression: 1395 self._match(TokenType.TABLE) 1396 self._match(TokenType.EQ) 1397 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1398 1399 def _parse_log(self, no=False) -> exp.Expression: 1400 self._match_text_seq("LOG") 1401 return self.expression(exp.LogProperty, no=no) 1402 1403 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1404 before = self._match_text_seq("BEFORE") 1405 self._match_text_seq("JOURNAL") 1406 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1407 1408 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1409 self._match_text_seq("NOT") 1410 self._match_text_seq("LOCAL") 1411 self._match_text_seq("AFTER", "JOURNAL") 1412 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1413 1414 def _parse_checksum(self) -> exp.Expression: 1415 self._match_text_seq("CHECKSUM") 1416 self._match(TokenType.EQ) 1417 1418 on = None 1419 if self._match(TokenType.ON): 1420 on = True 1421 elif self._match_text_seq("OFF"): 1422 on = False 1423 default = self._match(TokenType.DEFAULT) 1424 1425 return self.expression( 1426 exp.ChecksumProperty, 1427 on=on, 1428 default=default, 1429 ) 1430 1431 def _parse_freespace(self) -> exp.Expression: 1432 self._match_text_seq("FREESPACE") 1433 self._match(TokenType.EQ) 1434 return self.expression( 1435 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1436 ) 1437 1438 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1439 self._match_text_seq("MERGEBLOCKRATIO") 1440 if self._match(TokenType.EQ): 1441 return self.expression( 1442 exp.MergeBlockRatioProperty, 1443 this=self._parse_number(), 1444 percent=self._match(TokenType.PERCENT), 1445 ) 1446 else: 1447 return self.expression( 1448 exp.MergeBlockRatioProperty, 1449 no=no, 1450 default=default, 1451 ) 1452 1453 def _parse_datablocksize(self, default=None) -> exp.Expression: 1454 if default: 1455 self._match_text_seq("DATABLOCKSIZE") 1456 return self.expression(exp.DataBlocksizeProperty, default=True) 1457 elif self._match_texts(("MIN", "MINIMUM")): 1458 self._match_text_seq("DATABLOCKSIZE") 1459 return self.expression(exp.DataBlocksizeProperty, min=True) 1460 elif self._match_texts(("MAX", "MAXIMUM")): 1461 self._match_text_seq("DATABLOCKSIZE") 1462 return self.expression(exp.DataBlocksizeProperty, min=False) 1463 1464 self._match_text_seq("DATABLOCKSIZE") 1465 self._match(TokenType.EQ) 1466 size = self._parse_number() 1467 units = None 1468 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1469 units = self._prev.text 1470 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1471 1472 def _parse_blockcompression(self) -> exp.Expression: 1473 self._match_text_seq("BLOCKCOMPRESSION") 1474 self._match(TokenType.EQ) 1475 always = self._match_text_seq("ALWAYS") 1476 manual = self._match_text_seq("MANUAL") 1477 never = self._match_text_seq("NEVER") 1478 default = self._match_text_seq("DEFAULT") 1479 autotemp = None 1480 if self._match_text_seq("AUTOTEMP"): 1481 autotemp = self._parse_schema() 1482 1483 return self.expression( 1484 exp.BlockCompressionProperty, 1485 always=always, 1486 manual=manual, 1487 never=never, 1488 default=default, 1489 autotemp=autotemp, 1490 ) 1491 1492 def _parse_withisolatedloading(self) -> exp.Expression: 1493 no = self._match_text_seq("NO") 1494 concurrent = self._match_text_seq("CONCURRENT") 1495 self._match_text_seq("ISOLATED", "LOADING") 1496 for_all = self._match_text_seq("FOR", "ALL") 1497 for_insert = self._match_text_seq("FOR", "INSERT") 1498 for_none = self._match_text_seq("FOR", "NONE") 1499 return self.expression( 1500 exp.IsolatedLoadingProperty, 1501 no=no, 1502 concurrent=concurrent, 1503 for_all=for_all, 1504 for_insert=for_insert, 1505 for_none=for_none, 1506 ) 1507 1508 def _parse_locking(self) -> exp.Expression: 1509 if self._match(TokenType.TABLE): 1510 kind = "TABLE" 1511 elif self._match(TokenType.VIEW): 1512 kind = "VIEW" 1513 elif self._match(TokenType.ROW): 1514 kind = "ROW" 1515 elif self._match_text_seq("DATABASE"): 1516 kind = "DATABASE" 1517 else: 1518 kind = None 1519 1520 if kind in ("DATABASE", "TABLE", "VIEW"): 1521 this = self._parse_table_parts() 1522 else: 1523 this = None 1524 1525 if self._match(TokenType.FOR): 1526 for_or_in = "FOR" 1527 elif self._match(TokenType.IN): 1528 for_or_in = "IN" 1529 else: 1530 for_or_in = None 1531 1532 if self._match_text_seq("ACCESS"): 1533 lock_type = "ACCESS" 1534 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1535 lock_type = "EXCLUSIVE" 1536 elif self._match_text_seq("SHARE"): 1537 lock_type = "SHARE" 1538 elif self._match_text_seq("READ"): 1539 lock_type = "READ" 1540 elif self._match_text_seq("WRITE"): 1541 lock_type = "WRITE" 1542 elif self._match_text_seq("CHECKSUM"): 1543 lock_type = "CHECKSUM" 1544 else: 1545 lock_type = None 1546 1547 override = self._match_text_seq("OVERRIDE") 1548 1549 return self.expression( 1550 exp.LockingProperty, 1551 this=this, 1552 kind=kind, 1553 for_or_in=for_or_in, 1554 lock_type=lock_type, 1555 override=override, 1556 ) 1557 1558 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1559 if self._match(TokenType.PARTITION_BY): 1560 return self._parse_csv(self._parse_conjunction) 1561 return [] 1562 1563 def _parse_partitioned_by(self) -> exp.Expression: 1564 self._match(TokenType.EQ) 1565 return self.expression( 1566 exp.PartitionedByProperty, 1567 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1568 ) 1569 1570 def _parse_withdata(self, no=False) -> exp.Expression: 1571 if self._match_text_seq("AND", "STATISTICS"): 1572 statistics = True 1573 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1574 statistics = False 1575 else: 1576 statistics = None 1577 1578 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1579 1580 def _parse_noprimaryindex(self) -> exp.Expression: 1581 self._match_text_seq("PRIMARY", "INDEX") 1582 return exp.NoPrimaryIndexProperty() 1583 1584 def _parse_oncommit(self) -> exp.Expression: 1585 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1586 return exp.OnCommitProperty() 1587 return exp.OnCommitProperty(delete=self._match_text_seq("COMMIT", "DELETE", "ROWS")) 1588 1589 def _parse_distkey(self) -> exp.Expression: 1590 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1591 1592 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1593 table = self._parse_table(schema=True) 1594 options = [] 1595 while self._match_texts(("INCLUDING", "EXCLUDING")): 1596 this = self._prev.text.upper() 1597 id_var = self._parse_id_var() 1598 1599 if not id_var: 1600 return None 1601 1602 options.append( 1603 self.expression( 1604 exp.Property, 1605 this=this, 1606 value=exp.Var(this=id_var.this.upper()), 1607 ) 1608 ) 1609 return self.expression(exp.LikeProperty, this=table, expressions=options) 1610 1611 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1612 return self.expression( 1613 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1614 ) 1615 1616 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1617 self._match(TokenType.EQ) 1618 return self.expression( 1619 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1620 ) 1621 1622 def _parse_returns(self) -> exp.Expression: 1623 value: t.Optional[exp.Expression] 1624 is_table = self._match(TokenType.TABLE) 1625 1626 if is_table: 1627 if self._match(TokenType.LT): 1628 value = self.expression( 1629 exp.Schema, 1630 this="TABLE", 1631 expressions=self._parse_csv(self._parse_struct_types), 1632 ) 1633 if not self._match(TokenType.GT): 1634 self.raise_error("Expecting >") 1635 else: 1636 value = self._parse_schema(exp.Var(this="TABLE")) 1637 else: 1638 value = self._parse_types() 1639 1640 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1641 1642 def _parse_temporary(self, global_=False) -> exp.Expression: 1643 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1644 return self.expression(exp.TemporaryProperty, global_=global_) 1645 1646 def _parse_describe(self) -> exp.Expression: 1647 kind = self._match_set(self.CREATABLES) and self._prev.text 1648 this = self._parse_table() 1649 1650 return self.expression(exp.Describe, this=this, kind=kind) 1651 1652 def _parse_insert(self) -> exp.Expression: 1653 overwrite = self._match(TokenType.OVERWRITE) 1654 local = self._match(TokenType.LOCAL) 1655 alternative = None 1656 1657 if self._match_text_seq("DIRECTORY"): 1658 this: t.Optional[exp.Expression] = self.expression( 1659 exp.Directory, 1660 this=self._parse_var_or_string(), 1661 local=local, 1662 row_format=self._parse_row_format(match_row=True), 1663 ) 1664 else: 1665 if self._match(TokenType.OR): 1666 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1667 1668 self._match(TokenType.INTO) 1669 self._match(TokenType.TABLE) 1670 this = self._parse_table(schema=True) 1671 1672 return self.expression( 1673 exp.Insert, 1674 this=this, 1675 exists=self._parse_exists(), 1676 partition=self._parse_partition(), 1677 expression=self._parse_ddl_select(), 1678 conflict=self._parse_on_conflict(), 1679 returning=self._parse_returning(), 1680 overwrite=overwrite, 1681 alternative=alternative, 1682 ) 1683 1684 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1685 conflict = self._match_text_seq("ON", "CONFLICT") 1686 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1687 1688 if not (conflict or duplicate): 1689 return None 1690 1691 nothing = None 1692 expressions = None 1693 key = None 1694 constraint = None 1695 1696 if conflict: 1697 if self._match_text_seq("ON", "CONSTRAINT"): 1698 constraint = self._parse_id_var() 1699 else: 1700 key = self._parse_csv(self._parse_value) 1701 1702 self._match_text_seq("DO") 1703 if self._match_text_seq("NOTHING"): 1704 nothing = True 1705 else: 1706 self._match(TokenType.UPDATE) 1707 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1708 1709 return self.expression( 1710 exp.OnConflict, 1711 duplicate=duplicate, 1712 expressions=expressions, 1713 nothing=nothing, 1714 key=key, 1715 constraint=constraint, 1716 ) 1717 1718 def _parse_returning(self) -> t.Optional[exp.Expression]: 1719 if not self._match(TokenType.RETURNING): 1720 return None 1721 1722 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1723 1724 def _parse_row(self) -> t.Optional[exp.Expression]: 1725 if not self._match(TokenType.FORMAT): 1726 return None 1727 return self._parse_row_format() 1728 1729 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1730 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1731 return None 1732 1733 if self._match_text_seq("SERDE"): 1734 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1735 1736 self._match_text_seq("DELIMITED") 1737 1738 kwargs = {} 1739 1740 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1741 kwargs["fields"] = self._parse_string() 1742 if self._match_text_seq("ESCAPED", "BY"): 1743 kwargs["escaped"] = self._parse_string() 1744 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1745 kwargs["collection_items"] = self._parse_string() 1746 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1747 kwargs["map_keys"] = self._parse_string() 1748 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1749 kwargs["lines"] = self._parse_string() 1750 if self._match_text_seq("NULL", "DEFINED", "AS"): 1751 kwargs["null"] = self._parse_string() 1752 1753 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1754 1755 def _parse_load_data(self) -> exp.Expression: 1756 local = self._match(TokenType.LOCAL) 1757 self._match_text_seq("INPATH") 1758 inpath = self._parse_string() 1759 overwrite = self._match(TokenType.OVERWRITE) 1760 self._match_pair(TokenType.INTO, TokenType.TABLE) 1761 1762 return self.expression( 1763 exp.LoadData, 1764 this=self._parse_table(schema=True), 1765 local=local, 1766 overwrite=overwrite, 1767 inpath=inpath, 1768 partition=self._parse_partition(), 1769 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1770 serde=self._match_text_seq("SERDE") and self._parse_string(), 1771 ) 1772 1773 def _parse_delete(self) -> exp.Expression: 1774 self._match(TokenType.FROM) 1775 1776 return self.expression( 1777 exp.Delete, 1778 this=self._parse_table(), 1779 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1780 where=self._parse_where(), 1781 returning=self._parse_returning(), 1782 ) 1783 1784 def _parse_update(self) -> exp.Expression: 1785 return self.expression( 1786 exp.Update, 1787 **{ # type: ignore 1788 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1789 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1790 "from": self._parse_from(modifiers=True), 1791 "where": self._parse_where(), 1792 "returning": self._parse_returning(), 1793 }, 1794 ) 1795 1796 def _parse_uncache(self) -> exp.Expression: 1797 if not self._match(TokenType.TABLE): 1798 self.raise_error("Expecting TABLE after UNCACHE") 1799 1800 return self.expression( 1801 exp.Uncache, 1802 exists=self._parse_exists(), 1803 this=self._parse_table(schema=True), 1804 ) 1805 1806 def _parse_cache(self) -> exp.Expression: 1807 lazy = self._match(TokenType.LAZY) 1808 self._match(TokenType.TABLE) 1809 table = self._parse_table(schema=True) 1810 options = [] 1811 1812 if self._match(TokenType.OPTIONS): 1813 self._match_l_paren() 1814 k = self._parse_string() 1815 self._match(TokenType.EQ) 1816 v = self._parse_string() 1817 options = [k, v] 1818 self._match_r_paren() 1819 1820 self._match(TokenType.ALIAS) 1821 return self.expression( 1822 exp.Cache, 1823 this=table, 1824 lazy=lazy, 1825 options=options, 1826 expression=self._parse_select(nested=True), 1827 ) 1828 1829 def _parse_partition(self) -> t.Optional[exp.Expression]: 1830 if not self._match(TokenType.PARTITION): 1831 return None 1832 1833 return self.expression( 1834 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1835 ) 1836 1837 def _parse_value(self) -> exp.Expression: 1838 if self._match(TokenType.L_PAREN): 1839 expressions = self._parse_csv(self._parse_conjunction) 1840 self._match_r_paren() 1841 return self.expression(exp.Tuple, expressions=expressions) 1842 1843 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1844 # Source: https://prestodb.io/docs/current/sql/values.html 1845 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1846 1847 def _parse_select( 1848 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1849 ) -> t.Optional[exp.Expression]: 1850 cte = self._parse_with() 1851 if cte: 1852 this = self._parse_statement() 1853 1854 if not this: 1855 self.raise_error("Failed to parse any statement following CTE") 1856 return cte 1857 1858 if "with" in this.arg_types: 1859 this.set("with", cte) 1860 else: 1861 self.raise_error(f"{this.key} does not support CTE") 1862 this = cte 1863 elif self._match(TokenType.SELECT): 1864 comments = self._prev_comments 1865 1866 hint = self._parse_hint() 1867 all_ = self._match(TokenType.ALL) 1868 distinct = self._match(TokenType.DISTINCT) 1869 1870 kind = ( 1871 self._match(TokenType.ALIAS) 1872 and self._match_texts(("STRUCT", "VALUE")) 1873 and self._prev.text 1874 ) 1875 1876 if distinct: 1877 distinct = self.expression( 1878 exp.Distinct, 1879 on=self._parse_value() if self._match(TokenType.ON) else None, 1880 ) 1881 1882 if all_ and distinct: 1883 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1884 1885 limit = self._parse_limit(top=True) 1886 expressions = self._parse_csv(self._parse_expression) 1887 1888 this = self.expression( 1889 exp.Select, 1890 kind=kind, 1891 hint=hint, 1892 distinct=distinct, 1893 expressions=expressions, 1894 limit=limit, 1895 ) 1896 this.comments = comments 1897 1898 into = self._parse_into() 1899 if into: 1900 this.set("into", into) 1901 1902 from_ = self._parse_from() 1903 if from_: 1904 this.set("from", from_) 1905 1906 this = self._parse_query_modifiers(this) 1907 elif (table or nested) and self._match(TokenType.L_PAREN): 1908 this = self._parse_table() if table else self._parse_select(nested=True) 1909 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1910 self._match_r_paren() 1911 1912 # early return so that subquery unions aren't parsed again 1913 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1914 # Union ALL should be a property of the top select node, not the subquery 1915 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1916 elif self._match(TokenType.VALUES): 1917 this = self.expression( 1918 exp.Values, 1919 expressions=self._parse_csv(self._parse_value), 1920 alias=self._parse_table_alias(), 1921 ) 1922 else: 1923 this = None 1924 1925 return self._parse_set_operations(this) 1926 1927 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1928 if not skip_with_token and not self._match(TokenType.WITH): 1929 return None 1930 1931 comments = self._prev_comments 1932 recursive = self._match(TokenType.RECURSIVE) 1933 1934 expressions = [] 1935 while True: 1936 expressions.append(self._parse_cte()) 1937 1938 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1939 break 1940 else: 1941 self._match(TokenType.WITH) 1942 1943 return self.expression( 1944 exp.With, comments=comments, expressions=expressions, recursive=recursive 1945 ) 1946 1947 def _parse_cte(self) -> exp.Expression: 1948 alias = self._parse_table_alias() 1949 if not alias or not alias.this: 1950 self.raise_error("Expected CTE to have alias") 1951 1952 self._match(TokenType.ALIAS) 1953 1954 return self.expression( 1955 exp.CTE, 1956 this=self._parse_wrapped(self._parse_statement), 1957 alias=alias, 1958 ) 1959 1960 def _parse_table_alias( 1961 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1962 ) -> t.Optional[exp.Expression]: 1963 any_token = self._match(TokenType.ALIAS) 1964 alias = ( 1965 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1966 or self._parse_string_as_identifier() 1967 ) 1968 1969 index = self._index 1970 if self._match(TokenType.L_PAREN): 1971 columns = self._parse_csv(self._parse_function_parameter) 1972 self._match_r_paren() if columns else self._retreat(index) 1973 else: 1974 columns = None 1975 1976 if not alias and not columns: 1977 return None 1978 1979 return self.expression(exp.TableAlias, this=alias, columns=columns) 1980 1981 def _parse_subquery( 1982 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1983 ) -> exp.Expression: 1984 return self.expression( 1985 exp.Subquery, 1986 this=this, 1987 pivots=self._parse_pivots(), 1988 alias=self._parse_table_alias() if parse_alias else None, 1989 ) 1990 1991 def _parse_query_modifiers( 1992 self, this: t.Optional[exp.Expression] 1993 ) -> t.Optional[exp.Expression]: 1994 if isinstance(this, self.MODIFIABLES): 1995 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1996 expression = parser(self) 1997 1998 if expression: 1999 this.set(key, expression) 2000 return this 2001 2002 def _parse_hint(self) -> t.Optional[exp.Expression]: 2003 if self._match(TokenType.HINT): 2004 hints = self._parse_csv(self._parse_function) 2005 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2006 self.raise_error("Expected */ after HINT") 2007 return self.expression(exp.Hint, expressions=hints) 2008 2009 return None 2010 2011 def _parse_into(self) -> t.Optional[exp.Expression]: 2012 if not self._match(TokenType.INTO): 2013 return None 2014 2015 temp = self._match(TokenType.TEMPORARY) 2016 unlogged = self._match(TokenType.UNLOGGED) 2017 self._match(TokenType.TABLE) 2018 2019 return self.expression( 2020 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2021 ) 2022 2023 def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]: 2024 if not self._match(TokenType.FROM): 2025 return None 2026 2027 comments = self._prev_comments 2028 this = self._parse_table() 2029 2030 return self.expression( 2031 exp.From, 2032 comments=comments, 2033 this=self._parse_query_modifiers(this) if modifiers else this, 2034 ) 2035 2036 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2037 if not self._match(TokenType.MATCH_RECOGNIZE): 2038 return None 2039 2040 self._match_l_paren() 2041 2042 partition = self._parse_partition_by() 2043 order = self._parse_order() 2044 measures = ( 2045 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2046 ) 2047 2048 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2049 rows = exp.Var(this="ONE ROW PER MATCH") 2050 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2051 text = "ALL ROWS PER MATCH" 2052 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2053 text += f" SHOW EMPTY MATCHES" 2054 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2055 text += f" OMIT EMPTY MATCHES" 2056 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2057 text += f" WITH UNMATCHED ROWS" 2058 rows = exp.Var(this=text) 2059 else: 2060 rows = None 2061 2062 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2063 text = "AFTER MATCH SKIP" 2064 if self._match_text_seq("PAST", "LAST", "ROW"): 2065 text += f" PAST LAST ROW" 2066 elif self._match_text_seq("TO", "NEXT", "ROW"): 2067 text += f" TO NEXT ROW" 2068 elif self._match_text_seq("TO", "FIRST"): 2069 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2070 elif self._match_text_seq("TO", "LAST"): 2071 text += f" TO LAST {self._advance_any().text}" # type: ignore 2072 after = exp.Var(this=text) 2073 else: 2074 after = None 2075 2076 if self._match_text_seq("PATTERN"): 2077 self._match_l_paren() 2078 2079 if not self._curr: 2080 self.raise_error("Expecting )", self._curr) 2081 2082 paren = 1 2083 start = self._curr 2084 2085 while self._curr and paren > 0: 2086 if self._curr.token_type == TokenType.L_PAREN: 2087 paren += 1 2088 if self._curr.token_type == TokenType.R_PAREN: 2089 paren -= 1 2090 end = self._prev 2091 self._advance() 2092 if paren > 0: 2093 self.raise_error("Expecting )", self._curr) 2094 pattern = exp.Var(this=self._find_sql(start, end)) 2095 else: 2096 pattern = None 2097 2098 define = ( 2099 self._parse_csv( 2100 lambda: self.expression( 2101 exp.Alias, 2102 alias=self._parse_id_var(any_token=True), 2103 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2104 ) 2105 ) 2106 if self._match_text_seq("DEFINE") 2107 else None 2108 ) 2109 2110 self._match_r_paren() 2111 2112 return self.expression( 2113 exp.MatchRecognize, 2114 partition_by=partition, 2115 order=order, 2116 measures=measures, 2117 rows=rows, 2118 after=after, 2119 pattern=pattern, 2120 define=define, 2121 alias=self._parse_table_alias(), 2122 ) 2123 2124 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2125 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2126 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2127 2128 if outer_apply or cross_apply: 2129 this = self._parse_select(table=True) 2130 view = None 2131 outer = not cross_apply 2132 elif self._match(TokenType.LATERAL): 2133 this = self._parse_select(table=True) 2134 view = self._match(TokenType.VIEW) 2135 outer = self._match(TokenType.OUTER) 2136 else: 2137 return None 2138 2139 if not this: 2140 this = self._parse_function() or self._parse_id_var(any_token=False) 2141 while self._match(TokenType.DOT): 2142 this = exp.Dot( 2143 this=this, 2144 expression=self._parse_function() or self._parse_id_var(any_token=False), 2145 ) 2146 2147 table_alias: t.Optional[exp.Expression] 2148 2149 if view: 2150 table = self._parse_id_var(any_token=False) 2151 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2152 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2153 else: 2154 table_alias = self._parse_table_alias() 2155 2156 expression = self.expression( 2157 exp.Lateral, 2158 this=this, 2159 view=view, 2160 outer=outer, 2161 alias=table_alias, 2162 ) 2163 2164 return expression 2165 2166 def _parse_join_side_and_kind( 2167 self, 2168 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2169 return ( 2170 self._match(TokenType.NATURAL) and self._prev, 2171 self._match_set(self.JOIN_SIDES) and self._prev, 2172 self._match_set(self.JOIN_KINDS) and self._prev, 2173 ) 2174 2175 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2176 if self._match(TokenType.COMMA): 2177 return self.expression(exp.Join, this=self._parse_table()) 2178 2179 index = self._index 2180 natural, side, kind = self._parse_join_side_and_kind() 2181 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2182 join = self._match(TokenType.JOIN) 2183 2184 if not skip_join_token and not join: 2185 self._retreat(index) 2186 kind = None 2187 natural = None 2188 side = None 2189 2190 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2191 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2192 2193 if not skip_join_token and not join and not outer_apply and not cross_apply: 2194 return None 2195 2196 if outer_apply: 2197 side = Token(TokenType.LEFT, "LEFT") 2198 2199 kwargs: t.Dict[ 2200 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2201 ] = {"this": self._parse_table()} 2202 2203 if natural: 2204 kwargs["natural"] = True 2205 if side: 2206 kwargs["side"] = side.text 2207 if kind: 2208 kwargs["kind"] = kind.text 2209 if hint: 2210 kwargs["hint"] = hint 2211 2212 if self._match(TokenType.ON): 2213 kwargs["on"] = self._parse_conjunction() 2214 elif self._match(TokenType.USING): 2215 kwargs["using"] = self._parse_wrapped_id_vars() 2216 2217 return self.expression(exp.Join, **kwargs) # type: ignore 2218 2219 def _parse_index(self) -> exp.Expression: 2220 index = self._parse_id_var() 2221 self._match(TokenType.ON) 2222 self._match(TokenType.TABLE) # hive 2223 2224 return self.expression( 2225 exp.Index, 2226 this=index, 2227 table=self.expression(exp.Table, this=self._parse_id_var()), 2228 columns=self._parse_expression(), 2229 ) 2230 2231 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2232 unique = self._match(TokenType.UNIQUE) 2233 primary = self._match_text_seq("PRIMARY") 2234 amp = self._match_text_seq("AMP") 2235 if not self._match(TokenType.INDEX): 2236 return None 2237 index = self._parse_id_var() 2238 columns = None 2239 if self._match(TokenType.L_PAREN, advance=False): 2240 columns = self._parse_wrapped_csv(self._parse_column) 2241 return self.expression( 2242 exp.Index, 2243 this=index, 2244 columns=columns, 2245 unique=unique, 2246 primary=primary, 2247 amp=amp, 2248 ) 2249 2250 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2251 return ( 2252 (not schema and self._parse_function()) 2253 or self._parse_id_var(any_token=False) 2254 or self._parse_string_as_identifier() 2255 or self._parse_placeholder() 2256 ) 2257 2258 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2259 catalog = None 2260 db = None 2261 table = self._parse_table_part(schema=schema) 2262 2263 while self._match(TokenType.DOT): 2264 if catalog: 2265 # This allows nesting the table in arbitrarily many dot expressions if needed 2266 table = self.expression( 2267 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2268 ) 2269 else: 2270 catalog = db 2271 db = table 2272 table = self._parse_table_part(schema=schema) 2273 2274 if not table: 2275 self.raise_error(f"Expected table name but got {self._curr}") 2276 2277 return self.expression( 2278 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2279 ) 2280 2281 def _parse_table( 2282 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2283 ) -> t.Optional[exp.Expression]: 2284 lateral = self._parse_lateral() 2285 if lateral: 2286 return lateral 2287 2288 unnest = self._parse_unnest() 2289 if unnest: 2290 return unnest 2291 2292 values = self._parse_derived_table_values() 2293 if values: 2294 return values 2295 2296 subquery = self._parse_select(table=True) 2297 if subquery: 2298 if not subquery.args.get("pivots"): 2299 subquery.set("pivots", self._parse_pivots()) 2300 return subquery 2301 2302 this = self._parse_table_parts(schema=schema) 2303 2304 if schema: 2305 return self._parse_schema(this=this) 2306 2307 if self.alias_post_tablesample: 2308 table_sample = self._parse_table_sample() 2309 2310 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2311 if alias: 2312 this.set("alias", alias) 2313 2314 if not this.args.get("pivots"): 2315 this.set("pivots", self._parse_pivots()) 2316 2317 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2318 this.set( 2319 "hints", 2320 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2321 ) 2322 self._match_r_paren() 2323 2324 if not self.alias_post_tablesample: 2325 table_sample = self._parse_table_sample() 2326 2327 if table_sample: 2328 table_sample.set("this", this) 2329 this = table_sample 2330 2331 return this 2332 2333 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2334 if not self._match(TokenType.UNNEST): 2335 return None 2336 2337 expressions = self._parse_wrapped_csv(self._parse_type) 2338 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2339 alias = self._parse_table_alias() 2340 2341 if alias and self.unnest_column_only: 2342 if alias.args.get("columns"): 2343 self.raise_error("Unexpected extra column alias in unnest.") 2344 alias.set("columns", [alias.this]) 2345 alias.set("this", None) 2346 2347 offset = None 2348 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2349 self._match(TokenType.ALIAS) 2350 offset = self._parse_id_var() or exp.Identifier(this="offset") 2351 2352 return self.expression( 2353 exp.Unnest, 2354 expressions=expressions, 2355 ordinality=ordinality, 2356 alias=alias, 2357 offset=offset, 2358 ) 2359 2360 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2361 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2362 if not is_derived and not self._match(TokenType.VALUES): 2363 return None 2364 2365 expressions = self._parse_csv(self._parse_value) 2366 2367 if is_derived: 2368 self._match_r_paren() 2369 2370 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2371 2372 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2373 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2374 as_modifier and self._match_text_seq("USING", "SAMPLE") 2375 ): 2376 return None 2377 2378 bucket_numerator = None 2379 bucket_denominator = None 2380 bucket_field = None 2381 percent = None 2382 rows = None 2383 size = None 2384 seed = None 2385 2386 kind = ( 2387 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2388 ) 2389 method = self._parse_var(tokens=(TokenType.ROW,)) 2390 2391 self._match(TokenType.L_PAREN) 2392 2393 num = self._parse_number() 2394 2395 if self._match(TokenType.BUCKET): 2396 bucket_numerator = self._parse_number() 2397 self._match(TokenType.OUT_OF) 2398 bucket_denominator = bucket_denominator = self._parse_number() 2399 self._match(TokenType.ON) 2400 bucket_field = self._parse_field() 2401 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2402 percent = num 2403 elif self._match(TokenType.ROWS): 2404 rows = num 2405 else: 2406 size = num 2407 2408 self._match(TokenType.R_PAREN) 2409 2410 if self._match(TokenType.L_PAREN): 2411 method = self._parse_var() 2412 seed = self._match(TokenType.COMMA) and self._parse_number() 2413 self._match_r_paren() 2414 elif self._match_texts(("SEED", "REPEATABLE")): 2415 seed = self._parse_wrapped(self._parse_number) 2416 2417 return self.expression( 2418 exp.TableSample, 2419 method=method, 2420 bucket_numerator=bucket_numerator, 2421 bucket_denominator=bucket_denominator, 2422 bucket_field=bucket_field, 2423 percent=percent, 2424 rows=rows, 2425 size=size, 2426 seed=seed, 2427 kind=kind, 2428 ) 2429 2430 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2431 return list(iter(self._parse_pivot, None)) 2432 2433 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2434 index = self._index 2435 2436 if self._match(TokenType.PIVOT): 2437 unpivot = False 2438 elif self._match(TokenType.UNPIVOT): 2439 unpivot = True 2440 else: 2441 return None 2442 2443 expressions = [] 2444 field = None 2445 2446 if not self._match(TokenType.L_PAREN): 2447 self._retreat(index) 2448 return None 2449 2450 if unpivot: 2451 expressions = self._parse_csv(self._parse_column) 2452 else: 2453 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2454 2455 if not expressions: 2456 self.raise_error("Failed to parse PIVOT's aggregation list") 2457 2458 if not self._match(TokenType.FOR): 2459 self.raise_error("Expecting FOR") 2460 2461 value = self._parse_column() 2462 2463 if not self._match(TokenType.IN): 2464 self.raise_error("Expecting IN") 2465 2466 field = self._parse_in(value) 2467 2468 self._match_r_paren() 2469 2470 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2471 2472 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2473 pivot.set("alias", self._parse_table_alias()) 2474 2475 if not unpivot: 2476 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2477 2478 columns: t.List[exp.Expression] = [] 2479 for fld in pivot.args["field"].expressions: 2480 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2481 for name in names: 2482 if self.PREFIXED_PIVOT_COLUMNS: 2483 name = f"{name}_{field_name}" if name else field_name 2484 else: 2485 name = f"{field_name}_{name}" if name else field_name 2486 2487 columns.append(exp.to_identifier(name)) 2488 2489 pivot.set("columns", columns) 2490 2491 return pivot 2492 2493 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2494 return [agg.alias for agg in aggregations] 2495 2496 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2497 if not skip_where_token and not self._match(TokenType.WHERE): 2498 return None 2499 2500 return self.expression( 2501 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2502 ) 2503 2504 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2505 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2506 return None 2507 2508 elements = defaultdict(list) 2509 2510 while True: 2511 expressions = self._parse_csv(self._parse_conjunction) 2512 if expressions: 2513 elements["expressions"].extend(expressions) 2514 2515 grouping_sets = self._parse_grouping_sets() 2516 if grouping_sets: 2517 elements["grouping_sets"].extend(grouping_sets) 2518 2519 rollup = None 2520 cube = None 2521 totals = None 2522 2523 with_ = self._match(TokenType.WITH) 2524 if self._match(TokenType.ROLLUP): 2525 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2526 elements["rollup"].extend(ensure_list(rollup)) 2527 2528 if self._match(TokenType.CUBE): 2529 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2530 elements["cube"].extend(ensure_list(cube)) 2531 2532 if self._match_text_seq("TOTALS"): 2533 totals = True 2534 elements["totals"] = True # type: ignore 2535 2536 if not (grouping_sets or rollup or cube or totals): 2537 break 2538 2539 return self.expression(exp.Group, **elements) # type: ignore 2540 2541 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2542 if not self._match(TokenType.GROUPING_SETS): 2543 return None 2544 2545 return self._parse_wrapped_csv(self._parse_grouping_set) 2546 2547 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2548 if self._match(TokenType.L_PAREN): 2549 grouping_set = self._parse_csv(self._parse_column) 2550 self._match_r_paren() 2551 return self.expression(exp.Tuple, expressions=grouping_set) 2552 2553 return self._parse_column() 2554 2555 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2556 if not skip_having_token and not self._match(TokenType.HAVING): 2557 return None 2558 return self.expression(exp.Having, this=self._parse_conjunction()) 2559 2560 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2561 if not self._match(TokenType.QUALIFY): 2562 return None 2563 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2564 2565 def _parse_order( 2566 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2567 ) -> t.Optional[exp.Expression]: 2568 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2569 return this 2570 2571 return self.expression( 2572 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2573 ) 2574 2575 def _parse_sort( 2576 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2577 ) -> t.Optional[exp.Expression]: 2578 if not self._match(token_type): 2579 return None 2580 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2581 2582 def _parse_ordered(self) -> exp.Expression: 2583 this = self._parse_conjunction() 2584 self._match(TokenType.ASC) 2585 is_desc = self._match(TokenType.DESC) 2586 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2587 is_nulls_last = self._match(TokenType.NULLS_LAST) 2588 desc = is_desc or False 2589 asc = not desc 2590 nulls_first = is_nulls_first or False 2591 explicitly_null_ordered = is_nulls_first or is_nulls_last 2592 if ( 2593 not explicitly_null_ordered 2594 and ( 2595 (asc and self.null_ordering == "nulls_are_small") 2596 or (desc and self.null_ordering != "nulls_are_small") 2597 ) 2598 and self.null_ordering != "nulls_are_last" 2599 ): 2600 nulls_first = True 2601 2602 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2603 2604 def _parse_limit( 2605 self, this: t.Optional[exp.Expression] = None, top: bool = False 2606 ) -> t.Optional[exp.Expression]: 2607 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2608 limit_paren = self._match(TokenType.L_PAREN) 2609 limit_exp = self.expression( 2610 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2611 ) 2612 2613 if limit_paren: 2614 self._match_r_paren() 2615 2616 return limit_exp 2617 2618 if self._match(TokenType.FETCH): 2619 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2620 direction = self._prev.text if direction else "FIRST" 2621 2622 count = self._parse_number() 2623 percent = self._match(TokenType.PERCENT) 2624 2625 self._match_set((TokenType.ROW, TokenType.ROWS)) 2626 2627 only = self._match(TokenType.ONLY) 2628 with_ties = self._match_text_seq("WITH", "TIES") 2629 2630 if only and with_ties: 2631 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2632 2633 return self.expression( 2634 exp.Fetch, 2635 direction=direction, 2636 count=count, 2637 percent=percent, 2638 with_ties=with_ties, 2639 ) 2640 2641 return this 2642 2643 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2644 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2645 return this 2646 2647 count = self._parse_number() 2648 self._match_set((TokenType.ROW, TokenType.ROWS)) 2649 return self.expression(exp.Offset, this=this, expression=count) 2650 2651 def _parse_locks(self) -> t.List[exp.Expression]: 2652 # Lists are invariant, so we need to use a type hint here 2653 locks: t.List[exp.Expression] = [] 2654 2655 while True: 2656 if self._match_text_seq("FOR", "UPDATE"): 2657 update = True 2658 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2659 "LOCK", "IN", "SHARE", "MODE" 2660 ): 2661 update = False 2662 else: 2663 break 2664 2665 expressions = None 2666 if self._match_text_seq("OF"): 2667 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2668 2669 wait: t.Optional[bool | exp.Expression] = None 2670 if self._match_text_seq("NOWAIT"): 2671 wait = True 2672 elif self._match_text_seq("WAIT"): 2673 wait = self._parse_primary() 2674 elif self._match_text_seq("SKIP", "LOCKED"): 2675 wait = False 2676 2677 locks.append( 2678 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2679 ) 2680 2681 return locks 2682 2683 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2684 if not self._match_set(self.SET_OPERATIONS): 2685 return this 2686 2687 token_type = self._prev.token_type 2688 2689 if token_type == TokenType.UNION: 2690 expression = exp.Union 2691 elif token_type == TokenType.EXCEPT: 2692 expression = exp.Except 2693 else: 2694 expression = exp.Intersect 2695 2696 return self.expression( 2697 expression, 2698 this=this, 2699 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2700 expression=self._parse_set_operations(self._parse_select(nested=True)), 2701 ) 2702 2703 def _parse_expression(self) -> t.Optional[exp.Expression]: 2704 return self._parse_alias(self._parse_conjunction()) 2705 2706 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2707 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2708 2709 def _parse_equality(self) -> t.Optional[exp.Expression]: 2710 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2711 2712 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2713 return self._parse_tokens(self._parse_range, self.COMPARISON) 2714 2715 def _parse_range(self) -> t.Optional[exp.Expression]: 2716 this = self._parse_bitwise() 2717 negate = self._match(TokenType.NOT) 2718 2719 if self._match_set(self.RANGE_PARSERS): 2720 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2721 if not expression: 2722 return this 2723 2724 this = expression 2725 elif self._match(TokenType.ISNULL): 2726 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2727 2728 # Postgres supports ISNULL and NOTNULL for conditions. 2729 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2730 if self._match(TokenType.NOTNULL): 2731 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2732 this = self.expression(exp.Not, this=this) 2733 2734 if negate: 2735 this = self.expression(exp.Not, this=this) 2736 2737 if self._match(TokenType.IS): 2738 this = self._parse_is(this) 2739 2740 return this 2741 2742 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2743 index = self._index - 1 2744 negate = self._match(TokenType.NOT) 2745 if self._match(TokenType.DISTINCT_FROM): 2746 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2747 return self.expression(klass, this=this, expression=self._parse_expression()) 2748 2749 expression = self._parse_null() or self._parse_boolean() 2750 if not expression: 2751 self._retreat(index) 2752 return None 2753 2754 this = self.expression(exp.Is, this=this, expression=expression) 2755 return self.expression(exp.Not, this=this) if negate else this 2756 2757 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2758 unnest = self._parse_unnest() 2759 if unnest: 2760 this = self.expression(exp.In, this=this, unnest=unnest) 2761 elif self._match(TokenType.L_PAREN): 2762 expressions = self._parse_csv(self._parse_select_or_expression) 2763 2764 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2765 this = self.expression(exp.In, this=this, query=expressions[0]) 2766 else: 2767 this = self.expression(exp.In, this=this, expressions=expressions) 2768 2769 self._match_r_paren(this) 2770 else: 2771 this = self.expression(exp.In, this=this, field=self._parse_field()) 2772 2773 return this 2774 2775 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2776 low = self._parse_bitwise() 2777 self._match(TokenType.AND) 2778 high = self._parse_bitwise() 2779 return self.expression(exp.Between, this=this, low=low, high=high) 2780 2781 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2782 if not self._match(TokenType.ESCAPE): 2783 return this 2784 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2785 2786 def _parse_interval(self) -> t.Optional[exp.Expression]: 2787 if not self._match(TokenType.INTERVAL): 2788 return None 2789 2790 this = self._parse_primary() or self._parse_term() 2791 unit = self._parse_function() or self._parse_var() 2792 2793 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2794 # each INTERVAL expression into this canonical form so it's easy to transpile 2795 if this and isinstance(this, exp.Literal): 2796 if this.is_number: 2797 this = exp.Literal.string(this.name) 2798 2799 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2800 parts = this.name.split() 2801 if not unit and len(parts) <= 2: 2802 this = exp.Literal.string(seq_get(parts, 0)) 2803 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2804 2805 return self.expression(exp.Interval, this=this, unit=unit) 2806 2807 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2808 this = self._parse_term() 2809 2810 while True: 2811 if self._match_set(self.BITWISE): 2812 this = self.expression( 2813 self.BITWISE[self._prev.token_type], 2814 this=this, 2815 expression=self._parse_term(), 2816 ) 2817 elif self._match_pair(TokenType.LT, TokenType.LT): 2818 this = self.expression( 2819 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2820 ) 2821 elif self._match_pair(TokenType.GT, TokenType.GT): 2822 this = self.expression( 2823 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2824 ) 2825 else: 2826 break 2827 2828 return this 2829 2830 def _parse_term(self) -> t.Optional[exp.Expression]: 2831 return self._parse_tokens(self._parse_factor, self.TERM) 2832 2833 def _parse_factor(self) -> t.Optional[exp.Expression]: 2834 return self._parse_tokens(self._parse_unary, self.FACTOR) 2835 2836 def _parse_unary(self) -> t.Optional[exp.Expression]: 2837 if self._match_set(self.UNARY_PARSERS): 2838 return self.UNARY_PARSERS[self._prev.token_type](self) 2839 return self._parse_at_time_zone(self._parse_type()) 2840 2841 def _parse_type(self) -> t.Optional[exp.Expression]: 2842 interval = self._parse_interval() 2843 if interval: 2844 return interval 2845 2846 index = self._index 2847 data_type = self._parse_types(check_func=True) 2848 this = self._parse_column() 2849 2850 if data_type: 2851 if isinstance(this, exp.Literal): 2852 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2853 if parser: 2854 return parser(self, this, data_type) 2855 return self.expression(exp.Cast, this=this, to=data_type) 2856 if not data_type.expressions: 2857 self._retreat(index) 2858 return self._parse_column() 2859 return data_type 2860 2861 return this 2862 2863 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2864 this = self._parse_type() 2865 if not this: 2866 return None 2867 2868 return self.expression( 2869 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2870 ) 2871 2872 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2873 index = self._index 2874 2875 prefix = self._match_text_seq("SYSUDTLIB", ".") 2876 2877 if not self._match_set(self.TYPE_TOKENS): 2878 return None 2879 2880 type_token = self._prev.token_type 2881 2882 if type_token == TokenType.PSEUDO_TYPE: 2883 return self.expression(exp.PseudoType, this=self._prev.text) 2884 2885 nested = type_token in self.NESTED_TYPE_TOKENS 2886 is_struct = type_token == TokenType.STRUCT 2887 expressions = None 2888 maybe_func = False 2889 2890 if self._match(TokenType.L_PAREN): 2891 if is_struct: 2892 expressions = self._parse_csv(self._parse_struct_types) 2893 elif nested: 2894 expressions = self._parse_csv(self._parse_types) 2895 else: 2896 expressions = self._parse_csv(self._parse_type_size) 2897 2898 if not expressions or not self._match(TokenType.R_PAREN): 2899 self._retreat(index) 2900 return None 2901 2902 maybe_func = True 2903 2904 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2905 this = exp.DataType( 2906 this=exp.DataType.Type.ARRAY, 2907 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2908 nested=True, 2909 ) 2910 2911 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2912 this = exp.DataType( 2913 this=exp.DataType.Type.ARRAY, 2914 expressions=[this], 2915 nested=True, 2916 ) 2917 2918 return this 2919 2920 if self._match(TokenType.L_BRACKET): 2921 self._retreat(index) 2922 return None 2923 2924 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2925 if nested and self._match(TokenType.LT): 2926 if is_struct: 2927 expressions = self._parse_csv(self._parse_struct_types) 2928 else: 2929 expressions = self._parse_csv(self._parse_types) 2930 2931 if not self._match(TokenType.GT): 2932 self.raise_error("Expecting >") 2933 2934 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2935 values = self._parse_csv(self._parse_conjunction) 2936 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2937 2938 value: t.Optional[exp.Expression] = None 2939 if type_token in self.TIMESTAMPS: 2940 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2941 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2942 elif ( 2943 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2944 ): 2945 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2946 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2947 if type_token == TokenType.TIME: 2948 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2949 else: 2950 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2951 2952 maybe_func = maybe_func and value is None 2953 2954 if value is None: 2955 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2956 elif type_token == TokenType.INTERVAL: 2957 unit = self._parse_var() 2958 2959 if not unit: 2960 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2961 else: 2962 value = self.expression(exp.Interval, unit=unit) 2963 2964 if maybe_func and check_func: 2965 index2 = self._index 2966 peek = self._parse_string() 2967 2968 if not peek: 2969 self._retreat(index) 2970 return None 2971 2972 self._retreat(index2) 2973 2974 if value: 2975 return value 2976 2977 return exp.DataType( 2978 this=exp.DataType.Type[type_token.value.upper()], 2979 expressions=expressions, 2980 nested=nested, 2981 values=values, 2982 prefix=prefix, 2983 ) 2984 2985 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 2986 this = self._parse_type() or self._parse_id_var() 2987 self._match(TokenType.COLON) 2988 return self._parse_column_def(this) 2989 2990 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2991 if not self._match(TokenType.AT_TIME_ZONE): 2992 return this 2993 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2994 2995 def _parse_column(self) -> t.Optional[exp.Expression]: 2996 this = self._parse_field() 2997 if isinstance(this, exp.Identifier): 2998 this = self.expression(exp.Column, this=this) 2999 elif not this: 3000 return self._parse_bracket(this) 3001 this = self._parse_bracket(this) 3002 3003 while self._match_set(self.COLUMN_OPERATORS): 3004 op_token = self._prev.token_type 3005 op = self.COLUMN_OPERATORS.get(op_token) 3006 3007 if op_token == TokenType.DCOLON: 3008 field = self._parse_types() 3009 if not field: 3010 self.raise_error("Expected type") 3011 elif op and self._curr: 3012 self._advance() 3013 value = self._prev.text 3014 field = ( 3015 exp.Literal.number(value) 3016 if self._prev.token_type == TokenType.NUMBER 3017 else exp.Literal.string(value) 3018 ) 3019 else: 3020 field = ( 3021 self._parse_star() 3022 or self._parse_function(anonymous=True) 3023 or self._parse_id_var() 3024 ) 3025 3026 if isinstance(field, exp.Func): 3027 # bigquery allows function calls like x.y.count(...) 3028 # SAFE.SUBSTR(...) 3029 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3030 this = self._replace_columns_with_dots(this) 3031 3032 if op: 3033 this = op(self, this, field) 3034 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3035 this = self.expression( 3036 exp.Column, 3037 this=field, 3038 table=this.this, 3039 db=this.args.get("table"), 3040 catalog=this.args.get("db"), 3041 ) 3042 else: 3043 this = self.expression(exp.Dot, this=this, expression=field) 3044 this = self._parse_bracket(this) 3045 3046 return this 3047 3048 def _parse_primary(self) -> t.Optional[exp.Expression]: 3049 if self._match_set(self.PRIMARY_PARSERS): 3050 token_type = self._prev.token_type 3051 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3052 3053 if token_type == TokenType.STRING: 3054 expressions = [primary] 3055 while self._match(TokenType.STRING): 3056 expressions.append(exp.Literal.string(self._prev.text)) 3057 if len(expressions) > 1: 3058 return self.expression(exp.Concat, expressions=expressions) 3059 return primary 3060 3061 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3062 return exp.Literal.number(f"0.{self._prev.text}") 3063 3064 if self._match(TokenType.L_PAREN): 3065 comments = self._prev_comments 3066 query = self._parse_select() 3067 3068 if query: 3069 expressions = [query] 3070 else: 3071 expressions = self._parse_csv(self._parse_expression) 3072 3073 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3074 3075 if isinstance(this, exp.Subqueryable): 3076 this = self._parse_set_operations( 3077 self._parse_subquery(this=this, parse_alias=False) 3078 ) 3079 elif len(expressions) > 1: 3080 this = self.expression(exp.Tuple, expressions=expressions) 3081 else: 3082 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3083 3084 if this: 3085 this.add_comments(comments) 3086 self._match_r_paren(expression=this) 3087 3088 return this 3089 3090 return None 3091 3092 def _parse_field( 3093 self, 3094 any_token: bool = False, 3095 tokens: t.Optional[t.Collection[TokenType]] = None, 3096 ) -> t.Optional[exp.Expression]: 3097 return ( 3098 self._parse_primary() 3099 or self._parse_function() 3100 or self._parse_id_var(any_token=any_token, tokens=tokens) 3101 ) 3102 3103 def _parse_function( 3104 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3105 ) -> t.Optional[exp.Expression]: 3106 if not self._curr: 3107 return None 3108 3109 token_type = self._curr.token_type 3110 3111 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3112 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3113 3114 if not self._next or self._next.token_type != TokenType.L_PAREN: 3115 if token_type in self.NO_PAREN_FUNCTIONS: 3116 self._advance() 3117 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3118 3119 return None 3120 3121 if token_type not in self.FUNC_TOKENS: 3122 return None 3123 3124 this = self._curr.text 3125 upper = this.upper() 3126 self._advance(2) 3127 3128 parser = self.FUNCTION_PARSERS.get(upper) 3129 3130 if parser and not anonymous: 3131 this = parser(self) 3132 else: 3133 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3134 3135 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3136 this = self.expression(subquery_predicate, this=self._parse_select()) 3137 self._match_r_paren() 3138 return this 3139 3140 if functions is None: 3141 functions = self.FUNCTIONS 3142 3143 function = functions.get(upper) 3144 args = self._parse_csv(self._parse_lambda) 3145 3146 if function and not anonymous: 3147 this = function(args) 3148 self.validate_expression(this, args) 3149 else: 3150 this = self.expression(exp.Anonymous, this=this, expressions=args) 3151 3152 self._match_r_paren(this) 3153 return self._parse_window(this) 3154 3155 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3156 return self._parse_column_def(self._parse_id_var()) 3157 3158 def _parse_user_defined_function( 3159 self, kind: t.Optional[TokenType] = None 3160 ) -> t.Optional[exp.Expression]: 3161 this = self._parse_id_var() 3162 3163 while self._match(TokenType.DOT): 3164 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3165 3166 if not self._match(TokenType.L_PAREN): 3167 return this 3168 3169 expressions = self._parse_csv(self._parse_function_parameter) 3170 self._match_r_paren() 3171 return self.expression( 3172 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3173 ) 3174 3175 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3176 literal = self._parse_primary() 3177 if literal: 3178 return self.expression(exp.Introducer, this=token.text, expression=literal) 3179 3180 return self.expression(exp.Identifier, this=token.text) 3181 3182 def _parse_national(self, token: Token) -> exp.Expression: 3183 return self.expression(exp.National, this=exp.Literal.string(token.text)) 3184 3185 def _parse_session_parameter(self) -> exp.Expression: 3186 kind = None 3187 this = self._parse_id_var() or self._parse_primary() 3188 3189 if this and self._match(TokenType.DOT): 3190 kind = this.name 3191 this = self._parse_var() or self._parse_primary() 3192 3193 return self.expression(exp.SessionParameter, this=this, kind=kind) 3194 3195 def _parse_lambda(self) -> t.Optional[exp.Expression]: 3196 index = self._index 3197 3198 if self._match(TokenType.L_PAREN): 3199 expressions = self._parse_csv(self._parse_id_var) 3200 3201 if not self._match(TokenType.R_PAREN): 3202 self._retreat(index) 3203 else: 3204 expressions = [self._parse_id_var()] 3205 3206 if self._match_set(self.LAMBDAS): 3207 return self.LAMBDAS[self._prev.token_type](self, expressions) 3208 3209 self._retreat(index) 3210 3211 this: t.Optional[exp.Expression] 3212 3213 if self._match(TokenType.DISTINCT): 3214 this = self.expression( 3215 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3216 ) 3217 else: 3218 this = self._parse_select_or_expression() 3219 3220 if isinstance(this, exp.EQ): 3221 left = this.this 3222 if isinstance(left, exp.Column): 3223 left.replace(exp.Var(this=left.text("this"))) 3224 3225 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3226 3227 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3228 index = self._index 3229 3230 try: 3231 if self._parse_select(nested=True): 3232 return this 3233 except Exception: 3234 pass 3235 finally: 3236 self._retreat(index) 3237 3238 if not self._match(TokenType.L_PAREN): 3239 return this 3240 3241 args = self._parse_csv( 3242 lambda: self._parse_constraint() 3243 or self._parse_column_def(self._parse_field(any_token=True)) 3244 ) 3245 self._match_r_paren() 3246 return self.expression(exp.Schema, this=this, expressions=args) 3247 3248 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3249 # column defs are not really columns, they're identifiers 3250 if isinstance(this, exp.Column): 3251 this = this.this 3252 kind = self._parse_types() 3253 3254 if self._match_text_seq("FOR", "ORDINALITY"): 3255 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3256 3257 constraints = [] 3258 while True: 3259 constraint = self._parse_column_constraint() 3260 if not constraint: 3261 break 3262 constraints.append(constraint) 3263 3264 if not kind and not constraints: 3265 return this 3266 3267 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3268 3269 def _parse_auto_increment(self) -> exp.Expression: 3270 start = None 3271 increment = None 3272 3273 if self._match(TokenType.L_PAREN, advance=False): 3274 args = self._parse_wrapped_csv(self._parse_bitwise) 3275 start = seq_get(args, 0) 3276 increment = seq_get(args, 1) 3277 elif self._match_text_seq("START"): 3278 start = self._parse_bitwise() 3279 self._match_text_seq("INCREMENT") 3280 increment = self._parse_bitwise() 3281 3282 if start and increment: 3283 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3284 3285 return exp.AutoIncrementColumnConstraint() 3286 3287 def _parse_compress(self) -> exp.Expression: 3288 if self._match(TokenType.L_PAREN, advance=False): 3289 return self.expression( 3290 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3291 ) 3292 3293 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3294 3295 def _parse_generated_as_identity(self) -> exp.Expression: 3296 if self._match(TokenType.BY_DEFAULT): 3297 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3298 this = self.expression( 3299 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3300 ) 3301 else: 3302 self._match_text_seq("ALWAYS") 3303 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3304 3305 self._match_text_seq("AS", "IDENTITY") 3306 if self._match(TokenType.L_PAREN): 3307 if self._match_text_seq("START", "WITH"): 3308 this.set("start", self._parse_bitwise()) 3309 if self._match_text_seq("INCREMENT", "BY"): 3310 this.set("increment", self._parse_bitwise()) 3311 if self._match_text_seq("MINVALUE"): 3312 this.set("minvalue", self._parse_bitwise()) 3313 if self._match_text_seq("MAXVALUE"): 3314 this.set("maxvalue", self._parse_bitwise()) 3315 3316 if self._match_text_seq("CYCLE"): 3317 this.set("cycle", True) 3318 elif self._match_text_seq("NO", "CYCLE"): 3319 this.set("cycle", False) 3320 3321 self._match_r_paren() 3322 3323 return this 3324 3325 def _parse_inline(self) -> t.Optional[exp.Expression]: 3326 self._match_text_seq("LENGTH") 3327 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3328 3329 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3330 if self._match_text_seq("NULL"): 3331 return self.expression(exp.NotNullColumnConstraint) 3332 if self._match_text_seq("CASESPECIFIC"): 3333 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3334 return None 3335 3336 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3337 if self._match(TokenType.CONSTRAINT): 3338 this = self._parse_id_var() 3339 else: 3340 this = None 3341 3342 if self._match_texts(self.CONSTRAINT_PARSERS): 3343 return self.expression( 3344 exp.ColumnConstraint, 3345 this=this, 3346 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3347 ) 3348 3349 return this 3350 3351 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3352 if not self._match(TokenType.CONSTRAINT): 3353 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3354 3355 this = self._parse_id_var() 3356 expressions = [] 3357 3358 while True: 3359 constraint = self._parse_unnamed_constraint() or self._parse_function() 3360 if not constraint: 3361 break 3362 expressions.append(constraint) 3363 3364 return self.expression(exp.Constraint, this=this, expressions=expressions) 3365 3366 def _parse_unnamed_constraint( 3367 self, constraints: t.Optional[t.Collection[str]] = None 3368 ) -> t.Optional[exp.Expression]: 3369 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3370 return None 3371 3372 constraint = self._prev.text.upper() 3373 if constraint not in self.CONSTRAINT_PARSERS: 3374 self.raise_error(f"No parser found for schema constraint {constraint}.") 3375 3376 return self.CONSTRAINT_PARSERS[constraint](self) 3377 3378 def _parse_unique(self) -> exp.Expression: 3379 if not self._match(TokenType.L_PAREN, advance=False): 3380 return self.expression(exp.UniqueColumnConstraint) 3381 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3382 3383 def _parse_key_constraint_options(self) -> t.List[str]: 3384 options = [] 3385 while True: 3386 if not self._curr: 3387 break 3388 3389 if self._match(TokenType.ON): 3390 action = None 3391 on = self._advance_any() and self._prev.text 3392 3393 if self._match(TokenType.NO_ACTION): 3394 action = "NO ACTION" 3395 elif self._match(TokenType.CASCADE): 3396 action = "CASCADE" 3397 elif self._match_pair(TokenType.SET, TokenType.NULL): 3398 action = "SET NULL" 3399 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3400 action = "SET DEFAULT" 3401 else: 3402 self.raise_error("Invalid key constraint") 3403 3404 options.append(f"ON {on} {action}") 3405 elif self._match_text_seq("NOT", "ENFORCED"): 3406 options.append("NOT ENFORCED") 3407 elif self._match_text_seq("DEFERRABLE"): 3408 options.append("DEFERRABLE") 3409 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3410 options.append("INITIALLY DEFERRED") 3411 elif self._match_text_seq("NORELY"): 3412 options.append("NORELY") 3413 elif self._match_text_seq("MATCH", "FULL"): 3414 options.append("MATCH FULL") 3415 else: 3416 break 3417 3418 return options 3419 3420 def _parse_references(self, match=True) -> t.Optional[exp.Expression]: 3421 if match and not self._match(TokenType.REFERENCES): 3422 return None 3423 3424 expressions = None 3425 this = self._parse_id_var() 3426 3427 if self._match(TokenType.L_PAREN, advance=False): 3428 expressions = self._parse_wrapped_id_vars() 3429 3430 options = self._parse_key_constraint_options() 3431 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3432 3433 def _parse_foreign_key(self) -> exp.Expression: 3434 expressions = self._parse_wrapped_id_vars() 3435 reference = self._parse_references() 3436 options = {} 3437 3438 while self._match(TokenType.ON): 3439 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3440 self.raise_error("Expected DELETE or UPDATE") 3441 3442 kind = self._prev.text.lower() 3443 3444 if self._match(TokenType.NO_ACTION): 3445 action = "NO ACTION" 3446 elif self._match(TokenType.SET): 3447 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3448 action = "SET " + self._prev.text.upper() 3449 else: 3450 self._advance() 3451 action = self._prev.text.upper() 3452 3453 options[kind] = action 3454 3455 return self.expression( 3456 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3457 ) 3458 3459 def _parse_primary_key(self) -> exp.Expression: 3460 desc = ( 3461 self._match_set((TokenType.ASC, TokenType.DESC)) 3462 and self._prev.token_type == TokenType.DESC 3463 ) 3464 3465 if not self._match(TokenType.L_PAREN, advance=False): 3466 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3467 3468 expressions = self._parse_wrapped_csv(self._parse_field) 3469 options = self._parse_key_constraint_options() 3470 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3471 3472 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3473 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3474 return this 3475 3476 bracket_kind = self._prev.token_type 3477 expressions: t.List[t.Optional[exp.Expression]] 3478 3479 if self._match(TokenType.COLON): 3480 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3481 else: 3482 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3483 3484 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3485 if bracket_kind == TokenType.L_BRACE: 3486 this = self.expression(exp.Struct, expressions=expressions) 3487 elif not this or this.name.upper() == "ARRAY": 3488 this = self.expression(exp.Array, expressions=expressions) 3489 else: 3490 expressions = apply_index_offset(this, expressions, -self.index_offset) 3491 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3492 3493 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3494 self.raise_error("Expected ]") 3495 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3496 self.raise_error("Expected }") 3497 3498 self._add_comments(this) 3499 return self._parse_bracket(this) 3500 3501 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3502 if self._match(TokenType.COLON): 3503 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3504 return this 3505 3506 def _parse_case(self) -> t.Optional[exp.Expression]: 3507 ifs = [] 3508 default = None 3509 3510 expression = self._parse_conjunction() 3511 3512 while self._match(TokenType.WHEN): 3513 this = self._parse_conjunction() 3514 self._match(TokenType.THEN) 3515 then = self._parse_conjunction() 3516 ifs.append(self.expression(exp.If, this=this, true=then)) 3517 3518 if self._match(TokenType.ELSE): 3519 default = self._parse_conjunction() 3520 3521 if not self._match(TokenType.END): 3522 self.raise_error("Expected END after CASE", self._prev) 3523 3524 return self._parse_window( 3525 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3526 ) 3527 3528 def _parse_if(self) -> t.Optional[exp.Expression]: 3529 if self._match(TokenType.L_PAREN): 3530 args = self._parse_csv(self._parse_conjunction) 3531 this = exp.If.from_arg_list(args) 3532 self.validate_expression(this, args) 3533 self._match_r_paren() 3534 else: 3535 index = self._index - 1 3536 condition = self._parse_conjunction() 3537 3538 if not condition: 3539 self._retreat(index) 3540 return None 3541 3542 self._match(TokenType.THEN) 3543 true = self._parse_conjunction() 3544 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3545 self._match(TokenType.END) 3546 this = self.expression(exp.If, this=condition, true=true, false=false) 3547 3548 return self._parse_window(this) 3549 3550 def _parse_extract(self) -> exp.Expression: 3551 this = self._parse_function() or self._parse_var() or self._parse_type() 3552 3553 if self._match(TokenType.FROM): 3554 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3555 3556 if not self._match(TokenType.COMMA): 3557 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3558 3559 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3560 3561 def _parse_cast(self, strict: bool) -> exp.Expression: 3562 this = self._parse_conjunction() 3563 3564 if not self._match(TokenType.ALIAS): 3565 if self._match(TokenType.COMMA): 3566 return self.expression( 3567 exp.CastToStrType, this=this, expression=self._parse_string() 3568 ) 3569 else: 3570 self.raise_error("Expected AS after CAST") 3571 3572 to = self._parse_types() 3573 3574 if not to: 3575 self.raise_error("Expected TYPE after CAST") 3576 elif to.this == exp.DataType.Type.CHAR: 3577 if self._match(TokenType.CHARACTER_SET): 3578 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3579 3580 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3581 3582 def _parse_string_agg(self) -> exp.Expression: 3583 expression: t.Optional[exp.Expression] 3584 3585 if self._match(TokenType.DISTINCT): 3586 args = self._parse_csv(self._parse_conjunction) 3587 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3588 else: 3589 args = self._parse_csv(self._parse_conjunction) 3590 expression = seq_get(args, 0) 3591 3592 index = self._index 3593 if not self._match(TokenType.R_PAREN): 3594 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3595 order = self._parse_order(this=expression) 3596 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3597 3598 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3599 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3600 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3601 if not self._match(TokenType.WITHIN_GROUP): 3602 self._retreat(index) 3603 this = exp.GroupConcat.from_arg_list(args) 3604 self.validate_expression(this, args) 3605 return this 3606 3607 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3608 order = self._parse_order(this=expression) 3609 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3610 3611 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3612 to: t.Optional[exp.Expression] 3613 this = self._parse_bitwise() 3614 3615 if self._match(TokenType.USING): 3616 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3617 elif self._match(TokenType.COMMA): 3618 to = self._parse_bitwise() 3619 else: 3620 to = None 3621 3622 # Swap the argument order if needed to produce the correct AST 3623 if self.CONVERT_TYPE_FIRST: 3624 this, to = to, this 3625 3626 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3627 3628 def _parse_decode(self) -> t.Optional[exp.Expression]: 3629 """ 3630 There are generally two variants of the DECODE function: 3631 3632 - DECODE(bin, charset) 3633 - DECODE(expression, search, result [, search, result] ... [, default]) 3634 3635 The second variant will always be parsed into a CASE expression. Note that NULL 3636 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3637 instead of relying on pattern matching. 3638 """ 3639 args = self._parse_csv(self._parse_conjunction) 3640 3641 if len(args) < 3: 3642 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3643 3644 expression, *expressions = args 3645 if not expression: 3646 return None 3647 3648 ifs = [] 3649 for search, result in zip(expressions[::2], expressions[1::2]): 3650 if not search or not result: 3651 return None 3652 3653 if isinstance(search, exp.Literal): 3654 ifs.append( 3655 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3656 ) 3657 elif isinstance(search, exp.Null): 3658 ifs.append( 3659 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3660 ) 3661 else: 3662 cond = exp.or_( 3663 exp.EQ(this=expression.copy(), expression=search), 3664 exp.and_( 3665 exp.Is(this=expression.copy(), expression=exp.Null()), 3666 exp.Is(this=search.copy(), expression=exp.Null()), 3667 copy=False, 3668 ), 3669 copy=False, 3670 ) 3671 ifs.append(exp.If(this=cond, true=result)) 3672 3673 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3674 3675 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3676 self._match_text_seq("KEY") 3677 key = self._parse_field() 3678 self._match(TokenType.COLON) 3679 self._match_text_seq("VALUE") 3680 value = self._parse_field() 3681 if not key and not value: 3682 return None 3683 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3684 3685 def _parse_json_object(self) -> exp.Expression: 3686 expressions = self._parse_csv(self._parse_json_key_value) 3687 3688 null_handling = None 3689 if self._match_text_seq("NULL", "ON", "NULL"): 3690 null_handling = "NULL ON NULL" 3691 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3692 null_handling = "ABSENT ON NULL" 3693 3694 unique_keys = None 3695 if self._match_text_seq("WITH", "UNIQUE"): 3696 unique_keys = True 3697 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3698 unique_keys = False 3699 3700 self._match_text_seq("KEYS") 3701 3702 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3703 format_json = self._match_text_seq("FORMAT", "JSON") 3704 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3705 3706 return self.expression( 3707 exp.JSONObject, 3708 expressions=expressions, 3709 null_handling=null_handling, 3710 unique_keys=unique_keys, 3711 return_type=return_type, 3712 format_json=format_json, 3713 encoding=encoding, 3714 ) 3715 3716 def _parse_logarithm(self) -> exp.Expression: 3717 # Default argument order is base, expression 3718 args = self._parse_csv(self._parse_range) 3719 3720 if len(args) > 1: 3721 if not self.LOG_BASE_FIRST: 3722 args.reverse() 3723 return exp.Log.from_arg_list(args) 3724 3725 return self.expression( 3726 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3727 ) 3728 3729 def _parse_match_against(self) -> exp.Expression: 3730 expressions = self._parse_csv(self._parse_column) 3731 3732 self._match_text_seq(")", "AGAINST", "(") 3733 3734 this = self._parse_string() 3735 3736 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3737 modifier = "IN NATURAL LANGUAGE MODE" 3738 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3739 modifier = f"{modifier} WITH QUERY EXPANSION" 3740 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3741 modifier = "IN BOOLEAN MODE" 3742 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3743 modifier = "WITH QUERY EXPANSION" 3744 else: 3745 modifier = None 3746 3747 return self.expression( 3748 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3749 ) 3750 3751 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3752 def _parse_open_json(self) -> exp.Expression: 3753 this = self._parse_bitwise() 3754 path = self._match(TokenType.COMMA) and self._parse_string() 3755 3756 def _parse_open_json_column_def() -> exp.Expression: 3757 this = self._parse_field(any_token=True) 3758 kind = self._parse_types() 3759 path = self._parse_string() 3760 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3761 return self.expression( 3762 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3763 ) 3764 3765 expressions = None 3766 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3767 self._match_l_paren() 3768 expressions = self._parse_csv(_parse_open_json_column_def) 3769 3770 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3771 3772 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3773 args = self._parse_csv(self._parse_bitwise) 3774 3775 if self._match(TokenType.IN): 3776 return self.expression( 3777 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3778 ) 3779 3780 if haystack_first: 3781 haystack = seq_get(args, 0) 3782 needle = seq_get(args, 1) 3783 else: 3784 needle = seq_get(args, 0) 3785 haystack = seq_get(args, 1) 3786 3787 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3788 3789 self.validate_expression(this, args) 3790 3791 return this 3792 3793 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3794 args = self._parse_csv(self._parse_table) 3795 return exp.JoinHint(this=func_name.upper(), expressions=args) 3796 3797 def _parse_substring(self) -> exp.Expression: 3798 # Postgres supports the form: substring(string [from int] [for int]) 3799 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3800 3801 args = self._parse_csv(self._parse_bitwise) 3802 3803 if self._match(TokenType.FROM): 3804 args.append(self._parse_bitwise()) 3805 if self._match(TokenType.FOR): 3806 args.append(self._parse_bitwise()) 3807 3808 this = exp.Substring.from_arg_list(args) 3809 self.validate_expression(this, args) 3810 3811 return this 3812 3813 def _parse_trim(self) -> exp.Expression: 3814 # https://www.w3resource.com/sql/character-functions/trim.php 3815 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3816 3817 position = None 3818 collation = None 3819 3820 if self._match_set(self.TRIM_TYPES): 3821 position = self._prev.text.upper() 3822 3823 expression = self._parse_bitwise() 3824 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3825 this = self._parse_bitwise() 3826 else: 3827 this = expression 3828 expression = None 3829 3830 if self._match(TokenType.COLLATE): 3831 collation = self._parse_bitwise() 3832 3833 return self.expression( 3834 exp.Trim, 3835 this=this, 3836 position=position, 3837 expression=expression, 3838 collation=collation, 3839 ) 3840 3841 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3842 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3843 3844 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3845 return self._parse_window(self._parse_id_var(), alias=True) 3846 3847 def _parse_respect_or_ignore_nulls( 3848 self, this: t.Optional[exp.Expression] 3849 ) -> t.Optional[exp.Expression]: 3850 if self._match(TokenType.IGNORE_NULLS): 3851 return self.expression(exp.IgnoreNulls, this=this) 3852 if self._match(TokenType.RESPECT_NULLS): 3853 return self.expression(exp.RespectNulls, this=this) 3854 return this 3855 3856 def _parse_window( 3857 self, this: t.Optional[exp.Expression], alias: bool = False 3858 ) -> t.Optional[exp.Expression]: 3859 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3860 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3861 self._match_r_paren() 3862 3863 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3864 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3865 if self._match(TokenType.WITHIN_GROUP): 3866 order = self._parse_wrapped(self._parse_order) 3867 this = self.expression(exp.WithinGroup, this=this, expression=order) 3868 3869 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3870 # Some dialects choose to implement and some do not. 3871 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3872 3873 # There is some code above in _parse_lambda that handles 3874 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3875 3876 # The below changes handle 3877 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3878 3879 # Oracle allows both formats 3880 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3881 # and Snowflake chose to do the same for familiarity 3882 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3883 this = self._parse_respect_or_ignore_nulls(this) 3884 3885 # bigquery select from window x AS (partition by ...) 3886 if alias: 3887 over = None 3888 self._match(TokenType.ALIAS) 3889 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3890 return this 3891 else: 3892 over = self._prev.text.upper() 3893 3894 if not self._match(TokenType.L_PAREN): 3895 return self.expression( 3896 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3897 ) 3898 3899 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3900 3901 first = self._match(TokenType.FIRST) 3902 if self._match_text_seq("LAST"): 3903 first = False 3904 3905 partition = self._parse_partition_by() 3906 order = self._parse_order() 3907 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3908 3909 if kind: 3910 self._match(TokenType.BETWEEN) 3911 start = self._parse_window_spec() 3912 self._match(TokenType.AND) 3913 end = self._parse_window_spec() 3914 3915 spec = self.expression( 3916 exp.WindowSpec, 3917 kind=kind, 3918 start=start["value"], 3919 start_side=start["side"], 3920 end=end["value"], 3921 end_side=end["side"], 3922 ) 3923 else: 3924 spec = None 3925 3926 self._match_r_paren() 3927 3928 return self.expression( 3929 exp.Window, 3930 this=this, 3931 partition_by=partition, 3932 order=order, 3933 spec=spec, 3934 alias=window_alias, 3935 over=over, 3936 first=first, 3937 ) 3938 3939 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3940 self._match(TokenType.BETWEEN) 3941 3942 return { 3943 "value": ( 3944 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3945 ) 3946 or self._parse_bitwise(), 3947 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3948 } 3949 3950 def _parse_alias( 3951 self, this: t.Optional[exp.Expression], explicit: bool = False 3952 ) -> t.Optional[exp.Expression]: 3953 any_token = self._match(TokenType.ALIAS) 3954 3955 if explicit and not any_token: 3956 return this 3957 3958 if self._match(TokenType.L_PAREN): 3959 aliases = self.expression( 3960 exp.Aliases, 3961 this=this, 3962 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3963 ) 3964 self._match_r_paren(aliases) 3965 return aliases 3966 3967 alias = self._parse_id_var(any_token) 3968 3969 if alias: 3970 return self.expression(exp.Alias, this=this, alias=alias) 3971 3972 return this 3973 3974 def _parse_id_var( 3975 self, 3976 any_token: bool = True, 3977 tokens: t.Optional[t.Collection[TokenType]] = None, 3978 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3979 ) -> t.Optional[exp.Expression]: 3980 identifier = self._parse_identifier() 3981 3982 if identifier: 3983 return identifier 3984 3985 prefix = "" 3986 3987 if prefix_tokens: 3988 while self._match_set(prefix_tokens): 3989 prefix += self._prev.text 3990 3991 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3992 quoted = self._prev.token_type == TokenType.STRING 3993 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3994 3995 return None 3996 3997 def _parse_string(self) -> t.Optional[exp.Expression]: 3998 if self._match(TokenType.STRING): 3999 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4000 return self._parse_placeholder() 4001 4002 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 4003 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4004 4005 def _parse_number(self) -> t.Optional[exp.Expression]: 4006 if self._match(TokenType.NUMBER): 4007 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4008 return self._parse_placeholder() 4009 4010 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4011 if self._match(TokenType.IDENTIFIER): 4012 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4013 return self._parse_placeholder() 4014 4015 def _parse_var( 4016 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4017 ) -> t.Optional[exp.Expression]: 4018 if ( 4019 (any_token and self._advance_any()) 4020 or self._match(TokenType.VAR) 4021 or (self._match_set(tokens) if tokens else False) 4022 ): 4023 return self.expression(exp.Var, this=self._prev.text) 4024 return self._parse_placeholder() 4025 4026 def _advance_any(self) -> t.Optional[Token]: 4027 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4028 self._advance() 4029 return self._prev 4030 return None 4031 4032 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4033 return self._parse_var() or self._parse_string() 4034 4035 def _parse_null(self) -> t.Optional[exp.Expression]: 4036 if self._match(TokenType.NULL): 4037 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4038 return None 4039 4040 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4041 if self._match(TokenType.TRUE): 4042 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4043 if self._match(TokenType.FALSE): 4044 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4045 return None 4046 4047 def _parse_star(self) -> t.Optional[exp.Expression]: 4048 if self._match(TokenType.STAR): 4049 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4050 return None 4051 4052 def _parse_parameter(self) -> exp.Expression: 4053 wrapped = self._match(TokenType.L_BRACE) 4054 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4055 self._match(TokenType.R_BRACE) 4056 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4057 4058 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4059 if self._match_set(self.PLACEHOLDER_PARSERS): 4060 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4061 if placeholder: 4062 return placeholder 4063 self._advance(-1) 4064 return None 4065 4066 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4067 if not self._match(TokenType.EXCEPT): 4068 return None 4069 if self._match(TokenType.L_PAREN, advance=False): 4070 return self._parse_wrapped_csv(self._parse_column) 4071 return self._parse_csv(self._parse_column) 4072 4073 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4074 if not self._match(TokenType.REPLACE): 4075 return None 4076 if self._match(TokenType.L_PAREN, advance=False): 4077 return self._parse_wrapped_csv(self._parse_expression) 4078 return self._parse_csv(self._parse_expression) 4079 4080 def _parse_csv( 4081 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4082 ) -> t.List[t.Optional[exp.Expression]]: 4083 parse_result = parse_method() 4084 items = [parse_result] if parse_result is not None else [] 4085 4086 while self._match(sep): 4087 self._add_comments(parse_result) 4088 parse_result = parse_method() 4089 if parse_result is not None: 4090 items.append(parse_result) 4091 4092 return items 4093 4094 def _parse_tokens( 4095 self, parse_method: t.Callable, expressions: t.Dict 4096 ) -> t.Optional[exp.Expression]: 4097 this = parse_method() 4098 4099 while self._match_set(expressions): 4100 this = self.expression( 4101 expressions[self._prev.token_type], 4102 this=this, 4103 comments=self._prev_comments, 4104 expression=parse_method(), 4105 ) 4106 4107 return this 4108 4109 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4110 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4111 4112 def _parse_wrapped_csv( 4113 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4114 ) -> t.List[t.Optional[exp.Expression]]: 4115 return self._parse_wrapped( 4116 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4117 ) 4118 4119 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4120 wrapped = self._match(TokenType.L_PAREN) 4121 if not wrapped and not optional: 4122 self.raise_error("Expecting (") 4123 parse_result = parse_method() 4124 if wrapped: 4125 self._match_r_paren() 4126 return parse_result 4127 4128 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 4129 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 4130 4131 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4132 return self._parse_set_operations( 4133 self._parse_select(nested=True, parse_subquery_alias=False) 4134 ) 4135 4136 def _parse_transaction(self) -> exp.Expression: 4137 this = None 4138 if self._match_texts(self.TRANSACTION_KIND): 4139 this = self._prev.text 4140 4141 self._match_texts({"TRANSACTION", "WORK"}) 4142 4143 modes = [] 4144 while True: 4145 mode = [] 4146 while self._match(TokenType.VAR): 4147 mode.append(self._prev.text) 4148 4149 if mode: 4150 modes.append(" ".join(mode)) 4151 if not self._match(TokenType.COMMA): 4152 break 4153 4154 return self.expression(exp.Transaction, this=this, modes=modes) 4155 4156 def _parse_commit_or_rollback(self) -> exp.Expression: 4157 chain = None 4158 savepoint = None 4159 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4160 4161 self._match_texts({"TRANSACTION", "WORK"}) 4162 4163 if self._match_text_seq("TO"): 4164 self._match_text_seq("SAVEPOINT") 4165 savepoint = self._parse_id_var() 4166 4167 if self._match(TokenType.AND): 4168 chain = not self._match_text_seq("NO") 4169 self._match_text_seq("CHAIN") 4170 4171 if is_rollback: 4172 return self.expression(exp.Rollback, savepoint=savepoint) 4173 return self.expression(exp.Commit, chain=chain) 4174 4175 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4176 if not self._match_text_seq("ADD"): 4177 return None 4178 4179 self._match(TokenType.COLUMN) 4180 exists_column = self._parse_exists(not_=True) 4181 expression = self._parse_column_def(self._parse_field(any_token=True)) 4182 4183 if expression: 4184 expression.set("exists", exists_column) 4185 4186 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4187 if self._match_texts(("FIRST", "AFTER")): 4188 position = self._prev.text 4189 column_position = self.expression( 4190 exp.ColumnPosition, this=self._parse_column(), position=position 4191 ) 4192 expression.set("position", column_position) 4193 4194 return expression 4195 4196 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4197 drop = self._match(TokenType.DROP) and self._parse_drop() 4198 if drop and not isinstance(drop, exp.Command): 4199 drop.set("kind", drop.args.get("kind", "COLUMN")) 4200 return drop 4201 4202 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4203 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4204 return self.expression( 4205 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4206 ) 4207 4208 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4209 this = None 4210 kind = self._prev.token_type 4211 4212 if kind == TokenType.CONSTRAINT: 4213 this = self._parse_id_var() 4214 4215 if self._match_text_seq("CHECK"): 4216 expression = self._parse_wrapped(self._parse_conjunction) 4217 enforced = self._match_text_seq("ENFORCED") 4218 4219 return self.expression( 4220 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4221 ) 4222 4223 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4224 expression = self._parse_foreign_key() 4225 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4226 expression = self._parse_primary_key() 4227 else: 4228 expression = None 4229 4230 return self.expression(exp.AddConstraint, this=this, expression=expression) 4231 4232 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4233 index = self._index - 1 4234 4235 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4236 return self._parse_csv(self._parse_add_constraint) 4237 4238 self._retreat(index) 4239 return self._parse_csv(self._parse_add_column) 4240 4241 def _parse_alter_table_alter(self) -> exp.Expression: 4242 self._match(TokenType.COLUMN) 4243 column = self._parse_field(any_token=True) 4244 4245 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4246 return self.expression(exp.AlterColumn, this=column, drop=True) 4247 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4248 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4249 4250 self._match_text_seq("SET", "DATA") 4251 return self.expression( 4252 exp.AlterColumn, 4253 this=column, 4254 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4255 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4256 using=self._match(TokenType.USING) and self._parse_conjunction(), 4257 ) 4258 4259 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4260 index = self._index - 1 4261 4262 partition_exists = self._parse_exists() 4263 if self._match(TokenType.PARTITION, advance=False): 4264 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4265 4266 self._retreat(index) 4267 return self._parse_csv(self._parse_drop_column) 4268 4269 def _parse_alter_table_rename(self) -> exp.Expression: 4270 self._match_text_seq("TO") 4271 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4272 4273 def _parse_alter(self) -> t.Optional[exp.Expression]: 4274 start = self._prev 4275 4276 if not self._match(TokenType.TABLE): 4277 return self._parse_as_command(start) 4278 4279 exists = self._parse_exists() 4280 this = self._parse_table(schema=True) 4281 4282 if self._next: 4283 self._advance() 4284 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4285 4286 if parser: 4287 actions = ensure_list(parser(self)) 4288 4289 if not self._curr: 4290 return self.expression( 4291 exp.AlterTable, 4292 this=this, 4293 exists=exists, 4294 actions=actions, 4295 ) 4296 return self._parse_as_command(start) 4297 4298 def _parse_merge(self) -> exp.Expression: 4299 self._match(TokenType.INTO) 4300 target = self._parse_table() 4301 4302 self._match(TokenType.USING) 4303 using = self._parse_table() 4304 4305 self._match(TokenType.ON) 4306 on = self._parse_conjunction() 4307 4308 whens = [] 4309 while self._match(TokenType.WHEN): 4310 matched = not self._match(TokenType.NOT) 4311 self._match_text_seq("MATCHED") 4312 source = ( 4313 False 4314 if self._match_text_seq("BY", "TARGET") 4315 else self._match_text_seq("BY", "SOURCE") 4316 ) 4317 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4318 4319 self._match(TokenType.THEN) 4320 4321 if self._match(TokenType.INSERT): 4322 _this = self._parse_star() 4323 if _this: 4324 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4325 else: 4326 then = self.expression( 4327 exp.Insert, 4328 this=self._parse_value(), 4329 expression=self._match(TokenType.VALUES) and self._parse_value(), 4330 ) 4331 elif self._match(TokenType.UPDATE): 4332 expressions = self._parse_star() 4333 if expressions: 4334 then = self.expression(exp.Update, expressions=expressions) 4335 else: 4336 then = self.expression( 4337 exp.Update, 4338 expressions=self._match(TokenType.SET) 4339 and self._parse_csv(self._parse_equality), 4340 ) 4341 elif self._match(TokenType.DELETE): 4342 then = self.expression(exp.Var, this=self._prev.text) 4343 else: 4344 then = None 4345 4346 whens.append( 4347 self.expression( 4348 exp.When, 4349 matched=matched, 4350 source=source, 4351 condition=condition, 4352 then=then, 4353 ) 4354 ) 4355 4356 return self.expression( 4357 exp.Merge, 4358 this=target, 4359 using=using, 4360 on=on, 4361 expressions=whens, 4362 ) 4363 4364 def _parse_show(self) -> t.Optional[exp.Expression]: 4365 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4366 if parser: 4367 return parser(self) 4368 self._advance() 4369 return self.expression(exp.Show, this=self._prev.text.upper()) 4370 4371 def _parse_set_item_assignment( 4372 self, kind: t.Optional[str] = None 4373 ) -> t.Optional[exp.Expression]: 4374 index = self._index 4375 4376 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4377 return self._parse_set_transaction(global_=kind == "GLOBAL") 4378 4379 left = self._parse_primary() or self._parse_id_var() 4380 4381 if not self._match_texts(("=", "TO")): 4382 self._retreat(index) 4383 return None 4384 4385 right = self._parse_statement() or self._parse_id_var() 4386 this = self.expression( 4387 exp.EQ, 4388 this=left, 4389 expression=right, 4390 ) 4391 4392 return self.expression( 4393 exp.SetItem, 4394 this=this, 4395 kind=kind, 4396 ) 4397 4398 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4399 self._match_text_seq("TRANSACTION") 4400 characteristics = self._parse_csv( 4401 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4402 ) 4403 return self.expression( 4404 exp.SetItem, 4405 expressions=characteristics, 4406 kind="TRANSACTION", 4407 **{"global": global_}, # type: ignore 4408 ) 4409 4410 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4411 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4412 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4413 4414 def _parse_set(self) -> exp.Expression: 4415 index = self._index 4416 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4417 4418 if self._curr: 4419 self._retreat(index) 4420 return self._parse_as_command(self._prev) 4421 4422 return set_ 4423 4424 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4425 for option in options: 4426 if self._match_text_seq(*option.split(" ")): 4427 return exp.Var(this=option) 4428 return None 4429 4430 def _parse_as_command(self, start: Token) -> exp.Command: 4431 while self._curr: 4432 self._advance() 4433 text = self._find_sql(start, self._prev) 4434 size = len(start.text) 4435 return exp.Command(this=text[:size], expression=text[size:]) 4436 4437 def _find_parser( 4438 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4439 ) -> t.Optional[t.Callable]: 4440 if not self._curr: 4441 return None 4442 4443 index = self._index 4444 this = [] 4445 while True: 4446 # The current token might be multiple words 4447 curr = self._curr.text.upper() 4448 key = curr.split(" ") 4449 this.append(curr) 4450 self._advance() 4451 result, trie = in_trie(trie, key) 4452 if result == 0: 4453 break 4454 if result == 2: 4455 subparser = parsers[" ".join(this)] 4456 return subparser 4457 self._retreat(index) 4458 return None 4459 4460 def _match(self, token_type, advance=True, expression=None): 4461 if not self._curr: 4462 return None 4463 4464 if self._curr.token_type == token_type: 4465 if advance: 4466 self._advance() 4467 self._add_comments(expression) 4468 return True 4469 4470 return None 4471 4472 def _match_set(self, types, advance=True): 4473 if not self._curr: 4474 return None 4475 4476 if self._curr.token_type in types: 4477 if advance: 4478 self._advance() 4479 return True 4480 4481 return None 4482 4483 def _match_pair(self, token_type_a, token_type_b, advance=True): 4484 if not self._curr or not self._next: 4485 return None 4486 4487 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4488 if advance: 4489 self._advance(2) 4490 return True 4491 4492 return None 4493 4494 def _match_l_paren(self, expression=None): 4495 if not self._match(TokenType.L_PAREN, expression=expression): 4496 self.raise_error("Expecting (") 4497 4498 def _match_r_paren(self, expression=None): 4499 if not self._match(TokenType.R_PAREN, expression=expression): 4500 self.raise_error("Expecting )") 4501 4502 def _match_texts(self, texts, advance=True): 4503 if self._curr and self._curr.text.upper() in texts: 4504 if advance: 4505 self._advance() 4506 return True 4507 return False 4508 4509 def _match_text_seq(self, *texts, advance=True): 4510 index = self._index 4511 for text in texts: 4512 if self._curr and self._curr.text.upper() == text: 4513 self._advance() 4514 else: 4515 self._retreat(index) 4516 return False 4517 4518 if not advance: 4519 self._retreat(index) 4520 4521 return True 4522 4523 def _replace_columns_with_dots(self, this): 4524 if isinstance(this, exp.Dot): 4525 exp.replace_children(this, self._replace_columns_with_dots) 4526 elif isinstance(this, exp.Column): 4527 exp.replace_children(this, self._replace_columns_with_dots) 4528 table = this.args.get("table") 4529 this = ( 4530 self.expression(exp.Dot, this=table, expression=this.this) 4531 if table 4532 else self.expression(exp.Var, this=this.name) 4533 ) 4534 elif isinstance(this, exp.Identifier): 4535 this = self.expression(exp.Var, this=this.name) 4536 return this 4537 4538 def _replace_lambda(self, node, lambda_variables): 4539 for column in node.find_all(exp.Column): 4540 if column.parts[0].name in lambda_variables: 4541 dot_or_id = column.to_dot() if column.table else column.this 4542 parent = column.parent 4543 4544 while isinstance(parent, exp.Dot): 4545 if not isinstance(parent.parent, exp.Dot): 4546 parent.replace(dot_or_id) 4547 break 4548 parent = parent.parent 4549 else: 4550 if column is node: 4551 node = dot_or_id 4552 else: 4553 column.replace(dot_or_id) 4554 return node
19def parse_var_map(args: t.Sequence) -> exp.Expression: 20 if len(args) == 1 and args[0].is_star: 21 return exp.StarMap(this=args[0]) 22 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 )
56class Parser(metaclass=_Parser): 57 """ 58 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 59 a parsed syntax tree. 60 61 Args: 62 error_level: the desired error level. 63 Default: ErrorLevel.RAISE 64 error_message_context: determines the amount of context to capture from a 65 query string when displaying the error message (in number of characters). 66 Default: 50. 67 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 68 Default: 0 69 alias_post_tablesample: If the table alias comes after tablesample. 70 Default: False 71 max_errors: Maximum number of error messages to include in a raised ParseError. 72 This is only relevant if error_level is ErrorLevel.RAISE. 73 Default: 3 74 null_ordering: Indicates the default null ordering method to use if not explicitly set. 75 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 76 Default: "nulls_are_small" 77 """ 78 79 FUNCTIONS: t.Dict[str, t.Callable] = { 80 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 81 "DATE_TO_DATE_STR": lambda args: exp.Cast( 82 this=seq_get(args, 0), 83 to=exp.DataType(this=exp.DataType.Type.TEXT), 84 ), 85 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 86 "IFNULL": exp.Coalesce.from_arg_list, 87 "LIKE": parse_like, 88 "TIME_TO_TIME_STR": lambda args: exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 93 this=exp.Cast( 94 this=seq_get(args, 0), 95 to=exp.DataType(this=exp.DataType.Type.TEXT), 96 ), 97 start=exp.Literal.number(1), 98 length=exp.Literal.number(10), 99 ), 100 "VAR_MAP": parse_var_map, 101 } 102 103 NO_PAREN_FUNCTIONS = { 104 TokenType.CURRENT_DATE: exp.CurrentDate, 105 TokenType.CURRENT_DATETIME: exp.CurrentDate, 106 TokenType.CURRENT_TIME: exp.CurrentTime, 107 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 108 TokenType.CURRENT_USER: exp.CurrentUser, 109 } 110 111 JOIN_HINTS: t.Set[str] = set() 112 113 NESTED_TYPE_TOKENS = { 114 TokenType.ARRAY, 115 TokenType.MAP, 116 TokenType.NULLABLE, 117 TokenType.STRUCT, 118 } 119 120 TYPE_TOKENS = { 121 TokenType.BIT, 122 TokenType.BOOLEAN, 123 TokenType.TINYINT, 124 TokenType.UTINYINT, 125 TokenType.SMALLINT, 126 TokenType.USMALLINT, 127 TokenType.INT, 128 TokenType.UINT, 129 TokenType.BIGINT, 130 TokenType.UBIGINT, 131 TokenType.INT128, 132 TokenType.UINT128, 133 TokenType.INT256, 134 TokenType.UINT256, 135 TokenType.FLOAT, 136 TokenType.DOUBLE, 137 TokenType.CHAR, 138 TokenType.NCHAR, 139 TokenType.VARCHAR, 140 TokenType.NVARCHAR, 141 TokenType.TEXT, 142 TokenType.MEDIUMTEXT, 143 TokenType.LONGTEXT, 144 TokenType.MEDIUMBLOB, 145 TokenType.LONGBLOB, 146 TokenType.BINARY, 147 TokenType.VARBINARY, 148 TokenType.JSON, 149 TokenType.JSONB, 150 TokenType.INTERVAL, 151 TokenType.TIME, 152 TokenType.TIMESTAMP, 153 TokenType.TIMESTAMPTZ, 154 TokenType.TIMESTAMPLTZ, 155 TokenType.DATETIME, 156 TokenType.DATETIME64, 157 TokenType.DATE, 158 TokenType.DECIMAL, 159 TokenType.BIGDECIMAL, 160 TokenType.UUID, 161 TokenType.GEOGRAPHY, 162 TokenType.GEOMETRY, 163 TokenType.HLLSKETCH, 164 TokenType.HSTORE, 165 TokenType.PSEUDO_TYPE, 166 TokenType.SUPER, 167 TokenType.SERIAL, 168 TokenType.SMALLSERIAL, 169 TokenType.BIGSERIAL, 170 TokenType.XML, 171 TokenType.UNIQUEIDENTIFIER, 172 TokenType.MONEY, 173 TokenType.SMALLMONEY, 174 TokenType.ROWVERSION, 175 TokenType.IMAGE, 176 TokenType.VARIANT, 177 TokenType.OBJECT, 178 TokenType.INET, 179 *NESTED_TYPE_TOKENS, 180 } 181 182 SUBQUERY_PREDICATES = { 183 TokenType.ANY: exp.Any, 184 TokenType.ALL: exp.All, 185 TokenType.EXISTS: exp.Exists, 186 TokenType.SOME: exp.Any, 187 } 188 189 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 190 191 DB_CREATABLES = { 192 TokenType.DATABASE, 193 TokenType.SCHEMA, 194 TokenType.TABLE, 195 TokenType.VIEW, 196 } 197 198 CREATABLES = { 199 TokenType.COLUMN, 200 TokenType.FUNCTION, 201 TokenType.INDEX, 202 TokenType.PROCEDURE, 203 *DB_CREATABLES, 204 } 205 206 ID_VAR_TOKENS = { 207 TokenType.VAR, 208 TokenType.ANTI, 209 TokenType.APPLY, 210 TokenType.ASC, 211 TokenType.AUTO_INCREMENT, 212 TokenType.BEGIN, 213 TokenType.BOTH, 214 TokenType.BUCKET, 215 TokenType.CACHE, 216 TokenType.CASCADE, 217 TokenType.COLLATE, 218 TokenType.COMMAND, 219 TokenType.COMMENT, 220 TokenType.COMMIT, 221 TokenType.COMPOUND, 222 TokenType.CONSTRAINT, 223 TokenType.DEFAULT, 224 TokenType.DELETE, 225 TokenType.DESC, 226 TokenType.DESCRIBE, 227 TokenType.DIV, 228 TokenType.END, 229 TokenType.EXECUTE, 230 TokenType.ESCAPE, 231 TokenType.FALSE, 232 TokenType.FIRST, 233 TokenType.FILTER, 234 TokenType.FOLLOWING, 235 TokenType.FORMAT, 236 TokenType.FULL, 237 TokenType.IF, 238 TokenType.IS, 239 TokenType.ISNULL, 240 TokenType.INTERVAL, 241 TokenType.KEEP, 242 TokenType.LAZY, 243 TokenType.LEADING, 244 TokenType.LEFT, 245 TokenType.LOCAL, 246 TokenType.MATERIALIZED, 247 TokenType.MERGE, 248 TokenType.NATURAL, 249 TokenType.NEXT, 250 TokenType.OFFSET, 251 TokenType.ONLY, 252 TokenType.OPTIONS, 253 TokenType.ORDINALITY, 254 TokenType.OVERWRITE, 255 TokenType.PARTITION, 256 TokenType.PERCENT, 257 TokenType.PIVOT, 258 TokenType.PRAGMA, 259 TokenType.PRECEDING, 260 TokenType.RANGE, 261 TokenType.REFERENCES, 262 TokenType.RIGHT, 263 TokenType.ROW, 264 TokenType.ROWS, 265 TokenType.SEED, 266 TokenType.SEMI, 267 TokenType.SET, 268 TokenType.SETTINGS, 269 TokenType.SHOW, 270 TokenType.SORTKEY, 271 TokenType.TEMPORARY, 272 TokenType.TOP, 273 TokenType.TRAILING, 274 TokenType.TRUE, 275 TokenType.UNBOUNDED, 276 TokenType.UNIQUE, 277 TokenType.UNLOGGED, 278 TokenType.UNPIVOT, 279 TokenType.VOLATILE, 280 TokenType.WINDOW, 281 *CREATABLES, 282 *SUBQUERY_PREDICATES, 283 *TYPE_TOKENS, 284 *NO_PAREN_FUNCTIONS, 285 } 286 287 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 288 289 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 290 TokenType.APPLY, 291 TokenType.FULL, 292 TokenType.LEFT, 293 TokenType.LOCK, 294 TokenType.NATURAL, 295 TokenType.OFFSET, 296 TokenType.RIGHT, 297 TokenType.WINDOW, 298 } 299 300 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 301 302 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 303 304 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 305 306 FUNC_TOKENS = { 307 TokenType.COMMAND, 308 TokenType.CURRENT_DATE, 309 TokenType.CURRENT_DATETIME, 310 TokenType.CURRENT_TIMESTAMP, 311 TokenType.CURRENT_TIME, 312 TokenType.CURRENT_USER, 313 TokenType.FILTER, 314 TokenType.FIRST, 315 TokenType.FORMAT, 316 TokenType.GLOB, 317 TokenType.IDENTIFIER, 318 TokenType.INDEX, 319 TokenType.ISNULL, 320 TokenType.ILIKE, 321 TokenType.LIKE, 322 TokenType.MERGE, 323 TokenType.OFFSET, 324 TokenType.PRIMARY_KEY, 325 TokenType.RANGE, 326 TokenType.REPLACE, 327 TokenType.ROW, 328 TokenType.UNNEST, 329 TokenType.VAR, 330 TokenType.LEFT, 331 TokenType.RIGHT, 332 TokenType.DATE, 333 TokenType.DATETIME, 334 TokenType.TABLE, 335 TokenType.TIMESTAMP, 336 TokenType.TIMESTAMPTZ, 337 TokenType.WINDOW, 338 *TYPE_TOKENS, 339 *SUBQUERY_PREDICATES, 340 } 341 342 CONJUNCTION = { 343 TokenType.AND: exp.And, 344 TokenType.OR: exp.Or, 345 } 346 347 EQUALITY = { 348 TokenType.EQ: exp.EQ, 349 TokenType.NEQ: exp.NEQ, 350 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 351 } 352 353 COMPARISON = { 354 TokenType.GT: exp.GT, 355 TokenType.GTE: exp.GTE, 356 TokenType.LT: exp.LT, 357 TokenType.LTE: exp.LTE, 358 } 359 360 BITWISE = { 361 TokenType.AMP: exp.BitwiseAnd, 362 TokenType.CARET: exp.BitwiseXor, 363 TokenType.PIPE: exp.BitwiseOr, 364 TokenType.DPIPE: exp.DPipe, 365 } 366 367 TERM = { 368 TokenType.DASH: exp.Sub, 369 TokenType.PLUS: exp.Add, 370 TokenType.MOD: exp.Mod, 371 TokenType.COLLATE: exp.Collate, 372 } 373 374 FACTOR = { 375 TokenType.DIV: exp.IntDiv, 376 TokenType.LR_ARROW: exp.Distance, 377 TokenType.SLASH: exp.Div, 378 TokenType.STAR: exp.Mul, 379 } 380 381 TIMESTAMPS = { 382 TokenType.TIME, 383 TokenType.TIMESTAMP, 384 TokenType.TIMESTAMPTZ, 385 TokenType.TIMESTAMPLTZ, 386 } 387 388 SET_OPERATIONS = { 389 TokenType.UNION, 390 TokenType.INTERSECT, 391 TokenType.EXCEPT, 392 } 393 394 JOIN_SIDES = { 395 TokenType.LEFT, 396 TokenType.RIGHT, 397 TokenType.FULL, 398 } 399 400 JOIN_KINDS = { 401 TokenType.INNER, 402 TokenType.OUTER, 403 TokenType.CROSS, 404 TokenType.SEMI, 405 TokenType.ANTI, 406 } 407 408 LAMBDAS = { 409 TokenType.ARROW: lambda self, expressions: self.expression( 410 exp.Lambda, 411 this=self._replace_lambda( 412 self._parse_conjunction(), 413 {node.name for node in expressions}, 414 ), 415 expressions=expressions, 416 ), 417 TokenType.FARROW: lambda self, expressions: self.expression( 418 exp.Kwarg, 419 this=exp.Var(this=expressions[0].name), 420 expression=self._parse_conjunction(), 421 ), 422 } 423 424 COLUMN_OPERATORS = { 425 TokenType.DOT: None, 426 TokenType.DCOLON: lambda self, this, to: self.expression( 427 exp.Cast if self.STRICT_CAST else exp.TryCast, 428 this=this, 429 to=to, 430 ), 431 TokenType.ARROW: lambda self, this, path: self.expression( 432 exp.JSONExtract, 433 this=this, 434 expression=path, 435 ), 436 TokenType.DARROW: lambda self, this, path: self.expression( 437 exp.JSONExtractScalar, 438 this=this, 439 expression=path, 440 ), 441 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 442 exp.JSONBExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 447 exp.JSONBExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 452 exp.JSONBContains, 453 this=this, 454 expression=key, 455 ), 456 } 457 458 EXPRESSION_PARSERS = { 459 exp.Column: lambda self: self._parse_column(), 460 exp.DataType: lambda self: self._parse_types(), 461 exp.From: lambda self: self._parse_from(), 462 exp.Group: lambda self: self._parse_group(), 463 exp.Identifier: lambda self: self._parse_id_var(), 464 exp.Lateral: lambda self: self._parse_lateral(), 465 exp.Join: lambda self: self._parse_join(), 466 exp.Order: lambda self: self._parse_order(), 467 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 468 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 469 exp.Lambda: lambda self: self._parse_lambda(), 470 exp.Limit: lambda self: self._parse_limit(), 471 exp.Offset: lambda self: self._parse_offset(), 472 exp.TableAlias: lambda self: self._parse_table_alias(), 473 exp.Table: lambda self: self._parse_table(), 474 exp.Condition: lambda self: self._parse_conjunction(), 475 exp.Expression: lambda self: self._parse_statement(), 476 exp.Properties: lambda self: self._parse_properties(), 477 exp.Where: lambda self: self._parse_where(), 478 exp.Ordered: lambda self: self._parse_ordered(), 479 exp.Having: lambda self: self._parse_having(), 480 exp.With: lambda self: self._parse_with(), 481 exp.Window: lambda self: self._parse_named_window(), 482 exp.Qualify: lambda self: self._parse_qualify(), 483 exp.Returning: lambda self: self._parse_returning(), 484 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 485 } 486 487 STATEMENT_PARSERS = { 488 TokenType.ALTER: lambda self: self._parse_alter(), 489 TokenType.BEGIN: lambda self: self._parse_transaction(), 490 TokenType.CACHE: lambda self: self._parse_cache(), 491 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 492 TokenType.COMMENT: lambda self: self._parse_comment(), 493 TokenType.CREATE: lambda self: self._parse_create(), 494 TokenType.DELETE: lambda self: self._parse_delete(), 495 TokenType.DESC: lambda self: self._parse_describe(), 496 TokenType.DESCRIBE: lambda self: self._parse_describe(), 497 TokenType.DROP: lambda self: self._parse_drop(), 498 TokenType.END: lambda self: self._parse_commit_or_rollback(), 499 TokenType.INSERT: lambda self: self._parse_insert(), 500 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 501 TokenType.MERGE: lambda self: self._parse_merge(), 502 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 503 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 504 TokenType.SET: lambda self: self._parse_set(), 505 TokenType.UNCACHE: lambda self: self._parse_uncache(), 506 TokenType.UPDATE: lambda self: self._parse_update(), 507 TokenType.USE: lambda self: self.expression( 508 exp.Use, 509 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 510 and exp.Var(this=self._prev.text), 511 this=self._parse_table(schema=False), 512 ), 513 } 514 515 UNARY_PARSERS = { 516 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 517 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 518 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 519 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 520 } 521 522 PRIMARY_PARSERS = { 523 TokenType.STRING: lambda self, token: self.expression( 524 exp.Literal, this=token.text, is_string=True 525 ), 526 TokenType.NUMBER: lambda self, token: self.expression( 527 exp.Literal, this=token.text, is_string=False 528 ), 529 TokenType.STAR: lambda self, _: self.expression( 530 exp.Star, 531 **{"except": self._parse_except(), "replace": self._parse_replace()}, 532 ), 533 TokenType.NULL: lambda self, _: self.expression(exp.Null), 534 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 535 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 536 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 537 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 538 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 539 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 540 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 541 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 542 } 543 544 PLACEHOLDER_PARSERS = { 545 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 546 TokenType.PARAMETER: lambda self: self._parse_parameter(), 547 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 548 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 549 else None, 550 } 551 552 RANGE_PARSERS = { 553 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 554 TokenType.GLOB: binary_range_parser(exp.Glob), 555 TokenType.ILIKE: binary_range_parser(exp.ILike), 556 TokenType.IN: lambda self, this: self._parse_in(this), 557 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 558 TokenType.IS: lambda self, this: self._parse_is(this), 559 TokenType.LIKE: binary_range_parser(exp.Like), 560 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 561 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 562 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 563 } 564 565 PROPERTY_PARSERS = { 566 "AFTER": lambda self: self._parse_afterjournal( 567 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 568 ), 569 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 570 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 571 "BEFORE": lambda self: self._parse_journal( 572 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 573 ), 574 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 575 "CHARACTER SET": lambda self: self._parse_character_set(), 576 "CHECKSUM": lambda self: self._parse_checksum(), 577 "CLUSTER BY": lambda self: self.expression( 578 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 579 ), 580 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 581 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 582 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 583 default=self._prev.text.upper() == "DEFAULT" 584 ), 585 "DEFINER": lambda self: self._parse_definer(), 586 "DETERMINISTIC": lambda self: self.expression( 587 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 588 ), 589 "DISTKEY": lambda self: self._parse_distkey(), 590 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 591 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 592 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 593 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 594 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 595 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 596 "FREESPACE": lambda self: self._parse_freespace(), 597 "GLOBAL": lambda self: self._parse_temporary(global_=True), 598 "IMMUTABLE": lambda self: self.expression( 599 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 600 ), 601 "JOURNAL": lambda self: self._parse_journal( 602 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 603 ), 604 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 605 "LIKE": lambda self: self._parse_create_like(), 606 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 607 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 608 "LOCK": lambda self: self._parse_locking(), 609 "LOCKING": lambda self: self._parse_locking(), 610 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 611 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 612 "MAX": lambda self: self._parse_datablocksize(), 613 "MAXIMUM": lambda self: self._parse_datablocksize(), 614 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 615 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 616 ), 617 "MIN": lambda self: self._parse_datablocksize(), 618 "MINIMUM": lambda self: self._parse_datablocksize(), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_noprimaryindex(), 621 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 622 "ON": lambda self: self._parse_oncommit(), 623 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 624 "PARTITION BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 627 "PRIMARY KEY": lambda self: self._parse_primary_key(), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "STABLE": lambda self: self.expression( 637 exp.StabilityProperty, this=exp.Literal.string("STABLE") 638 ), 639 "STORED": lambda self: self._parse_stored(), 640 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 641 "TEMP": lambda self: self._parse_temporary(global_=False), 642 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 643 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 644 "TTL": lambda self: self._parse_ttl(), 645 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 646 "VOLATILE": lambda self: self._parse_volatile_property(), 647 "WITH": lambda self: self._parse_with_property(), 648 } 649 650 CONSTRAINT_PARSERS = { 651 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 652 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 653 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 654 "CHARACTER SET": lambda self: self.expression( 655 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 656 ), 657 "CHECK": lambda self: self.expression( 658 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 659 ), 660 "COLLATE": lambda self: self.expression( 661 exp.CollateColumnConstraint, this=self._parse_var() 662 ), 663 "COMMENT": lambda self: self.expression( 664 exp.CommentColumnConstraint, this=self._parse_string() 665 ), 666 "COMPRESS": lambda self: self._parse_compress(), 667 "DEFAULT": lambda self: self.expression( 668 exp.DefaultColumnConstraint, this=self._parse_bitwise() 669 ), 670 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 671 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 672 "FORMAT": lambda self: self.expression( 673 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 674 ), 675 "GENERATED": lambda self: self._parse_generated_as_identity(), 676 "IDENTITY": lambda self: self._parse_auto_increment(), 677 "INLINE": lambda self: self._parse_inline(), 678 "LIKE": lambda self: self._parse_create_like(), 679 "NOT": lambda self: self._parse_not_constraint(), 680 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 681 "ON": lambda self: self._match(TokenType.UPDATE) 682 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 683 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 684 "PRIMARY KEY": lambda self: self._parse_primary_key(), 685 "REFERENCES": lambda self: self._parse_references(match=False), 686 "TITLE": lambda self: self.expression( 687 exp.TitleColumnConstraint, this=self._parse_var_or_string() 688 ), 689 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 690 "UNIQUE": lambda self: self._parse_unique(), 691 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 692 } 693 694 ALTER_PARSERS = { 695 "ADD": lambda self: self._parse_alter_table_add(), 696 "ALTER": lambda self: self._parse_alter_table_alter(), 697 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 698 "DROP": lambda self: self._parse_alter_table_drop(), 699 "RENAME": lambda self: self._parse_alter_table_rename(), 700 } 701 702 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 703 704 NO_PAREN_FUNCTION_PARSERS = { 705 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 706 TokenType.CASE: lambda self: self._parse_case(), 707 TokenType.IF: lambda self: self._parse_if(), 708 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 709 exp.NextValueFor, 710 this=self._parse_column(), 711 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 712 ), 713 } 714 715 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 716 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 717 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 718 "DECODE": lambda self: self._parse_decode(), 719 "EXTRACT": lambda self: self._parse_extract(), 720 "JSON_OBJECT": lambda self: self._parse_json_object(), 721 "LOG": lambda self: self._parse_logarithm(), 722 "MATCH": lambda self: self._parse_match_against(), 723 "OPENJSON": lambda self: self._parse_open_json(), 724 "POSITION": lambda self: self._parse_position(), 725 "STRING_AGG": lambda self: self._parse_string_agg(), 726 "SUBSTRING": lambda self: self._parse_substring(), 727 "TRIM": lambda self: self._parse_trim(), 728 "TRY_CAST": lambda self: self._parse_cast(False), 729 "TRY_CONVERT": lambda self: self._parse_convert(False), 730 } 731 732 QUERY_MODIFIER_PARSERS = { 733 "joins": lambda self: list(iter(self._parse_join, None)), 734 "laterals": lambda self: list(iter(self._parse_lateral, None)), 735 "match": lambda self: self._parse_match_recognize(), 736 "where": lambda self: self._parse_where(), 737 "group": lambda self: self._parse_group(), 738 "having": lambda self: self._parse_having(), 739 "qualify": lambda self: self._parse_qualify(), 740 "windows": lambda self: self._parse_window_clause(), 741 "order": lambda self: self._parse_order(), 742 "limit": lambda self: self._parse_limit(), 743 "offset": lambda self: self._parse_offset(), 744 "locks": lambda self: self._parse_locks(), 745 "sample": lambda self: self._parse_table_sample(as_modifier=True), 746 } 747 748 SET_PARSERS = { 749 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 750 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 751 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 752 "TRANSACTION": lambda self: self._parse_set_transaction(), 753 } 754 755 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 756 757 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 758 759 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 760 761 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 762 763 TRANSACTION_CHARACTERISTICS = { 764 "ISOLATION LEVEL REPEATABLE READ", 765 "ISOLATION LEVEL READ COMMITTED", 766 "ISOLATION LEVEL READ UNCOMMITTED", 767 "ISOLATION LEVEL SERIALIZABLE", 768 "READ WRITE", 769 "READ ONLY", 770 } 771 772 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 773 774 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 775 776 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 777 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 778 779 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 780 781 STRICT_CAST = True 782 783 CONVERT_TYPE_FIRST = False 784 785 PREFIXED_PIVOT_COLUMNS = False 786 IDENTIFY_PIVOT_STRINGS = False 787 788 LOG_BASE_FIRST = True 789 LOG_DEFAULTS_TO_LN = False 790 791 __slots__ = ( 792 "error_level", 793 "error_message_context", 794 "sql", 795 "errors", 796 "index_offset", 797 "unnest_column_only", 798 "alias_post_tablesample", 799 "max_errors", 800 "null_ordering", 801 "_tokens", 802 "_index", 803 "_curr", 804 "_next", 805 "_prev", 806 "_prev_comments", 807 "_show_trie", 808 "_set_trie", 809 ) 810 811 def __init__( 812 self, 813 error_level: t.Optional[ErrorLevel] = None, 814 error_message_context: int = 100, 815 index_offset: int = 0, 816 unnest_column_only: bool = False, 817 alias_post_tablesample: bool = False, 818 max_errors: int = 3, 819 null_ordering: t.Optional[str] = None, 820 ): 821 self.error_level = error_level or ErrorLevel.IMMEDIATE 822 self.error_message_context = error_message_context 823 self.index_offset = index_offset 824 self.unnest_column_only = unnest_column_only 825 self.alias_post_tablesample = alias_post_tablesample 826 self.max_errors = max_errors 827 self.null_ordering = null_ordering 828 self.reset() 829 830 def reset(self): 831 self.sql = "" 832 self.errors = [] 833 self._tokens = [] 834 self._index = 0 835 self._curr = None 836 self._next = None 837 self._prev = None 838 self._prev_comments = None 839 840 def parse( 841 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 842 ) -> t.List[t.Optional[exp.Expression]]: 843 """ 844 Parses a list of tokens and returns a list of syntax trees, one tree 845 per parsed SQL statement. 846 847 Args: 848 raw_tokens: the list of tokens. 849 sql: the original SQL string, used to produce helpful debug messages. 850 851 Returns: 852 The list of syntax trees. 853 """ 854 return self._parse( 855 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 856 ) 857 858 def parse_into( 859 self, 860 expression_types: exp.IntoType, 861 raw_tokens: t.List[Token], 862 sql: t.Optional[str] = None, 863 ) -> t.List[t.Optional[exp.Expression]]: 864 """ 865 Parses a list of tokens into a given Expression type. If a collection of Expression 866 types is given instead, this method will try to parse the token list into each one 867 of them, stopping at the first for which the parsing succeeds. 868 869 Args: 870 expression_types: the expression type(s) to try and parse the token list into. 871 raw_tokens: the list of tokens. 872 sql: the original SQL string, used to produce helpful debug messages. 873 874 Returns: 875 The target Expression. 876 """ 877 errors = [] 878 for expression_type in ensure_collection(expression_types): 879 parser = self.EXPRESSION_PARSERS.get(expression_type) 880 if not parser: 881 raise TypeError(f"No parser registered for {expression_type}") 882 try: 883 return self._parse(parser, raw_tokens, sql) 884 except ParseError as e: 885 e.errors[0]["into_expression"] = expression_type 886 errors.append(e) 887 raise ParseError( 888 f"Failed to parse into {expression_types}", 889 errors=merge_errors(errors), 890 ) from errors[-1] 891 892 def _parse( 893 self, 894 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 895 raw_tokens: t.List[Token], 896 sql: t.Optional[str] = None, 897 ) -> t.List[t.Optional[exp.Expression]]: 898 self.reset() 899 self.sql = sql or "" 900 total = len(raw_tokens) 901 chunks: t.List[t.List[Token]] = [[]] 902 903 for i, token in enumerate(raw_tokens): 904 if token.token_type == TokenType.SEMICOLON: 905 if i < total - 1: 906 chunks.append([]) 907 else: 908 chunks[-1].append(token) 909 910 expressions = [] 911 912 for tokens in chunks: 913 self._index = -1 914 self._tokens = tokens 915 self._advance() 916 917 expressions.append(parse_method(self)) 918 919 if self._index < len(self._tokens): 920 self.raise_error("Invalid expression / Unexpected token") 921 922 self.check_errors() 923 924 return expressions 925 926 def check_errors(self) -> None: 927 """ 928 Logs or raises any found errors, depending on the chosen error level setting. 929 """ 930 if self.error_level == ErrorLevel.WARN: 931 for error in self.errors: 932 logger.error(str(error)) 933 elif self.error_level == ErrorLevel.RAISE and self.errors: 934 raise ParseError( 935 concat_messages(self.errors, self.max_errors), 936 errors=merge_errors(self.errors), 937 ) 938 939 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 940 """ 941 Appends an error in the list of recorded errors or raises it, depending on the chosen 942 error level setting. 943 """ 944 token = token or self._curr or self._prev or Token.string("") 945 start = token.start 946 end = token.end + 1 947 start_context = self.sql[max(start - self.error_message_context, 0) : start] 948 highlight = self.sql[start:end] 949 end_context = self.sql[end : end + self.error_message_context] 950 951 error = ParseError.new( 952 f"{message}. Line {token.line}, Col: {token.col}.\n" 953 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 954 description=message, 955 line=token.line, 956 col=token.col, 957 start_context=start_context, 958 highlight=highlight, 959 end_context=end_context, 960 ) 961 962 if self.error_level == ErrorLevel.IMMEDIATE: 963 raise error 964 965 self.errors.append(error) 966 967 def expression( 968 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 969 ) -> E: 970 """ 971 Creates a new, validated Expression. 972 973 Args: 974 exp_class: the expression class to instantiate. 975 comments: an optional list of comments to attach to the expression. 976 kwargs: the arguments to set for the expression along with their respective values. 977 978 Returns: 979 The target expression. 980 """ 981 instance = exp_class(**kwargs) 982 instance.add_comments(comments) if comments else self._add_comments(instance) 983 self.validate_expression(instance) 984 return instance 985 986 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 987 if expression and self._prev_comments: 988 expression.add_comments(self._prev_comments) 989 self._prev_comments = None 990 991 def validate_expression( 992 self, expression: exp.Expression, args: t.Optional[t.List] = None 993 ) -> None: 994 """ 995 Validates an already instantiated expression, making sure that all its mandatory arguments 996 are set. 997 998 Args: 999 expression: the expression to validate. 1000 args: an optional list of items that was used to instantiate the expression, if it's a Func. 1001 """ 1002 if self.error_level == ErrorLevel.IGNORE: 1003 return 1004 1005 for error_message in expression.error_messages(args): 1006 self.raise_error(error_message) 1007 1008 def _find_sql(self, start: Token, end: Token) -> str: 1009 return self.sql[start.start : end.end + 1] 1010 1011 def _advance(self, times: int = 1) -> None: 1012 self._index += times 1013 self._curr = seq_get(self._tokens, self._index) 1014 self._next = seq_get(self._tokens, self._index + 1) 1015 if self._index > 0: 1016 self._prev = self._tokens[self._index - 1] 1017 self._prev_comments = self._prev.comments 1018 else: 1019 self._prev = None 1020 self._prev_comments = None 1021 1022 def _retreat(self, index: int) -> None: 1023 if index != self._index: 1024 self._advance(index - self._index) 1025 1026 def _parse_command(self) -> exp.Command: 1027 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1028 1029 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1030 start = self._prev 1031 exists = self._parse_exists() if allow_exists else None 1032 1033 self._match(TokenType.ON) 1034 1035 kind = self._match_set(self.CREATABLES) and self._prev 1036 1037 if not kind: 1038 return self._parse_as_command(start) 1039 1040 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1041 this = self._parse_user_defined_function(kind=kind.token_type) 1042 elif kind.token_type == TokenType.TABLE: 1043 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1044 elif kind.token_type == TokenType.COLUMN: 1045 this = self._parse_column() 1046 else: 1047 this = self._parse_id_var() 1048 1049 self._match(TokenType.IS) 1050 1051 return self.expression( 1052 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1053 ) 1054 1055 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1056 def _parse_ttl(self) -> exp.Expression: 1057 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1058 this = self._parse_bitwise() 1059 1060 if self._match_text_seq("DELETE"): 1061 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1062 if self._match_text_seq("RECOMPRESS"): 1063 return self.expression( 1064 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1065 ) 1066 if self._match_text_seq("TO", "DISK"): 1067 return self.expression( 1068 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1069 ) 1070 if self._match_text_seq("TO", "VOLUME"): 1071 return self.expression( 1072 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1073 ) 1074 1075 return this 1076 1077 expressions = self._parse_csv(_parse_ttl_action) 1078 where = self._parse_where() 1079 group = self._parse_group() 1080 1081 aggregates = None 1082 if group and self._match(TokenType.SET): 1083 aggregates = self._parse_csv(self._parse_set_item) 1084 1085 return self.expression( 1086 exp.MergeTreeTTL, 1087 expressions=expressions, 1088 where=where, 1089 group=group, 1090 aggregates=aggregates, 1091 ) 1092 1093 def _parse_statement(self) -> t.Optional[exp.Expression]: 1094 if self._curr is None: 1095 return None 1096 1097 if self._match_set(self.STATEMENT_PARSERS): 1098 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1099 1100 if self._match_set(Tokenizer.COMMANDS): 1101 return self._parse_command() 1102 1103 expression = self._parse_expression() 1104 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1105 return self._parse_query_modifiers(expression) 1106 1107 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1108 start = self._prev 1109 temporary = self._match(TokenType.TEMPORARY) 1110 materialized = self._match(TokenType.MATERIALIZED) 1111 kind = self._match_set(self.CREATABLES) and self._prev.text 1112 if not kind: 1113 return self._parse_as_command(start) 1114 1115 return self.expression( 1116 exp.Drop, 1117 exists=self._parse_exists(), 1118 this=self._parse_table(schema=True), 1119 kind=kind, 1120 temporary=temporary, 1121 materialized=materialized, 1122 cascade=self._match(TokenType.CASCADE), 1123 constraints=self._match_text_seq("CONSTRAINTS"), 1124 purge=self._match_text_seq("PURGE"), 1125 ) 1126 1127 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1128 return ( 1129 self._match(TokenType.IF) 1130 and (not not_ or self._match(TokenType.NOT)) 1131 and self._match(TokenType.EXISTS) 1132 ) 1133 1134 def _parse_create(self) -> t.Optional[exp.Expression]: 1135 start = self._prev 1136 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1137 TokenType.OR, TokenType.REPLACE 1138 ) 1139 unique = self._match(TokenType.UNIQUE) 1140 1141 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1142 self._match(TokenType.TABLE) 1143 1144 properties = None 1145 create_token = self._match_set(self.CREATABLES) and self._prev 1146 1147 if not create_token: 1148 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1149 create_token = self._match_set(self.CREATABLES) and self._prev 1150 1151 if not properties or not create_token: 1152 return self._parse_as_command(start) 1153 1154 exists = self._parse_exists(not_=True) 1155 this = None 1156 expression = None 1157 indexes = None 1158 no_schema_binding = None 1159 begin = None 1160 clone = None 1161 1162 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1163 this = self._parse_user_defined_function(kind=create_token.token_type) 1164 temp_properties = self._parse_properties() 1165 if properties and temp_properties: 1166 properties.expressions.extend(temp_properties.expressions) 1167 elif temp_properties: 1168 properties = temp_properties 1169 1170 self._match(TokenType.ALIAS) 1171 begin = self._match(TokenType.BEGIN) 1172 return_ = self._match_text_seq("RETURN") 1173 expression = self._parse_statement() 1174 1175 if return_: 1176 expression = self.expression(exp.Return, this=expression) 1177 elif create_token.token_type == TokenType.INDEX: 1178 this = self._parse_index() 1179 elif create_token.token_type in self.DB_CREATABLES: 1180 table_parts = self._parse_table_parts(schema=True) 1181 1182 # exp.Properties.Location.POST_NAME 1183 if self._match(TokenType.COMMA): 1184 temp_properties = self._parse_properties(before=True) 1185 if properties and temp_properties: 1186 properties.expressions.extend(temp_properties.expressions) 1187 elif temp_properties: 1188 properties = temp_properties 1189 1190 this = self._parse_schema(this=table_parts) 1191 1192 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1193 temp_properties = self._parse_properties() 1194 if properties and temp_properties: 1195 properties.expressions.extend(temp_properties.expressions) 1196 elif temp_properties: 1197 properties = temp_properties 1198 1199 self._match(TokenType.ALIAS) 1200 1201 # exp.Properties.Location.POST_ALIAS 1202 if not ( 1203 self._match(TokenType.SELECT, advance=False) 1204 or self._match(TokenType.WITH, advance=False) 1205 or self._match(TokenType.L_PAREN, advance=False) 1206 ): 1207 temp_properties = self._parse_properties() 1208 if properties and temp_properties: 1209 properties.expressions.extend(temp_properties.expressions) 1210 elif temp_properties: 1211 properties = temp_properties 1212 1213 expression = self._parse_ddl_select() 1214 1215 if create_token.token_type == TokenType.TABLE: 1216 indexes = [] 1217 while True: 1218 index = self._parse_create_table_index() 1219 1220 # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX 1221 temp_properties = self._parse_properties() 1222 if properties and temp_properties: 1223 properties.expressions.extend(temp_properties.expressions) 1224 elif temp_properties: 1225 properties = temp_properties 1226 1227 if not index: 1228 break 1229 else: 1230 self._match(TokenType.COMMA) 1231 indexes.append(index) 1232 elif create_token.token_type == TokenType.VIEW: 1233 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1234 no_schema_binding = True 1235 1236 if self._match_text_seq("CLONE"): 1237 clone = self._parse_table(schema=True) 1238 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1239 clone_kind = ( 1240 self._match(TokenType.L_PAREN) 1241 and self._match_texts(self.CLONE_KINDS) 1242 and self._prev.text.upper() 1243 ) 1244 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1245 self._match(TokenType.R_PAREN) 1246 clone = self.expression( 1247 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1248 ) 1249 1250 return self.expression( 1251 exp.Create, 1252 this=this, 1253 kind=create_token.text, 1254 replace=replace, 1255 unique=unique, 1256 expression=expression, 1257 exists=exists, 1258 properties=properties, 1259 indexes=indexes, 1260 no_schema_binding=no_schema_binding, 1261 begin=begin, 1262 clone=clone, 1263 ) 1264 1265 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1266 self._match(TokenType.COMMA) 1267 1268 # parsers look to _prev for no/dual/default, so need to consume first 1269 self._match_text_seq("NO") 1270 self._match_text_seq("DUAL") 1271 self._match_text_seq("DEFAULT") 1272 1273 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1274 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1275 1276 return None 1277 1278 def _parse_property(self) -> t.Optional[exp.Expression]: 1279 if self._match_texts(self.PROPERTY_PARSERS): 1280 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1281 1282 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1283 return self._parse_character_set(default=True) 1284 1285 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1286 return self._parse_sortkey(compound=True) 1287 1288 if self._match_text_seq("SQL", "SECURITY"): 1289 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1290 1291 assignment = self._match_pair( 1292 TokenType.VAR, TokenType.EQ, advance=False 1293 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1294 1295 if assignment: 1296 key = self._parse_var_or_string() 1297 self._match(TokenType.EQ) 1298 return self.expression(exp.Property, this=key, value=self._parse_column()) 1299 1300 return None 1301 1302 def _parse_stored(self) -> exp.Expression: 1303 self._match(TokenType.ALIAS) 1304 1305 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1306 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1307 1308 return self.expression( 1309 exp.FileFormatProperty, 1310 this=self.expression( 1311 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1312 ) 1313 if input_format or output_format 1314 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1315 ) 1316 1317 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1318 self._match(TokenType.EQ) 1319 self._match(TokenType.ALIAS) 1320 return self.expression(exp_class, this=self._parse_field()) 1321 1322 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1323 properties = [] 1324 1325 while True: 1326 if before: 1327 identified_property = self._parse_property_before() 1328 else: 1329 identified_property = self._parse_property() 1330 1331 if not identified_property: 1332 break 1333 for p in ensure_list(identified_property): 1334 properties.append(p) 1335 1336 if properties: 1337 return self.expression(exp.Properties, expressions=properties) 1338 1339 return None 1340 1341 def _parse_fallback(self, no=False) -> exp.Expression: 1342 self._match_text_seq("FALLBACK") 1343 return self.expression( 1344 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1345 ) 1346 1347 def _parse_volatile_property(self) -> exp.Expression: 1348 if self._index >= 2: 1349 pre_volatile_token = self._tokens[self._index - 2] 1350 else: 1351 pre_volatile_token = None 1352 1353 if pre_volatile_token and pre_volatile_token.token_type in ( 1354 TokenType.CREATE, 1355 TokenType.REPLACE, 1356 TokenType.UNIQUE, 1357 ): 1358 return exp.VolatileProperty() 1359 1360 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1361 1362 def _parse_with_property( 1363 self, 1364 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1365 self._match(TokenType.WITH) 1366 if self._match(TokenType.L_PAREN, advance=False): 1367 return self._parse_wrapped_csv(self._parse_property) 1368 1369 if self._match_text_seq("JOURNAL"): 1370 return self._parse_withjournaltable() 1371 1372 if self._match_text_seq("DATA"): 1373 return self._parse_withdata(no=False) 1374 elif self._match_text_seq("NO", "DATA"): 1375 return self._parse_withdata(no=True) 1376 1377 if not self._next: 1378 return None 1379 1380 return self._parse_withisolatedloading() 1381 1382 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1383 def _parse_definer(self) -> t.Optional[exp.Expression]: 1384 self._match(TokenType.EQ) 1385 1386 user = self._parse_id_var() 1387 self._match(TokenType.PARAMETER) 1388 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1389 1390 if not user or not host: 1391 return None 1392 1393 return exp.DefinerProperty(this=f"{user}@{host}") 1394 1395 def _parse_withjournaltable(self) -> exp.Expression: 1396 self._match(TokenType.TABLE) 1397 self._match(TokenType.EQ) 1398 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1399 1400 def _parse_log(self, no=False) -> exp.Expression: 1401 self._match_text_seq("LOG") 1402 return self.expression(exp.LogProperty, no=no) 1403 1404 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1405 before = self._match_text_seq("BEFORE") 1406 self._match_text_seq("JOURNAL") 1407 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1408 1409 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1410 self._match_text_seq("NOT") 1411 self._match_text_seq("LOCAL") 1412 self._match_text_seq("AFTER", "JOURNAL") 1413 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1414 1415 def _parse_checksum(self) -> exp.Expression: 1416 self._match_text_seq("CHECKSUM") 1417 self._match(TokenType.EQ) 1418 1419 on = None 1420 if self._match(TokenType.ON): 1421 on = True 1422 elif self._match_text_seq("OFF"): 1423 on = False 1424 default = self._match(TokenType.DEFAULT) 1425 1426 return self.expression( 1427 exp.ChecksumProperty, 1428 on=on, 1429 default=default, 1430 ) 1431 1432 def _parse_freespace(self) -> exp.Expression: 1433 self._match_text_seq("FREESPACE") 1434 self._match(TokenType.EQ) 1435 return self.expression( 1436 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1437 ) 1438 1439 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1440 self._match_text_seq("MERGEBLOCKRATIO") 1441 if self._match(TokenType.EQ): 1442 return self.expression( 1443 exp.MergeBlockRatioProperty, 1444 this=self._parse_number(), 1445 percent=self._match(TokenType.PERCENT), 1446 ) 1447 else: 1448 return self.expression( 1449 exp.MergeBlockRatioProperty, 1450 no=no, 1451 default=default, 1452 ) 1453 1454 def _parse_datablocksize(self, default=None) -> exp.Expression: 1455 if default: 1456 self._match_text_seq("DATABLOCKSIZE") 1457 return self.expression(exp.DataBlocksizeProperty, default=True) 1458 elif self._match_texts(("MIN", "MINIMUM")): 1459 self._match_text_seq("DATABLOCKSIZE") 1460 return self.expression(exp.DataBlocksizeProperty, min=True) 1461 elif self._match_texts(("MAX", "MAXIMUM")): 1462 self._match_text_seq("DATABLOCKSIZE") 1463 return self.expression(exp.DataBlocksizeProperty, min=False) 1464 1465 self._match_text_seq("DATABLOCKSIZE") 1466 self._match(TokenType.EQ) 1467 size = self._parse_number() 1468 units = None 1469 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1470 units = self._prev.text 1471 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1472 1473 def _parse_blockcompression(self) -> exp.Expression: 1474 self._match_text_seq("BLOCKCOMPRESSION") 1475 self._match(TokenType.EQ) 1476 always = self._match_text_seq("ALWAYS") 1477 manual = self._match_text_seq("MANUAL") 1478 never = self._match_text_seq("NEVER") 1479 default = self._match_text_seq("DEFAULT") 1480 autotemp = None 1481 if self._match_text_seq("AUTOTEMP"): 1482 autotemp = self._parse_schema() 1483 1484 return self.expression( 1485 exp.BlockCompressionProperty, 1486 always=always, 1487 manual=manual, 1488 never=never, 1489 default=default, 1490 autotemp=autotemp, 1491 ) 1492 1493 def _parse_withisolatedloading(self) -> exp.Expression: 1494 no = self._match_text_seq("NO") 1495 concurrent = self._match_text_seq("CONCURRENT") 1496 self._match_text_seq("ISOLATED", "LOADING") 1497 for_all = self._match_text_seq("FOR", "ALL") 1498 for_insert = self._match_text_seq("FOR", "INSERT") 1499 for_none = self._match_text_seq("FOR", "NONE") 1500 return self.expression( 1501 exp.IsolatedLoadingProperty, 1502 no=no, 1503 concurrent=concurrent, 1504 for_all=for_all, 1505 for_insert=for_insert, 1506 for_none=for_none, 1507 ) 1508 1509 def _parse_locking(self) -> exp.Expression: 1510 if self._match(TokenType.TABLE): 1511 kind = "TABLE" 1512 elif self._match(TokenType.VIEW): 1513 kind = "VIEW" 1514 elif self._match(TokenType.ROW): 1515 kind = "ROW" 1516 elif self._match_text_seq("DATABASE"): 1517 kind = "DATABASE" 1518 else: 1519 kind = None 1520 1521 if kind in ("DATABASE", "TABLE", "VIEW"): 1522 this = self._parse_table_parts() 1523 else: 1524 this = None 1525 1526 if self._match(TokenType.FOR): 1527 for_or_in = "FOR" 1528 elif self._match(TokenType.IN): 1529 for_or_in = "IN" 1530 else: 1531 for_or_in = None 1532 1533 if self._match_text_seq("ACCESS"): 1534 lock_type = "ACCESS" 1535 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1536 lock_type = "EXCLUSIVE" 1537 elif self._match_text_seq("SHARE"): 1538 lock_type = "SHARE" 1539 elif self._match_text_seq("READ"): 1540 lock_type = "READ" 1541 elif self._match_text_seq("WRITE"): 1542 lock_type = "WRITE" 1543 elif self._match_text_seq("CHECKSUM"): 1544 lock_type = "CHECKSUM" 1545 else: 1546 lock_type = None 1547 1548 override = self._match_text_seq("OVERRIDE") 1549 1550 return self.expression( 1551 exp.LockingProperty, 1552 this=this, 1553 kind=kind, 1554 for_or_in=for_or_in, 1555 lock_type=lock_type, 1556 override=override, 1557 ) 1558 1559 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1560 if self._match(TokenType.PARTITION_BY): 1561 return self._parse_csv(self._parse_conjunction) 1562 return [] 1563 1564 def _parse_partitioned_by(self) -> exp.Expression: 1565 self._match(TokenType.EQ) 1566 return self.expression( 1567 exp.PartitionedByProperty, 1568 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1569 ) 1570 1571 def _parse_withdata(self, no=False) -> exp.Expression: 1572 if self._match_text_seq("AND", "STATISTICS"): 1573 statistics = True 1574 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1575 statistics = False 1576 else: 1577 statistics = None 1578 1579 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1580 1581 def _parse_noprimaryindex(self) -> exp.Expression: 1582 self._match_text_seq("PRIMARY", "INDEX") 1583 return exp.NoPrimaryIndexProperty() 1584 1585 def _parse_oncommit(self) -> exp.Expression: 1586 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1587 return exp.OnCommitProperty() 1588 return exp.OnCommitProperty(delete=self._match_text_seq("COMMIT", "DELETE", "ROWS")) 1589 1590 def _parse_distkey(self) -> exp.Expression: 1591 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1592 1593 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1594 table = self._parse_table(schema=True) 1595 options = [] 1596 while self._match_texts(("INCLUDING", "EXCLUDING")): 1597 this = self._prev.text.upper() 1598 id_var = self._parse_id_var() 1599 1600 if not id_var: 1601 return None 1602 1603 options.append( 1604 self.expression( 1605 exp.Property, 1606 this=this, 1607 value=exp.Var(this=id_var.this.upper()), 1608 ) 1609 ) 1610 return self.expression(exp.LikeProperty, this=table, expressions=options) 1611 1612 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1613 return self.expression( 1614 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1615 ) 1616 1617 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1618 self._match(TokenType.EQ) 1619 return self.expression( 1620 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1621 ) 1622 1623 def _parse_returns(self) -> exp.Expression: 1624 value: t.Optional[exp.Expression] 1625 is_table = self._match(TokenType.TABLE) 1626 1627 if is_table: 1628 if self._match(TokenType.LT): 1629 value = self.expression( 1630 exp.Schema, 1631 this="TABLE", 1632 expressions=self._parse_csv(self._parse_struct_types), 1633 ) 1634 if not self._match(TokenType.GT): 1635 self.raise_error("Expecting >") 1636 else: 1637 value = self._parse_schema(exp.Var(this="TABLE")) 1638 else: 1639 value = self._parse_types() 1640 1641 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1642 1643 def _parse_temporary(self, global_=False) -> exp.Expression: 1644 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1645 return self.expression(exp.TemporaryProperty, global_=global_) 1646 1647 def _parse_describe(self) -> exp.Expression: 1648 kind = self._match_set(self.CREATABLES) and self._prev.text 1649 this = self._parse_table() 1650 1651 return self.expression(exp.Describe, this=this, kind=kind) 1652 1653 def _parse_insert(self) -> exp.Expression: 1654 overwrite = self._match(TokenType.OVERWRITE) 1655 local = self._match(TokenType.LOCAL) 1656 alternative = None 1657 1658 if self._match_text_seq("DIRECTORY"): 1659 this: t.Optional[exp.Expression] = self.expression( 1660 exp.Directory, 1661 this=self._parse_var_or_string(), 1662 local=local, 1663 row_format=self._parse_row_format(match_row=True), 1664 ) 1665 else: 1666 if self._match(TokenType.OR): 1667 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1668 1669 self._match(TokenType.INTO) 1670 self._match(TokenType.TABLE) 1671 this = self._parse_table(schema=True) 1672 1673 return self.expression( 1674 exp.Insert, 1675 this=this, 1676 exists=self._parse_exists(), 1677 partition=self._parse_partition(), 1678 expression=self._parse_ddl_select(), 1679 conflict=self._parse_on_conflict(), 1680 returning=self._parse_returning(), 1681 overwrite=overwrite, 1682 alternative=alternative, 1683 ) 1684 1685 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1686 conflict = self._match_text_seq("ON", "CONFLICT") 1687 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1688 1689 if not (conflict or duplicate): 1690 return None 1691 1692 nothing = None 1693 expressions = None 1694 key = None 1695 constraint = None 1696 1697 if conflict: 1698 if self._match_text_seq("ON", "CONSTRAINT"): 1699 constraint = self._parse_id_var() 1700 else: 1701 key = self._parse_csv(self._parse_value) 1702 1703 self._match_text_seq("DO") 1704 if self._match_text_seq("NOTHING"): 1705 nothing = True 1706 else: 1707 self._match(TokenType.UPDATE) 1708 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1709 1710 return self.expression( 1711 exp.OnConflict, 1712 duplicate=duplicate, 1713 expressions=expressions, 1714 nothing=nothing, 1715 key=key, 1716 constraint=constraint, 1717 ) 1718 1719 def _parse_returning(self) -> t.Optional[exp.Expression]: 1720 if not self._match(TokenType.RETURNING): 1721 return None 1722 1723 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1724 1725 def _parse_row(self) -> t.Optional[exp.Expression]: 1726 if not self._match(TokenType.FORMAT): 1727 return None 1728 return self._parse_row_format() 1729 1730 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1731 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1732 return None 1733 1734 if self._match_text_seq("SERDE"): 1735 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1736 1737 self._match_text_seq("DELIMITED") 1738 1739 kwargs = {} 1740 1741 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1742 kwargs["fields"] = self._parse_string() 1743 if self._match_text_seq("ESCAPED", "BY"): 1744 kwargs["escaped"] = self._parse_string() 1745 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1746 kwargs["collection_items"] = self._parse_string() 1747 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1748 kwargs["map_keys"] = self._parse_string() 1749 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1750 kwargs["lines"] = self._parse_string() 1751 if self._match_text_seq("NULL", "DEFINED", "AS"): 1752 kwargs["null"] = self._parse_string() 1753 1754 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1755 1756 def _parse_load_data(self) -> exp.Expression: 1757 local = self._match(TokenType.LOCAL) 1758 self._match_text_seq("INPATH") 1759 inpath = self._parse_string() 1760 overwrite = self._match(TokenType.OVERWRITE) 1761 self._match_pair(TokenType.INTO, TokenType.TABLE) 1762 1763 return self.expression( 1764 exp.LoadData, 1765 this=self._parse_table(schema=True), 1766 local=local, 1767 overwrite=overwrite, 1768 inpath=inpath, 1769 partition=self._parse_partition(), 1770 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1771 serde=self._match_text_seq("SERDE") and self._parse_string(), 1772 ) 1773 1774 def _parse_delete(self) -> exp.Expression: 1775 self._match(TokenType.FROM) 1776 1777 return self.expression( 1778 exp.Delete, 1779 this=self._parse_table(), 1780 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1781 where=self._parse_where(), 1782 returning=self._parse_returning(), 1783 ) 1784 1785 def _parse_update(self) -> exp.Expression: 1786 return self.expression( 1787 exp.Update, 1788 **{ # type: ignore 1789 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1790 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1791 "from": self._parse_from(modifiers=True), 1792 "where": self._parse_where(), 1793 "returning": self._parse_returning(), 1794 }, 1795 ) 1796 1797 def _parse_uncache(self) -> exp.Expression: 1798 if not self._match(TokenType.TABLE): 1799 self.raise_error("Expecting TABLE after UNCACHE") 1800 1801 return self.expression( 1802 exp.Uncache, 1803 exists=self._parse_exists(), 1804 this=self._parse_table(schema=True), 1805 ) 1806 1807 def _parse_cache(self) -> exp.Expression: 1808 lazy = self._match(TokenType.LAZY) 1809 self._match(TokenType.TABLE) 1810 table = self._parse_table(schema=True) 1811 options = [] 1812 1813 if self._match(TokenType.OPTIONS): 1814 self._match_l_paren() 1815 k = self._parse_string() 1816 self._match(TokenType.EQ) 1817 v = self._parse_string() 1818 options = [k, v] 1819 self._match_r_paren() 1820 1821 self._match(TokenType.ALIAS) 1822 return self.expression( 1823 exp.Cache, 1824 this=table, 1825 lazy=lazy, 1826 options=options, 1827 expression=self._parse_select(nested=True), 1828 ) 1829 1830 def _parse_partition(self) -> t.Optional[exp.Expression]: 1831 if not self._match(TokenType.PARTITION): 1832 return None 1833 1834 return self.expression( 1835 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1836 ) 1837 1838 def _parse_value(self) -> exp.Expression: 1839 if self._match(TokenType.L_PAREN): 1840 expressions = self._parse_csv(self._parse_conjunction) 1841 self._match_r_paren() 1842 return self.expression(exp.Tuple, expressions=expressions) 1843 1844 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1845 # Source: https://prestodb.io/docs/current/sql/values.html 1846 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1847 1848 def _parse_select( 1849 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1850 ) -> t.Optional[exp.Expression]: 1851 cte = self._parse_with() 1852 if cte: 1853 this = self._parse_statement() 1854 1855 if not this: 1856 self.raise_error("Failed to parse any statement following CTE") 1857 return cte 1858 1859 if "with" in this.arg_types: 1860 this.set("with", cte) 1861 else: 1862 self.raise_error(f"{this.key} does not support CTE") 1863 this = cte 1864 elif self._match(TokenType.SELECT): 1865 comments = self._prev_comments 1866 1867 hint = self._parse_hint() 1868 all_ = self._match(TokenType.ALL) 1869 distinct = self._match(TokenType.DISTINCT) 1870 1871 kind = ( 1872 self._match(TokenType.ALIAS) 1873 and self._match_texts(("STRUCT", "VALUE")) 1874 and self._prev.text 1875 ) 1876 1877 if distinct: 1878 distinct = self.expression( 1879 exp.Distinct, 1880 on=self._parse_value() if self._match(TokenType.ON) else None, 1881 ) 1882 1883 if all_ and distinct: 1884 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1885 1886 limit = self._parse_limit(top=True) 1887 expressions = self._parse_csv(self._parse_expression) 1888 1889 this = self.expression( 1890 exp.Select, 1891 kind=kind, 1892 hint=hint, 1893 distinct=distinct, 1894 expressions=expressions, 1895 limit=limit, 1896 ) 1897 this.comments = comments 1898 1899 into = self._parse_into() 1900 if into: 1901 this.set("into", into) 1902 1903 from_ = self._parse_from() 1904 if from_: 1905 this.set("from", from_) 1906 1907 this = self._parse_query_modifiers(this) 1908 elif (table or nested) and self._match(TokenType.L_PAREN): 1909 this = self._parse_table() if table else self._parse_select(nested=True) 1910 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1911 self._match_r_paren() 1912 1913 # early return so that subquery unions aren't parsed again 1914 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1915 # Union ALL should be a property of the top select node, not the subquery 1916 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1917 elif self._match(TokenType.VALUES): 1918 this = self.expression( 1919 exp.Values, 1920 expressions=self._parse_csv(self._parse_value), 1921 alias=self._parse_table_alias(), 1922 ) 1923 else: 1924 this = None 1925 1926 return self._parse_set_operations(this) 1927 1928 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1929 if not skip_with_token and not self._match(TokenType.WITH): 1930 return None 1931 1932 comments = self._prev_comments 1933 recursive = self._match(TokenType.RECURSIVE) 1934 1935 expressions = [] 1936 while True: 1937 expressions.append(self._parse_cte()) 1938 1939 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1940 break 1941 else: 1942 self._match(TokenType.WITH) 1943 1944 return self.expression( 1945 exp.With, comments=comments, expressions=expressions, recursive=recursive 1946 ) 1947 1948 def _parse_cte(self) -> exp.Expression: 1949 alias = self._parse_table_alias() 1950 if not alias or not alias.this: 1951 self.raise_error("Expected CTE to have alias") 1952 1953 self._match(TokenType.ALIAS) 1954 1955 return self.expression( 1956 exp.CTE, 1957 this=self._parse_wrapped(self._parse_statement), 1958 alias=alias, 1959 ) 1960 1961 def _parse_table_alias( 1962 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1963 ) -> t.Optional[exp.Expression]: 1964 any_token = self._match(TokenType.ALIAS) 1965 alias = ( 1966 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1967 or self._parse_string_as_identifier() 1968 ) 1969 1970 index = self._index 1971 if self._match(TokenType.L_PAREN): 1972 columns = self._parse_csv(self._parse_function_parameter) 1973 self._match_r_paren() if columns else self._retreat(index) 1974 else: 1975 columns = None 1976 1977 if not alias and not columns: 1978 return None 1979 1980 return self.expression(exp.TableAlias, this=alias, columns=columns) 1981 1982 def _parse_subquery( 1983 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1984 ) -> exp.Expression: 1985 return self.expression( 1986 exp.Subquery, 1987 this=this, 1988 pivots=self._parse_pivots(), 1989 alias=self._parse_table_alias() if parse_alias else None, 1990 ) 1991 1992 def _parse_query_modifiers( 1993 self, this: t.Optional[exp.Expression] 1994 ) -> t.Optional[exp.Expression]: 1995 if isinstance(this, self.MODIFIABLES): 1996 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1997 expression = parser(self) 1998 1999 if expression: 2000 this.set(key, expression) 2001 return this 2002 2003 def _parse_hint(self) -> t.Optional[exp.Expression]: 2004 if self._match(TokenType.HINT): 2005 hints = self._parse_csv(self._parse_function) 2006 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2007 self.raise_error("Expected */ after HINT") 2008 return self.expression(exp.Hint, expressions=hints) 2009 2010 return None 2011 2012 def _parse_into(self) -> t.Optional[exp.Expression]: 2013 if not self._match(TokenType.INTO): 2014 return None 2015 2016 temp = self._match(TokenType.TEMPORARY) 2017 unlogged = self._match(TokenType.UNLOGGED) 2018 self._match(TokenType.TABLE) 2019 2020 return self.expression( 2021 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2022 ) 2023 2024 def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]: 2025 if not self._match(TokenType.FROM): 2026 return None 2027 2028 comments = self._prev_comments 2029 this = self._parse_table() 2030 2031 return self.expression( 2032 exp.From, 2033 comments=comments, 2034 this=self._parse_query_modifiers(this) if modifiers else this, 2035 ) 2036 2037 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2038 if not self._match(TokenType.MATCH_RECOGNIZE): 2039 return None 2040 2041 self._match_l_paren() 2042 2043 partition = self._parse_partition_by() 2044 order = self._parse_order() 2045 measures = ( 2046 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2047 ) 2048 2049 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2050 rows = exp.Var(this="ONE ROW PER MATCH") 2051 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2052 text = "ALL ROWS PER MATCH" 2053 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2054 text += f" SHOW EMPTY MATCHES" 2055 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2056 text += f" OMIT EMPTY MATCHES" 2057 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2058 text += f" WITH UNMATCHED ROWS" 2059 rows = exp.Var(this=text) 2060 else: 2061 rows = None 2062 2063 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2064 text = "AFTER MATCH SKIP" 2065 if self._match_text_seq("PAST", "LAST", "ROW"): 2066 text += f" PAST LAST ROW" 2067 elif self._match_text_seq("TO", "NEXT", "ROW"): 2068 text += f" TO NEXT ROW" 2069 elif self._match_text_seq("TO", "FIRST"): 2070 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2071 elif self._match_text_seq("TO", "LAST"): 2072 text += f" TO LAST {self._advance_any().text}" # type: ignore 2073 after = exp.Var(this=text) 2074 else: 2075 after = None 2076 2077 if self._match_text_seq("PATTERN"): 2078 self._match_l_paren() 2079 2080 if not self._curr: 2081 self.raise_error("Expecting )", self._curr) 2082 2083 paren = 1 2084 start = self._curr 2085 2086 while self._curr and paren > 0: 2087 if self._curr.token_type == TokenType.L_PAREN: 2088 paren += 1 2089 if self._curr.token_type == TokenType.R_PAREN: 2090 paren -= 1 2091 end = self._prev 2092 self._advance() 2093 if paren > 0: 2094 self.raise_error("Expecting )", self._curr) 2095 pattern = exp.Var(this=self._find_sql(start, end)) 2096 else: 2097 pattern = None 2098 2099 define = ( 2100 self._parse_csv( 2101 lambda: self.expression( 2102 exp.Alias, 2103 alias=self._parse_id_var(any_token=True), 2104 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2105 ) 2106 ) 2107 if self._match_text_seq("DEFINE") 2108 else None 2109 ) 2110 2111 self._match_r_paren() 2112 2113 return self.expression( 2114 exp.MatchRecognize, 2115 partition_by=partition, 2116 order=order, 2117 measures=measures, 2118 rows=rows, 2119 after=after, 2120 pattern=pattern, 2121 define=define, 2122 alias=self._parse_table_alias(), 2123 ) 2124 2125 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2126 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2127 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2128 2129 if outer_apply or cross_apply: 2130 this = self._parse_select(table=True) 2131 view = None 2132 outer = not cross_apply 2133 elif self._match(TokenType.LATERAL): 2134 this = self._parse_select(table=True) 2135 view = self._match(TokenType.VIEW) 2136 outer = self._match(TokenType.OUTER) 2137 else: 2138 return None 2139 2140 if not this: 2141 this = self._parse_function() or self._parse_id_var(any_token=False) 2142 while self._match(TokenType.DOT): 2143 this = exp.Dot( 2144 this=this, 2145 expression=self._parse_function() or self._parse_id_var(any_token=False), 2146 ) 2147 2148 table_alias: t.Optional[exp.Expression] 2149 2150 if view: 2151 table = self._parse_id_var(any_token=False) 2152 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2153 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2154 else: 2155 table_alias = self._parse_table_alias() 2156 2157 expression = self.expression( 2158 exp.Lateral, 2159 this=this, 2160 view=view, 2161 outer=outer, 2162 alias=table_alias, 2163 ) 2164 2165 return expression 2166 2167 def _parse_join_side_and_kind( 2168 self, 2169 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2170 return ( 2171 self._match(TokenType.NATURAL) and self._prev, 2172 self._match_set(self.JOIN_SIDES) and self._prev, 2173 self._match_set(self.JOIN_KINDS) and self._prev, 2174 ) 2175 2176 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2177 if self._match(TokenType.COMMA): 2178 return self.expression(exp.Join, this=self._parse_table()) 2179 2180 index = self._index 2181 natural, side, kind = self._parse_join_side_and_kind() 2182 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2183 join = self._match(TokenType.JOIN) 2184 2185 if not skip_join_token and not join: 2186 self._retreat(index) 2187 kind = None 2188 natural = None 2189 side = None 2190 2191 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2192 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2193 2194 if not skip_join_token and not join and not outer_apply and not cross_apply: 2195 return None 2196 2197 if outer_apply: 2198 side = Token(TokenType.LEFT, "LEFT") 2199 2200 kwargs: t.Dict[ 2201 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2202 ] = {"this": self._parse_table()} 2203 2204 if natural: 2205 kwargs["natural"] = True 2206 if side: 2207 kwargs["side"] = side.text 2208 if kind: 2209 kwargs["kind"] = kind.text 2210 if hint: 2211 kwargs["hint"] = hint 2212 2213 if self._match(TokenType.ON): 2214 kwargs["on"] = self._parse_conjunction() 2215 elif self._match(TokenType.USING): 2216 kwargs["using"] = self._parse_wrapped_id_vars() 2217 2218 return self.expression(exp.Join, **kwargs) # type: ignore 2219 2220 def _parse_index(self) -> exp.Expression: 2221 index = self._parse_id_var() 2222 self._match(TokenType.ON) 2223 self._match(TokenType.TABLE) # hive 2224 2225 return self.expression( 2226 exp.Index, 2227 this=index, 2228 table=self.expression(exp.Table, this=self._parse_id_var()), 2229 columns=self._parse_expression(), 2230 ) 2231 2232 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2233 unique = self._match(TokenType.UNIQUE) 2234 primary = self._match_text_seq("PRIMARY") 2235 amp = self._match_text_seq("AMP") 2236 if not self._match(TokenType.INDEX): 2237 return None 2238 index = self._parse_id_var() 2239 columns = None 2240 if self._match(TokenType.L_PAREN, advance=False): 2241 columns = self._parse_wrapped_csv(self._parse_column) 2242 return self.expression( 2243 exp.Index, 2244 this=index, 2245 columns=columns, 2246 unique=unique, 2247 primary=primary, 2248 amp=amp, 2249 ) 2250 2251 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2252 return ( 2253 (not schema and self._parse_function()) 2254 or self._parse_id_var(any_token=False) 2255 or self._parse_string_as_identifier() 2256 or self._parse_placeholder() 2257 ) 2258 2259 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2260 catalog = None 2261 db = None 2262 table = self._parse_table_part(schema=schema) 2263 2264 while self._match(TokenType.DOT): 2265 if catalog: 2266 # This allows nesting the table in arbitrarily many dot expressions if needed 2267 table = self.expression( 2268 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2269 ) 2270 else: 2271 catalog = db 2272 db = table 2273 table = self._parse_table_part(schema=schema) 2274 2275 if not table: 2276 self.raise_error(f"Expected table name but got {self._curr}") 2277 2278 return self.expression( 2279 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2280 ) 2281 2282 def _parse_table( 2283 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2284 ) -> t.Optional[exp.Expression]: 2285 lateral = self._parse_lateral() 2286 if lateral: 2287 return lateral 2288 2289 unnest = self._parse_unnest() 2290 if unnest: 2291 return unnest 2292 2293 values = self._parse_derived_table_values() 2294 if values: 2295 return values 2296 2297 subquery = self._parse_select(table=True) 2298 if subquery: 2299 if not subquery.args.get("pivots"): 2300 subquery.set("pivots", self._parse_pivots()) 2301 return subquery 2302 2303 this = self._parse_table_parts(schema=schema) 2304 2305 if schema: 2306 return self._parse_schema(this=this) 2307 2308 if self.alias_post_tablesample: 2309 table_sample = self._parse_table_sample() 2310 2311 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2312 if alias: 2313 this.set("alias", alias) 2314 2315 if not this.args.get("pivots"): 2316 this.set("pivots", self._parse_pivots()) 2317 2318 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2319 this.set( 2320 "hints", 2321 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2322 ) 2323 self._match_r_paren() 2324 2325 if not self.alias_post_tablesample: 2326 table_sample = self._parse_table_sample() 2327 2328 if table_sample: 2329 table_sample.set("this", this) 2330 this = table_sample 2331 2332 return this 2333 2334 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2335 if not self._match(TokenType.UNNEST): 2336 return None 2337 2338 expressions = self._parse_wrapped_csv(self._parse_type) 2339 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2340 alias = self._parse_table_alias() 2341 2342 if alias and self.unnest_column_only: 2343 if alias.args.get("columns"): 2344 self.raise_error("Unexpected extra column alias in unnest.") 2345 alias.set("columns", [alias.this]) 2346 alias.set("this", None) 2347 2348 offset = None 2349 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2350 self._match(TokenType.ALIAS) 2351 offset = self._parse_id_var() or exp.Identifier(this="offset") 2352 2353 return self.expression( 2354 exp.Unnest, 2355 expressions=expressions, 2356 ordinality=ordinality, 2357 alias=alias, 2358 offset=offset, 2359 ) 2360 2361 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2362 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2363 if not is_derived and not self._match(TokenType.VALUES): 2364 return None 2365 2366 expressions = self._parse_csv(self._parse_value) 2367 2368 if is_derived: 2369 self._match_r_paren() 2370 2371 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2372 2373 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2374 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2375 as_modifier and self._match_text_seq("USING", "SAMPLE") 2376 ): 2377 return None 2378 2379 bucket_numerator = None 2380 bucket_denominator = None 2381 bucket_field = None 2382 percent = None 2383 rows = None 2384 size = None 2385 seed = None 2386 2387 kind = ( 2388 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2389 ) 2390 method = self._parse_var(tokens=(TokenType.ROW,)) 2391 2392 self._match(TokenType.L_PAREN) 2393 2394 num = self._parse_number() 2395 2396 if self._match(TokenType.BUCKET): 2397 bucket_numerator = self._parse_number() 2398 self._match(TokenType.OUT_OF) 2399 bucket_denominator = bucket_denominator = self._parse_number() 2400 self._match(TokenType.ON) 2401 bucket_field = self._parse_field() 2402 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2403 percent = num 2404 elif self._match(TokenType.ROWS): 2405 rows = num 2406 else: 2407 size = num 2408 2409 self._match(TokenType.R_PAREN) 2410 2411 if self._match(TokenType.L_PAREN): 2412 method = self._parse_var() 2413 seed = self._match(TokenType.COMMA) and self._parse_number() 2414 self._match_r_paren() 2415 elif self._match_texts(("SEED", "REPEATABLE")): 2416 seed = self._parse_wrapped(self._parse_number) 2417 2418 return self.expression( 2419 exp.TableSample, 2420 method=method, 2421 bucket_numerator=bucket_numerator, 2422 bucket_denominator=bucket_denominator, 2423 bucket_field=bucket_field, 2424 percent=percent, 2425 rows=rows, 2426 size=size, 2427 seed=seed, 2428 kind=kind, 2429 ) 2430 2431 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2432 return list(iter(self._parse_pivot, None)) 2433 2434 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2435 index = self._index 2436 2437 if self._match(TokenType.PIVOT): 2438 unpivot = False 2439 elif self._match(TokenType.UNPIVOT): 2440 unpivot = True 2441 else: 2442 return None 2443 2444 expressions = [] 2445 field = None 2446 2447 if not self._match(TokenType.L_PAREN): 2448 self._retreat(index) 2449 return None 2450 2451 if unpivot: 2452 expressions = self._parse_csv(self._parse_column) 2453 else: 2454 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2455 2456 if not expressions: 2457 self.raise_error("Failed to parse PIVOT's aggregation list") 2458 2459 if not self._match(TokenType.FOR): 2460 self.raise_error("Expecting FOR") 2461 2462 value = self._parse_column() 2463 2464 if not self._match(TokenType.IN): 2465 self.raise_error("Expecting IN") 2466 2467 field = self._parse_in(value) 2468 2469 self._match_r_paren() 2470 2471 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2472 2473 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2474 pivot.set("alias", self._parse_table_alias()) 2475 2476 if not unpivot: 2477 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2478 2479 columns: t.List[exp.Expression] = [] 2480 for fld in pivot.args["field"].expressions: 2481 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2482 for name in names: 2483 if self.PREFIXED_PIVOT_COLUMNS: 2484 name = f"{name}_{field_name}" if name else field_name 2485 else: 2486 name = f"{field_name}_{name}" if name else field_name 2487 2488 columns.append(exp.to_identifier(name)) 2489 2490 pivot.set("columns", columns) 2491 2492 return pivot 2493 2494 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2495 return [agg.alias for agg in aggregations] 2496 2497 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2498 if not skip_where_token and not self._match(TokenType.WHERE): 2499 return None 2500 2501 return self.expression( 2502 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2503 ) 2504 2505 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2506 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2507 return None 2508 2509 elements = defaultdict(list) 2510 2511 while True: 2512 expressions = self._parse_csv(self._parse_conjunction) 2513 if expressions: 2514 elements["expressions"].extend(expressions) 2515 2516 grouping_sets = self._parse_grouping_sets() 2517 if grouping_sets: 2518 elements["grouping_sets"].extend(grouping_sets) 2519 2520 rollup = None 2521 cube = None 2522 totals = None 2523 2524 with_ = self._match(TokenType.WITH) 2525 if self._match(TokenType.ROLLUP): 2526 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2527 elements["rollup"].extend(ensure_list(rollup)) 2528 2529 if self._match(TokenType.CUBE): 2530 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2531 elements["cube"].extend(ensure_list(cube)) 2532 2533 if self._match_text_seq("TOTALS"): 2534 totals = True 2535 elements["totals"] = True # type: ignore 2536 2537 if not (grouping_sets or rollup or cube or totals): 2538 break 2539 2540 return self.expression(exp.Group, **elements) # type: ignore 2541 2542 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2543 if not self._match(TokenType.GROUPING_SETS): 2544 return None 2545 2546 return self._parse_wrapped_csv(self._parse_grouping_set) 2547 2548 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2549 if self._match(TokenType.L_PAREN): 2550 grouping_set = self._parse_csv(self._parse_column) 2551 self._match_r_paren() 2552 return self.expression(exp.Tuple, expressions=grouping_set) 2553 2554 return self._parse_column() 2555 2556 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2557 if not skip_having_token and not self._match(TokenType.HAVING): 2558 return None 2559 return self.expression(exp.Having, this=self._parse_conjunction()) 2560 2561 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2562 if not self._match(TokenType.QUALIFY): 2563 return None 2564 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2565 2566 def _parse_order( 2567 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2568 ) -> t.Optional[exp.Expression]: 2569 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2570 return this 2571 2572 return self.expression( 2573 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2574 ) 2575 2576 def _parse_sort( 2577 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2578 ) -> t.Optional[exp.Expression]: 2579 if not self._match(token_type): 2580 return None 2581 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2582 2583 def _parse_ordered(self) -> exp.Expression: 2584 this = self._parse_conjunction() 2585 self._match(TokenType.ASC) 2586 is_desc = self._match(TokenType.DESC) 2587 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2588 is_nulls_last = self._match(TokenType.NULLS_LAST) 2589 desc = is_desc or False 2590 asc = not desc 2591 nulls_first = is_nulls_first or False 2592 explicitly_null_ordered = is_nulls_first or is_nulls_last 2593 if ( 2594 not explicitly_null_ordered 2595 and ( 2596 (asc and self.null_ordering == "nulls_are_small") 2597 or (desc and self.null_ordering != "nulls_are_small") 2598 ) 2599 and self.null_ordering != "nulls_are_last" 2600 ): 2601 nulls_first = True 2602 2603 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2604 2605 def _parse_limit( 2606 self, this: t.Optional[exp.Expression] = None, top: bool = False 2607 ) -> t.Optional[exp.Expression]: 2608 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2609 limit_paren = self._match(TokenType.L_PAREN) 2610 limit_exp = self.expression( 2611 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2612 ) 2613 2614 if limit_paren: 2615 self._match_r_paren() 2616 2617 return limit_exp 2618 2619 if self._match(TokenType.FETCH): 2620 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2621 direction = self._prev.text if direction else "FIRST" 2622 2623 count = self._parse_number() 2624 percent = self._match(TokenType.PERCENT) 2625 2626 self._match_set((TokenType.ROW, TokenType.ROWS)) 2627 2628 only = self._match(TokenType.ONLY) 2629 with_ties = self._match_text_seq("WITH", "TIES") 2630 2631 if only and with_ties: 2632 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2633 2634 return self.expression( 2635 exp.Fetch, 2636 direction=direction, 2637 count=count, 2638 percent=percent, 2639 with_ties=with_ties, 2640 ) 2641 2642 return this 2643 2644 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2645 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2646 return this 2647 2648 count = self._parse_number() 2649 self._match_set((TokenType.ROW, TokenType.ROWS)) 2650 return self.expression(exp.Offset, this=this, expression=count) 2651 2652 def _parse_locks(self) -> t.List[exp.Expression]: 2653 # Lists are invariant, so we need to use a type hint here 2654 locks: t.List[exp.Expression] = [] 2655 2656 while True: 2657 if self._match_text_seq("FOR", "UPDATE"): 2658 update = True 2659 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2660 "LOCK", "IN", "SHARE", "MODE" 2661 ): 2662 update = False 2663 else: 2664 break 2665 2666 expressions = None 2667 if self._match_text_seq("OF"): 2668 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2669 2670 wait: t.Optional[bool | exp.Expression] = None 2671 if self._match_text_seq("NOWAIT"): 2672 wait = True 2673 elif self._match_text_seq("WAIT"): 2674 wait = self._parse_primary() 2675 elif self._match_text_seq("SKIP", "LOCKED"): 2676 wait = False 2677 2678 locks.append( 2679 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2680 ) 2681 2682 return locks 2683 2684 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2685 if not self._match_set(self.SET_OPERATIONS): 2686 return this 2687 2688 token_type = self._prev.token_type 2689 2690 if token_type == TokenType.UNION: 2691 expression = exp.Union 2692 elif token_type == TokenType.EXCEPT: 2693 expression = exp.Except 2694 else: 2695 expression = exp.Intersect 2696 2697 return self.expression( 2698 expression, 2699 this=this, 2700 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2701 expression=self._parse_set_operations(self._parse_select(nested=True)), 2702 ) 2703 2704 def _parse_expression(self) -> t.Optional[exp.Expression]: 2705 return self._parse_alias(self._parse_conjunction()) 2706 2707 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2708 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2709 2710 def _parse_equality(self) -> t.Optional[exp.Expression]: 2711 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2712 2713 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2714 return self._parse_tokens(self._parse_range, self.COMPARISON) 2715 2716 def _parse_range(self) -> t.Optional[exp.Expression]: 2717 this = self._parse_bitwise() 2718 negate = self._match(TokenType.NOT) 2719 2720 if self._match_set(self.RANGE_PARSERS): 2721 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2722 if not expression: 2723 return this 2724 2725 this = expression 2726 elif self._match(TokenType.ISNULL): 2727 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2728 2729 # Postgres supports ISNULL and NOTNULL for conditions. 2730 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2731 if self._match(TokenType.NOTNULL): 2732 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2733 this = self.expression(exp.Not, this=this) 2734 2735 if negate: 2736 this = self.expression(exp.Not, this=this) 2737 2738 if self._match(TokenType.IS): 2739 this = self._parse_is(this) 2740 2741 return this 2742 2743 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2744 index = self._index - 1 2745 negate = self._match(TokenType.NOT) 2746 if self._match(TokenType.DISTINCT_FROM): 2747 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2748 return self.expression(klass, this=this, expression=self._parse_expression()) 2749 2750 expression = self._parse_null() or self._parse_boolean() 2751 if not expression: 2752 self._retreat(index) 2753 return None 2754 2755 this = self.expression(exp.Is, this=this, expression=expression) 2756 return self.expression(exp.Not, this=this) if negate else this 2757 2758 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2759 unnest = self._parse_unnest() 2760 if unnest: 2761 this = self.expression(exp.In, this=this, unnest=unnest) 2762 elif self._match(TokenType.L_PAREN): 2763 expressions = self._parse_csv(self._parse_select_or_expression) 2764 2765 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2766 this = self.expression(exp.In, this=this, query=expressions[0]) 2767 else: 2768 this = self.expression(exp.In, this=this, expressions=expressions) 2769 2770 self._match_r_paren(this) 2771 else: 2772 this = self.expression(exp.In, this=this, field=self._parse_field()) 2773 2774 return this 2775 2776 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2777 low = self._parse_bitwise() 2778 self._match(TokenType.AND) 2779 high = self._parse_bitwise() 2780 return self.expression(exp.Between, this=this, low=low, high=high) 2781 2782 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2783 if not self._match(TokenType.ESCAPE): 2784 return this 2785 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2786 2787 def _parse_interval(self) -> t.Optional[exp.Expression]: 2788 if not self._match(TokenType.INTERVAL): 2789 return None 2790 2791 this = self._parse_primary() or self._parse_term() 2792 unit = self._parse_function() or self._parse_var() 2793 2794 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2795 # each INTERVAL expression into this canonical form so it's easy to transpile 2796 if this and isinstance(this, exp.Literal): 2797 if this.is_number: 2798 this = exp.Literal.string(this.name) 2799 2800 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2801 parts = this.name.split() 2802 if not unit and len(parts) <= 2: 2803 this = exp.Literal.string(seq_get(parts, 0)) 2804 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2805 2806 return self.expression(exp.Interval, this=this, unit=unit) 2807 2808 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2809 this = self._parse_term() 2810 2811 while True: 2812 if self._match_set(self.BITWISE): 2813 this = self.expression( 2814 self.BITWISE[self._prev.token_type], 2815 this=this, 2816 expression=self._parse_term(), 2817 ) 2818 elif self._match_pair(TokenType.LT, TokenType.LT): 2819 this = self.expression( 2820 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2821 ) 2822 elif self._match_pair(TokenType.GT, TokenType.GT): 2823 this = self.expression( 2824 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2825 ) 2826 else: 2827 break 2828 2829 return this 2830 2831 def _parse_term(self) -> t.Optional[exp.Expression]: 2832 return self._parse_tokens(self._parse_factor, self.TERM) 2833 2834 def _parse_factor(self) -> t.Optional[exp.Expression]: 2835 return self._parse_tokens(self._parse_unary, self.FACTOR) 2836 2837 def _parse_unary(self) -> t.Optional[exp.Expression]: 2838 if self._match_set(self.UNARY_PARSERS): 2839 return self.UNARY_PARSERS[self._prev.token_type](self) 2840 return self._parse_at_time_zone(self._parse_type()) 2841 2842 def _parse_type(self) -> t.Optional[exp.Expression]: 2843 interval = self._parse_interval() 2844 if interval: 2845 return interval 2846 2847 index = self._index 2848 data_type = self._parse_types(check_func=True) 2849 this = self._parse_column() 2850 2851 if data_type: 2852 if isinstance(this, exp.Literal): 2853 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2854 if parser: 2855 return parser(self, this, data_type) 2856 return self.expression(exp.Cast, this=this, to=data_type) 2857 if not data_type.expressions: 2858 self._retreat(index) 2859 return self._parse_column() 2860 return data_type 2861 2862 return this 2863 2864 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2865 this = self._parse_type() 2866 if not this: 2867 return None 2868 2869 return self.expression( 2870 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2871 ) 2872 2873 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2874 index = self._index 2875 2876 prefix = self._match_text_seq("SYSUDTLIB", ".") 2877 2878 if not self._match_set(self.TYPE_TOKENS): 2879 return None 2880 2881 type_token = self._prev.token_type 2882 2883 if type_token == TokenType.PSEUDO_TYPE: 2884 return self.expression(exp.PseudoType, this=self._prev.text) 2885 2886 nested = type_token in self.NESTED_TYPE_TOKENS 2887 is_struct = type_token == TokenType.STRUCT 2888 expressions = None 2889 maybe_func = False 2890 2891 if self._match(TokenType.L_PAREN): 2892 if is_struct: 2893 expressions = self._parse_csv(self._parse_struct_types) 2894 elif nested: 2895 expressions = self._parse_csv(self._parse_types) 2896 else: 2897 expressions = self._parse_csv(self._parse_type_size) 2898 2899 if not expressions or not self._match(TokenType.R_PAREN): 2900 self._retreat(index) 2901 return None 2902 2903 maybe_func = True 2904 2905 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2906 this = exp.DataType( 2907 this=exp.DataType.Type.ARRAY, 2908 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2909 nested=True, 2910 ) 2911 2912 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2913 this = exp.DataType( 2914 this=exp.DataType.Type.ARRAY, 2915 expressions=[this], 2916 nested=True, 2917 ) 2918 2919 return this 2920 2921 if self._match(TokenType.L_BRACKET): 2922 self._retreat(index) 2923 return None 2924 2925 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2926 if nested and self._match(TokenType.LT): 2927 if is_struct: 2928 expressions = self._parse_csv(self._parse_struct_types) 2929 else: 2930 expressions = self._parse_csv(self._parse_types) 2931 2932 if not self._match(TokenType.GT): 2933 self.raise_error("Expecting >") 2934 2935 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2936 values = self._parse_csv(self._parse_conjunction) 2937 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2938 2939 value: t.Optional[exp.Expression] = None 2940 if type_token in self.TIMESTAMPS: 2941 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2942 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2943 elif ( 2944 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2945 ): 2946 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2947 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2948 if type_token == TokenType.TIME: 2949 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2950 else: 2951 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2952 2953 maybe_func = maybe_func and value is None 2954 2955 if value is None: 2956 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2957 elif type_token == TokenType.INTERVAL: 2958 unit = self._parse_var() 2959 2960 if not unit: 2961 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2962 else: 2963 value = self.expression(exp.Interval, unit=unit) 2964 2965 if maybe_func and check_func: 2966 index2 = self._index 2967 peek = self._parse_string() 2968 2969 if not peek: 2970 self._retreat(index) 2971 return None 2972 2973 self._retreat(index2) 2974 2975 if value: 2976 return value 2977 2978 return exp.DataType( 2979 this=exp.DataType.Type[type_token.value.upper()], 2980 expressions=expressions, 2981 nested=nested, 2982 values=values, 2983 prefix=prefix, 2984 ) 2985 2986 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 2987 this = self._parse_type() or self._parse_id_var() 2988 self._match(TokenType.COLON) 2989 return self._parse_column_def(this) 2990 2991 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2992 if not self._match(TokenType.AT_TIME_ZONE): 2993 return this 2994 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2995 2996 def _parse_column(self) -> t.Optional[exp.Expression]: 2997 this = self._parse_field() 2998 if isinstance(this, exp.Identifier): 2999 this = self.expression(exp.Column, this=this) 3000 elif not this: 3001 return self._parse_bracket(this) 3002 this = self._parse_bracket(this) 3003 3004 while self._match_set(self.COLUMN_OPERATORS): 3005 op_token = self._prev.token_type 3006 op = self.COLUMN_OPERATORS.get(op_token) 3007 3008 if op_token == TokenType.DCOLON: 3009 field = self._parse_types() 3010 if not field: 3011 self.raise_error("Expected type") 3012 elif op and self._curr: 3013 self._advance() 3014 value = self._prev.text 3015 field = ( 3016 exp.Literal.number(value) 3017 if self._prev.token_type == TokenType.NUMBER 3018 else exp.Literal.string(value) 3019 ) 3020 else: 3021 field = ( 3022 self._parse_star() 3023 or self._parse_function(anonymous=True) 3024 or self._parse_id_var() 3025 ) 3026 3027 if isinstance(field, exp.Func): 3028 # bigquery allows function calls like x.y.count(...) 3029 # SAFE.SUBSTR(...) 3030 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3031 this = self._replace_columns_with_dots(this) 3032 3033 if op: 3034 this = op(self, this, field) 3035 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3036 this = self.expression( 3037 exp.Column, 3038 this=field, 3039 table=this.this, 3040 db=this.args.get("table"), 3041 catalog=this.args.get("db"), 3042 ) 3043 else: 3044 this = self.expression(exp.Dot, this=this, expression=field) 3045 this = self._parse_bracket(this) 3046 3047 return this 3048 3049 def _parse_primary(self) -> t.Optional[exp.Expression]: 3050 if self._match_set(self.PRIMARY_PARSERS): 3051 token_type = self._prev.token_type 3052 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3053 3054 if token_type == TokenType.STRING: 3055 expressions = [primary] 3056 while self._match(TokenType.STRING): 3057 expressions.append(exp.Literal.string(self._prev.text)) 3058 if len(expressions) > 1: 3059 return self.expression(exp.Concat, expressions=expressions) 3060 return primary 3061 3062 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3063 return exp.Literal.number(f"0.{self._prev.text}") 3064 3065 if self._match(TokenType.L_PAREN): 3066 comments = self._prev_comments 3067 query = self._parse_select() 3068 3069 if query: 3070 expressions = [query] 3071 else: 3072 expressions = self._parse_csv(self._parse_expression) 3073 3074 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3075 3076 if isinstance(this, exp.Subqueryable): 3077 this = self._parse_set_operations( 3078 self._parse_subquery(this=this, parse_alias=False) 3079 ) 3080 elif len(expressions) > 1: 3081 this = self.expression(exp.Tuple, expressions=expressions) 3082 else: 3083 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3084 3085 if this: 3086 this.add_comments(comments) 3087 self._match_r_paren(expression=this) 3088 3089 return this 3090 3091 return None 3092 3093 def _parse_field( 3094 self, 3095 any_token: bool = False, 3096 tokens: t.Optional[t.Collection[TokenType]] = None, 3097 ) -> t.Optional[exp.Expression]: 3098 return ( 3099 self._parse_primary() 3100 or self._parse_function() 3101 or self._parse_id_var(any_token=any_token, tokens=tokens) 3102 ) 3103 3104 def _parse_function( 3105 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3106 ) -> t.Optional[exp.Expression]: 3107 if not self._curr: 3108 return None 3109 3110 token_type = self._curr.token_type 3111 3112 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3113 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3114 3115 if not self._next or self._next.token_type != TokenType.L_PAREN: 3116 if token_type in self.NO_PAREN_FUNCTIONS: 3117 self._advance() 3118 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3119 3120 return None 3121 3122 if token_type not in self.FUNC_TOKENS: 3123 return None 3124 3125 this = self._curr.text 3126 upper = this.upper() 3127 self._advance(2) 3128 3129 parser = self.FUNCTION_PARSERS.get(upper) 3130 3131 if parser and not anonymous: 3132 this = parser(self) 3133 else: 3134 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3135 3136 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3137 this = self.expression(subquery_predicate, this=self._parse_select()) 3138 self._match_r_paren() 3139 return this 3140 3141 if functions is None: 3142 functions = self.FUNCTIONS 3143 3144 function = functions.get(upper) 3145 args = self._parse_csv(self._parse_lambda) 3146 3147 if function and not anonymous: 3148 this = function(args) 3149 self.validate_expression(this, args) 3150 else: 3151 this = self.expression(exp.Anonymous, this=this, expressions=args) 3152 3153 self._match_r_paren(this) 3154 return self._parse_window(this) 3155 3156 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3157 return self._parse_column_def(self._parse_id_var()) 3158 3159 def _parse_user_defined_function( 3160 self, kind: t.Optional[TokenType] = None 3161 ) -> t.Optional[exp.Expression]: 3162 this = self._parse_id_var() 3163 3164 while self._match(TokenType.DOT): 3165 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3166 3167 if not self._match(TokenType.L_PAREN): 3168 return this 3169 3170 expressions = self._parse_csv(self._parse_function_parameter) 3171 self._match_r_paren() 3172 return self.expression( 3173 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3174 ) 3175 3176 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3177 literal = self._parse_primary() 3178 if literal: 3179 return self.expression(exp.Introducer, this=token.text, expression=literal) 3180 3181 return self.expression(exp.Identifier, this=token.text) 3182 3183 def _parse_national(self, token: Token) -> exp.Expression: 3184 return self.expression(exp.National, this=exp.Literal.string(token.text)) 3185 3186 def _parse_session_parameter(self) -> exp.Expression: 3187 kind = None 3188 this = self._parse_id_var() or self._parse_primary() 3189 3190 if this and self._match(TokenType.DOT): 3191 kind = this.name 3192 this = self._parse_var() or self._parse_primary() 3193 3194 return self.expression(exp.SessionParameter, this=this, kind=kind) 3195 3196 def _parse_lambda(self) -> t.Optional[exp.Expression]: 3197 index = self._index 3198 3199 if self._match(TokenType.L_PAREN): 3200 expressions = self._parse_csv(self._parse_id_var) 3201 3202 if not self._match(TokenType.R_PAREN): 3203 self._retreat(index) 3204 else: 3205 expressions = [self._parse_id_var()] 3206 3207 if self._match_set(self.LAMBDAS): 3208 return self.LAMBDAS[self._prev.token_type](self, expressions) 3209 3210 self._retreat(index) 3211 3212 this: t.Optional[exp.Expression] 3213 3214 if self._match(TokenType.DISTINCT): 3215 this = self.expression( 3216 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3217 ) 3218 else: 3219 this = self._parse_select_or_expression() 3220 3221 if isinstance(this, exp.EQ): 3222 left = this.this 3223 if isinstance(left, exp.Column): 3224 left.replace(exp.Var(this=left.text("this"))) 3225 3226 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3227 3228 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3229 index = self._index 3230 3231 try: 3232 if self._parse_select(nested=True): 3233 return this 3234 except Exception: 3235 pass 3236 finally: 3237 self._retreat(index) 3238 3239 if not self._match(TokenType.L_PAREN): 3240 return this 3241 3242 args = self._parse_csv( 3243 lambda: self._parse_constraint() 3244 or self._parse_column_def(self._parse_field(any_token=True)) 3245 ) 3246 self._match_r_paren() 3247 return self.expression(exp.Schema, this=this, expressions=args) 3248 3249 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3250 # column defs are not really columns, they're identifiers 3251 if isinstance(this, exp.Column): 3252 this = this.this 3253 kind = self._parse_types() 3254 3255 if self._match_text_seq("FOR", "ORDINALITY"): 3256 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3257 3258 constraints = [] 3259 while True: 3260 constraint = self._parse_column_constraint() 3261 if not constraint: 3262 break 3263 constraints.append(constraint) 3264 3265 if not kind and not constraints: 3266 return this 3267 3268 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3269 3270 def _parse_auto_increment(self) -> exp.Expression: 3271 start = None 3272 increment = None 3273 3274 if self._match(TokenType.L_PAREN, advance=False): 3275 args = self._parse_wrapped_csv(self._parse_bitwise) 3276 start = seq_get(args, 0) 3277 increment = seq_get(args, 1) 3278 elif self._match_text_seq("START"): 3279 start = self._parse_bitwise() 3280 self._match_text_seq("INCREMENT") 3281 increment = self._parse_bitwise() 3282 3283 if start and increment: 3284 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3285 3286 return exp.AutoIncrementColumnConstraint() 3287 3288 def _parse_compress(self) -> exp.Expression: 3289 if self._match(TokenType.L_PAREN, advance=False): 3290 return self.expression( 3291 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3292 ) 3293 3294 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3295 3296 def _parse_generated_as_identity(self) -> exp.Expression: 3297 if self._match(TokenType.BY_DEFAULT): 3298 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3299 this = self.expression( 3300 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3301 ) 3302 else: 3303 self._match_text_seq("ALWAYS") 3304 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3305 3306 self._match_text_seq("AS", "IDENTITY") 3307 if self._match(TokenType.L_PAREN): 3308 if self._match_text_seq("START", "WITH"): 3309 this.set("start", self._parse_bitwise()) 3310 if self._match_text_seq("INCREMENT", "BY"): 3311 this.set("increment", self._parse_bitwise()) 3312 if self._match_text_seq("MINVALUE"): 3313 this.set("minvalue", self._parse_bitwise()) 3314 if self._match_text_seq("MAXVALUE"): 3315 this.set("maxvalue", self._parse_bitwise()) 3316 3317 if self._match_text_seq("CYCLE"): 3318 this.set("cycle", True) 3319 elif self._match_text_seq("NO", "CYCLE"): 3320 this.set("cycle", False) 3321 3322 self._match_r_paren() 3323 3324 return this 3325 3326 def _parse_inline(self) -> t.Optional[exp.Expression]: 3327 self._match_text_seq("LENGTH") 3328 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3329 3330 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3331 if self._match_text_seq("NULL"): 3332 return self.expression(exp.NotNullColumnConstraint) 3333 if self._match_text_seq("CASESPECIFIC"): 3334 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3335 return None 3336 3337 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3338 if self._match(TokenType.CONSTRAINT): 3339 this = self._parse_id_var() 3340 else: 3341 this = None 3342 3343 if self._match_texts(self.CONSTRAINT_PARSERS): 3344 return self.expression( 3345 exp.ColumnConstraint, 3346 this=this, 3347 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3348 ) 3349 3350 return this 3351 3352 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3353 if not self._match(TokenType.CONSTRAINT): 3354 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3355 3356 this = self._parse_id_var() 3357 expressions = [] 3358 3359 while True: 3360 constraint = self._parse_unnamed_constraint() or self._parse_function() 3361 if not constraint: 3362 break 3363 expressions.append(constraint) 3364 3365 return self.expression(exp.Constraint, this=this, expressions=expressions) 3366 3367 def _parse_unnamed_constraint( 3368 self, constraints: t.Optional[t.Collection[str]] = None 3369 ) -> t.Optional[exp.Expression]: 3370 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3371 return None 3372 3373 constraint = self._prev.text.upper() 3374 if constraint not in self.CONSTRAINT_PARSERS: 3375 self.raise_error(f"No parser found for schema constraint {constraint}.") 3376 3377 return self.CONSTRAINT_PARSERS[constraint](self) 3378 3379 def _parse_unique(self) -> exp.Expression: 3380 if not self._match(TokenType.L_PAREN, advance=False): 3381 return self.expression(exp.UniqueColumnConstraint) 3382 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3383 3384 def _parse_key_constraint_options(self) -> t.List[str]: 3385 options = [] 3386 while True: 3387 if not self._curr: 3388 break 3389 3390 if self._match(TokenType.ON): 3391 action = None 3392 on = self._advance_any() and self._prev.text 3393 3394 if self._match(TokenType.NO_ACTION): 3395 action = "NO ACTION" 3396 elif self._match(TokenType.CASCADE): 3397 action = "CASCADE" 3398 elif self._match_pair(TokenType.SET, TokenType.NULL): 3399 action = "SET NULL" 3400 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3401 action = "SET DEFAULT" 3402 else: 3403 self.raise_error("Invalid key constraint") 3404 3405 options.append(f"ON {on} {action}") 3406 elif self._match_text_seq("NOT", "ENFORCED"): 3407 options.append("NOT ENFORCED") 3408 elif self._match_text_seq("DEFERRABLE"): 3409 options.append("DEFERRABLE") 3410 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3411 options.append("INITIALLY DEFERRED") 3412 elif self._match_text_seq("NORELY"): 3413 options.append("NORELY") 3414 elif self._match_text_seq("MATCH", "FULL"): 3415 options.append("MATCH FULL") 3416 else: 3417 break 3418 3419 return options 3420 3421 def _parse_references(self, match=True) -> t.Optional[exp.Expression]: 3422 if match and not self._match(TokenType.REFERENCES): 3423 return None 3424 3425 expressions = None 3426 this = self._parse_id_var() 3427 3428 if self._match(TokenType.L_PAREN, advance=False): 3429 expressions = self._parse_wrapped_id_vars() 3430 3431 options = self._parse_key_constraint_options() 3432 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3433 3434 def _parse_foreign_key(self) -> exp.Expression: 3435 expressions = self._parse_wrapped_id_vars() 3436 reference = self._parse_references() 3437 options = {} 3438 3439 while self._match(TokenType.ON): 3440 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3441 self.raise_error("Expected DELETE or UPDATE") 3442 3443 kind = self._prev.text.lower() 3444 3445 if self._match(TokenType.NO_ACTION): 3446 action = "NO ACTION" 3447 elif self._match(TokenType.SET): 3448 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3449 action = "SET " + self._prev.text.upper() 3450 else: 3451 self._advance() 3452 action = self._prev.text.upper() 3453 3454 options[kind] = action 3455 3456 return self.expression( 3457 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3458 ) 3459 3460 def _parse_primary_key(self) -> exp.Expression: 3461 desc = ( 3462 self._match_set((TokenType.ASC, TokenType.DESC)) 3463 and self._prev.token_type == TokenType.DESC 3464 ) 3465 3466 if not self._match(TokenType.L_PAREN, advance=False): 3467 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3468 3469 expressions = self._parse_wrapped_csv(self._parse_field) 3470 options = self._parse_key_constraint_options() 3471 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3472 3473 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3474 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3475 return this 3476 3477 bracket_kind = self._prev.token_type 3478 expressions: t.List[t.Optional[exp.Expression]] 3479 3480 if self._match(TokenType.COLON): 3481 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3482 else: 3483 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3484 3485 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3486 if bracket_kind == TokenType.L_BRACE: 3487 this = self.expression(exp.Struct, expressions=expressions) 3488 elif not this or this.name.upper() == "ARRAY": 3489 this = self.expression(exp.Array, expressions=expressions) 3490 else: 3491 expressions = apply_index_offset(this, expressions, -self.index_offset) 3492 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3493 3494 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3495 self.raise_error("Expected ]") 3496 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3497 self.raise_error("Expected }") 3498 3499 self._add_comments(this) 3500 return self._parse_bracket(this) 3501 3502 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3503 if self._match(TokenType.COLON): 3504 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3505 return this 3506 3507 def _parse_case(self) -> t.Optional[exp.Expression]: 3508 ifs = [] 3509 default = None 3510 3511 expression = self._parse_conjunction() 3512 3513 while self._match(TokenType.WHEN): 3514 this = self._parse_conjunction() 3515 self._match(TokenType.THEN) 3516 then = self._parse_conjunction() 3517 ifs.append(self.expression(exp.If, this=this, true=then)) 3518 3519 if self._match(TokenType.ELSE): 3520 default = self._parse_conjunction() 3521 3522 if not self._match(TokenType.END): 3523 self.raise_error("Expected END after CASE", self._prev) 3524 3525 return self._parse_window( 3526 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3527 ) 3528 3529 def _parse_if(self) -> t.Optional[exp.Expression]: 3530 if self._match(TokenType.L_PAREN): 3531 args = self._parse_csv(self._parse_conjunction) 3532 this = exp.If.from_arg_list(args) 3533 self.validate_expression(this, args) 3534 self._match_r_paren() 3535 else: 3536 index = self._index - 1 3537 condition = self._parse_conjunction() 3538 3539 if not condition: 3540 self._retreat(index) 3541 return None 3542 3543 self._match(TokenType.THEN) 3544 true = self._parse_conjunction() 3545 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3546 self._match(TokenType.END) 3547 this = self.expression(exp.If, this=condition, true=true, false=false) 3548 3549 return self._parse_window(this) 3550 3551 def _parse_extract(self) -> exp.Expression: 3552 this = self._parse_function() or self._parse_var() or self._parse_type() 3553 3554 if self._match(TokenType.FROM): 3555 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3556 3557 if not self._match(TokenType.COMMA): 3558 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3559 3560 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3561 3562 def _parse_cast(self, strict: bool) -> exp.Expression: 3563 this = self._parse_conjunction() 3564 3565 if not self._match(TokenType.ALIAS): 3566 if self._match(TokenType.COMMA): 3567 return self.expression( 3568 exp.CastToStrType, this=this, expression=self._parse_string() 3569 ) 3570 else: 3571 self.raise_error("Expected AS after CAST") 3572 3573 to = self._parse_types() 3574 3575 if not to: 3576 self.raise_error("Expected TYPE after CAST") 3577 elif to.this == exp.DataType.Type.CHAR: 3578 if self._match(TokenType.CHARACTER_SET): 3579 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3580 3581 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3582 3583 def _parse_string_agg(self) -> exp.Expression: 3584 expression: t.Optional[exp.Expression] 3585 3586 if self._match(TokenType.DISTINCT): 3587 args = self._parse_csv(self._parse_conjunction) 3588 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3589 else: 3590 args = self._parse_csv(self._parse_conjunction) 3591 expression = seq_get(args, 0) 3592 3593 index = self._index 3594 if not self._match(TokenType.R_PAREN): 3595 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3596 order = self._parse_order(this=expression) 3597 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3598 3599 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3600 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3601 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3602 if not self._match(TokenType.WITHIN_GROUP): 3603 self._retreat(index) 3604 this = exp.GroupConcat.from_arg_list(args) 3605 self.validate_expression(this, args) 3606 return this 3607 3608 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3609 order = self._parse_order(this=expression) 3610 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3611 3612 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3613 to: t.Optional[exp.Expression] 3614 this = self._parse_bitwise() 3615 3616 if self._match(TokenType.USING): 3617 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3618 elif self._match(TokenType.COMMA): 3619 to = self._parse_bitwise() 3620 else: 3621 to = None 3622 3623 # Swap the argument order if needed to produce the correct AST 3624 if self.CONVERT_TYPE_FIRST: 3625 this, to = to, this 3626 3627 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3628 3629 def _parse_decode(self) -> t.Optional[exp.Expression]: 3630 """ 3631 There are generally two variants of the DECODE function: 3632 3633 - DECODE(bin, charset) 3634 - DECODE(expression, search, result [, search, result] ... [, default]) 3635 3636 The second variant will always be parsed into a CASE expression. Note that NULL 3637 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3638 instead of relying on pattern matching. 3639 """ 3640 args = self._parse_csv(self._parse_conjunction) 3641 3642 if len(args) < 3: 3643 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3644 3645 expression, *expressions = args 3646 if not expression: 3647 return None 3648 3649 ifs = [] 3650 for search, result in zip(expressions[::2], expressions[1::2]): 3651 if not search or not result: 3652 return None 3653 3654 if isinstance(search, exp.Literal): 3655 ifs.append( 3656 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3657 ) 3658 elif isinstance(search, exp.Null): 3659 ifs.append( 3660 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3661 ) 3662 else: 3663 cond = exp.or_( 3664 exp.EQ(this=expression.copy(), expression=search), 3665 exp.and_( 3666 exp.Is(this=expression.copy(), expression=exp.Null()), 3667 exp.Is(this=search.copy(), expression=exp.Null()), 3668 copy=False, 3669 ), 3670 copy=False, 3671 ) 3672 ifs.append(exp.If(this=cond, true=result)) 3673 3674 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3675 3676 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3677 self._match_text_seq("KEY") 3678 key = self._parse_field() 3679 self._match(TokenType.COLON) 3680 self._match_text_seq("VALUE") 3681 value = self._parse_field() 3682 if not key and not value: 3683 return None 3684 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3685 3686 def _parse_json_object(self) -> exp.Expression: 3687 expressions = self._parse_csv(self._parse_json_key_value) 3688 3689 null_handling = None 3690 if self._match_text_seq("NULL", "ON", "NULL"): 3691 null_handling = "NULL ON NULL" 3692 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3693 null_handling = "ABSENT ON NULL" 3694 3695 unique_keys = None 3696 if self._match_text_seq("WITH", "UNIQUE"): 3697 unique_keys = True 3698 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3699 unique_keys = False 3700 3701 self._match_text_seq("KEYS") 3702 3703 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3704 format_json = self._match_text_seq("FORMAT", "JSON") 3705 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3706 3707 return self.expression( 3708 exp.JSONObject, 3709 expressions=expressions, 3710 null_handling=null_handling, 3711 unique_keys=unique_keys, 3712 return_type=return_type, 3713 format_json=format_json, 3714 encoding=encoding, 3715 ) 3716 3717 def _parse_logarithm(self) -> exp.Expression: 3718 # Default argument order is base, expression 3719 args = self._parse_csv(self._parse_range) 3720 3721 if len(args) > 1: 3722 if not self.LOG_BASE_FIRST: 3723 args.reverse() 3724 return exp.Log.from_arg_list(args) 3725 3726 return self.expression( 3727 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3728 ) 3729 3730 def _parse_match_against(self) -> exp.Expression: 3731 expressions = self._parse_csv(self._parse_column) 3732 3733 self._match_text_seq(")", "AGAINST", "(") 3734 3735 this = self._parse_string() 3736 3737 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3738 modifier = "IN NATURAL LANGUAGE MODE" 3739 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3740 modifier = f"{modifier} WITH QUERY EXPANSION" 3741 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3742 modifier = "IN BOOLEAN MODE" 3743 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3744 modifier = "WITH QUERY EXPANSION" 3745 else: 3746 modifier = None 3747 3748 return self.expression( 3749 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3750 ) 3751 3752 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3753 def _parse_open_json(self) -> exp.Expression: 3754 this = self._parse_bitwise() 3755 path = self._match(TokenType.COMMA) and self._parse_string() 3756 3757 def _parse_open_json_column_def() -> exp.Expression: 3758 this = self._parse_field(any_token=True) 3759 kind = self._parse_types() 3760 path = self._parse_string() 3761 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3762 return self.expression( 3763 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3764 ) 3765 3766 expressions = None 3767 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3768 self._match_l_paren() 3769 expressions = self._parse_csv(_parse_open_json_column_def) 3770 3771 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3772 3773 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3774 args = self._parse_csv(self._parse_bitwise) 3775 3776 if self._match(TokenType.IN): 3777 return self.expression( 3778 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3779 ) 3780 3781 if haystack_first: 3782 haystack = seq_get(args, 0) 3783 needle = seq_get(args, 1) 3784 else: 3785 needle = seq_get(args, 0) 3786 haystack = seq_get(args, 1) 3787 3788 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3789 3790 self.validate_expression(this, args) 3791 3792 return this 3793 3794 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3795 args = self._parse_csv(self._parse_table) 3796 return exp.JoinHint(this=func_name.upper(), expressions=args) 3797 3798 def _parse_substring(self) -> exp.Expression: 3799 # Postgres supports the form: substring(string [from int] [for int]) 3800 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3801 3802 args = self._parse_csv(self._parse_bitwise) 3803 3804 if self._match(TokenType.FROM): 3805 args.append(self._parse_bitwise()) 3806 if self._match(TokenType.FOR): 3807 args.append(self._parse_bitwise()) 3808 3809 this = exp.Substring.from_arg_list(args) 3810 self.validate_expression(this, args) 3811 3812 return this 3813 3814 def _parse_trim(self) -> exp.Expression: 3815 # https://www.w3resource.com/sql/character-functions/trim.php 3816 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3817 3818 position = None 3819 collation = None 3820 3821 if self._match_set(self.TRIM_TYPES): 3822 position = self._prev.text.upper() 3823 3824 expression = self._parse_bitwise() 3825 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3826 this = self._parse_bitwise() 3827 else: 3828 this = expression 3829 expression = None 3830 3831 if self._match(TokenType.COLLATE): 3832 collation = self._parse_bitwise() 3833 3834 return self.expression( 3835 exp.Trim, 3836 this=this, 3837 position=position, 3838 expression=expression, 3839 collation=collation, 3840 ) 3841 3842 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3843 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3844 3845 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3846 return self._parse_window(self._parse_id_var(), alias=True) 3847 3848 def _parse_respect_or_ignore_nulls( 3849 self, this: t.Optional[exp.Expression] 3850 ) -> t.Optional[exp.Expression]: 3851 if self._match(TokenType.IGNORE_NULLS): 3852 return self.expression(exp.IgnoreNulls, this=this) 3853 if self._match(TokenType.RESPECT_NULLS): 3854 return self.expression(exp.RespectNulls, this=this) 3855 return this 3856 3857 def _parse_window( 3858 self, this: t.Optional[exp.Expression], alias: bool = False 3859 ) -> t.Optional[exp.Expression]: 3860 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3861 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3862 self._match_r_paren() 3863 3864 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3865 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3866 if self._match(TokenType.WITHIN_GROUP): 3867 order = self._parse_wrapped(self._parse_order) 3868 this = self.expression(exp.WithinGroup, this=this, expression=order) 3869 3870 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3871 # Some dialects choose to implement and some do not. 3872 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3873 3874 # There is some code above in _parse_lambda that handles 3875 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3876 3877 # The below changes handle 3878 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3879 3880 # Oracle allows both formats 3881 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3882 # and Snowflake chose to do the same for familiarity 3883 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3884 this = self._parse_respect_or_ignore_nulls(this) 3885 3886 # bigquery select from window x AS (partition by ...) 3887 if alias: 3888 over = None 3889 self._match(TokenType.ALIAS) 3890 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3891 return this 3892 else: 3893 over = self._prev.text.upper() 3894 3895 if not self._match(TokenType.L_PAREN): 3896 return self.expression( 3897 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3898 ) 3899 3900 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3901 3902 first = self._match(TokenType.FIRST) 3903 if self._match_text_seq("LAST"): 3904 first = False 3905 3906 partition = self._parse_partition_by() 3907 order = self._parse_order() 3908 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3909 3910 if kind: 3911 self._match(TokenType.BETWEEN) 3912 start = self._parse_window_spec() 3913 self._match(TokenType.AND) 3914 end = self._parse_window_spec() 3915 3916 spec = self.expression( 3917 exp.WindowSpec, 3918 kind=kind, 3919 start=start["value"], 3920 start_side=start["side"], 3921 end=end["value"], 3922 end_side=end["side"], 3923 ) 3924 else: 3925 spec = None 3926 3927 self._match_r_paren() 3928 3929 return self.expression( 3930 exp.Window, 3931 this=this, 3932 partition_by=partition, 3933 order=order, 3934 spec=spec, 3935 alias=window_alias, 3936 over=over, 3937 first=first, 3938 ) 3939 3940 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3941 self._match(TokenType.BETWEEN) 3942 3943 return { 3944 "value": ( 3945 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3946 ) 3947 or self._parse_bitwise(), 3948 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3949 } 3950 3951 def _parse_alias( 3952 self, this: t.Optional[exp.Expression], explicit: bool = False 3953 ) -> t.Optional[exp.Expression]: 3954 any_token = self._match(TokenType.ALIAS) 3955 3956 if explicit and not any_token: 3957 return this 3958 3959 if self._match(TokenType.L_PAREN): 3960 aliases = self.expression( 3961 exp.Aliases, 3962 this=this, 3963 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3964 ) 3965 self._match_r_paren(aliases) 3966 return aliases 3967 3968 alias = self._parse_id_var(any_token) 3969 3970 if alias: 3971 return self.expression(exp.Alias, this=this, alias=alias) 3972 3973 return this 3974 3975 def _parse_id_var( 3976 self, 3977 any_token: bool = True, 3978 tokens: t.Optional[t.Collection[TokenType]] = None, 3979 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3980 ) -> t.Optional[exp.Expression]: 3981 identifier = self._parse_identifier() 3982 3983 if identifier: 3984 return identifier 3985 3986 prefix = "" 3987 3988 if prefix_tokens: 3989 while self._match_set(prefix_tokens): 3990 prefix += self._prev.text 3991 3992 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3993 quoted = self._prev.token_type == TokenType.STRING 3994 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3995 3996 return None 3997 3998 def _parse_string(self) -> t.Optional[exp.Expression]: 3999 if self._match(TokenType.STRING): 4000 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4001 return self._parse_placeholder() 4002 4003 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 4004 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4005 4006 def _parse_number(self) -> t.Optional[exp.Expression]: 4007 if self._match(TokenType.NUMBER): 4008 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4009 return self._parse_placeholder() 4010 4011 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4012 if self._match(TokenType.IDENTIFIER): 4013 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4014 return self._parse_placeholder() 4015 4016 def _parse_var( 4017 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4018 ) -> t.Optional[exp.Expression]: 4019 if ( 4020 (any_token and self._advance_any()) 4021 or self._match(TokenType.VAR) 4022 or (self._match_set(tokens) if tokens else False) 4023 ): 4024 return self.expression(exp.Var, this=self._prev.text) 4025 return self._parse_placeholder() 4026 4027 def _advance_any(self) -> t.Optional[Token]: 4028 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4029 self._advance() 4030 return self._prev 4031 return None 4032 4033 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4034 return self._parse_var() or self._parse_string() 4035 4036 def _parse_null(self) -> t.Optional[exp.Expression]: 4037 if self._match(TokenType.NULL): 4038 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4039 return None 4040 4041 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4042 if self._match(TokenType.TRUE): 4043 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4044 if self._match(TokenType.FALSE): 4045 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4046 return None 4047 4048 def _parse_star(self) -> t.Optional[exp.Expression]: 4049 if self._match(TokenType.STAR): 4050 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4051 return None 4052 4053 def _parse_parameter(self) -> exp.Expression: 4054 wrapped = self._match(TokenType.L_BRACE) 4055 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4056 self._match(TokenType.R_BRACE) 4057 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4058 4059 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4060 if self._match_set(self.PLACEHOLDER_PARSERS): 4061 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4062 if placeholder: 4063 return placeholder 4064 self._advance(-1) 4065 return None 4066 4067 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4068 if not self._match(TokenType.EXCEPT): 4069 return None 4070 if self._match(TokenType.L_PAREN, advance=False): 4071 return self._parse_wrapped_csv(self._parse_column) 4072 return self._parse_csv(self._parse_column) 4073 4074 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4075 if not self._match(TokenType.REPLACE): 4076 return None 4077 if self._match(TokenType.L_PAREN, advance=False): 4078 return self._parse_wrapped_csv(self._parse_expression) 4079 return self._parse_csv(self._parse_expression) 4080 4081 def _parse_csv( 4082 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4083 ) -> t.List[t.Optional[exp.Expression]]: 4084 parse_result = parse_method() 4085 items = [parse_result] if parse_result is not None else [] 4086 4087 while self._match(sep): 4088 self._add_comments(parse_result) 4089 parse_result = parse_method() 4090 if parse_result is not None: 4091 items.append(parse_result) 4092 4093 return items 4094 4095 def _parse_tokens( 4096 self, parse_method: t.Callable, expressions: t.Dict 4097 ) -> t.Optional[exp.Expression]: 4098 this = parse_method() 4099 4100 while self._match_set(expressions): 4101 this = self.expression( 4102 expressions[self._prev.token_type], 4103 this=this, 4104 comments=self._prev_comments, 4105 expression=parse_method(), 4106 ) 4107 4108 return this 4109 4110 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4111 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4112 4113 def _parse_wrapped_csv( 4114 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4115 ) -> t.List[t.Optional[exp.Expression]]: 4116 return self._parse_wrapped( 4117 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4118 ) 4119 4120 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4121 wrapped = self._match(TokenType.L_PAREN) 4122 if not wrapped and not optional: 4123 self.raise_error("Expecting (") 4124 parse_result = parse_method() 4125 if wrapped: 4126 self._match_r_paren() 4127 return parse_result 4128 4129 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 4130 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 4131 4132 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4133 return self._parse_set_operations( 4134 self._parse_select(nested=True, parse_subquery_alias=False) 4135 ) 4136 4137 def _parse_transaction(self) -> exp.Expression: 4138 this = None 4139 if self._match_texts(self.TRANSACTION_KIND): 4140 this = self._prev.text 4141 4142 self._match_texts({"TRANSACTION", "WORK"}) 4143 4144 modes = [] 4145 while True: 4146 mode = [] 4147 while self._match(TokenType.VAR): 4148 mode.append(self._prev.text) 4149 4150 if mode: 4151 modes.append(" ".join(mode)) 4152 if not self._match(TokenType.COMMA): 4153 break 4154 4155 return self.expression(exp.Transaction, this=this, modes=modes) 4156 4157 def _parse_commit_or_rollback(self) -> exp.Expression: 4158 chain = None 4159 savepoint = None 4160 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4161 4162 self._match_texts({"TRANSACTION", "WORK"}) 4163 4164 if self._match_text_seq("TO"): 4165 self._match_text_seq("SAVEPOINT") 4166 savepoint = self._parse_id_var() 4167 4168 if self._match(TokenType.AND): 4169 chain = not self._match_text_seq("NO") 4170 self._match_text_seq("CHAIN") 4171 4172 if is_rollback: 4173 return self.expression(exp.Rollback, savepoint=savepoint) 4174 return self.expression(exp.Commit, chain=chain) 4175 4176 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4177 if not self._match_text_seq("ADD"): 4178 return None 4179 4180 self._match(TokenType.COLUMN) 4181 exists_column = self._parse_exists(not_=True) 4182 expression = self._parse_column_def(self._parse_field(any_token=True)) 4183 4184 if expression: 4185 expression.set("exists", exists_column) 4186 4187 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4188 if self._match_texts(("FIRST", "AFTER")): 4189 position = self._prev.text 4190 column_position = self.expression( 4191 exp.ColumnPosition, this=self._parse_column(), position=position 4192 ) 4193 expression.set("position", column_position) 4194 4195 return expression 4196 4197 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4198 drop = self._match(TokenType.DROP) and self._parse_drop() 4199 if drop and not isinstance(drop, exp.Command): 4200 drop.set("kind", drop.args.get("kind", "COLUMN")) 4201 return drop 4202 4203 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4204 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4205 return self.expression( 4206 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4207 ) 4208 4209 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4210 this = None 4211 kind = self._prev.token_type 4212 4213 if kind == TokenType.CONSTRAINT: 4214 this = self._parse_id_var() 4215 4216 if self._match_text_seq("CHECK"): 4217 expression = self._parse_wrapped(self._parse_conjunction) 4218 enforced = self._match_text_seq("ENFORCED") 4219 4220 return self.expression( 4221 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4222 ) 4223 4224 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4225 expression = self._parse_foreign_key() 4226 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4227 expression = self._parse_primary_key() 4228 else: 4229 expression = None 4230 4231 return self.expression(exp.AddConstraint, this=this, expression=expression) 4232 4233 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4234 index = self._index - 1 4235 4236 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4237 return self._parse_csv(self._parse_add_constraint) 4238 4239 self._retreat(index) 4240 return self._parse_csv(self._parse_add_column) 4241 4242 def _parse_alter_table_alter(self) -> exp.Expression: 4243 self._match(TokenType.COLUMN) 4244 column = self._parse_field(any_token=True) 4245 4246 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4247 return self.expression(exp.AlterColumn, this=column, drop=True) 4248 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4249 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4250 4251 self._match_text_seq("SET", "DATA") 4252 return self.expression( 4253 exp.AlterColumn, 4254 this=column, 4255 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4256 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4257 using=self._match(TokenType.USING) and self._parse_conjunction(), 4258 ) 4259 4260 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4261 index = self._index - 1 4262 4263 partition_exists = self._parse_exists() 4264 if self._match(TokenType.PARTITION, advance=False): 4265 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4266 4267 self._retreat(index) 4268 return self._parse_csv(self._parse_drop_column) 4269 4270 def _parse_alter_table_rename(self) -> exp.Expression: 4271 self._match_text_seq("TO") 4272 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4273 4274 def _parse_alter(self) -> t.Optional[exp.Expression]: 4275 start = self._prev 4276 4277 if not self._match(TokenType.TABLE): 4278 return self._parse_as_command(start) 4279 4280 exists = self._parse_exists() 4281 this = self._parse_table(schema=True) 4282 4283 if self._next: 4284 self._advance() 4285 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4286 4287 if parser: 4288 actions = ensure_list(parser(self)) 4289 4290 if not self._curr: 4291 return self.expression( 4292 exp.AlterTable, 4293 this=this, 4294 exists=exists, 4295 actions=actions, 4296 ) 4297 return self._parse_as_command(start) 4298 4299 def _parse_merge(self) -> exp.Expression: 4300 self._match(TokenType.INTO) 4301 target = self._parse_table() 4302 4303 self._match(TokenType.USING) 4304 using = self._parse_table() 4305 4306 self._match(TokenType.ON) 4307 on = self._parse_conjunction() 4308 4309 whens = [] 4310 while self._match(TokenType.WHEN): 4311 matched = not self._match(TokenType.NOT) 4312 self._match_text_seq("MATCHED") 4313 source = ( 4314 False 4315 if self._match_text_seq("BY", "TARGET") 4316 else self._match_text_seq("BY", "SOURCE") 4317 ) 4318 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4319 4320 self._match(TokenType.THEN) 4321 4322 if self._match(TokenType.INSERT): 4323 _this = self._parse_star() 4324 if _this: 4325 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4326 else: 4327 then = self.expression( 4328 exp.Insert, 4329 this=self._parse_value(), 4330 expression=self._match(TokenType.VALUES) and self._parse_value(), 4331 ) 4332 elif self._match(TokenType.UPDATE): 4333 expressions = self._parse_star() 4334 if expressions: 4335 then = self.expression(exp.Update, expressions=expressions) 4336 else: 4337 then = self.expression( 4338 exp.Update, 4339 expressions=self._match(TokenType.SET) 4340 and self._parse_csv(self._parse_equality), 4341 ) 4342 elif self._match(TokenType.DELETE): 4343 then = self.expression(exp.Var, this=self._prev.text) 4344 else: 4345 then = None 4346 4347 whens.append( 4348 self.expression( 4349 exp.When, 4350 matched=matched, 4351 source=source, 4352 condition=condition, 4353 then=then, 4354 ) 4355 ) 4356 4357 return self.expression( 4358 exp.Merge, 4359 this=target, 4360 using=using, 4361 on=on, 4362 expressions=whens, 4363 ) 4364 4365 def _parse_show(self) -> t.Optional[exp.Expression]: 4366 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4367 if parser: 4368 return parser(self) 4369 self._advance() 4370 return self.expression(exp.Show, this=self._prev.text.upper()) 4371 4372 def _parse_set_item_assignment( 4373 self, kind: t.Optional[str] = None 4374 ) -> t.Optional[exp.Expression]: 4375 index = self._index 4376 4377 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4378 return self._parse_set_transaction(global_=kind == "GLOBAL") 4379 4380 left = self._parse_primary() or self._parse_id_var() 4381 4382 if not self._match_texts(("=", "TO")): 4383 self._retreat(index) 4384 return None 4385 4386 right = self._parse_statement() or self._parse_id_var() 4387 this = self.expression( 4388 exp.EQ, 4389 this=left, 4390 expression=right, 4391 ) 4392 4393 return self.expression( 4394 exp.SetItem, 4395 this=this, 4396 kind=kind, 4397 ) 4398 4399 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4400 self._match_text_seq("TRANSACTION") 4401 characteristics = self._parse_csv( 4402 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4403 ) 4404 return self.expression( 4405 exp.SetItem, 4406 expressions=characteristics, 4407 kind="TRANSACTION", 4408 **{"global": global_}, # type: ignore 4409 ) 4410 4411 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4412 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4413 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4414 4415 def _parse_set(self) -> exp.Expression: 4416 index = self._index 4417 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4418 4419 if self._curr: 4420 self._retreat(index) 4421 return self._parse_as_command(self._prev) 4422 4423 return set_ 4424 4425 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4426 for option in options: 4427 if self._match_text_seq(*option.split(" ")): 4428 return exp.Var(this=option) 4429 return None 4430 4431 def _parse_as_command(self, start: Token) -> exp.Command: 4432 while self._curr: 4433 self._advance() 4434 text = self._find_sql(start, self._prev) 4435 size = len(start.text) 4436 return exp.Command(this=text[:size], expression=text[size:]) 4437 4438 def _find_parser( 4439 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4440 ) -> t.Optional[t.Callable]: 4441 if not self._curr: 4442 return None 4443 4444 index = self._index 4445 this = [] 4446 while True: 4447 # The current token might be multiple words 4448 curr = self._curr.text.upper() 4449 key = curr.split(" ") 4450 this.append(curr) 4451 self._advance() 4452 result, trie = in_trie(trie, key) 4453 if result == 0: 4454 break 4455 if result == 2: 4456 subparser = parsers[" ".join(this)] 4457 return subparser 4458 self._retreat(index) 4459 return None 4460 4461 def _match(self, token_type, advance=True, expression=None): 4462 if not self._curr: 4463 return None 4464 4465 if self._curr.token_type == token_type: 4466 if advance: 4467 self._advance() 4468 self._add_comments(expression) 4469 return True 4470 4471 return None 4472 4473 def _match_set(self, types, advance=True): 4474 if not self._curr: 4475 return None 4476 4477 if self._curr.token_type in types: 4478 if advance: 4479 self._advance() 4480 return True 4481 4482 return None 4483 4484 def _match_pair(self, token_type_a, token_type_b, advance=True): 4485 if not self._curr or not self._next: 4486 return None 4487 4488 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4489 if advance: 4490 self._advance(2) 4491 return True 4492 4493 return None 4494 4495 def _match_l_paren(self, expression=None): 4496 if not self._match(TokenType.L_PAREN, expression=expression): 4497 self.raise_error("Expecting (") 4498 4499 def _match_r_paren(self, expression=None): 4500 if not self._match(TokenType.R_PAREN, expression=expression): 4501 self.raise_error("Expecting )") 4502 4503 def _match_texts(self, texts, advance=True): 4504 if self._curr and self._curr.text.upper() in texts: 4505 if advance: 4506 self._advance() 4507 return True 4508 return False 4509 4510 def _match_text_seq(self, *texts, advance=True): 4511 index = self._index 4512 for text in texts: 4513 if self._curr and self._curr.text.upper() == text: 4514 self._advance() 4515 else: 4516 self._retreat(index) 4517 return False 4518 4519 if not advance: 4520 self._retreat(index) 4521 4522 return True 4523 4524 def _replace_columns_with_dots(self, this): 4525 if isinstance(this, exp.Dot): 4526 exp.replace_children(this, self._replace_columns_with_dots) 4527 elif isinstance(this, exp.Column): 4528 exp.replace_children(this, self._replace_columns_with_dots) 4529 table = this.args.get("table") 4530 this = ( 4531 self.expression(exp.Dot, this=table, expression=this.this) 4532 if table 4533 else self.expression(exp.Var, this=this.name) 4534 ) 4535 elif isinstance(this, exp.Identifier): 4536 this = self.expression(exp.Var, this=this.name) 4537 return this 4538 4539 def _replace_lambda(self, node, lambda_variables): 4540 for column in node.find_all(exp.Column): 4541 if column.parts[0].name in lambda_variables: 4542 dot_or_id = column.to_dot() if column.table else column.this 4543 parent = column.parent 4544 4545 while isinstance(parent, exp.Dot): 4546 if not isinstance(parent.parent, exp.Dot): 4547 parent.replace(dot_or_id) 4548 break 4549 parent = parent.parent 4550 else: 4551 if column is node: 4552 node = dot_or_id 4553 else: 4554 column.replace(dot_or_id) 4555 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
811 def __init__( 812 self, 813 error_level: t.Optional[ErrorLevel] = None, 814 error_message_context: int = 100, 815 index_offset: int = 0, 816 unnest_column_only: bool = False, 817 alias_post_tablesample: bool = False, 818 max_errors: int = 3, 819 null_ordering: t.Optional[str] = None, 820 ): 821 self.error_level = error_level or ErrorLevel.IMMEDIATE 822 self.error_message_context = error_message_context 823 self.index_offset = index_offset 824 self.unnest_column_only = unnest_column_only 825 self.alias_post_tablesample = alias_post_tablesample 826 self.max_errors = max_errors 827 self.null_ordering = null_ordering 828 self.reset()
840 def parse( 841 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 842 ) -> t.List[t.Optional[exp.Expression]]: 843 """ 844 Parses a list of tokens and returns a list of syntax trees, one tree 845 per parsed SQL statement. 846 847 Args: 848 raw_tokens: the list of tokens. 849 sql: the original SQL string, used to produce helpful debug messages. 850 851 Returns: 852 The list of syntax trees. 853 """ 854 return self._parse( 855 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 856 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
858 def parse_into( 859 self, 860 expression_types: exp.IntoType, 861 raw_tokens: t.List[Token], 862 sql: t.Optional[str] = None, 863 ) -> t.List[t.Optional[exp.Expression]]: 864 """ 865 Parses a list of tokens into a given Expression type. If a collection of Expression 866 types is given instead, this method will try to parse the token list into each one 867 of them, stopping at the first for which the parsing succeeds. 868 869 Args: 870 expression_types: the expression type(s) to try and parse the token list into. 871 raw_tokens: the list of tokens. 872 sql: the original SQL string, used to produce helpful debug messages. 873 874 Returns: 875 The target Expression. 876 """ 877 errors = [] 878 for expression_type in ensure_collection(expression_types): 879 parser = self.EXPRESSION_PARSERS.get(expression_type) 880 if not parser: 881 raise TypeError(f"No parser registered for {expression_type}") 882 try: 883 return self._parse(parser, raw_tokens, sql) 884 except ParseError as e: 885 e.errors[0]["into_expression"] = expression_type 886 errors.append(e) 887 raise ParseError( 888 f"Failed to parse into {expression_types}", 889 errors=merge_errors(errors), 890 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
926 def check_errors(self) -> None: 927 """ 928 Logs or raises any found errors, depending on the chosen error level setting. 929 """ 930 if self.error_level == ErrorLevel.WARN: 931 for error in self.errors: 932 logger.error(str(error)) 933 elif self.error_level == ErrorLevel.RAISE and self.errors: 934 raise ParseError( 935 concat_messages(self.errors, self.max_errors), 936 errors=merge_errors(self.errors), 937 )
Logs or raises any found errors, depending on the chosen error level setting.
939 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 940 """ 941 Appends an error in the list of recorded errors or raises it, depending on the chosen 942 error level setting. 943 """ 944 token = token or self._curr or self._prev or Token.string("") 945 start = token.start 946 end = token.end + 1 947 start_context = self.sql[max(start - self.error_message_context, 0) : start] 948 highlight = self.sql[start:end] 949 end_context = self.sql[end : end + self.error_message_context] 950 951 error = ParseError.new( 952 f"{message}. Line {token.line}, Col: {token.col}.\n" 953 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 954 description=message, 955 line=token.line, 956 col=token.col, 957 start_context=start_context, 958 highlight=highlight, 959 end_context=end_context, 960 ) 961 962 if self.error_level == ErrorLevel.IMMEDIATE: 963 raise error 964 965 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
967 def expression( 968 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 969 ) -> E: 970 """ 971 Creates a new, validated Expression. 972 973 Args: 974 exp_class: the expression class to instantiate. 975 comments: an optional list of comments to attach to the expression. 976 kwargs: the arguments to set for the expression along with their respective values. 977 978 Returns: 979 The target expression. 980 """ 981 instance = exp_class(**kwargs) 982 instance.add_comments(comments) if comments else self._add_comments(instance) 983 self.validate_expression(instance) 984 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
991 def validate_expression( 992 self, expression: exp.Expression, args: t.Optional[t.List] = None 993 ) -> None: 994 """ 995 Validates an already instantiated expression, making sure that all its mandatory arguments 996 are set. 997 998 Args: 999 expression: the expression to validate. 1000 args: an optional list of items that was used to instantiate the expression, if it's a Func. 1001 """ 1002 if self.error_level == ErrorLevel.IGNORE: 1003 return 1004 1005 for error_message in expression.error_messages(args): 1006 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.