Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import apply_index_offset, ensure_list, seq_get
  10from sqlglot.time import format_time
  11from sqlglot.tokens import Token, Tokenizer, TokenType
  12from sqlglot.trie import in_trie, new_trie
  13
  14if t.TYPE_CHECKING:
  15    from sqlglot._typing import E
  16
  17logger = logging.getLogger("sqlglot")
  18
  19
  20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap:
  21    if len(args) == 1 and args[0].is_star:
  22        return exp.StarMap(this=args[0])
  23
  24    keys = []
  25    values = []
  26    for i in range(0, len(args), 2):
  27        keys.append(args[i])
  28        values.append(args[i + 1])
  29
  30    return exp.VarMap(
  31        keys=exp.Array(expressions=keys),
  32        values=exp.Array(expressions=values),
  33    )
  34
  35
  36def parse_like(args: t.List) -> exp.Escape | exp.Like:
  37    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
  38    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
  39
  40
  41def binary_range_parser(
  42    expr_type: t.Type[exp.Expression],
  43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
  44    return lambda self, this: self._parse_escape(
  45        self.expression(expr_type, this=this, expression=self._parse_bitwise())
  46    )
  47
  48
  49class _Parser(type):
  50    def __new__(cls, clsname, bases, attrs):
  51        klass = super().__new__(cls, clsname, bases, attrs)
  52
  53        klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  54        klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  55
  56        return klass
  57
  58
  59class Parser(metaclass=_Parser):
  60    """
  61    Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
  62
  63    Args:
  64        error_level: The desired error level.
  65            Default: ErrorLevel.IMMEDIATE
  66        error_message_context: Determines the amount of context to capture from a
  67            query string when displaying the error message (in number of characters).
  68            Default: 100
  69        max_errors: Maximum number of error messages to include in a raised ParseError.
  70            This is only relevant if error_level is ErrorLevel.RAISE.
  71            Default: 3
  72    """
  73
  74    FUNCTIONS: t.Dict[str, t.Callable] = {
  75        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  76        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  77            this=seq_get(args, 0),
  78            to=exp.DataType(this=exp.DataType.Type.TEXT),
  79        ),
  80        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  81        "LIKE": parse_like,
  82        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  83            this=seq_get(args, 0),
  84            to=exp.DataType(this=exp.DataType.Type.TEXT),
  85        ),
  86        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  87            this=exp.Cast(
  88                this=seq_get(args, 0),
  89                to=exp.DataType(this=exp.DataType.Type.TEXT),
  90            ),
  91            start=exp.Literal.number(1),
  92            length=exp.Literal.number(10),
  93        ),
  94        "VAR_MAP": parse_var_map,
  95    }
  96
  97    NO_PAREN_FUNCTIONS = {
  98        TokenType.CURRENT_DATE: exp.CurrentDate,
  99        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 100        TokenType.CURRENT_TIME: exp.CurrentTime,
 101        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 102        TokenType.CURRENT_USER: exp.CurrentUser,
 103    }
 104
 105    NESTED_TYPE_TOKENS = {
 106        TokenType.ARRAY,
 107        TokenType.MAP,
 108        TokenType.NULLABLE,
 109        TokenType.STRUCT,
 110    }
 111
 112    TYPE_TOKENS = {
 113        TokenType.BIT,
 114        TokenType.BOOLEAN,
 115        TokenType.TINYINT,
 116        TokenType.UTINYINT,
 117        TokenType.SMALLINT,
 118        TokenType.USMALLINT,
 119        TokenType.INT,
 120        TokenType.UINT,
 121        TokenType.BIGINT,
 122        TokenType.UBIGINT,
 123        TokenType.INT128,
 124        TokenType.UINT128,
 125        TokenType.INT256,
 126        TokenType.UINT256,
 127        TokenType.FLOAT,
 128        TokenType.DOUBLE,
 129        TokenType.CHAR,
 130        TokenType.NCHAR,
 131        TokenType.VARCHAR,
 132        TokenType.NVARCHAR,
 133        TokenType.TEXT,
 134        TokenType.MEDIUMTEXT,
 135        TokenType.LONGTEXT,
 136        TokenType.MEDIUMBLOB,
 137        TokenType.LONGBLOB,
 138        TokenType.BINARY,
 139        TokenType.VARBINARY,
 140        TokenType.JSON,
 141        TokenType.JSONB,
 142        TokenType.INTERVAL,
 143        TokenType.TIME,
 144        TokenType.TIMESTAMP,
 145        TokenType.TIMESTAMPTZ,
 146        TokenType.TIMESTAMPLTZ,
 147        TokenType.DATETIME,
 148        TokenType.DATETIME64,
 149        TokenType.DATE,
 150        TokenType.INT4RANGE,
 151        TokenType.INT4MULTIRANGE,
 152        TokenType.INT8RANGE,
 153        TokenType.INT8MULTIRANGE,
 154        TokenType.NUMRANGE,
 155        TokenType.NUMMULTIRANGE,
 156        TokenType.TSRANGE,
 157        TokenType.TSMULTIRANGE,
 158        TokenType.TSTZRANGE,
 159        TokenType.TSTZMULTIRANGE,
 160        TokenType.DATERANGE,
 161        TokenType.DATEMULTIRANGE,
 162        TokenType.DECIMAL,
 163        TokenType.BIGDECIMAL,
 164        TokenType.UUID,
 165        TokenType.GEOGRAPHY,
 166        TokenType.GEOMETRY,
 167        TokenType.HLLSKETCH,
 168        TokenType.HSTORE,
 169        TokenType.PSEUDO_TYPE,
 170        TokenType.SUPER,
 171        TokenType.SERIAL,
 172        TokenType.SMALLSERIAL,
 173        TokenType.BIGSERIAL,
 174        TokenType.XML,
 175        TokenType.UNIQUEIDENTIFIER,
 176        TokenType.MONEY,
 177        TokenType.SMALLMONEY,
 178        TokenType.ROWVERSION,
 179        TokenType.IMAGE,
 180        TokenType.VARIANT,
 181        TokenType.OBJECT,
 182        TokenType.INET,
 183        *NESTED_TYPE_TOKENS,
 184    }
 185
 186    SUBQUERY_PREDICATES = {
 187        TokenType.ANY: exp.Any,
 188        TokenType.ALL: exp.All,
 189        TokenType.EXISTS: exp.Exists,
 190        TokenType.SOME: exp.Any,
 191    }
 192
 193    RESERVED_KEYWORDS = {
 194        *Tokenizer.SINGLE_TOKENS.values(),
 195        TokenType.SELECT,
 196    }
 197
 198    DB_CREATABLES = {
 199        TokenType.DATABASE,
 200        TokenType.SCHEMA,
 201        TokenType.TABLE,
 202        TokenType.VIEW,
 203        TokenType.DICTIONARY,
 204    }
 205
 206    CREATABLES = {
 207        TokenType.COLUMN,
 208        TokenType.FUNCTION,
 209        TokenType.INDEX,
 210        TokenType.PROCEDURE,
 211        *DB_CREATABLES,
 212    }
 213
 214    # Tokens that can represent identifiers
 215    ID_VAR_TOKENS = {
 216        TokenType.VAR,
 217        TokenType.ANTI,
 218        TokenType.APPLY,
 219        TokenType.ASC,
 220        TokenType.AUTO_INCREMENT,
 221        TokenType.BEGIN,
 222        TokenType.CACHE,
 223        TokenType.COLLATE,
 224        TokenType.COMMAND,
 225        TokenType.COMMENT,
 226        TokenType.COMMIT,
 227        TokenType.CONSTRAINT,
 228        TokenType.DEFAULT,
 229        TokenType.DELETE,
 230        TokenType.DESC,
 231        TokenType.DESCRIBE,
 232        TokenType.DICTIONARY,
 233        TokenType.DIV,
 234        TokenType.END,
 235        TokenType.EXECUTE,
 236        TokenType.ESCAPE,
 237        TokenType.FALSE,
 238        TokenType.FIRST,
 239        TokenType.FILTER,
 240        TokenType.FORMAT,
 241        TokenType.FULL,
 242        TokenType.IF,
 243        TokenType.IS,
 244        TokenType.ISNULL,
 245        TokenType.INTERVAL,
 246        TokenType.KEEP,
 247        TokenType.LEFT,
 248        TokenType.LOAD,
 249        TokenType.MERGE,
 250        TokenType.NATURAL,
 251        TokenType.NEXT,
 252        TokenType.OFFSET,
 253        TokenType.ORDINALITY,
 254        TokenType.OVERWRITE,
 255        TokenType.PARTITION,
 256        TokenType.PERCENT,
 257        TokenType.PIVOT,
 258        TokenType.PRAGMA,
 259        TokenType.RANGE,
 260        TokenType.REFERENCES,
 261        TokenType.RIGHT,
 262        TokenType.ROW,
 263        TokenType.ROWS,
 264        TokenType.SEMI,
 265        TokenType.SET,
 266        TokenType.SETTINGS,
 267        TokenType.SHOW,
 268        TokenType.TEMPORARY,
 269        TokenType.TOP,
 270        TokenType.TRUE,
 271        TokenType.UNIQUE,
 272        TokenType.UNPIVOT,
 273        TokenType.VOLATILE,
 274        TokenType.WINDOW,
 275        *CREATABLES,
 276        *SUBQUERY_PREDICATES,
 277        *TYPE_TOKENS,
 278        *NO_PAREN_FUNCTIONS,
 279    }
 280
 281    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 282
 283    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 284        TokenType.APPLY,
 285        TokenType.ASOF,
 286        TokenType.FULL,
 287        TokenType.LEFT,
 288        TokenType.LOCK,
 289        TokenType.NATURAL,
 290        TokenType.OFFSET,
 291        TokenType.RIGHT,
 292        TokenType.WINDOW,
 293    }
 294
 295    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 296
 297    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 298
 299    TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
 300
 301    FUNC_TOKENS = {
 302        TokenType.COMMAND,
 303        TokenType.CURRENT_DATE,
 304        TokenType.CURRENT_DATETIME,
 305        TokenType.CURRENT_TIMESTAMP,
 306        TokenType.CURRENT_TIME,
 307        TokenType.CURRENT_USER,
 308        TokenType.FILTER,
 309        TokenType.FIRST,
 310        TokenType.FORMAT,
 311        TokenType.GLOB,
 312        TokenType.IDENTIFIER,
 313        TokenType.INDEX,
 314        TokenType.ISNULL,
 315        TokenType.ILIKE,
 316        TokenType.LIKE,
 317        TokenType.MERGE,
 318        TokenType.OFFSET,
 319        TokenType.PRIMARY_KEY,
 320        TokenType.RANGE,
 321        TokenType.REPLACE,
 322        TokenType.ROW,
 323        TokenType.UNNEST,
 324        TokenType.VAR,
 325        TokenType.LEFT,
 326        TokenType.RIGHT,
 327        TokenType.DATE,
 328        TokenType.DATETIME,
 329        TokenType.TABLE,
 330        TokenType.TIMESTAMP,
 331        TokenType.TIMESTAMPTZ,
 332        TokenType.WINDOW,
 333        *TYPE_TOKENS,
 334        *SUBQUERY_PREDICATES,
 335    }
 336
 337    CONJUNCTION = {
 338        TokenType.AND: exp.And,
 339        TokenType.OR: exp.Or,
 340    }
 341
 342    EQUALITY = {
 343        TokenType.EQ: exp.EQ,
 344        TokenType.NEQ: exp.NEQ,
 345        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 346    }
 347
 348    COMPARISON = {
 349        TokenType.GT: exp.GT,
 350        TokenType.GTE: exp.GTE,
 351        TokenType.LT: exp.LT,
 352        TokenType.LTE: exp.LTE,
 353    }
 354
 355    BITWISE = {
 356        TokenType.AMP: exp.BitwiseAnd,
 357        TokenType.CARET: exp.BitwiseXor,
 358        TokenType.PIPE: exp.BitwiseOr,
 359        TokenType.DPIPE: exp.DPipe,
 360    }
 361
 362    TERM = {
 363        TokenType.DASH: exp.Sub,
 364        TokenType.PLUS: exp.Add,
 365        TokenType.MOD: exp.Mod,
 366        TokenType.COLLATE: exp.Collate,
 367    }
 368
 369    FACTOR = {
 370        TokenType.DIV: exp.IntDiv,
 371        TokenType.LR_ARROW: exp.Distance,
 372        TokenType.SLASH: exp.Div,
 373        TokenType.STAR: exp.Mul,
 374    }
 375
 376    TIMESTAMPS = {
 377        TokenType.TIME,
 378        TokenType.TIMESTAMP,
 379        TokenType.TIMESTAMPTZ,
 380        TokenType.TIMESTAMPLTZ,
 381    }
 382
 383    SET_OPERATIONS = {
 384        TokenType.UNION,
 385        TokenType.INTERSECT,
 386        TokenType.EXCEPT,
 387    }
 388
 389    JOIN_METHODS = {
 390        TokenType.NATURAL,
 391        TokenType.ASOF,
 392    }
 393
 394    JOIN_SIDES = {
 395        TokenType.LEFT,
 396        TokenType.RIGHT,
 397        TokenType.FULL,
 398    }
 399
 400    JOIN_KINDS = {
 401        TokenType.INNER,
 402        TokenType.OUTER,
 403        TokenType.CROSS,
 404        TokenType.SEMI,
 405        TokenType.ANTI,
 406    }
 407
 408    JOIN_HINTS: t.Set[str] = set()
 409
 410    LAMBDAS = {
 411        TokenType.ARROW: lambda self, expressions: self.expression(
 412            exp.Lambda,
 413            this=self._replace_lambda(
 414                self._parse_conjunction(),
 415                {node.name for node in expressions},
 416            ),
 417            expressions=expressions,
 418        ),
 419        TokenType.FARROW: lambda self, expressions: self.expression(
 420            exp.Kwarg,
 421            this=exp.var(expressions[0].name),
 422            expression=self._parse_conjunction(),
 423        ),
 424    }
 425
 426    COLUMN_OPERATORS = {
 427        TokenType.DOT: None,
 428        TokenType.DCOLON: lambda self, this, to: self.expression(
 429            exp.Cast if self.STRICT_CAST else exp.TryCast,
 430            this=this,
 431            to=to,
 432        ),
 433        TokenType.ARROW: lambda self, this, path: self.expression(
 434            exp.JSONExtract,
 435            this=this,
 436            expression=path,
 437        ),
 438        TokenType.DARROW: lambda self, this, path: self.expression(
 439            exp.JSONExtractScalar,
 440            this=this,
 441            expression=path,
 442        ),
 443        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 444            exp.JSONBExtract,
 445            this=this,
 446            expression=path,
 447        ),
 448        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 449            exp.JSONBExtractScalar,
 450            this=this,
 451            expression=path,
 452        ),
 453        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 454            exp.JSONBContains,
 455            this=this,
 456            expression=key,
 457        ),
 458    }
 459
 460    EXPRESSION_PARSERS = {
 461        exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"),
 462        exp.Column: lambda self: self._parse_column(),
 463        exp.Condition: lambda self: self._parse_conjunction(),
 464        exp.DataType: lambda self: self._parse_types(),
 465        exp.Expression: lambda self: self._parse_statement(),
 466        exp.From: lambda self: self._parse_from(),
 467        exp.Group: lambda self: self._parse_group(),
 468        exp.Having: lambda self: self._parse_having(),
 469        exp.Identifier: lambda self: self._parse_id_var(),
 470        exp.Join: lambda self: self._parse_join(),
 471        exp.Lambda: lambda self: self._parse_lambda(),
 472        exp.Lateral: lambda self: self._parse_lateral(),
 473        exp.Limit: lambda self: self._parse_limit(),
 474        exp.Offset: lambda self: self._parse_offset(),
 475        exp.Order: lambda self: self._parse_order(),
 476        exp.Ordered: lambda self: self._parse_ordered(),
 477        exp.Properties: lambda self: self._parse_properties(),
 478        exp.Qualify: lambda self: self._parse_qualify(),
 479        exp.Returning: lambda self: self._parse_returning(),
 480        exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"),
 481        exp.Table: lambda self: self._parse_table_parts(),
 482        exp.TableAlias: lambda self: self._parse_table_alias(),
 483        exp.Where: lambda self: self._parse_where(),
 484        exp.Window: lambda self: self._parse_named_window(),
 485        exp.With: lambda self: self._parse_with(),
 486        "JOIN_TYPE": lambda self: self._parse_join_parts(),
 487    }
 488
 489    STATEMENT_PARSERS = {
 490        TokenType.ALTER: lambda self: self._parse_alter(),
 491        TokenType.BEGIN: lambda self: self._parse_transaction(),
 492        TokenType.CACHE: lambda self: self._parse_cache(),
 493        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 494        TokenType.COMMENT: lambda self: self._parse_comment(),
 495        TokenType.CREATE: lambda self: self._parse_create(),
 496        TokenType.DELETE: lambda self: self._parse_delete(),
 497        TokenType.DESC: lambda self: self._parse_describe(),
 498        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 499        TokenType.DROP: lambda self: self._parse_drop(),
 500        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 501        TokenType.FROM: lambda self: exp.select("*").from_(
 502            t.cast(exp.From, self._parse_from(skip_from_token=True))
 503        ),
 504        TokenType.INSERT: lambda self: self._parse_insert(),
 505        TokenType.LOAD: lambda self: self._parse_load(),
 506        TokenType.MERGE: lambda self: self._parse_merge(),
 507        TokenType.PIVOT: lambda self: self._parse_simplified_pivot(),
 508        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 509        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 510        TokenType.SET: lambda self: self._parse_set(),
 511        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 512        TokenType.UPDATE: lambda self: self._parse_update(),
 513        TokenType.USE: lambda self: self.expression(
 514            exp.Use,
 515            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 516            and exp.var(self._prev.text),
 517            this=self._parse_table(schema=False),
 518        ),
 519    }
 520
 521    UNARY_PARSERS = {
 522        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 523        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 524        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 525        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 526    }
 527
 528    PRIMARY_PARSERS = {
 529        TokenType.STRING: lambda self, token: self.expression(
 530            exp.Literal, this=token.text, is_string=True
 531        ),
 532        TokenType.NUMBER: lambda self, token: self.expression(
 533            exp.Literal, this=token.text, is_string=False
 534        ),
 535        TokenType.STAR: lambda self, _: self.expression(
 536            exp.Star,
 537            **{"except": self._parse_except(), "replace": self._parse_replace()},
 538        ),
 539        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 540        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 541        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 542        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 543        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 544        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 545        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 546        TokenType.NATIONAL_STRING: lambda self, token: self.expression(
 547            exp.National, this=token.text
 548        ),
 549        TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text),
 550        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 551    }
 552
 553    PLACEHOLDER_PARSERS = {
 554        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 555        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 556        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 557        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 558        else None,
 559    }
 560
 561    RANGE_PARSERS = {
 562        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 563        TokenType.GLOB: binary_range_parser(exp.Glob),
 564        TokenType.ILIKE: binary_range_parser(exp.ILike),
 565        TokenType.IN: lambda self, this: self._parse_in(this),
 566        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 567        TokenType.IS: lambda self, this: self._parse_is(this),
 568        TokenType.LIKE: binary_range_parser(exp.Like),
 569        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 570        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 571        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 572    }
 573
 574    PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
 575        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 576        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 577        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 578        "CHARACTER SET": lambda self: self._parse_character_set(),
 579        "CHECKSUM": lambda self: self._parse_checksum(),
 580        "CLUSTER": lambda self: self._parse_cluster(),
 581        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 582        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 583        "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
 584        "DEFINER": lambda self: self._parse_definer(),
 585        "DETERMINISTIC": lambda self: self.expression(
 586            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 587        ),
 588        "DISTKEY": lambda self: self._parse_distkey(),
 589        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 590        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 591        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 592        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 593        "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
 594        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 595        "FREESPACE": lambda self: self._parse_freespace(),
 596        "IMMUTABLE": lambda self: self.expression(
 597            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 598        ),
 599        "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
 600        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 601        "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"),
 602        "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"),
 603        "LIKE": lambda self: self._parse_create_like(),
 604        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 605        "LOCK": lambda self: self._parse_locking(),
 606        "LOCKING": lambda self: self._parse_locking(),
 607        "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
 608        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 609        "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
 610        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 611        "NO": lambda self: self._parse_no_property(),
 612        "ON": lambda self: self._parse_on_property(),
 613        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 614        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 615        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 616        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 617        "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True),
 618        "RANGE": lambda self: self._parse_dict_range(this="RANGE"),
 619        "RETURNS": lambda self: self._parse_returns(),
 620        "ROW": lambda self: self._parse_row(),
 621        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 622        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 623        "SETTINGS": lambda self: self.expression(
 624            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 625        ),
 626        "SORTKEY": lambda self: self._parse_sortkey(),
 627        "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"),
 628        "STABLE": lambda self: self.expression(
 629            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 630        ),
 631        "STORED": lambda self: self._parse_stored(),
 632        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 633        "TEMP": lambda self: self.expression(exp.TemporaryProperty),
 634        "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
 635        "TO": lambda self: self._parse_to_table(),
 636        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 637        "TTL": lambda self: self._parse_ttl(),
 638        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 639        "VOLATILE": lambda self: self._parse_volatile_property(),
 640        "WITH": lambda self: self._parse_with_property(),
 641    }
 642
 643    CONSTRAINT_PARSERS = {
 644        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 645        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 646        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 647        "CHARACTER SET": lambda self: self.expression(
 648            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 649        ),
 650        "CHECK": lambda self: self.expression(
 651            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 652        ),
 653        "COLLATE": lambda self: self.expression(
 654            exp.CollateColumnConstraint, this=self._parse_var()
 655        ),
 656        "COMMENT": lambda self: self.expression(
 657            exp.CommentColumnConstraint, this=self._parse_string()
 658        ),
 659        "COMPRESS": lambda self: self._parse_compress(),
 660        "DEFAULT": lambda self: self.expression(
 661            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 662        ),
 663        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 664        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 665        "FORMAT": lambda self: self.expression(
 666            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 667        ),
 668        "GENERATED": lambda self: self._parse_generated_as_identity(),
 669        "IDENTITY": lambda self: self._parse_auto_increment(),
 670        "INLINE": lambda self: self._parse_inline(),
 671        "LIKE": lambda self: self._parse_create_like(),
 672        "NOT": lambda self: self._parse_not_constraint(),
 673        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 674        "ON": lambda self: self._match(TokenType.UPDATE)
 675        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 676        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 677        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 678        "REFERENCES": lambda self: self._parse_references(match=False),
 679        "TITLE": lambda self: self.expression(
 680            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 681        ),
 682        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 683        "UNIQUE": lambda self: self._parse_unique(),
 684        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 685    }
 686
 687    ALTER_PARSERS = {
 688        "ADD": lambda self: self._parse_alter_table_add(),
 689        "ALTER": lambda self: self._parse_alter_table_alter(),
 690        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 691        "DROP": lambda self: self._parse_alter_table_drop(),
 692        "RENAME": lambda self: self._parse_alter_table_rename(),
 693    }
 694
 695    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 696
 697    NO_PAREN_FUNCTION_PARSERS = {
 698        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 699        TokenType.CASE: lambda self: self._parse_case(),
 700        TokenType.IF: lambda self: self._parse_if(),
 701        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 702            exp.NextValueFor,
 703            this=self._parse_column(),
 704            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 705        ),
 706    }
 707
 708    FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"}
 709
 710    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 711        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 712        "CONCAT": lambda self: self._parse_concat(),
 713        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 714        "DECODE": lambda self: self._parse_decode(),
 715        "EXTRACT": lambda self: self._parse_extract(),
 716        "JSON_OBJECT": lambda self: self._parse_json_object(),
 717        "LOG": lambda self: self._parse_logarithm(),
 718        "MATCH": lambda self: self._parse_match_against(),
 719        "OPENJSON": lambda self: self._parse_open_json(),
 720        "POSITION": lambda self: self._parse_position(),
 721        "SAFE_CAST": lambda self: self._parse_cast(False),
 722        "STRING_AGG": lambda self: self._parse_string_agg(),
 723        "SUBSTRING": lambda self: self._parse_substring(),
 724        "TRIM": lambda self: self._parse_trim(),
 725        "TRY_CAST": lambda self: self._parse_cast(False),
 726        "TRY_CONVERT": lambda self: self._parse_convert(False),
 727    }
 728
 729    QUERY_MODIFIER_PARSERS = {
 730        "joins": lambda self: list(iter(self._parse_join, None)),
 731        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 732        "match": lambda self: self._parse_match_recognize(),
 733        "where": lambda self: self._parse_where(),
 734        "group": lambda self: self._parse_group(),
 735        "having": lambda self: self._parse_having(),
 736        "qualify": lambda self: self._parse_qualify(),
 737        "windows": lambda self: self._parse_window_clause(),
 738        "order": lambda self: self._parse_order(),
 739        "limit": lambda self: self._parse_limit(),
 740        "offset": lambda self: self._parse_offset(),
 741        "locks": lambda self: self._parse_locks(),
 742        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 743    }
 744
 745    SET_PARSERS = {
 746        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 747        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 748        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 749        "TRANSACTION": lambda self: self._parse_set_transaction(),
 750    }
 751
 752    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 753
 754    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 755
 756    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 757
 758    DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN}
 759
 760    PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE}
 761
 762    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 763    TRANSACTION_CHARACTERISTICS = {
 764        "ISOLATION LEVEL REPEATABLE READ",
 765        "ISOLATION LEVEL READ COMMITTED",
 766        "ISOLATION LEVEL READ UNCOMMITTED",
 767        "ISOLATION LEVEL SERIALIZABLE",
 768        "READ WRITE",
 769        "READ ONLY",
 770    }
 771
 772    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 773
 774    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 775
 776    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 777    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 778    WINDOW_SIDES = {"FOLLOWING", "PRECEDING"}
 779
 780    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 781
 782    STRICT_CAST = True
 783
 784    CONCAT_NULL_OUTPUTS_STRING = False  # A NULL arg in CONCAT yields NULL by default
 785
 786    CONVERT_TYPE_FIRST = False
 787
 788    PREFIXED_PIVOT_COLUMNS = False
 789    IDENTIFY_PIVOT_STRINGS = False
 790
 791    LOG_BASE_FIRST = True
 792    LOG_DEFAULTS_TO_LN = False
 793
 794    __slots__ = (
 795        "error_level",
 796        "error_message_context",
 797        "max_errors",
 798        "sql",
 799        "errors",
 800        "_tokens",
 801        "_index",
 802        "_curr",
 803        "_next",
 804        "_prev",
 805        "_prev_comments",
 806    )
 807
 808    # Autofilled
 809    INDEX_OFFSET: int = 0
 810    UNNEST_COLUMN_ONLY: bool = False
 811    ALIAS_POST_TABLESAMPLE: bool = False
 812    STRICT_STRING_CONCAT = False
 813    NULL_ORDERING: str = "nulls_are_small"
 814    SHOW_TRIE: t.Dict = {}
 815    SET_TRIE: t.Dict = {}
 816    FORMAT_MAPPING: t.Dict[str, str] = {}
 817    FORMAT_TRIE: t.Dict = {}
 818    TIME_MAPPING: t.Dict[str, str] = {}
 819    TIME_TRIE: t.Dict = {}
 820
 821    def __init__(
 822        self,
 823        error_level: t.Optional[ErrorLevel] = None,
 824        error_message_context: int = 100,
 825        max_errors: int = 3,
 826    ):
 827        self.error_level = error_level or ErrorLevel.IMMEDIATE
 828        self.error_message_context = error_message_context
 829        self.max_errors = max_errors
 830        self.reset()
 831
 832    def reset(self):
 833        self.sql = ""
 834        self.errors = []
 835        self._tokens = []
 836        self._index = 0
 837        self._curr = None
 838        self._next = None
 839        self._prev = None
 840        self._prev_comments = None
 841
 842    def parse(
 843        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 844    ) -> t.List[t.Optional[exp.Expression]]:
 845        """
 846        Parses a list of tokens and returns a list of syntax trees, one tree
 847        per parsed SQL statement.
 848
 849        Args:
 850            raw_tokens: The list of tokens.
 851            sql: The original SQL string, used to produce helpful debug messages.
 852
 853        Returns:
 854            The list of the produced syntax trees.
 855        """
 856        return self._parse(
 857            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 858        )
 859
 860    def parse_into(
 861        self,
 862        expression_types: exp.IntoType,
 863        raw_tokens: t.List[Token],
 864        sql: t.Optional[str] = None,
 865    ) -> t.List[t.Optional[exp.Expression]]:
 866        """
 867        Parses a list of tokens into a given Expression type. If a collection of Expression
 868        types is given instead, this method will try to parse the token list into each one
 869        of them, stopping at the first for which the parsing succeeds.
 870
 871        Args:
 872            expression_types: The expression type(s) to try and parse the token list into.
 873            raw_tokens: The list of tokens.
 874            sql: The original SQL string, used to produce helpful debug messages.
 875
 876        Returns:
 877            The target Expression.
 878        """
 879        errors = []
 880        for expression_type in ensure_list(expression_types):
 881            parser = self.EXPRESSION_PARSERS.get(expression_type)
 882            if not parser:
 883                raise TypeError(f"No parser registered for {expression_type}")
 884
 885            try:
 886                return self._parse(parser, raw_tokens, sql)
 887            except ParseError as e:
 888                e.errors[0]["into_expression"] = expression_type
 889                errors.append(e)
 890
 891        raise ParseError(
 892            f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
 893            errors=merge_errors(errors),
 894        ) from errors[-1]
 895
 896    def _parse(
 897        self,
 898        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 899        raw_tokens: t.List[Token],
 900        sql: t.Optional[str] = None,
 901    ) -> t.List[t.Optional[exp.Expression]]:
 902        self.reset()
 903        self.sql = sql or ""
 904
 905        total = len(raw_tokens)
 906        chunks: t.List[t.List[Token]] = [[]]
 907
 908        for i, token in enumerate(raw_tokens):
 909            if token.token_type == TokenType.SEMICOLON:
 910                if i < total - 1:
 911                    chunks.append([])
 912            else:
 913                chunks[-1].append(token)
 914
 915        expressions = []
 916
 917        for tokens in chunks:
 918            self._index = -1
 919            self._tokens = tokens
 920            self._advance()
 921
 922            expressions.append(parse_method(self))
 923
 924            if self._index < len(self._tokens):
 925                self.raise_error("Invalid expression / Unexpected token")
 926
 927            self.check_errors()
 928
 929        return expressions
 930
 931    def check_errors(self) -> None:
 932        """Logs or raises any found errors, depending on the chosen error level setting."""
 933        if self.error_level == ErrorLevel.WARN:
 934            for error in self.errors:
 935                logger.error(str(error))
 936        elif self.error_level == ErrorLevel.RAISE and self.errors:
 937            raise ParseError(
 938                concat_messages(self.errors, self.max_errors),
 939                errors=merge_errors(self.errors),
 940            )
 941
 942    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 943        """
 944        Appends an error in the list of recorded errors or raises it, depending on the chosen
 945        error level setting.
 946        """
 947        token = token or self._curr or self._prev or Token.string("")
 948        start = token.start
 949        end = token.end + 1
 950        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 951        highlight = self.sql[start:end]
 952        end_context = self.sql[end : end + self.error_message_context]
 953
 954        error = ParseError.new(
 955            f"{message}. Line {token.line}, Col: {token.col}.\n"
 956            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 957            description=message,
 958            line=token.line,
 959            col=token.col,
 960            start_context=start_context,
 961            highlight=highlight,
 962            end_context=end_context,
 963        )
 964
 965        if self.error_level == ErrorLevel.IMMEDIATE:
 966            raise error
 967
 968        self.errors.append(error)
 969
 970    def expression(
 971        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 972    ) -> E:
 973        """
 974        Creates a new, validated Expression.
 975
 976        Args:
 977            exp_class: The expression class to instantiate.
 978            comments: An optional list of comments to attach to the expression.
 979            kwargs: The arguments to set for the expression along with their respective values.
 980
 981        Returns:
 982            The target expression.
 983        """
 984        instance = exp_class(**kwargs)
 985        instance.add_comments(comments) if comments else self._add_comments(instance)
 986        return self.validate_expression(instance)
 987
 988    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 989        if expression and self._prev_comments:
 990            expression.add_comments(self._prev_comments)
 991            self._prev_comments = None
 992
 993    def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E:
 994        """
 995        Validates an Expression, making sure that all its mandatory arguments are set.
 996
 997        Args:
 998            expression: The expression to validate.
 999            args: An optional list of items that was used to instantiate the expression, if it's a Func.
1000
1001        Returns:
1002            The validated expression.
1003        """
1004        if self.error_level != ErrorLevel.IGNORE:
1005            for error_message in expression.error_messages(args):
1006                self.raise_error(error_message)
1007
1008        return expression
1009
1010    def _find_sql(self, start: Token, end: Token) -> str:
1011        return self.sql[start.start : end.end + 1]
1012
1013    def _advance(self, times: int = 1) -> None:
1014        self._index += times
1015        self._curr = seq_get(self._tokens, self._index)
1016        self._next = seq_get(self._tokens, self._index + 1)
1017
1018        if self._index > 0:
1019            self._prev = self._tokens[self._index - 1]
1020            self._prev_comments = self._prev.comments
1021        else:
1022            self._prev = None
1023            self._prev_comments = None
1024
1025    def _retreat(self, index: int) -> None:
1026        if index != self._index:
1027            self._advance(index - self._index)
1028
1029    def _parse_command(self) -> exp.Command:
1030        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1031
1032    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1033        start = self._prev
1034        exists = self._parse_exists() if allow_exists else None
1035
1036        self._match(TokenType.ON)
1037
1038        kind = self._match_set(self.CREATABLES) and self._prev
1039        if not kind:
1040            return self._parse_as_command(start)
1041
1042        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1043            this = self._parse_user_defined_function(kind=kind.token_type)
1044        elif kind.token_type == TokenType.TABLE:
1045            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1046        elif kind.token_type == TokenType.COLUMN:
1047            this = self._parse_column()
1048        else:
1049            this = self._parse_id_var()
1050
1051        self._match(TokenType.IS)
1052
1053        return self.expression(
1054            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1055        )
1056
1057    def _parse_to_table(
1058        self,
1059    ) -> exp.ToTableProperty:
1060        table = self._parse_table_parts(schema=True)
1061        return self.expression(exp.ToTableProperty, this=table)
1062
1063    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1064    def _parse_ttl(self) -> exp.Expression:
1065        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1066            this = self._parse_bitwise()
1067
1068            if self._match_text_seq("DELETE"):
1069                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1070            if self._match_text_seq("RECOMPRESS"):
1071                return self.expression(
1072                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1073                )
1074            if self._match_text_seq("TO", "DISK"):
1075                return self.expression(
1076                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1077                )
1078            if self._match_text_seq("TO", "VOLUME"):
1079                return self.expression(
1080                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1081                )
1082
1083            return this
1084
1085        expressions = self._parse_csv(_parse_ttl_action)
1086        where = self._parse_where()
1087        group = self._parse_group()
1088
1089        aggregates = None
1090        if group and self._match(TokenType.SET):
1091            aggregates = self._parse_csv(self._parse_set_item)
1092
1093        return self.expression(
1094            exp.MergeTreeTTL,
1095            expressions=expressions,
1096            where=where,
1097            group=group,
1098            aggregates=aggregates,
1099        )
1100
1101    def _parse_statement(self) -> t.Optional[exp.Expression]:
1102        if self._curr is None:
1103            return None
1104
1105        if self._match_set(self.STATEMENT_PARSERS):
1106            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1107
1108        if self._match_set(Tokenizer.COMMANDS):
1109            return self._parse_command()
1110
1111        expression = self._parse_expression()
1112        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1113        return self._parse_query_modifiers(expression)
1114
1115    def _parse_drop(self) -> exp.Drop | exp.Command:
1116        start = self._prev
1117        temporary = self._match(TokenType.TEMPORARY)
1118        materialized = self._match_text_seq("MATERIALIZED")
1119
1120        kind = self._match_set(self.CREATABLES) and self._prev.text
1121        if not kind:
1122            return self._parse_as_command(start)
1123
1124        return self.expression(
1125            exp.Drop,
1126            exists=self._parse_exists(),
1127            this=self._parse_table(schema=True),
1128            kind=kind,
1129            temporary=temporary,
1130            materialized=materialized,
1131            cascade=self._match_text_seq("CASCADE"),
1132            constraints=self._match_text_seq("CONSTRAINTS"),
1133            purge=self._match_text_seq("PURGE"),
1134        )
1135
1136    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1137        return (
1138            self._match(TokenType.IF)
1139            and (not not_ or self._match(TokenType.NOT))
1140            and self._match(TokenType.EXISTS)
1141        )
1142
1143    def _parse_create(self) -> exp.Create | exp.Command:
1144        # Note: this can't be None because we've matched a statement parser
1145        start = self._prev
1146        replace = start.text.upper() == "REPLACE" or self._match_pair(
1147            TokenType.OR, TokenType.REPLACE
1148        )
1149        unique = self._match(TokenType.UNIQUE)
1150
1151        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1152            self._advance()
1153
1154        properties = None
1155        create_token = self._match_set(self.CREATABLES) and self._prev
1156
1157        if not create_token:
1158            # exp.Properties.Location.POST_CREATE
1159            properties = self._parse_properties()
1160            create_token = self._match_set(self.CREATABLES) and self._prev
1161
1162            if not properties or not create_token:
1163                return self._parse_as_command(start)
1164
1165        exists = self._parse_exists(not_=True)
1166        this = None
1167        expression = None
1168        indexes = None
1169        no_schema_binding = None
1170        begin = None
1171        clone = None
1172
1173        def extend_props(temp_props: t.Optional[exp.Properties]) -> None:
1174            nonlocal properties
1175            if properties and temp_props:
1176                properties.expressions.extend(temp_props.expressions)
1177            elif temp_props:
1178                properties = temp_props
1179
1180        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1181            this = self._parse_user_defined_function(kind=create_token.token_type)
1182
1183            # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature)
1184            extend_props(self._parse_properties())
1185
1186            self._match(TokenType.ALIAS)
1187            begin = self._match(TokenType.BEGIN)
1188            return_ = self._match_text_seq("RETURN")
1189            expression = self._parse_statement()
1190
1191            if return_:
1192                expression = self.expression(exp.Return, this=expression)
1193        elif create_token.token_type == TokenType.INDEX:
1194            this = self._parse_index(index=self._parse_id_var())
1195        elif create_token.token_type in self.DB_CREATABLES:
1196            table_parts = self._parse_table_parts(schema=True)
1197
1198            # exp.Properties.Location.POST_NAME
1199            self._match(TokenType.COMMA)
1200            extend_props(self._parse_properties(before=True))
1201
1202            this = self._parse_schema(this=table_parts)
1203
1204            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1205            extend_props(self._parse_properties())
1206
1207            self._match(TokenType.ALIAS)
1208            if not self._match_set(self.DDL_SELECT_TOKENS, advance=False):
1209                # exp.Properties.Location.POST_ALIAS
1210                extend_props(self._parse_properties())
1211
1212            expression = self._parse_ddl_select()
1213
1214            if create_token.token_type == TokenType.TABLE:
1215                indexes = []
1216                while True:
1217                    index = self._parse_index()
1218
1219                    # exp.Properties.Location.POST_EXPRESSION and POST_INDEX
1220                    extend_props(self._parse_properties())
1221
1222                    if not index:
1223                        break
1224                    else:
1225                        self._match(TokenType.COMMA)
1226                        indexes.append(index)
1227            elif create_token.token_type == TokenType.VIEW:
1228                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1229                    no_schema_binding = True
1230
1231            if self._match_text_seq("CLONE"):
1232                clone = self._parse_table(schema=True)
1233                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1234                clone_kind = (
1235                    self._match(TokenType.L_PAREN)
1236                    and self._match_texts(self.CLONE_KINDS)
1237                    and self._prev.text.upper()
1238                )
1239                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1240                self._match(TokenType.R_PAREN)
1241                clone = self.expression(
1242                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1243                )
1244
1245        return self.expression(
1246            exp.Create,
1247            this=this,
1248            kind=create_token.text,
1249            replace=replace,
1250            unique=unique,
1251            expression=expression,
1252            exists=exists,
1253            properties=properties,
1254            indexes=indexes,
1255            no_schema_binding=no_schema_binding,
1256            begin=begin,
1257            clone=clone,
1258        )
1259
1260    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1261        # only used for teradata currently
1262        self._match(TokenType.COMMA)
1263
1264        kwargs = {
1265            "no": self._match_text_seq("NO"),
1266            "dual": self._match_text_seq("DUAL"),
1267            "before": self._match_text_seq("BEFORE"),
1268            "default": self._match_text_seq("DEFAULT"),
1269            "local": (self._match_text_seq("LOCAL") and "LOCAL")
1270            or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
1271            "after": self._match_text_seq("AFTER"),
1272            "minimum": self._match_texts(("MIN", "MINIMUM")),
1273            "maximum": self._match_texts(("MAX", "MAXIMUM")),
1274        }
1275
1276        if self._match_texts(self.PROPERTY_PARSERS):
1277            parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
1278            try:
1279                return parser(self, **{k: v for k, v in kwargs.items() if v})
1280            except TypeError:
1281                self.raise_error(f"Cannot parse property '{self._prev.text}'")
1282
1283        return None
1284
1285    def _parse_property(self) -> t.Optional[exp.Expression]:
1286        if self._match_texts(self.PROPERTY_PARSERS):
1287            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1288
1289        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1290            return self._parse_character_set(default=True)
1291
1292        if self._match_text_seq("COMPOUND", "SORTKEY"):
1293            return self._parse_sortkey(compound=True)
1294
1295        if self._match_text_seq("SQL", "SECURITY"):
1296            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1297
1298        assignment = self._match_pair(
1299            TokenType.VAR, TokenType.EQ, advance=False
1300        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1301
1302        if assignment:
1303            key = self._parse_var_or_string()
1304            self._match(TokenType.EQ)
1305            return self.expression(exp.Property, this=key, value=self._parse_column())
1306
1307        return None
1308
1309    def _parse_stored(self) -> exp.FileFormatProperty:
1310        self._match(TokenType.ALIAS)
1311
1312        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1313        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1314
1315        return self.expression(
1316            exp.FileFormatProperty,
1317            this=self.expression(
1318                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1319            )
1320            if input_format or output_format
1321            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1322        )
1323
1324    def _parse_property_assignment(self, exp_class: t.Type[E]) -> E:
1325        self._match(TokenType.EQ)
1326        self._match(TokenType.ALIAS)
1327        return self.expression(exp_class, this=self._parse_field())
1328
1329    def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]:
1330        properties = []
1331        while True:
1332            if before:
1333                prop = self._parse_property_before()
1334            else:
1335                prop = self._parse_property()
1336
1337            if not prop:
1338                break
1339            for p in ensure_list(prop):
1340                properties.append(p)
1341
1342        if properties:
1343            return self.expression(exp.Properties, expressions=properties)
1344
1345        return None
1346
1347    def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty:
1348        return self.expression(
1349            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1350        )
1351
1352    def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty:
1353        if self._index >= 2:
1354            pre_volatile_token = self._tokens[self._index - 2]
1355        else:
1356            pre_volatile_token = None
1357
1358        if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS:
1359            return exp.VolatileProperty()
1360
1361        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1362
1363    def _parse_with_property(
1364        self,
1365    ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]:
1366        self._match(TokenType.WITH)
1367        if self._match(TokenType.L_PAREN, advance=False):
1368            return self._parse_wrapped_csv(self._parse_property)
1369
1370        if self._match_text_seq("JOURNAL"):
1371            return self._parse_withjournaltable()
1372
1373        if self._match_text_seq("DATA"):
1374            return self._parse_withdata(no=False)
1375        elif self._match_text_seq("NO", "DATA"):
1376            return self._parse_withdata(no=True)
1377
1378        if not self._next:
1379            return None
1380
1381        return self._parse_withisolatedloading()
1382
1383    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1384    def _parse_definer(self) -> t.Optional[exp.DefinerProperty]:
1385        self._match(TokenType.EQ)
1386
1387        user = self._parse_id_var()
1388        self._match(TokenType.PARAMETER)
1389        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1390
1391        if not user or not host:
1392            return None
1393
1394        return exp.DefinerProperty(this=f"{user}@{host}")
1395
1396    def _parse_withjournaltable(self) -> exp.WithJournalTableProperty:
1397        self._match(TokenType.TABLE)
1398        self._match(TokenType.EQ)
1399        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1400
1401    def _parse_log(self, no: bool = False) -> exp.LogProperty:
1402        return self.expression(exp.LogProperty, no=no)
1403
1404    def _parse_journal(self, **kwargs) -> exp.JournalProperty:
1405        return self.expression(exp.JournalProperty, **kwargs)
1406
1407    def _parse_checksum(self) -> exp.ChecksumProperty:
1408        self._match(TokenType.EQ)
1409
1410        on = None
1411        if self._match(TokenType.ON):
1412            on = True
1413        elif self._match_text_seq("OFF"):
1414            on = False
1415
1416        return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT))
1417
1418    def _parse_cluster(self) -> t.Optional[exp.Cluster]:
1419        if not self._match_text_seq("BY"):
1420            self._retreat(self._index - 1)
1421            return None
1422
1423        return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered))
1424
1425    def _parse_freespace(self) -> exp.FreespaceProperty:
1426        self._match(TokenType.EQ)
1427        return self.expression(
1428            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1429        )
1430
1431    def _parse_mergeblockratio(
1432        self, no: bool = False, default: bool = False
1433    ) -> exp.MergeBlockRatioProperty:
1434        if self._match(TokenType.EQ):
1435            return self.expression(
1436                exp.MergeBlockRatioProperty,
1437                this=self._parse_number(),
1438                percent=self._match(TokenType.PERCENT),
1439            )
1440
1441        return self.expression(exp.MergeBlockRatioProperty, no=no, default=default)
1442
1443    def _parse_datablocksize(
1444        self,
1445        default: t.Optional[bool] = None,
1446        minimum: t.Optional[bool] = None,
1447        maximum: t.Optional[bool] = None,
1448    ) -> exp.DataBlocksizeProperty:
1449        self._match(TokenType.EQ)
1450        size = self._parse_number()
1451
1452        units = None
1453        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1454            units = self._prev.text
1455
1456        return self.expression(
1457            exp.DataBlocksizeProperty,
1458            size=size,
1459            units=units,
1460            default=default,
1461            minimum=minimum,
1462            maximum=maximum,
1463        )
1464
1465    def _parse_blockcompression(self) -> exp.BlockCompressionProperty:
1466        self._match(TokenType.EQ)
1467        always = self._match_text_seq("ALWAYS")
1468        manual = self._match_text_seq("MANUAL")
1469        never = self._match_text_seq("NEVER")
1470        default = self._match_text_seq("DEFAULT")
1471
1472        autotemp = None
1473        if self._match_text_seq("AUTOTEMP"):
1474            autotemp = self._parse_schema()
1475
1476        return self.expression(
1477            exp.BlockCompressionProperty,
1478            always=always,
1479            manual=manual,
1480            never=never,
1481            default=default,
1482            autotemp=autotemp,
1483        )
1484
1485    def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty:
1486        no = self._match_text_seq("NO")
1487        concurrent = self._match_text_seq("CONCURRENT")
1488        self._match_text_seq("ISOLATED", "LOADING")
1489        for_all = self._match_text_seq("FOR", "ALL")
1490        for_insert = self._match_text_seq("FOR", "INSERT")
1491        for_none = self._match_text_seq("FOR", "NONE")
1492        return self.expression(
1493            exp.IsolatedLoadingProperty,
1494            no=no,
1495            concurrent=concurrent,
1496            for_all=for_all,
1497            for_insert=for_insert,
1498            for_none=for_none,
1499        )
1500
1501    def _parse_locking(self) -> exp.LockingProperty:
1502        if self._match(TokenType.TABLE):
1503            kind = "TABLE"
1504        elif self._match(TokenType.VIEW):
1505            kind = "VIEW"
1506        elif self._match(TokenType.ROW):
1507            kind = "ROW"
1508        elif self._match_text_seq("DATABASE"):
1509            kind = "DATABASE"
1510        else:
1511            kind = None
1512
1513        if kind in ("DATABASE", "TABLE", "VIEW"):
1514            this = self._parse_table_parts()
1515        else:
1516            this = None
1517
1518        if self._match(TokenType.FOR):
1519            for_or_in = "FOR"
1520        elif self._match(TokenType.IN):
1521            for_or_in = "IN"
1522        else:
1523            for_or_in = None
1524
1525        if self._match_text_seq("ACCESS"):
1526            lock_type = "ACCESS"
1527        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1528            lock_type = "EXCLUSIVE"
1529        elif self._match_text_seq("SHARE"):
1530            lock_type = "SHARE"
1531        elif self._match_text_seq("READ"):
1532            lock_type = "READ"
1533        elif self._match_text_seq("WRITE"):
1534            lock_type = "WRITE"
1535        elif self._match_text_seq("CHECKSUM"):
1536            lock_type = "CHECKSUM"
1537        else:
1538            lock_type = None
1539
1540        override = self._match_text_seq("OVERRIDE")
1541
1542        return self.expression(
1543            exp.LockingProperty,
1544            this=this,
1545            kind=kind,
1546            for_or_in=for_or_in,
1547            lock_type=lock_type,
1548            override=override,
1549        )
1550
1551    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1552        if self._match(TokenType.PARTITION_BY):
1553            return self._parse_csv(self._parse_conjunction)
1554        return []
1555
1556    def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
1557        self._match(TokenType.EQ)
1558        return self.expression(
1559            exp.PartitionedByProperty,
1560            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1561        )
1562
1563    def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty:
1564        if self._match_text_seq("AND", "STATISTICS"):
1565            statistics = True
1566        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1567            statistics = False
1568        else:
1569            statistics = None
1570
1571        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1572
1573    def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]:
1574        if self._match_text_seq("PRIMARY", "INDEX"):
1575            return exp.NoPrimaryIndexProperty()
1576        return None
1577
1578    def _parse_on_property(self) -> t.Optional[exp.Expression]:
1579        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
1580            return exp.OnCommitProperty()
1581        elif self._match_text_seq("COMMIT", "DELETE", "ROWS"):
1582            return exp.OnCommitProperty(delete=True)
1583        return None
1584
1585    def _parse_distkey(self) -> exp.DistKeyProperty:
1586        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1587
1588    def _parse_create_like(self) -> t.Optional[exp.LikeProperty]:
1589        table = self._parse_table(schema=True)
1590
1591        options = []
1592        while self._match_texts(("INCLUDING", "EXCLUDING")):
1593            this = self._prev.text.upper()
1594
1595            id_var = self._parse_id_var()
1596            if not id_var:
1597                return None
1598
1599            options.append(
1600                self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper()))
1601            )
1602
1603        return self.expression(exp.LikeProperty, this=table, expressions=options)
1604
1605    def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty:
1606        return self.expression(
1607            exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound
1608        )
1609
1610    def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty:
1611        self._match(TokenType.EQ)
1612        return self.expression(
1613            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1614        )
1615
1616    def _parse_returns(self) -> exp.ReturnsProperty:
1617        value: t.Optional[exp.Expression]
1618        is_table = self._match(TokenType.TABLE)
1619
1620        if is_table:
1621            if self._match(TokenType.LT):
1622                value = self.expression(
1623                    exp.Schema,
1624                    this="TABLE",
1625                    expressions=self._parse_csv(self._parse_struct_types),
1626                )
1627                if not self._match(TokenType.GT):
1628                    self.raise_error("Expecting >")
1629            else:
1630                value = self._parse_schema(exp.var("TABLE"))
1631        else:
1632            value = self._parse_types()
1633
1634        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1635
1636    def _parse_describe(self) -> exp.Describe:
1637        kind = self._match_set(self.CREATABLES) and self._prev.text
1638        this = self._parse_table()
1639        return self.expression(exp.Describe, this=this, kind=kind)
1640
1641    def _parse_insert(self) -> exp.Insert:
1642        overwrite = self._match(TokenType.OVERWRITE)
1643        local = self._match_text_seq("LOCAL")
1644        alternative = None
1645
1646        if self._match_text_seq("DIRECTORY"):
1647            this: t.Optional[exp.Expression] = self.expression(
1648                exp.Directory,
1649                this=self._parse_var_or_string(),
1650                local=local,
1651                row_format=self._parse_row_format(match_row=True),
1652            )
1653        else:
1654            if self._match(TokenType.OR):
1655                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1656
1657            self._match(TokenType.INTO)
1658            self._match(TokenType.TABLE)
1659            this = self._parse_table(schema=True)
1660
1661        return self.expression(
1662            exp.Insert,
1663            this=this,
1664            exists=self._parse_exists(),
1665            partition=self._parse_partition(),
1666            expression=self._parse_ddl_select(),
1667            conflict=self._parse_on_conflict(),
1668            returning=self._parse_returning(),
1669            overwrite=overwrite,
1670            alternative=alternative,
1671        )
1672
1673    def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]:
1674        conflict = self._match_text_seq("ON", "CONFLICT")
1675        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1676
1677        if not conflict and not duplicate:
1678            return None
1679
1680        nothing = None
1681        expressions = None
1682        key = None
1683        constraint = None
1684
1685        if conflict:
1686            if self._match_text_seq("ON", "CONSTRAINT"):
1687                constraint = self._parse_id_var()
1688            else:
1689                key = self._parse_csv(self._parse_value)
1690
1691        self._match_text_seq("DO")
1692        if self._match_text_seq("NOTHING"):
1693            nothing = True
1694        else:
1695            self._match(TokenType.UPDATE)
1696            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1697
1698        return self.expression(
1699            exp.OnConflict,
1700            duplicate=duplicate,
1701            expressions=expressions,
1702            nothing=nothing,
1703            key=key,
1704            constraint=constraint,
1705        )
1706
1707    def _parse_returning(self) -> t.Optional[exp.Returning]:
1708        if not self._match(TokenType.RETURNING):
1709            return None
1710
1711        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1712
1713    def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]:
1714        if not self._match(TokenType.FORMAT):
1715            return None
1716        return self._parse_row_format()
1717
1718    def _parse_row_format(
1719        self, match_row: bool = False
1720    ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]:
1721        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1722            return None
1723
1724        if self._match_text_seq("SERDE"):
1725            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1726
1727        self._match_text_seq("DELIMITED")
1728
1729        kwargs = {}
1730
1731        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1732            kwargs["fields"] = self._parse_string()
1733            if self._match_text_seq("ESCAPED", "BY"):
1734                kwargs["escaped"] = self._parse_string()
1735        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1736            kwargs["collection_items"] = self._parse_string()
1737        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1738            kwargs["map_keys"] = self._parse_string()
1739        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1740            kwargs["lines"] = self._parse_string()
1741        if self._match_text_seq("NULL", "DEFINED", "AS"):
1742            kwargs["null"] = self._parse_string()
1743
1744        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1745
1746    def _parse_load(self) -> exp.LoadData | exp.Command:
1747        if self._match_text_seq("DATA"):
1748            local = self._match_text_seq("LOCAL")
1749            self._match_text_seq("INPATH")
1750            inpath = self._parse_string()
1751            overwrite = self._match(TokenType.OVERWRITE)
1752            self._match_pair(TokenType.INTO, TokenType.TABLE)
1753
1754            return self.expression(
1755                exp.LoadData,
1756                this=self._parse_table(schema=True),
1757                local=local,
1758                overwrite=overwrite,
1759                inpath=inpath,
1760                partition=self._parse_partition(),
1761                input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1762                serde=self._match_text_seq("SERDE") and self._parse_string(),
1763            )
1764        return self._parse_as_command(self._prev)
1765
1766    def _parse_delete(self) -> exp.Delete:
1767        self._match(TokenType.FROM)
1768
1769        return self.expression(
1770            exp.Delete,
1771            this=self._parse_table(),
1772            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1773            where=self._parse_where(),
1774            returning=self._parse_returning(),
1775        )
1776
1777    def _parse_update(self) -> exp.Update:
1778        return self.expression(
1779            exp.Update,
1780            **{  # type: ignore
1781                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1782                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1783                "from": self._parse_from(modifiers=True),
1784                "where": self._parse_where(),
1785                "returning": self._parse_returning(),
1786            },
1787        )
1788
1789    def _parse_uncache(self) -> exp.Uncache:
1790        if not self._match(TokenType.TABLE):
1791            self.raise_error("Expecting TABLE after UNCACHE")
1792
1793        return self.expression(
1794            exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True)
1795        )
1796
1797    def _parse_cache(self) -> exp.Cache:
1798        lazy = self._match_text_seq("LAZY")
1799        self._match(TokenType.TABLE)
1800        table = self._parse_table(schema=True)
1801
1802        options = []
1803        if self._match_text_seq("OPTIONS"):
1804            self._match_l_paren()
1805            k = self._parse_string()
1806            self._match(TokenType.EQ)
1807            v = self._parse_string()
1808            options = [k, v]
1809            self._match_r_paren()
1810
1811        self._match(TokenType.ALIAS)
1812        return self.expression(
1813            exp.Cache,
1814            this=table,
1815            lazy=lazy,
1816            options=options,
1817            expression=self._parse_select(nested=True),
1818        )
1819
1820    def _parse_partition(self) -> t.Optional[exp.Partition]:
1821        if not self._match(TokenType.PARTITION):
1822            return None
1823
1824        return self.expression(
1825            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1826        )
1827
1828    def _parse_value(self) -> exp.Tuple:
1829        if self._match(TokenType.L_PAREN):
1830            expressions = self._parse_csv(self._parse_conjunction)
1831            self._match_r_paren()
1832            return self.expression(exp.Tuple, expressions=expressions)
1833
1834        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1835        # Source: https://prestodb.io/docs/current/sql/values.html
1836        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1837
1838    def _parse_select(
1839        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1840    ) -> t.Optional[exp.Expression]:
1841        cte = self._parse_with()
1842        if cte:
1843            this = self._parse_statement()
1844
1845            if not this:
1846                self.raise_error("Failed to parse any statement following CTE")
1847                return cte
1848
1849            if "with" in this.arg_types:
1850                this.set("with", cte)
1851            else:
1852                self.raise_error(f"{this.key} does not support CTE")
1853                this = cte
1854        elif self._match(TokenType.SELECT):
1855            comments = self._prev_comments
1856
1857            hint = self._parse_hint()
1858            all_ = self._match(TokenType.ALL)
1859            distinct = self._match(TokenType.DISTINCT)
1860
1861            kind = (
1862                self._match(TokenType.ALIAS)
1863                and self._match_texts(("STRUCT", "VALUE"))
1864                and self._prev.text
1865            )
1866
1867            if distinct:
1868                distinct = self.expression(
1869                    exp.Distinct,
1870                    on=self._parse_value() if self._match(TokenType.ON) else None,
1871                )
1872
1873            if all_ and distinct:
1874                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1875
1876            limit = self._parse_limit(top=True)
1877            expressions = self._parse_csv(self._parse_expression)
1878
1879            this = self.expression(
1880                exp.Select,
1881                kind=kind,
1882                hint=hint,
1883                distinct=distinct,
1884                expressions=expressions,
1885                limit=limit,
1886            )
1887            this.comments = comments
1888
1889            into = self._parse_into()
1890            if into:
1891                this.set("into", into)
1892
1893            from_ = self._parse_from()
1894            if from_:
1895                this.set("from", from_)
1896
1897            this = self._parse_query_modifiers(this)
1898        elif (table or nested) and self._match(TokenType.L_PAREN):
1899            if self._match(TokenType.PIVOT):
1900                this = self._parse_simplified_pivot()
1901            elif self._match(TokenType.FROM):
1902                this = exp.select("*").from_(
1903                    t.cast(exp.From, self._parse_from(skip_from_token=True))
1904                )
1905            else:
1906                this = self._parse_table() if table else self._parse_select(nested=True)
1907                this = self._parse_set_operations(self._parse_query_modifiers(this))
1908
1909            self._match_r_paren()
1910
1911            # early return so that subquery unions aren't parsed again
1912            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1913            # Union ALL should be a property of the top select node, not the subquery
1914            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1915        elif self._match(TokenType.VALUES):
1916            this = self.expression(
1917                exp.Values,
1918                expressions=self._parse_csv(self._parse_value),
1919                alias=self._parse_table_alias(),
1920            )
1921        else:
1922            this = None
1923
1924        return self._parse_set_operations(this)
1925
1926    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]:
1927        if not skip_with_token and not self._match(TokenType.WITH):
1928            return None
1929
1930        comments = self._prev_comments
1931        recursive = self._match(TokenType.RECURSIVE)
1932
1933        expressions = []
1934        while True:
1935            expressions.append(self._parse_cte())
1936
1937            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1938                break
1939            else:
1940                self._match(TokenType.WITH)
1941
1942        return self.expression(
1943            exp.With, comments=comments, expressions=expressions, recursive=recursive
1944        )
1945
1946    def _parse_cte(self) -> exp.CTE:
1947        alias = self._parse_table_alias()
1948        if not alias or not alias.this:
1949            self.raise_error("Expected CTE to have alias")
1950
1951        self._match(TokenType.ALIAS)
1952        return self.expression(
1953            exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias
1954        )
1955
1956    def _parse_table_alias(
1957        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1958    ) -> t.Optional[exp.TableAlias]:
1959        any_token = self._match(TokenType.ALIAS)
1960        alias = (
1961            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1962            or self._parse_string_as_identifier()
1963        )
1964
1965        index = self._index
1966        if self._match(TokenType.L_PAREN):
1967            columns = self._parse_csv(self._parse_function_parameter)
1968            self._match_r_paren() if columns else self._retreat(index)
1969        else:
1970            columns = None
1971
1972        if not alias and not columns:
1973            return None
1974
1975        return self.expression(exp.TableAlias, this=alias, columns=columns)
1976
1977    def _parse_subquery(
1978        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1979    ) -> t.Optional[exp.Subquery]:
1980        if not this:
1981            return None
1982
1983        return self.expression(
1984            exp.Subquery,
1985            this=this,
1986            pivots=self._parse_pivots(),
1987            alias=self._parse_table_alias() if parse_alias else None,
1988        )
1989
1990    def _parse_query_modifiers(
1991        self, this: t.Optional[exp.Expression]
1992    ) -> t.Optional[exp.Expression]:
1993        if isinstance(this, self.MODIFIABLES):
1994            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1995                expression = parser(self)
1996
1997                if expression:
1998                    this.set(key, expression)
1999        return this
2000
2001    def _parse_hint(self) -> t.Optional[exp.Hint]:
2002        if self._match(TokenType.HINT):
2003            hints = self._parse_csv(self._parse_function)
2004
2005            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2006                self.raise_error("Expected */ after HINT")
2007
2008            return self.expression(exp.Hint, expressions=hints)
2009
2010        return None
2011
2012    def _parse_into(self) -> t.Optional[exp.Into]:
2013        if not self._match(TokenType.INTO):
2014            return None
2015
2016        temp = self._match(TokenType.TEMPORARY)
2017        unlogged = self._match_text_seq("UNLOGGED")
2018        self._match(TokenType.TABLE)
2019
2020        return self.expression(
2021            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2022        )
2023
2024    def _parse_from(
2025        self, modifiers: bool = False, skip_from_token: bool = False
2026    ) -> t.Optional[exp.From]:
2027        if not skip_from_token and not self._match(TokenType.FROM):
2028            return None
2029
2030        comments = self._prev_comments
2031        this = self._parse_table()
2032
2033        return self.expression(
2034            exp.From,
2035            comments=comments,
2036            this=self._parse_query_modifiers(this) if modifiers else this,
2037        )
2038
2039    def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]:
2040        if not self._match(TokenType.MATCH_RECOGNIZE):
2041            return None
2042
2043        self._match_l_paren()
2044
2045        partition = self._parse_partition_by()
2046        order = self._parse_order()
2047        measures = (
2048            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2049        )
2050
2051        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2052            rows = exp.var("ONE ROW PER MATCH")
2053        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2054            text = "ALL ROWS PER MATCH"
2055            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2056                text += f" SHOW EMPTY MATCHES"
2057            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2058                text += f" OMIT EMPTY MATCHES"
2059            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2060                text += f" WITH UNMATCHED ROWS"
2061            rows = exp.var(text)
2062        else:
2063            rows = None
2064
2065        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2066            text = "AFTER MATCH SKIP"
2067            if self._match_text_seq("PAST", "LAST", "ROW"):
2068                text += f" PAST LAST ROW"
2069            elif self._match_text_seq("TO", "NEXT", "ROW"):
2070                text += f" TO NEXT ROW"
2071            elif self._match_text_seq("TO", "FIRST"):
2072                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2073            elif self._match_text_seq("TO", "LAST"):
2074                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2075            after = exp.var(text)
2076        else:
2077            after = None
2078
2079        if self._match_text_seq("PATTERN"):
2080            self._match_l_paren()
2081
2082            if not self._curr:
2083                self.raise_error("Expecting )", self._curr)
2084
2085            paren = 1
2086            start = self._curr
2087
2088            while self._curr and paren > 0:
2089                if self._curr.token_type == TokenType.L_PAREN:
2090                    paren += 1
2091                if self._curr.token_type == TokenType.R_PAREN:
2092                    paren -= 1
2093
2094                end = self._prev
2095                self._advance()
2096
2097            if paren > 0:
2098                self.raise_error("Expecting )", self._curr)
2099
2100            pattern = exp.var(self._find_sql(start, end))
2101        else:
2102            pattern = None
2103
2104        define = (
2105            self._parse_csv(
2106                lambda: self.expression(
2107                    exp.Alias,
2108                    alias=self._parse_id_var(any_token=True),
2109                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2110                )
2111            )
2112            if self._match_text_seq("DEFINE")
2113            else None
2114        )
2115
2116        self._match_r_paren()
2117
2118        return self.expression(
2119            exp.MatchRecognize,
2120            partition_by=partition,
2121            order=order,
2122            measures=measures,
2123            rows=rows,
2124            after=after,
2125            pattern=pattern,
2126            define=define,
2127            alias=self._parse_table_alias(),
2128        )
2129
2130    def _parse_lateral(self) -> t.Optional[exp.Lateral]:
2131        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2132        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2133
2134        if outer_apply or cross_apply:
2135            this = self._parse_select(table=True)
2136            view = None
2137            outer = not cross_apply
2138        elif self._match(TokenType.LATERAL):
2139            this = self._parse_select(table=True)
2140            view = self._match(TokenType.VIEW)
2141            outer = self._match(TokenType.OUTER)
2142        else:
2143            return None
2144
2145        if not this:
2146            this = self._parse_function() or self._parse_id_var(any_token=False)
2147            while self._match(TokenType.DOT):
2148                this = exp.Dot(
2149                    this=this,
2150                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2151                )
2152
2153        if view:
2154            table = self._parse_id_var(any_token=False)
2155            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2156            table_alias: t.Optional[exp.TableAlias] = self.expression(
2157                exp.TableAlias, this=table, columns=columns
2158            )
2159        else:
2160            table_alias = self._parse_table_alias()
2161
2162        return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias)
2163
2164    def _parse_join_parts(
2165        self,
2166    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2167        return (
2168            self._match_set(self.JOIN_METHODS) and self._prev,
2169            self._match_set(self.JOIN_SIDES) and self._prev,
2170            self._match_set(self.JOIN_KINDS) and self._prev,
2171        )
2172
2173    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]:
2174        if self._match(TokenType.COMMA):
2175            return self.expression(exp.Join, this=self._parse_table())
2176
2177        index = self._index
2178        method, side, kind = self._parse_join_parts()
2179        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2180        join = self._match(TokenType.JOIN)
2181
2182        if not skip_join_token and not join:
2183            self._retreat(index)
2184            kind = None
2185            method = None
2186            side = None
2187
2188        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2189        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2190
2191        if not skip_join_token and not join and not outer_apply and not cross_apply:
2192            return None
2193
2194        if outer_apply:
2195            side = Token(TokenType.LEFT, "LEFT")
2196
2197        kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()}
2198
2199        if method:
2200            kwargs["method"] = method.text
2201        if side:
2202            kwargs["side"] = side.text
2203        if kind:
2204            kwargs["kind"] = kind.text
2205        if hint:
2206            kwargs["hint"] = hint
2207
2208        if self._match(TokenType.ON):
2209            kwargs["on"] = self._parse_conjunction()
2210        elif self._match(TokenType.USING):
2211            kwargs["using"] = self._parse_wrapped_id_vars()
2212
2213        return self.expression(exp.Join, **kwargs)
2214
2215    def _parse_index(
2216        self,
2217        index: t.Optional[exp.Expression] = None,
2218    ) -> t.Optional[exp.Index]:
2219        if index:
2220            unique = None
2221            primary = None
2222            amp = None
2223
2224            self._match(TokenType.ON)
2225            self._match(TokenType.TABLE)  # hive
2226            table = self._parse_table_parts(schema=True)
2227        else:
2228            unique = self._match(TokenType.UNIQUE)
2229            primary = self._match_text_seq("PRIMARY")
2230            amp = self._match_text_seq("AMP")
2231
2232            if not self._match(TokenType.INDEX):
2233                return None
2234
2235            index = self._parse_id_var()
2236            table = None
2237
2238        using = self._parse_field() if self._match(TokenType.USING) else None
2239
2240        if self._match(TokenType.L_PAREN, advance=False):
2241            columns = self._parse_wrapped_csv(self._parse_ordered)
2242        else:
2243            columns = None
2244
2245        return self.expression(
2246            exp.Index,
2247            this=index,
2248            table=table,
2249            using=using,
2250            columns=columns,
2251            unique=unique,
2252            primary=primary,
2253            amp=amp,
2254            partition_by=self._parse_partition_by(),
2255        )
2256
2257    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2258        return (
2259            (not schema and self._parse_function())
2260            or self._parse_id_var(any_token=False)
2261            or self._parse_string_as_identifier()
2262            or self._parse_placeholder()
2263        )
2264
2265    def _parse_table_parts(self, schema: bool = False) -> exp.Table:
2266        catalog = None
2267        db = None
2268        table = self._parse_table_part(schema=schema)
2269
2270        while self._match(TokenType.DOT):
2271            if catalog:
2272                # This allows nesting the table in arbitrarily many dot expressions if needed
2273                table = self.expression(
2274                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2275                )
2276            else:
2277                catalog = db
2278                db = table
2279                table = self._parse_table_part(schema=schema)
2280
2281        if not table:
2282            self.raise_error(f"Expected table name but got {self._curr}")
2283
2284        return self.expression(
2285            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2286        )
2287
2288    def _parse_table(
2289        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2290    ) -> t.Optional[exp.Expression]:
2291        lateral = self._parse_lateral()
2292        if lateral:
2293            return lateral
2294
2295        unnest = self._parse_unnest()
2296        if unnest:
2297            return unnest
2298
2299        values = self._parse_derived_table_values()
2300        if values:
2301            return values
2302
2303        subquery = self._parse_select(table=True)
2304        if subquery:
2305            if not subquery.args.get("pivots"):
2306                subquery.set("pivots", self._parse_pivots())
2307            return subquery
2308
2309        this: exp.Expression = self._parse_table_parts(schema=schema)
2310
2311        if schema:
2312            return self._parse_schema(this=this)
2313
2314        if self.ALIAS_POST_TABLESAMPLE:
2315            table_sample = self._parse_table_sample()
2316
2317        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2318        if alias:
2319            this.set("alias", alias)
2320
2321        if not this.args.get("pivots"):
2322            this.set("pivots", self._parse_pivots())
2323
2324        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2325            this.set(
2326                "hints",
2327                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2328            )
2329            self._match_r_paren()
2330
2331        if not self.ALIAS_POST_TABLESAMPLE:
2332            table_sample = self._parse_table_sample()
2333
2334        if table_sample:
2335            table_sample.set("this", this)
2336            this = table_sample
2337
2338        return this
2339
2340    def _parse_unnest(self) -> t.Optional[exp.Unnest]:
2341        if not self._match(TokenType.UNNEST):
2342            return None
2343
2344        expressions = self._parse_wrapped_csv(self._parse_type)
2345        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2346        alias = self._parse_table_alias()
2347
2348        if alias and self.UNNEST_COLUMN_ONLY:
2349            if alias.args.get("columns"):
2350                self.raise_error("Unexpected extra column alias in unnest.")
2351
2352            alias.set("columns", [alias.this])
2353            alias.set("this", None)
2354
2355        offset = None
2356        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2357            self._match(TokenType.ALIAS)
2358            offset = self._parse_id_var() or exp.to_identifier("offset")
2359
2360        return self.expression(
2361            exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset
2362        )
2363
2364    def _parse_derived_table_values(self) -> t.Optional[exp.Values]:
2365        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2366        if not is_derived and not self._match(TokenType.VALUES):
2367            return None
2368
2369        expressions = self._parse_csv(self._parse_value)
2370
2371        if is_derived:
2372            self._match_r_paren()
2373
2374        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2375
2376    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]:
2377        if not self._match(TokenType.TABLE_SAMPLE) and not (
2378            as_modifier and self._match_text_seq("USING", "SAMPLE")
2379        ):
2380            return None
2381
2382        bucket_numerator = None
2383        bucket_denominator = None
2384        bucket_field = None
2385        percent = None
2386        rows = None
2387        size = None
2388        seed = None
2389
2390        kind = (
2391            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2392        )
2393        method = self._parse_var(tokens=(TokenType.ROW,))
2394
2395        self._match(TokenType.L_PAREN)
2396
2397        num = self._parse_number()
2398
2399        if self._match_text_seq("BUCKET"):
2400            bucket_numerator = self._parse_number()
2401            self._match_text_seq("OUT", "OF")
2402            bucket_denominator = bucket_denominator = self._parse_number()
2403            self._match(TokenType.ON)
2404            bucket_field = self._parse_field()
2405        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2406            percent = num
2407        elif self._match(TokenType.ROWS):
2408            rows = num
2409        else:
2410            size = num
2411
2412        self._match(TokenType.R_PAREN)
2413
2414        if self._match(TokenType.L_PAREN):
2415            method = self._parse_var()
2416            seed = self._match(TokenType.COMMA) and self._parse_number()
2417            self._match_r_paren()
2418        elif self._match_texts(("SEED", "REPEATABLE")):
2419            seed = self._parse_wrapped(self._parse_number)
2420
2421        return self.expression(
2422            exp.TableSample,
2423            method=method,
2424            bucket_numerator=bucket_numerator,
2425            bucket_denominator=bucket_denominator,
2426            bucket_field=bucket_field,
2427            percent=percent,
2428            rows=rows,
2429            size=size,
2430            seed=seed,
2431            kind=kind,
2432        )
2433
2434    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2435        return list(iter(self._parse_pivot, None))
2436
2437    # https://duckdb.org/docs/sql/statements/pivot
2438    def _parse_simplified_pivot(self) -> exp.Pivot:
2439        def _parse_on() -> t.Optional[exp.Expression]:
2440            this = self._parse_bitwise()
2441            return self._parse_in(this) if self._match(TokenType.IN) else this
2442
2443        this = self._parse_table()
2444        expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on)
2445        using = self._match(TokenType.USING) and self._parse_csv(
2446            lambda: self._parse_alias(self._parse_function())
2447        )
2448        group = self._parse_group()
2449        return self.expression(
2450            exp.Pivot, this=this, expressions=expressions, using=using, group=group
2451        )
2452
2453    def _parse_pivot(self) -> t.Optional[exp.Pivot]:
2454        index = self._index
2455
2456        if self._match(TokenType.PIVOT):
2457            unpivot = False
2458        elif self._match(TokenType.UNPIVOT):
2459            unpivot = True
2460        else:
2461            return None
2462
2463        expressions = []
2464        field = None
2465
2466        if not self._match(TokenType.L_PAREN):
2467            self._retreat(index)
2468            return None
2469
2470        if unpivot:
2471            expressions = self._parse_csv(self._parse_column)
2472        else:
2473            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2474
2475        if not expressions:
2476            self.raise_error("Failed to parse PIVOT's aggregation list")
2477
2478        if not self._match(TokenType.FOR):
2479            self.raise_error("Expecting FOR")
2480
2481        value = self._parse_column()
2482
2483        if not self._match(TokenType.IN):
2484            self.raise_error("Expecting IN")
2485
2486        field = self._parse_in(value, alias=True)
2487
2488        self._match_r_paren()
2489
2490        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2491
2492        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2493            pivot.set("alias", self._parse_table_alias())
2494
2495        if not unpivot:
2496            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2497
2498            columns: t.List[exp.Expression] = []
2499            for fld in pivot.args["field"].expressions:
2500                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2501                for name in names:
2502                    if self.PREFIXED_PIVOT_COLUMNS:
2503                        name = f"{name}_{field_name}" if name else field_name
2504                    else:
2505                        name = f"{field_name}_{name}" if name else field_name
2506
2507                    columns.append(exp.to_identifier(name))
2508
2509            pivot.set("columns", columns)
2510
2511        return pivot
2512
2513    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2514        return [agg.alias for agg in aggregations]
2515
2516    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]:
2517        if not skip_where_token and not self._match(TokenType.WHERE):
2518            return None
2519
2520        return self.expression(
2521            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2522        )
2523
2524    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]:
2525        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2526            return None
2527
2528        elements = defaultdict(list)
2529
2530        while True:
2531            expressions = self._parse_csv(self._parse_conjunction)
2532            if expressions:
2533                elements["expressions"].extend(expressions)
2534
2535            grouping_sets = self._parse_grouping_sets()
2536            if grouping_sets:
2537                elements["grouping_sets"].extend(grouping_sets)
2538
2539            rollup = None
2540            cube = None
2541            totals = None
2542
2543            with_ = self._match(TokenType.WITH)
2544            if self._match(TokenType.ROLLUP):
2545                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2546                elements["rollup"].extend(ensure_list(rollup))
2547
2548            if self._match(TokenType.CUBE):
2549                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2550                elements["cube"].extend(ensure_list(cube))
2551
2552            if self._match_text_seq("TOTALS"):
2553                totals = True
2554                elements["totals"] = True  # type: ignore
2555
2556            if not (grouping_sets or rollup or cube or totals):
2557                break
2558
2559        return self.expression(exp.Group, **elements)  # type: ignore
2560
2561    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2562        if not self._match(TokenType.GROUPING_SETS):
2563            return None
2564
2565        return self._parse_wrapped_csv(self._parse_grouping_set)
2566
2567    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2568        if self._match(TokenType.L_PAREN):
2569            grouping_set = self._parse_csv(self._parse_column)
2570            self._match_r_paren()
2571            return self.expression(exp.Tuple, expressions=grouping_set)
2572
2573        return self._parse_column()
2574
2575    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]:
2576        if not skip_having_token and not self._match(TokenType.HAVING):
2577            return None
2578        return self.expression(exp.Having, this=self._parse_conjunction())
2579
2580    def _parse_qualify(self) -> t.Optional[exp.Qualify]:
2581        if not self._match(TokenType.QUALIFY):
2582            return None
2583        return self.expression(exp.Qualify, this=self._parse_conjunction())
2584
2585    def _parse_order(
2586        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2587    ) -> t.Optional[exp.Expression]:
2588        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2589            return this
2590
2591        return self.expression(
2592            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2593        )
2594
2595    def _parse_sort(self, exp_class: t.Type[E], *texts: str) -> t.Optional[E]:
2596        if not self._match_text_seq(*texts):
2597            return None
2598        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2599
2600    def _parse_ordered(self) -> exp.Ordered:
2601        this = self._parse_conjunction()
2602        self._match(TokenType.ASC)
2603
2604        is_desc = self._match(TokenType.DESC)
2605        is_nulls_first = self._match_text_seq("NULLS", "FIRST")
2606        is_nulls_last = self._match_text_seq("NULLS", "LAST")
2607        desc = is_desc or False
2608        asc = not desc
2609        nulls_first = is_nulls_first or False
2610        explicitly_null_ordered = is_nulls_first or is_nulls_last
2611
2612        if (
2613            not explicitly_null_ordered
2614            and (
2615                (asc and self.NULL_ORDERING == "nulls_are_small")
2616                or (desc and self.NULL_ORDERING != "nulls_are_small")
2617            )
2618            and self.NULL_ORDERING != "nulls_are_last"
2619        ):
2620            nulls_first = True
2621
2622        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2623
2624    def _parse_limit(
2625        self, this: t.Optional[exp.Expression] = None, top: bool = False
2626    ) -> t.Optional[exp.Expression]:
2627        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2628            limit_paren = self._match(TokenType.L_PAREN)
2629            limit_exp = self.expression(
2630                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2631            )
2632
2633            if limit_paren:
2634                self._match_r_paren()
2635
2636            return limit_exp
2637
2638        if self._match(TokenType.FETCH):
2639            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2640            direction = self._prev.text if direction else "FIRST"
2641
2642            count = self._parse_number()
2643            percent = self._match(TokenType.PERCENT)
2644
2645            self._match_set((TokenType.ROW, TokenType.ROWS))
2646
2647            only = self._match_text_seq("ONLY")
2648            with_ties = self._match_text_seq("WITH", "TIES")
2649
2650            if only and with_ties:
2651                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2652
2653            return self.expression(
2654                exp.Fetch,
2655                direction=direction,
2656                count=count,
2657                percent=percent,
2658                with_ties=with_ties,
2659            )
2660
2661        return this
2662
2663    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2664        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2665            return this
2666
2667        count = self._parse_number()
2668        self._match_set((TokenType.ROW, TokenType.ROWS))
2669        return self.expression(exp.Offset, this=this, expression=count)
2670
2671    def _parse_locks(self) -> t.List[exp.Lock]:
2672        locks = []
2673        while True:
2674            if self._match_text_seq("FOR", "UPDATE"):
2675                update = True
2676            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2677                "LOCK", "IN", "SHARE", "MODE"
2678            ):
2679                update = False
2680            else:
2681                break
2682
2683            expressions = None
2684            if self._match_text_seq("OF"):
2685                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2686
2687            wait: t.Optional[bool | exp.Expression] = None
2688            if self._match_text_seq("NOWAIT"):
2689                wait = True
2690            elif self._match_text_seq("WAIT"):
2691                wait = self._parse_primary()
2692            elif self._match_text_seq("SKIP", "LOCKED"):
2693                wait = False
2694
2695            locks.append(
2696                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2697            )
2698
2699        return locks
2700
2701    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2702        if not self._match_set(self.SET_OPERATIONS):
2703            return this
2704
2705        token_type = self._prev.token_type
2706
2707        if token_type == TokenType.UNION:
2708            expression = exp.Union
2709        elif token_type == TokenType.EXCEPT:
2710            expression = exp.Except
2711        else:
2712            expression = exp.Intersect
2713
2714        return self.expression(
2715            expression,
2716            this=this,
2717            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2718            expression=self._parse_set_operations(self._parse_select(nested=True)),
2719        )
2720
2721    def _parse_expression(self) -> t.Optional[exp.Expression]:
2722        return self._parse_alias(self._parse_conjunction())
2723
2724    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2725        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2726
2727    def _parse_equality(self) -> t.Optional[exp.Expression]:
2728        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2729
2730    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2731        return self._parse_tokens(self._parse_range, self.COMPARISON)
2732
2733    def _parse_range(self) -> t.Optional[exp.Expression]:
2734        this = self._parse_bitwise()
2735        negate = self._match(TokenType.NOT)
2736
2737        if self._match_set(self.RANGE_PARSERS):
2738            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2739            if not expression:
2740                return this
2741
2742            this = expression
2743        elif self._match(TokenType.ISNULL):
2744            this = self.expression(exp.Is, this=this, expression=exp.Null())
2745
2746        # Postgres supports ISNULL and NOTNULL for conditions.
2747        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2748        if self._match(TokenType.NOTNULL):
2749            this = self.expression(exp.Is, this=this, expression=exp.Null())
2750            this = self.expression(exp.Not, this=this)
2751
2752        if negate:
2753            this = self.expression(exp.Not, this=this)
2754
2755        if self._match(TokenType.IS):
2756            this = self._parse_is(this)
2757
2758        return this
2759
2760    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2761        index = self._index - 1
2762        negate = self._match(TokenType.NOT)
2763
2764        if self._match_text_seq("DISTINCT", "FROM"):
2765            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2766            return self.expression(klass, this=this, expression=self._parse_expression())
2767
2768        expression = self._parse_null() or self._parse_boolean()
2769        if not expression:
2770            self._retreat(index)
2771            return None
2772
2773        this = self.expression(exp.Is, this=this, expression=expression)
2774        return self.expression(exp.Not, this=this) if negate else this
2775
2776    def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In:
2777        unnest = self._parse_unnest()
2778        if unnest:
2779            this = self.expression(exp.In, this=this, unnest=unnest)
2780        elif self._match(TokenType.L_PAREN):
2781            expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias))
2782
2783            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2784                this = self.expression(exp.In, this=this, query=expressions[0])
2785            else:
2786                this = self.expression(exp.In, this=this, expressions=expressions)
2787
2788            self._match_r_paren(this)
2789        else:
2790            this = self.expression(exp.In, this=this, field=self._parse_field())
2791
2792        return this
2793
2794    def _parse_between(self, this: exp.Expression) -> exp.Between:
2795        low = self._parse_bitwise()
2796        self._match(TokenType.AND)
2797        high = self._parse_bitwise()
2798        return self.expression(exp.Between, this=this, low=low, high=high)
2799
2800    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2801        if not self._match(TokenType.ESCAPE):
2802            return this
2803        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2804
2805    def _parse_interval(self) -> t.Optional[exp.Interval]:
2806        if not self._match(TokenType.INTERVAL):
2807            return None
2808
2809        this = self._parse_primary() or self._parse_term()
2810        unit = self._parse_function() or self._parse_var()
2811
2812        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2813        # each INTERVAL expression into this canonical form so it's easy to transpile
2814        if this and this.is_number:
2815            this = exp.Literal.string(this.name)
2816        elif this and this.is_string:
2817            parts = this.name.split()
2818
2819            if len(parts) == 2:
2820                if unit:
2821                    # this is not actually a unit, it's something else
2822                    unit = None
2823                    self._retreat(self._index - 1)
2824                else:
2825                    this = exp.Literal.string(parts[0])
2826                    unit = self.expression(exp.Var, this=parts[1])
2827
2828        return self.expression(exp.Interval, this=this, unit=unit)
2829
2830    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2831        this = self._parse_term()
2832
2833        while True:
2834            if self._match_set(self.BITWISE):
2835                this = self.expression(
2836                    self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term()
2837                )
2838            elif self._match_pair(TokenType.LT, TokenType.LT):
2839                this = self.expression(
2840                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2841                )
2842            elif self._match_pair(TokenType.GT, TokenType.GT):
2843                this = self.expression(
2844                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2845                )
2846            else:
2847                break
2848
2849        return this
2850
2851    def _parse_term(self) -> t.Optional[exp.Expression]:
2852        return self._parse_tokens(self._parse_factor, self.TERM)
2853
2854    def _parse_factor(self) -> t.Optional[exp.Expression]:
2855        return self._parse_tokens(self._parse_unary, self.FACTOR)
2856
2857    def _parse_unary(self) -> t.Optional[exp.Expression]:
2858        if self._match_set(self.UNARY_PARSERS):
2859            return self.UNARY_PARSERS[self._prev.token_type](self)
2860        return self._parse_at_time_zone(self._parse_type())
2861
2862    def _parse_type(self) -> t.Optional[exp.Expression]:
2863        interval = self._parse_interval()
2864        if interval:
2865            return interval
2866
2867        index = self._index
2868        data_type = self._parse_types(check_func=True)
2869        this = self._parse_column()
2870
2871        if data_type:
2872            if isinstance(this, exp.Literal):
2873                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2874                if parser:
2875                    return parser(self, this, data_type)
2876                return self.expression(exp.Cast, this=this, to=data_type)
2877            if not data_type.expressions:
2878                self._retreat(index)
2879                return self._parse_column()
2880            return self._parse_column_ops(data_type)
2881
2882        return this
2883
2884    def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]:
2885        this = self._parse_type()
2886        if not this:
2887            return None
2888
2889        return self.expression(
2890            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2891        )
2892
2893    def _parse_types(
2894        self, check_func: bool = False, schema: bool = False
2895    ) -> t.Optional[exp.Expression]:
2896        index = self._index
2897
2898        prefix = self._match_text_seq("SYSUDTLIB", ".")
2899
2900        if not self._match_set(self.TYPE_TOKENS):
2901            return None
2902
2903        type_token = self._prev.token_type
2904
2905        if type_token == TokenType.PSEUDO_TYPE:
2906            return self.expression(exp.PseudoType, this=self._prev.text)
2907
2908        nested = type_token in self.NESTED_TYPE_TOKENS
2909        is_struct = type_token == TokenType.STRUCT
2910        expressions = None
2911        maybe_func = False
2912
2913        if self._match(TokenType.L_PAREN):
2914            if is_struct:
2915                expressions = self._parse_csv(self._parse_struct_types)
2916            elif nested:
2917                expressions = self._parse_csv(
2918                    lambda: self._parse_types(check_func=check_func, schema=schema)
2919                )
2920            else:
2921                expressions = self._parse_csv(self._parse_type_size)
2922
2923            if not expressions or not self._match(TokenType.R_PAREN):
2924                self._retreat(index)
2925                return None
2926
2927            maybe_func = True
2928
2929        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2930            this = exp.DataType(
2931                this=exp.DataType.Type.ARRAY,
2932                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2933                nested=True,
2934            )
2935
2936            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2937                this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True)
2938
2939            return this
2940
2941        if self._match(TokenType.L_BRACKET):
2942            self._retreat(index)
2943            return None
2944
2945        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2946        if nested and self._match(TokenType.LT):
2947            if is_struct:
2948                expressions = self._parse_csv(self._parse_struct_types)
2949            else:
2950                expressions = self._parse_csv(
2951                    lambda: self._parse_types(check_func=check_func, schema=schema)
2952                )
2953
2954            if not self._match(TokenType.GT):
2955                self.raise_error("Expecting >")
2956
2957            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2958                values = self._parse_csv(self._parse_conjunction)
2959                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2960
2961        value: t.Optional[exp.Expression] = None
2962        if type_token in self.TIMESTAMPS:
2963            if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ:
2964                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2965            elif (
2966                self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE")
2967                or type_token == TokenType.TIMESTAMPLTZ
2968            ):
2969                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2970            elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
2971                if type_token == TokenType.TIME:
2972                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2973                else:
2974                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2975
2976            maybe_func = maybe_func and value is None
2977
2978            if value is None:
2979                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2980        elif type_token == TokenType.INTERVAL:
2981            unit = self._parse_var()
2982
2983            if not unit:
2984                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2985            else:
2986                value = self.expression(exp.Interval, unit=unit)
2987
2988        if maybe_func and check_func:
2989            index2 = self._index
2990            peek = self._parse_string()
2991
2992            if not peek:
2993                self._retreat(index)
2994                return None
2995
2996            self._retreat(index2)
2997
2998        if value:
2999            return value
3000
3001        return exp.DataType(
3002            this=exp.DataType.Type[type_token.value.upper()],
3003            expressions=expressions,
3004            nested=nested,
3005            values=values,
3006            prefix=prefix,
3007        )
3008
3009    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
3010        this = self._parse_type() or self._parse_id_var()
3011        self._match(TokenType.COLON)
3012        return self._parse_column_def(this)
3013
3014    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3015        if not self._match_text_seq("AT", "TIME", "ZONE"):
3016            return this
3017        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
3018
3019    def _parse_column(self) -> t.Optional[exp.Expression]:
3020        this = self._parse_field()
3021        if isinstance(this, exp.Identifier):
3022            this = self.expression(exp.Column, this=this)
3023        elif not this:
3024            return self._parse_bracket(this)
3025        return self._parse_column_ops(this)
3026
3027    def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3028        this = self._parse_bracket(this)
3029
3030        while self._match_set(self.COLUMN_OPERATORS):
3031            op_token = self._prev.token_type
3032            op = self.COLUMN_OPERATORS.get(op_token)
3033
3034            if op_token == TokenType.DCOLON:
3035                field = self._parse_types()
3036                if not field:
3037                    self.raise_error("Expected type")
3038            elif op and self._curr:
3039                self._advance()
3040                value = self._prev.text
3041                field = (
3042                    exp.Literal.number(value)
3043                    if self._prev.token_type == TokenType.NUMBER
3044                    else exp.Literal.string(value)
3045                )
3046            else:
3047                field = self._parse_field(anonymous_func=True)
3048
3049            if isinstance(field, exp.Func):
3050                # bigquery allows function calls like x.y.count(...)
3051                # SAFE.SUBSTR(...)
3052                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3053                this = self._replace_columns_with_dots(this)
3054
3055            if op:
3056                this = op(self, this, field)
3057            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3058                this = self.expression(
3059                    exp.Column,
3060                    this=field,
3061                    table=this.this,
3062                    db=this.args.get("table"),
3063                    catalog=this.args.get("db"),
3064                )
3065            else:
3066                this = self.expression(exp.Dot, this=this, expression=field)
3067            this = self._parse_bracket(this)
3068        return this
3069
3070    def _parse_primary(self) -> t.Optional[exp.Expression]:
3071        if self._match_set(self.PRIMARY_PARSERS):
3072            token_type = self._prev.token_type
3073            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3074
3075            if token_type == TokenType.STRING:
3076                expressions = [primary]
3077                while self._match(TokenType.STRING):
3078                    expressions.append(exp.Literal.string(self._prev.text))
3079
3080                if len(expressions) > 1:
3081                    return self.expression(exp.Concat, expressions=expressions)
3082
3083            return primary
3084
3085        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3086            return exp.Literal.number(f"0.{self._prev.text}")
3087
3088        if self._match(TokenType.L_PAREN):
3089            comments = self._prev_comments
3090            query = self._parse_select()
3091
3092            if query:
3093                expressions = [query]
3094            else:
3095                expressions = self._parse_csv(self._parse_expression)
3096
3097            this = self._parse_query_modifiers(seq_get(expressions, 0))
3098
3099            if isinstance(this, exp.Subqueryable):
3100                this = self._parse_set_operations(
3101                    self._parse_subquery(this=this, parse_alias=False)
3102                )
3103            elif len(expressions) > 1:
3104                this = self.expression(exp.Tuple, expressions=expressions)
3105            else:
3106                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3107
3108            if this:
3109                this.add_comments(comments)
3110
3111            self._match_r_paren(expression=this)
3112            return this
3113
3114        return None
3115
3116    def _parse_field(
3117        self,
3118        any_token: bool = False,
3119        tokens: t.Optional[t.Collection[TokenType]] = None,
3120        anonymous_func: bool = False,
3121    ) -> t.Optional[exp.Expression]:
3122        return (
3123            self._parse_primary()
3124            or self._parse_function(anonymous=anonymous_func)
3125            or self._parse_id_var(any_token=any_token, tokens=tokens)
3126        )
3127
3128    def _parse_function(
3129        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3130    ) -> t.Optional[exp.Expression]:
3131        if not self._curr:
3132            return None
3133
3134        token_type = self._curr.token_type
3135
3136        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3137            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3138
3139        if not self._next or self._next.token_type != TokenType.L_PAREN:
3140            if token_type in self.NO_PAREN_FUNCTIONS:
3141                self._advance()
3142                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3143
3144            return None
3145
3146        if token_type not in self.FUNC_TOKENS:
3147            return None
3148
3149        this = self._curr.text
3150        upper = this.upper()
3151        self._advance(2)
3152
3153        parser = self.FUNCTION_PARSERS.get(upper)
3154
3155        if parser and not anonymous:
3156            this = parser(self)
3157        else:
3158            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3159
3160            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3161                this = self.expression(subquery_predicate, this=self._parse_select())
3162                self._match_r_paren()
3163                return this
3164
3165            if functions is None:
3166                functions = self.FUNCTIONS
3167
3168            function = functions.get(upper)
3169
3170            alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS
3171            args = self._parse_csv(lambda: self._parse_lambda(alias=alias))
3172
3173            if function and not anonymous:
3174                this = self.validate_expression(function(args), args)
3175            else:
3176                this = self.expression(exp.Anonymous, this=this, expressions=args)
3177
3178        self._match_r_paren(this)
3179        return self._parse_window(this)
3180
3181    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3182        return self._parse_column_def(self._parse_id_var())
3183
3184    def _parse_user_defined_function(
3185        self, kind: t.Optional[TokenType] = None
3186    ) -> t.Optional[exp.Expression]:
3187        this = self._parse_id_var()
3188
3189        while self._match(TokenType.DOT):
3190            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3191
3192        if not self._match(TokenType.L_PAREN):
3193            return this
3194
3195        expressions = self._parse_csv(self._parse_function_parameter)
3196        self._match_r_paren()
3197        return self.expression(
3198            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3199        )
3200
3201    def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier:
3202        literal = self._parse_primary()
3203        if literal:
3204            return self.expression(exp.Introducer, this=token.text, expression=literal)
3205
3206        return self.expression(exp.Identifier, this=token.text)
3207
3208    def _parse_session_parameter(self) -> exp.SessionParameter:
3209        kind = None
3210        this = self._parse_id_var() or self._parse_primary()
3211
3212        if this and self._match(TokenType.DOT):
3213            kind = this.name
3214            this = self._parse_var() or self._parse_primary()
3215
3216        return self.expression(exp.SessionParameter, this=this, kind=kind)
3217
3218    def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]:
3219        index = self._index
3220
3221        if self._match(TokenType.L_PAREN):
3222            expressions = self._parse_csv(self._parse_id_var)
3223
3224            if not self._match(TokenType.R_PAREN):
3225                self._retreat(index)
3226        else:
3227            expressions = [self._parse_id_var()]
3228
3229        if self._match_set(self.LAMBDAS):
3230            return self.LAMBDAS[self._prev.token_type](self, expressions)
3231
3232        self._retreat(index)
3233
3234        this: t.Optional[exp.Expression]
3235
3236        if self._match(TokenType.DISTINCT):
3237            this = self.expression(
3238                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3239            )
3240        else:
3241            this = self._parse_select_or_expression(alias=alias)
3242
3243            if isinstance(this, exp.EQ):
3244                left = this.this
3245                if isinstance(left, exp.Column):
3246                    left.replace(exp.var(left.text("this")))
3247
3248        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3249
3250    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3251        index = self._index
3252
3253        if not self.errors:
3254            try:
3255                if self._parse_select(nested=True):
3256                    return this
3257            except ParseError:
3258                pass
3259            finally:
3260                self.errors.clear()
3261                self._retreat(index)
3262
3263        if not self._match(TokenType.L_PAREN):
3264            return this
3265
3266        args = self._parse_csv(
3267            lambda: self._parse_constraint()
3268            or self._parse_column_def(self._parse_field(any_token=True))
3269        )
3270
3271        self._match_r_paren()
3272        return self.expression(exp.Schema, this=this, expressions=args)
3273
3274    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3275        # column defs are not really columns, they're identifiers
3276        if isinstance(this, exp.Column):
3277            this = this.this
3278
3279        kind = self._parse_types(schema=True)
3280
3281        if self._match_text_seq("FOR", "ORDINALITY"):
3282            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3283
3284        constraints = []
3285        while True:
3286            constraint = self._parse_column_constraint()
3287            if not constraint:
3288                break
3289            constraints.append(constraint)
3290
3291        if not kind and not constraints:
3292            return this
3293
3294        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3295
3296    def _parse_auto_increment(
3297        self,
3298    ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint:
3299        start = None
3300        increment = None
3301
3302        if self._match(TokenType.L_PAREN, advance=False):
3303            args = self._parse_wrapped_csv(self._parse_bitwise)
3304            start = seq_get(args, 0)
3305            increment = seq_get(args, 1)
3306        elif self._match_text_seq("START"):
3307            start = self._parse_bitwise()
3308            self._match_text_seq("INCREMENT")
3309            increment = self._parse_bitwise()
3310
3311        if start and increment:
3312            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3313
3314        return exp.AutoIncrementColumnConstraint()
3315
3316    def _parse_compress(self) -> exp.CompressColumnConstraint:
3317        if self._match(TokenType.L_PAREN, advance=False):
3318            return self.expression(
3319                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3320            )
3321
3322        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3323
3324    def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint:
3325        if self._match_text_seq("BY", "DEFAULT"):
3326            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3327            this = self.expression(
3328                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3329            )
3330        else:
3331            self._match_text_seq("ALWAYS")
3332            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3333
3334        self._match(TokenType.ALIAS)
3335        identity = self._match_text_seq("IDENTITY")
3336
3337        if self._match(TokenType.L_PAREN):
3338            if self._match_text_seq("START", "WITH"):
3339                this.set("start", self._parse_bitwise())
3340            if self._match_text_seq("INCREMENT", "BY"):
3341                this.set("increment", self._parse_bitwise())
3342            if self._match_text_seq("MINVALUE"):
3343                this.set("minvalue", self._parse_bitwise())
3344            if self._match_text_seq("MAXVALUE"):
3345                this.set("maxvalue", self._parse_bitwise())
3346
3347            if self._match_text_seq("CYCLE"):
3348                this.set("cycle", True)
3349            elif self._match_text_seq("NO", "CYCLE"):
3350                this.set("cycle", False)
3351
3352            if not identity:
3353                this.set("expression", self._parse_bitwise())
3354
3355            self._match_r_paren()
3356
3357        return this
3358
3359    def _parse_inline(self) -> exp.InlineLengthColumnConstraint:
3360        self._match_text_seq("LENGTH")
3361        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3362
3363    def _parse_not_constraint(
3364        self,
3365    ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]:
3366        if self._match_text_seq("NULL"):
3367            return self.expression(exp.NotNullColumnConstraint)
3368        if self._match_text_seq("CASESPECIFIC"):
3369            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3370        return None
3371
3372    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3373        if self._match(TokenType.CONSTRAINT):
3374            this = self._parse_id_var()
3375        else:
3376            this = None
3377
3378        if self._match_texts(self.CONSTRAINT_PARSERS):
3379            return self.expression(
3380                exp.ColumnConstraint,
3381                this=this,
3382                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3383            )
3384
3385        return this
3386
3387    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3388        if not self._match(TokenType.CONSTRAINT):
3389            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3390
3391        this = self._parse_id_var()
3392        expressions = []
3393
3394        while True:
3395            constraint = self._parse_unnamed_constraint() or self._parse_function()
3396            if not constraint:
3397                break
3398            expressions.append(constraint)
3399
3400        return self.expression(exp.Constraint, this=this, expressions=expressions)
3401
3402    def _parse_unnamed_constraint(
3403        self, constraints: t.Optional[t.Collection[str]] = None
3404    ) -> t.Optional[exp.Expression]:
3405        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3406            return None
3407
3408        constraint = self._prev.text.upper()
3409        if constraint not in self.CONSTRAINT_PARSERS:
3410            self.raise_error(f"No parser found for schema constraint {constraint}.")
3411
3412        return self.CONSTRAINT_PARSERS[constraint](self)
3413
3414    def _parse_unique(self) -> exp.UniqueColumnConstraint:
3415        self._match_text_seq("KEY")
3416        return self.expression(
3417            exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False))
3418        )
3419
3420    def _parse_key_constraint_options(self) -> t.List[str]:
3421        options = []
3422        while True:
3423            if not self._curr:
3424                break
3425
3426            if self._match(TokenType.ON):
3427                action = None
3428                on = self._advance_any() and self._prev.text
3429
3430                if self._match_text_seq("NO", "ACTION"):
3431                    action = "NO ACTION"
3432                elif self._match_text_seq("CASCADE"):
3433                    action = "CASCADE"
3434                elif self._match_pair(TokenType.SET, TokenType.NULL):
3435                    action = "SET NULL"
3436                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3437                    action = "SET DEFAULT"
3438                else:
3439                    self.raise_error("Invalid key constraint")
3440
3441                options.append(f"ON {on} {action}")
3442            elif self._match_text_seq("NOT", "ENFORCED"):
3443                options.append("NOT ENFORCED")
3444            elif self._match_text_seq("DEFERRABLE"):
3445                options.append("DEFERRABLE")
3446            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3447                options.append("INITIALLY DEFERRED")
3448            elif self._match_text_seq("NORELY"):
3449                options.append("NORELY")
3450            elif self._match_text_seq("MATCH", "FULL"):
3451                options.append("MATCH FULL")
3452            else:
3453                break
3454
3455        return options
3456
3457    def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]:
3458        if match and not self._match(TokenType.REFERENCES):
3459            return None
3460
3461        expressions = None
3462        this = self._parse_id_var()
3463
3464        if self._match(TokenType.L_PAREN, advance=False):
3465            expressions = self._parse_wrapped_id_vars()
3466
3467        options = self._parse_key_constraint_options()
3468        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3469
3470    def _parse_foreign_key(self) -> exp.ForeignKey:
3471        expressions = self._parse_wrapped_id_vars()
3472        reference = self._parse_references()
3473        options = {}
3474
3475        while self._match(TokenType.ON):
3476            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3477                self.raise_error("Expected DELETE or UPDATE")
3478
3479            kind = self._prev.text.lower()
3480
3481            if self._match_text_seq("NO", "ACTION"):
3482                action = "NO ACTION"
3483            elif self._match(TokenType.SET):
3484                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3485                action = "SET " + self._prev.text.upper()
3486            else:
3487                self._advance()
3488                action = self._prev.text.upper()
3489
3490            options[kind] = action
3491
3492        return self.expression(
3493            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3494        )
3495
3496    def _parse_primary_key(
3497        self, wrapped_optional: bool = False, in_props: bool = False
3498    ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey:
3499        desc = (
3500            self._match_set((TokenType.ASC, TokenType.DESC))
3501            and self._prev.token_type == TokenType.DESC
3502        )
3503
3504        if not in_props and not self._match(TokenType.L_PAREN, advance=False):
3505            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3506
3507        expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional)
3508        options = self._parse_key_constraint_options()
3509        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3510
3511    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3512        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3513            return this
3514
3515        bracket_kind = self._prev.token_type
3516
3517        if self._match(TokenType.COLON):
3518            expressions: t.List[t.Optional[exp.Expression]] = [
3519                self.expression(exp.Slice, expression=self._parse_conjunction())
3520            ]
3521        else:
3522            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3523
3524        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3525        if bracket_kind == TokenType.L_BRACE:
3526            this = self.expression(exp.Struct, expressions=expressions)
3527        elif not this or this.name.upper() == "ARRAY":
3528            this = self.expression(exp.Array, expressions=expressions)
3529        else:
3530            expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET)
3531            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3532
3533        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3534            self.raise_error("Expected ]")
3535        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3536            self.raise_error("Expected }")
3537
3538        self._add_comments(this)
3539        return self._parse_bracket(this)
3540
3541    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3542        if self._match(TokenType.COLON):
3543            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3544        return this
3545
3546    def _parse_case(self) -> t.Optional[exp.Expression]:
3547        ifs = []
3548        default = None
3549
3550        expression = self._parse_conjunction()
3551
3552        while self._match(TokenType.WHEN):
3553            this = self._parse_conjunction()
3554            self._match(TokenType.THEN)
3555            then = self._parse_conjunction()
3556            ifs.append(self.expression(exp.If, this=this, true=then))
3557
3558        if self._match(TokenType.ELSE):
3559            default = self._parse_conjunction()
3560
3561        if not self._match(TokenType.END):
3562            self.raise_error("Expected END after CASE", self._prev)
3563
3564        return self._parse_window(
3565            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3566        )
3567
3568    def _parse_if(self) -> t.Optional[exp.Expression]:
3569        if self._match(TokenType.L_PAREN):
3570            args = self._parse_csv(self._parse_conjunction)
3571            this = self.validate_expression(exp.If.from_arg_list(args), args)
3572            self._match_r_paren()
3573        else:
3574            index = self._index - 1
3575            condition = self._parse_conjunction()
3576
3577            if not condition:
3578                self._retreat(index)
3579                return None
3580
3581            self._match(TokenType.THEN)
3582            true = self._parse_conjunction()
3583            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3584            self._match(TokenType.END)
3585            this = self.expression(exp.If, this=condition, true=true, false=false)
3586
3587        return self._parse_window(this)
3588
3589    def _parse_extract(self) -> exp.Extract:
3590        this = self._parse_function() or self._parse_var() or self._parse_type()
3591
3592        if self._match(TokenType.FROM):
3593            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3594
3595        if not self._match(TokenType.COMMA):
3596            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3597
3598        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3599
3600    def _parse_cast(self, strict: bool) -> exp.Expression:
3601        this = self._parse_conjunction()
3602
3603        if not self._match(TokenType.ALIAS):
3604            if self._match(TokenType.COMMA):
3605                return self.expression(
3606                    exp.CastToStrType, this=this, expression=self._parse_string()
3607                )
3608            else:
3609                self.raise_error("Expected AS after CAST")
3610
3611        to = self._parse_types()
3612
3613        if not to:
3614            self.raise_error("Expected TYPE after CAST")
3615        elif to.this == exp.DataType.Type.CHAR:
3616            if self._match(TokenType.CHARACTER_SET):
3617                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3618        elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT):
3619            fmt = self._parse_string()
3620
3621            return self.expression(
3622                exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime,
3623                this=this,
3624                format=exp.Literal.string(
3625                    format_time(
3626                        fmt.this if fmt else "",
3627                        self.FORMAT_MAPPING or self.TIME_MAPPING,
3628                        self.FORMAT_TRIE or self.TIME_TRIE,
3629                    )
3630                ),
3631            )
3632
3633        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3634
3635    def _parse_concat(self) -> t.Optional[exp.Expression]:
3636        args = self._parse_csv(self._parse_conjunction)
3637        if self.CONCAT_NULL_OUTPUTS_STRING:
3638            args = [exp.func("COALESCE", arg, exp.Literal.string("")) for arg in args]
3639
3640        # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when
3641        # we find such a call we replace it with its argument.
3642        if len(args) == 1:
3643            return args[0]
3644
3645        return self.expression(
3646            exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args
3647        )
3648
3649    def _parse_string_agg(self) -> exp.Expression:
3650        expression: t.Optional[exp.Expression]
3651
3652        if self._match(TokenType.DISTINCT):
3653            args = self._parse_csv(self._parse_conjunction)
3654            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3655        else:
3656            args = self._parse_csv(self._parse_conjunction)
3657            expression = seq_get(args, 0)
3658
3659        index = self._index
3660        if not self._match(TokenType.R_PAREN):
3661            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3662            order = self._parse_order(this=expression)
3663            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3664
3665        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3666        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3667        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3668        if not self._match_text_seq("WITHIN", "GROUP"):
3669            self._retreat(index)
3670            return self.validate_expression(exp.GroupConcat.from_arg_list(args), args)
3671
3672        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3673        order = self._parse_order(this=expression)
3674        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3675
3676    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3677        to: t.Optional[exp.Expression]
3678        this = self._parse_bitwise()
3679
3680        if self._match(TokenType.USING):
3681            to = self.expression(exp.CharacterSet, this=self._parse_var())
3682        elif self._match(TokenType.COMMA):
3683            to = self._parse_bitwise()
3684        else:
3685            to = None
3686
3687        # Swap the argument order if needed to produce the correct AST
3688        if self.CONVERT_TYPE_FIRST:
3689            this, to = to, this
3690
3691        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3692
3693    def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]:
3694        """
3695        There are generally two variants of the DECODE function:
3696
3697        - DECODE(bin, charset)
3698        - DECODE(expression, search, result [, search, result] ... [, default])
3699
3700        The second variant will always be parsed into a CASE expression. Note that NULL
3701        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3702        instead of relying on pattern matching.
3703        """
3704        args = self._parse_csv(self._parse_conjunction)
3705
3706        if len(args) < 3:
3707            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3708
3709        expression, *expressions = args
3710        if not expression:
3711            return None
3712
3713        ifs = []
3714        for search, result in zip(expressions[::2], expressions[1::2]):
3715            if not search or not result:
3716                return None
3717
3718            if isinstance(search, exp.Literal):
3719                ifs.append(
3720                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3721                )
3722            elif isinstance(search, exp.Null):
3723                ifs.append(
3724                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3725                )
3726            else:
3727                cond = exp.or_(
3728                    exp.EQ(this=expression.copy(), expression=search),
3729                    exp.and_(
3730                        exp.Is(this=expression.copy(), expression=exp.Null()),
3731                        exp.Is(this=search.copy(), expression=exp.Null()),
3732                        copy=False,
3733                    ),
3734                    copy=False,
3735                )
3736                ifs.append(exp.If(this=cond, true=result))
3737
3738        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3739
3740    def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]:
3741        self._match_text_seq("KEY")
3742        key = self._parse_field()
3743        self._match(TokenType.COLON)
3744        self._match_text_seq("VALUE")
3745        value = self._parse_field()
3746
3747        if not key and not value:
3748            return None
3749        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3750
3751    def _parse_json_object(self) -> exp.JSONObject:
3752        expressions = self._parse_csv(self._parse_json_key_value)
3753
3754        null_handling = None
3755        if self._match_text_seq("NULL", "ON", "NULL"):
3756            null_handling = "NULL ON NULL"
3757        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3758            null_handling = "ABSENT ON NULL"
3759
3760        unique_keys = None
3761        if self._match_text_seq("WITH", "UNIQUE"):
3762            unique_keys = True
3763        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3764            unique_keys = False
3765
3766        self._match_text_seq("KEYS")
3767
3768        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3769        format_json = self._match_text_seq("FORMAT", "JSON")
3770        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3771
3772        return self.expression(
3773            exp.JSONObject,
3774            expressions=expressions,
3775            null_handling=null_handling,
3776            unique_keys=unique_keys,
3777            return_type=return_type,
3778            format_json=format_json,
3779            encoding=encoding,
3780        )
3781
3782    def _parse_logarithm(self) -> exp.Func:
3783        # Default argument order is base, expression
3784        args = self._parse_csv(self._parse_range)
3785
3786        if len(args) > 1:
3787            if not self.LOG_BASE_FIRST:
3788                args.reverse()
3789            return exp.Log.from_arg_list(args)
3790
3791        return self.expression(
3792            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3793        )
3794
3795    def _parse_match_against(self) -> exp.MatchAgainst:
3796        expressions = self._parse_csv(self._parse_column)
3797
3798        self._match_text_seq(")", "AGAINST", "(")
3799
3800        this = self._parse_string()
3801
3802        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3803            modifier = "IN NATURAL LANGUAGE MODE"
3804            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3805                modifier = f"{modifier} WITH QUERY EXPANSION"
3806        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3807            modifier = "IN BOOLEAN MODE"
3808        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3809            modifier = "WITH QUERY EXPANSION"
3810        else:
3811            modifier = None
3812
3813        return self.expression(
3814            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3815        )
3816
3817    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3818    def _parse_open_json(self) -> exp.OpenJSON:
3819        this = self._parse_bitwise()
3820        path = self._match(TokenType.COMMA) and self._parse_string()
3821
3822        def _parse_open_json_column_def() -> exp.OpenJSONColumnDef:
3823            this = self._parse_field(any_token=True)
3824            kind = self._parse_types()
3825            path = self._parse_string()
3826            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3827
3828            return self.expression(
3829                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3830            )
3831
3832        expressions = None
3833        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3834            self._match_l_paren()
3835            expressions = self._parse_csv(_parse_open_json_column_def)
3836
3837        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3838
3839    def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition:
3840        args = self._parse_csv(self._parse_bitwise)
3841
3842        if self._match(TokenType.IN):
3843            return self.expression(
3844                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3845            )
3846
3847        if haystack_first:
3848            haystack = seq_get(args, 0)
3849            needle = seq_get(args, 1)
3850        else:
3851            needle = seq_get(args, 0)
3852            haystack = seq_get(args, 1)
3853
3854        return self.expression(
3855            exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2)
3856        )
3857
3858    def _parse_join_hint(self, func_name: str) -> exp.JoinHint:
3859        args = self._parse_csv(self._parse_table)
3860        return exp.JoinHint(this=func_name.upper(), expressions=args)
3861
3862    def _parse_substring(self) -> exp.Substring:
3863        # Postgres supports the form: substring(string [from int] [for int])
3864        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3865
3866        args = self._parse_csv(self._parse_bitwise)
3867
3868        if self._match(TokenType.FROM):
3869            args.append(self._parse_bitwise())
3870            if self._match(TokenType.FOR):
3871                args.append(self._parse_bitwise())
3872
3873        return self.validate_expression(exp.Substring.from_arg_list(args), args)
3874
3875    def _parse_trim(self) -> exp.Trim:
3876        # https://www.w3resource.com/sql/character-functions/trim.php
3877        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3878
3879        position = None
3880        collation = None
3881
3882        if self._match_texts(self.TRIM_TYPES):
3883            position = self._prev.text.upper()
3884
3885        expression = self._parse_bitwise()
3886        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3887            this = self._parse_bitwise()
3888        else:
3889            this = expression
3890            expression = None
3891
3892        if self._match(TokenType.COLLATE):
3893            collation = self._parse_bitwise()
3894
3895        return self.expression(
3896            exp.Trim, this=this, position=position, expression=expression, collation=collation
3897        )
3898
3899    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3900        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3901
3902    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3903        return self._parse_window(self._parse_id_var(), alias=True)
3904
3905    def _parse_respect_or_ignore_nulls(
3906        self, this: t.Optional[exp.Expression]
3907    ) -> t.Optional[exp.Expression]:
3908        if self._match_text_seq("IGNORE", "NULLS"):
3909            return self.expression(exp.IgnoreNulls, this=this)
3910        if self._match_text_seq("RESPECT", "NULLS"):
3911            return self.expression(exp.RespectNulls, this=this)
3912        return this
3913
3914    def _parse_window(
3915        self, this: t.Optional[exp.Expression], alias: bool = False
3916    ) -> t.Optional[exp.Expression]:
3917        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3918            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3919            self._match_r_paren()
3920
3921        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3922        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3923        if self._match_text_seq("WITHIN", "GROUP"):
3924            order = self._parse_wrapped(self._parse_order)
3925            this = self.expression(exp.WithinGroup, this=this, expression=order)
3926
3927        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3928        # Some dialects choose to implement and some do not.
3929        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3930
3931        # There is some code above in _parse_lambda that handles
3932        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3933
3934        # The below changes handle
3935        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3936
3937        # Oracle allows both formats
3938        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3939        #   and Snowflake chose to do the same for familiarity
3940        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3941        this = self._parse_respect_or_ignore_nulls(this)
3942
3943        # bigquery select from window x AS (partition by ...)
3944        if alias:
3945            over = None
3946            self._match(TokenType.ALIAS)
3947        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3948            return this
3949        else:
3950            over = self._prev.text.upper()
3951
3952        if not self._match(TokenType.L_PAREN):
3953            return self.expression(
3954                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3955            )
3956
3957        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3958
3959        first = self._match(TokenType.FIRST)
3960        if self._match_text_seq("LAST"):
3961            first = False
3962
3963        partition = self._parse_partition_by()
3964        order = self._parse_order()
3965        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3966
3967        if kind:
3968            self._match(TokenType.BETWEEN)
3969            start = self._parse_window_spec()
3970            self._match(TokenType.AND)
3971            end = self._parse_window_spec()
3972
3973            spec = self.expression(
3974                exp.WindowSpec,
3975                kind=kind,
3976                start=start["value"],
3977                start_side=start["side"],
3978                end=end["value"],
3979                end_side=end["side"],
3980            )
3981        else:
3982            spec = None
3983
3984        self._match_r_paren()
3985
3986        return self.expression(
3987            exp.Window,
3988            this=this,
3989            partition_by=partition,
3990            order=order,
3991            spec=spec,
3992            alias=window_alias,
3993            over=over,
3994            first=first,
3995        )
3996
3997    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3998        self._match(TokenType.BETWEEN)
3999
4000        return {
4001            "value": (
4002                (self._match_text_seq("UNBOUNDED") and "UNBOUNDED")
4003                or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW")
4004                or self._parse_bitwise()
4005            ),
4006            "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text,
4007        }
4008
4009    def _parse_alias(
4010        self, this: t.Optional[exp.Expression], explicit: bool = False
4011    ) -> t.Optional[exp.Expression]:
4012        any_token = self._match(TokenType.ALIAS)
4013
4014        if explicit and not any_token:
4015            return this
4016
4017        if self._match(TokenType.L_PAREN):
4018            aliases = self.expression(
4019                exp.Aliases,
4020                this=this,
4021                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
4022            )
4023            self._match_r_paren(aliases)
4024            return aliases
4025
4026        alias = self._parse_id_var(any_token)
4027
4028        if alias:
4029            return self.expression(exp.Alias, this=this, alias=alias)
4030
4031        return this
4032
4033    def _parse_id_var(
4034        self,
4035        any_token: bool = True,
4036        tokens: t.Optional[t.Collection[TokenType]] = None,
4037    ) -> t.Optional[exp.Expression]:
4038        identifier = self._parse_identifier()
4039
4040        if identifier:
4041            return identifier
4042
4043        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
4044            quoted = self._prev.token_type == TokenType.STRING
4045            return exp.Identifier(this=self._prev.text, quoted=quoted)
4046
4047        return None
4048
4049    def _parse_string(self) -> t.Optional[exp.Expression]:
4050        if self._match(TokenType.STRING):
4051            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
4052        return self._parse_placeholder()
4053
4054    def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]:
4055        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
4056
4057    def _parse_number(self) -> t.Optional[exp.Expression]:
4058        if self._match(TokenType.NUMBER):
4059            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
4060        return self._parse_placeholder()
4061
4062    def _parse_identifier(self) -> t.Optional[exp.Expression]:
4063        if self._match(TokenType.IDENTIFIER):
4064            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
4065        return self._parse_placeholder()
4066
4067    def _parse_var(
4068        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4069    ) -> t.Optional[exp.Expression]:
4070        if (
4071            (any_token and self._advance_any())
4072            or self._match(TokenType.VAR)
4073            or (self._match_set(tokens) if tokens else False)
4074        ):
4075            return self.expression(exp.Var, this=self._prev.text)
4076        return self._parse_placeholder()
4077
4078    def _advance_any(self) -> t.Optional[Token]:
4079        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4080            self._advance()
4081            return self._prev
4082        return None
4083
4084    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4085        return self._parse_var() or self._parse_string()
4086
4087    def _parse_null(self) -> t.Optional[exp.Expression]:
4088        if self._match(TokenType.NULL):
4089            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4090        return None
4091
4092    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4093        if self._match(TokenType.TRUE):
4094            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4095        if self._match(TokenType.FALSE):
4096            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4097        return None
4098
4099    def _parse_star(self) -> t.Optional[exp.Expression]:
4100        if self._match(TokenType.STAR):
4101            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4102        return None
4103
4104    def _parse_parameter(self) -> exp.Parameter:
4105        wrapped = self._match(TokenType.L_BRACE)
4106        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4107        self._match(TokenType.R_BRACE)
4108        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4109
4110    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4111        if self._match_set(self.PLACEHOLDER_PARSERS):
4112            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4113            if placeholder:
4114                return placeholder
4115            self._advance(-1)
4116        return None
4117
4118    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4119        if not self._match(TokenType.EXCEPT):
4120            return None
4121        if self._match(TokenType.L_PAREN, advance=False):
4122            return self._parse_wrapped_csv(self._parse_column)
4123        return self._parse_csv(self._parse_column)
4124
4125    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4126        if not self._match(TokenType.REPLACE):
4127            return None
4128        if self._match(TokenType.L_PAREN, advance=False):
4129            return self._parse_wrapped_csv(self._parse_expression)
4130        return self._parse_csv(self._parse_expression)
4131
4132    def _parse_csv(
4133        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4134    ) -> t.List[t.Optional[exp.Expression]]:
4135        parse_result = parse_method()
4136        items = [parse_result] if parse_result is not None else []
4137
4138        while self._match(sep):
4139            self._add_comments(parse_result)
4140            parse_result = parse_method()
4141            if parse_result is not None:
4142                items.append(parse_result)
4143
4144        return items
4145
4146    def _parse_tokens(
4147        self, parse_method: t.Callable, expressions: t.Dict
4148    ) -> t.Optional[exp.Expression]:
4149        this = parse_method()
4150
4151        while self._match_set(expressions):
4152            this = self.expression(
4153                expressions[self._prev.token_type],
4154                this=this,
4155                comments=self._prev_comments,
4156                expression=parse_method(),
4157            )
4158
4159        return this
4160
4161    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4162        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4163
4164    def _parse_wrapped_csv(
4165        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4166    ) -> t.List[t.Optional[exp.Expression]]:
4167        return self._parse_wrapped(
4168            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4169        )
4170
4171    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4172        wrapped = self._match(TokenType.L_PAREN)
4173        if not wrapped and not optional:
4174            self.raise_error("Expecting (")
4175        parse_result = parse_method()
4176        if wrapped:
4177            self._match_r_paren()
4178        return parse_result
4179
4180    def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]:
4181        return self._parse_select() or self._parse_set_operations(
4182            self._parse_expression() if alias else self._parse_conjunction()
4183        )
4184
4185    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4186        return self._parse_query_modifiers(
4187            self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False))
4188        )
4189
4190    def _parse_transaction(self) -> exp.Transaction:
4191        this = None
4192        if self._match_texts(self.TRANSACTION_KIND):
4193            this = self._prev.text
4194
4195        self._match_texts({"TRANSACTION", "WORK"})
4196
4197        modes = []
4198        while True:
4199            mode = []
4200            while self._match(TokenType.VAR):
4201                mode.append(self._prev.text)
4202
4203            if mode:
4204                modes.append(" ".join(mode))
4205            if not self._match(TokenType.COMMA):
4206                break
4207
4208        return self.expression(exp.Transaction, this=this, modes=modes)
4209
4210    def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback:
4211        chain = None
4212        savepoint = None
4213        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4214
4215        self._match_texts({"TRANSACTION", "WORK"})
4216
4217        if self._match_text_seq("TO"):
4218            self._match_text_seq("SAVEPOINT")
4219            savepoint = self._parse_id_var()
4220
4221        if self._match(TokenType.AND):
4222            chain = not self._match_text_seq("NO")
4223            self._match_text_seq("CHAIN")
4224
4225        if is_rollback:
4226            return self.expression(exp.Rollback, savepoint=savepoint)
4227
4228        return self.expression(exp.Commit, chain=chain)
4229
4230    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4231        if not self._match_text_seq("ADD"):
4232            return None
4233
4234        self._match(TokenType.COLUMN)
4235        exists_column = self._parse_exists(not_=True)
4236        expression = self._parse_column_def(self._parse_field(any_token=True))
4237
4238        if expression:
4239            expression.set("exists", exists_column)
4240
4241            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4242            if self._match_texts(("FIRST", "AFTER")):
4243                position = self._prev.text
4244                column_position = self.expression(
4245                    exp.ColumnPosition, this=self._parse_column(), position=position
4246                )
4247                expression.set("position", column_position)
4248
4249        return expression
4250
4251    def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]:
4252        drop = self._match(TokenType.DROP) and self._parse_drop()
4253        if drop and not isinstance(drop, exp.Command):
4254            drop.set("kind", drop.args.get("kind", "COLUMN"))
4255        return drop
4256
4257    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4258    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition:
4259        return self.expression(
4260            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4261        )
4262
4263    def _parse_add_constraint(self) -> exp.AddConstraint:
4264        this = None
4265        kind = self._prev.token_type
4266
4267        if kind == TokenType.CONSTRAINT:
4268            this = self._parse_id_var()
4269
4270            if self._match_text_seq("CHECK"):
4271                expression = self._parse_wrapped(self._parse_conjunction)
4272                enforced = self._match_text_seq("ENFORCED")
4273
4274                return self.expression(
4275                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4276                )
4277
4278        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4279            expression = self._parse_foreign_key()
4280        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4281            expression = self._parse_primary_key()
4282        else:
4283            expression = None
4284
4285        return self.expression(exp.AddConstraint, this=this, expression=expression)
4286
4287    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4288        index = self._index - 1
4289
4290        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4291            return self._parse_csv(self._parse_add_constraint)
4292
4293        self._retreat(index)
4294        return self._parse_csv(self._parse_add_column)
4295
4296    def _parse_alter_table_alter(self) -> exp.AlterColumn:
4297        self._match(TokenType.COLUMN)
4298        column = self._parse_field(any_token=True)
4299
4300        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4301            return self.expression(exp.AlterColumn, this=column, drop=True)
4302        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4303            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4304
4305        self._match_text_seq("SET", "DATA")
4306        return self.expression(
4307            exp.AlterColumn,
4308            this=column,
4309            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4310            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4311            using=self._match(TokenType.USING) and self._parse_conjunction(),
4312        )
4313
4314    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4315        index = self._index - 1
4316
4317        partition_exists = self._parse_exists()
4318        if self._match(TokenType.PARTITION, advance=False):
4319            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4320
4321        self._retreat(index)
4322        return self._parse_csv(self._parse_drop_column)
4323
4324    def _parse_alter_table_rename(self) -> exp.RenameTable:
4325        self._match_text_seq("TO")
4326        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4327
4328    def _parse_alter(self) -> exp.AlterTable | exp.Command:
4329        start = self._prev
4330
4331        if not self._match(TokenType.TABLE):
4332            return self._parse_as_command(start)
4333
4334        exists = self._parse_exists()
4335        this = self._parse_table(schema=True)
4336
4337        if self._next:
4338            self._advance()
4339        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4340
4341        if parser:
4342            actions = ensure_list(parser(self))
4343
4344            if not self._curr:
4345                return self.expression(
4346                    exp.AlterTable,
4347                    this=this,
4348                    exists=exists,
4349                    actions=actions,
4350                )
4351        return self._parse_as_command(start)
4352
4353    def _parse_merge(self) -> exp.Merge:
4354        self._match(TokenType.INTO)
4355        target = self._parse_table()
4356
4357        self._match(TokenType.USING)
4358        using = self._parse_table()
4359
4360        self._match(TokenType.ON)
4361        on = self._parse_conjunction()
4362
4363        whens = []
4364        while self._match(TokenType.WHEN):
4365            matched = not self._match(TokenType.NOT)
4366            self._match_text_seq("MATCHED")
4367            source = (
4368                False
4369                if self._match_text_seq("BY", "TARGET")
4370                else self._match_text_seq("BY", "SOURCE")
4371            )
4372            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4373
4374            self._match(TokenType.THEN)
4375
4376            if self._match(TokenType.INSERT):
4377                _this = self._parse_star()
4378                if _this:
4379                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4380                else:
4381                    then = self.expression(
4382                        exp.Insert,
4383                        this=self._parse_value(),
4384                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4385                    )
4386            elif self._match(TokenType.UPDATE):
4387                expressions = self._parse_star()
4388                if expressions:
4389                    then = self.expression(exp.Update, expressions=expressions)
4390                else:
4391                    then = self.expression(
4392                        exp.Update,
4393                        expressions=self._match(TokenType.SET)
4394                        and self._parse_csv(self._parse_equality),
4395                    )
4396            elif self._match(TokenType.DELETE):
4397                then = self.expression(exp.Var, this=self._prev.text)
4398            else:
4399                then = None
4400
4401            whens.append(
4402                self.expression(
4403                    exp.When,
4404                    matched=matched,
4405                    source=source,
4406                    condition=condition,
4407                    then=then,
4408                )
4409            )
4410
4411        return self.expression(
4412            exp.Merge,
4413            this=target,
4414            using=using,
4415            on=on,
4416            expressions=whens,
4417        )
4418
4419    def _parse_show(self) -> t.Optional[exp.Expression]:
4420        parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE)
4421        if parser:
4422            return parser(self)
4423        self._advance()
4424        return self.expression(exp.Show, this=self._prev.text.upper())
4425
4426    def _parse_set_item_assignment(
4427        self, kind: t.Optional[str] = None
4428    ) -> t.Optional[exp.Expression]:
4429        index = self._index
4430
4431        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4432            return self._parse_set_transaction(global_=kind == "GLOBAL")
4433
4434        left = self._parse_primary() or self._parse_id_var()
4435
4436        if not self._match_texts(("=", "TO")):
4437            self._retreat(index)
4438            return None
4439
4440        right = self._parse_statement() or self._parse_id_var()
4441        this = self.expression(exp.EQ, this=left, expression=right)
4442
4443        return self.expression(exp.SetItem, this=this, kind=kind)
4444
4445    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4446        self._match_text_seq("TRANSACTION")
4447        characteristics = self._parse_csv(
4448            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4449        )
4450        return self.expression(
4451            exp.SetItem,
4452            expressions=characteristics,
4453            kind="TRANSACTION",
4454            **{"global": global_},  # type: ignore
4455        )
4456
4457    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4458        parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE)
4459        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4460
4461    def _parse_set(self) -> exp.Set | exp.Command:
4462        index = self._index
4463        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4464
4465        if self._curr:
4466            self._retreat(index)
4467            return self._parse_as_command(self._prev)
4468
4469        return set_
4470
4471    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]:
4472        for option in options:
4473            if self._match_text_seq(*option.split(" ")):
4474                return exp.var(option)
4475        return None
4476
4477    def _parse_as_command(self, start: Token) -> exp.Command:
4478        while self._curr:
4479            self._advance()
4480        text = self._find_sql(start, self._prev)
4481        size = len(start.text)
4482        return exp.Command(this=text[:size], expression=text[size:])
4483
4484    def _parse_dict_property(self, this: str) -> exp.DictProperty:
4485        settings = []
4486
4487        self._match_l_paren()
4488        kind = self._parse_id_var()
4489
4490        if self._match(TokenType.L_PAREN):
4491            while True:
4492                key = self._parse_id_var()
4493                value = self._parse_primary()
4494
4495                if not key and value is None:
4496                    break
4497                settings.append(self.expression(exp.DictSubProperty, this=key, value=value))
4498            self._match(TokenType.R_PAREN)
4499
4500        self._match_r_paren()
4501
4502        return self.expression(
4503            exp.DictProperty,
4504            this=this,
4505            kind=kind.this if kind else None,
4506            settings=settings,
4507        )
4508
4509    def _parse_dict_range(self, this: str) -> exp.DictRange:
4510        self._match_l_paren()
4511        has_min = self._match_text_seq("MIN")
4512        if has_min:
4513            min = self._parse_var() or self._parse_primary()
4514            self._match_text_seq("MAX")
4515            max = self._parse_var() or self._parse_primary()
4516        else:
4517            max = self._parse_var() or self._parse_primary()
4518            min = exp.Literal.number(0)
4519        self._match_r_paren()
4520        return self.expression(exp.DictRange, this=this, min=min, max=max)
4521
4522    def _find_parser(
4523        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4524    ) -> t.Optional[t.Callable]:
4525        if not self._curr:
4526            return None
4527
4528        index = self._index
4529        this = []
4530        while True:
4531            # The current token might be multiple words
4532            curr = self._curr.text.upper()
4533            key = curr.split(" ")
4534            this.append(curr)
4535            self._advance()
4536            result, trie = in_trie(trie, key)
4537            if result == 0:
4538                break
4539            if result == 2:
4540                subparser = parsers[" ".join(this)]
4541                return subparser
4542        self._retreat(index)
4543        return None
4544
4545    def _match(self, token_type, advance=True, expression=None):
4546        if not self._curr:
4547            return None
4548
4549        if self._curr.token_type == token_type:
4550            if advance:
4551                self._advance()
4552            self._add_comments(expression)
4553            return True
4554
4555        return None
4556
4557    def _match_set(self, types, advance=True):
4558        if not self._curr:
4559            return None
4560
4561        if self._curr.token_type in types:
4562            if advance:
4563                self._advance()
4564            return True
4565
4566        return None
4567
4568    def _match_pair(self, token_type_a, token_type_b, advance=True):
4569        if not self._curr or not self._next:
4570            return None
4571
4572        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4573            if advance:
4574                self._advance(2)
4575            return True
4576
4577        return None
4578
4579    def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4580        if not self._match(TokenType.L_PAREN, expression=expression):
4581            self.raise_error("Expecting (")
4582
4583    def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4584        if not self._match(TokenType.R_PAREN, expression=expression):
4585            self.raise_error("Expecting )")
4586
4587    def _match_texts(self, texts, advance=True):
4588        if self._curr and self._curr.text.upper() in texts:
4589            if advance:
4590                self._advance()
4591            return True
4592        return False
4593
4594    def _match_text_seq(self, *texts, advance=True):
4595        index = self._index
4596        for text in texts:
4597            if self._curr and self._curr.text.upper() == text:
4598                self._advance()
4599            else:
4600                self._retreat(index)
4601                return False
4602
4603        if not advance:
4604            self._retreat(index)
4605
4606        return True
4607
4608    @t.overload
4609    def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression:
4610        ...
4611
4612    @t.overload
4613    def _replace_columns_with_dots(
4614        self, this: t.Optional[exp.Expression]
4615    ) -> t.Optional[exp.Expression]:
4616        ...
4617
4618    def _replace_columns_with_dots(self, this):
4619        if isinstance(this, exp.Dot):
4620            exp.replace_children(this, self._replace_columns_with_dots)
4621        elif isinstance(this, exp.Column):
4622            exp.replace_children(this, self._replace_columns_with_dots)
4623            table = this.args.get("table")
4624            this = (
4625                self.expression(exp.Dot, this=table, expression=this.this)
4626                if table
4627                else self.expression(exp.Var, this=this.name)
4628            )
4629        elif isinstance(this, exp.Identifier):
4630            this = self.expression(exp.Var, this=this.name)
4631
4632        return this
4633
4634    def _replace_lambda(
4635        self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str]
4636    ) -> t.Optional[exp.Expression]:
4637        if not node:
4638            return node
4639
4640        for column in node.find_all(exp.Column):
4641            if column.parts[0].name in lambda_variables:
4642                dot_or_id = column.to_dot() if column.table else column.this
4643                parent = column.parent
4644
4645                while isinstance(parent, exp.Dot):
4646                    if not isinstance(parent.parent, exp.Dot):
4647                        parent.replace(dot_or_id)
4648                        break
4649                    parent = parent.parent
4650                else:
4651                    if column is node:
4652                        node = dot_or_id
4653                    else:
4654                        column.replace(dot_or_id)
4655        return node
def parse_var_map(args: List) -> sqlglot.expressions.StarMap | sqlglot.expressions.VarMap:
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap:
22    if len(args) == 1 and args[0].is_star:
23        return exp.StarMap(this=args[0])
24
25    keys = []
26    values = []
27    for i in range(0, len(args), 2):
28        keys.append(args[i])
29        values.append(args[i + 1])
30
31    return exp.VarMap(
32        keys=exp.Array(expressions=keys),
33        values=exp.Array(expressions=values),
34    )
def parse_like(args: List) -> sqlglot.expressions.Escape | sqlglot.expressions.Like:
37def parse_like(args: t.List) -> exp.Escape | exp.Like:
38    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
39    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
def binary_range_parser( expr_type: Type[sqlglot.expressions.Expression]) -> Callable[[sqlglot.parser.Parser, Optional[sqlglot.expressions.Expression]], Optional[sqlglot.expressions.Expression]]:
42def binary_range_parser(
43    expr_type: t.Type[exp.Expression],
44) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
45    return lambda self, this: self._parse_escape(
46        self.expression(expr_type, this=this, expression=self._parse_bitwise())
47    )
class Parser:
  60class Parser(metaclass=_Parser):
  61    """
  62    Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
  63
  64    Args:
  65        error_level: The desired error level.
  66            Default: ErrorLevel.IMMEDIATE
  67        error_message_context: Determines the amount of context to capture from a
  68            query string when displaying the error message (in number of characters).
  69            Default: 100
  70        max_errors: Maximum number of error messages to include in a raised ParseError.
  71            This is only relevant if error_level is ErrorLevel.RAISE.
  72            Default: 3
  73    """
  74
  75    FUNCTIONS: t.Dict[str, t.Callable] = {
  76        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  77        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  78            this=seq_get(args, 0),
  79            to=exp.DataType(this=exp.DataType.Type.TEXT),
  80        ),
  81        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  82        "LIKE": parse_like,
  83        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  84            this=seq_get(args, 0),
  85            to=exp.DataType(this=exp.DataType.Type.TEXT),
  86        ),
  87        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  88            this=exp.Cast(
  89                this=seq_get(args, 0),
  90                to=exp.DataType(this=exp.DataType.Type.TEXT),
  91            ),
  92            start=exp.Literal.number(1),
  93            length=exp.Literal.number(10),
  94        ),
  95        "VAR_MAP": parse_var_map,
  96    }
  97
  98    NO_PAREN_FUNCTIONS = {
  99        TokenType.CURRENT_DATE: exp.CurrentDate,
 100        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 101        TokenType.CURRENT_TIME: exp.CurrentTime,
 102        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 103        TokenType.CURRENT_USER: exp.CurrentUser,
 104    }
 105
 106    NESTED_TYPE_TOKENS = {
 107        TokenType.ARRAY,
 108        TokenType.MAP,
 109        TokenType.NULLABLE,
 110        TokenType.STRUCT,
 111    }
 112
 113    TYPE_TOKENS = {
 114        TokenType.BIT,
 115        TokenType.BOOLEAN,
 116        TokenType.TINYINT,
 117        TokenType.UTINYINT,
 118        TokenType.SMALLINT,
 119        TokenType.USMALLINT,
 120        TokenType.INT,
 121        TokenType.UINT,
 122        TokenType.BIGINT,
 123        TokenType.UBIGINT,
 124        TokenType.INT128,
 125        TokenType.UINT128,
 126        TokenType.INT256,
 127        TokenType.UINT256,
 128        TokenType.FLOAT,
 129        TokenType.DOUBLE,
 130        TokenType.CHAR,
 131        TokenType.NCHAR,
 132        TokenType.VARCHAR,
 133        TokenType.NVARCHAR,
 134        TokenType.TEXT,
 135        TokenType.MEDIUMTEXT,
 136        TokenType.LONGTEXT,
 137        TokenType.MEDIUMBLOB,
 138        TokenType.LONGBLOB,
 139        TokenType.BINARY,
 140        TokenType.VARBINARY,
 141        TokenType.JSON,
 142        TokenType.JSONB,
 143        TokenType.INTERVAL,
 144        TokenType.TIME,
 145        TokenType.TIMESTAMP,
 146        TokenType.TIMESTAMPTZ,
 147        TokenType.TIMESTAMPLTZ,
 148        TokenType.DATETIME,
 149        TokenType.DATETIME64,
 150        TokenType.DATE,
 151        TokenType.INT4RANGE,
 152        TokenType.INT4MULTIRANGE,
 153        TokenType.INT8RANGE,
 154        TokenType.INT8MULTIRANGE,
 155        TokenType.NUMRANGE,
 156        TokenType.NUMMULTIRANGE,
 157        TokenType.TSRANGE,
 158        TokenType.TSMULTIRANGE,
 159        TokenType.TSTZRANGE,
 160        TokenType.TSTZMULTIRANGE,
 161        TokenType.DATERANGE,
 162        TokenType.DATEMULTIRANGE,
 163        TokenType.DECIMAL,
 164        TokenType.BIGDECIMAL,
 165        TokenType.UUID,
 166        TokenType.GEOGRAPHY,
 167        TokenType.GEOMETRY,
 168        TokenType.HLLSKETCH,
 169        TokenType.HSTORE,
 170        TokenType.PSEUDO_TYPE,
 171        TokenType.SUPER,
 172        TokenType.SERIAL,
 173        TokenType.SMALLSERIAL,
 174        TokenType.BIGSERIAL,
 175        TokenType.XML,
 176        TokenType.UNIQUEIDENTIFIER,
 177        TokenType.MONEY,
 178        TokenType.SMALLMONEY,
 179        TokenType.ROWVERSION,
 180        TokenType.IMAGE,
 181        TokenType.VARIANT,
 182        TokenType.OBJECT,
 183        TokenType.INET,
 184        *NESTED_TYPE_TOKENS,
 185    }
 186
 187    SUBQUERY_PREDICATES = {
 188        TokenType.ANY: exp.Any,
 189        TokenType.ALL: exp.All,
 190        TokenType.EXISTS: exp.Exists,
 191        TokenType.SOME: exp.Any,
 192    }
 193
 194    RESERVED_KEYWORDS = {
 195        *Tokenizer.SINGLE_TOKENS.values(),
 196        TokenType.SELECT,
 197    }
 198
 199    DB_CREATABLES = {
 200        TokenType.DATABASE,
 201        TokenType.SCHEMA,
 202        TokenType.TABLE,
 203        TokenType.VIEW,
 204        TokenType.DICTIONARY,
 205    }
 206
 207    CREATABLES = {
 208        TokenType.COLUMN,
 209        TokenType.FUNCTION,
 210        TokenType.INDEX,
 211        TokenType.PROCEDURE,
 212        *DB_CREATABLES,
 213    }
 214
 215    # Tokens that can represent identifiers
 216    ID_VAR_TOKENS = {
 217        TokenType.VAR,
 218        TokenType.ANTI,
 219        TokenType.APPLY,
 220        TokenType.ASC,
 221        TokenType.AUTO_INCREMENT,
 222        TokenType.BEGIN,
 223        TokenType.CACHE,
 224        TokenType.COLLATE,
 225        TokenType.COMMAND,
 226        TokenType.COMMENT,
 227        TokenType.COMMIT,
 228        TokenType.CONSTRAINT,
 229        TokenType.DEFAULT,
 230        TokenType.DELETE,
 231        TokenType.DESC,
 232        TokenType.DESCRIBE,
 233        TokenType.DICTIONARY,
 234        TokenType.DIV,
 235        TokenType.END,
 236        TokenType.EXECUTE,
 237        TokenType.ESCAPE,
 238        TokenType.FALSE,
 239        TokenType.FIRST,
 240        TokenType.FILTER,
 241        TokenType.FORMAT,
 242        TokenType.FULL,
 243        TokenType.IF,
 244        TokenType.IS,
 245        TokenType.ISNULL,
 246        TokenType.INTERVAL,
 247        TokenType.KEEP,
 248        TokenType.LEFT,
 249        TokenType.LOAD,
 250        TokenType.MERGE,
 251        TokenType.NATURAL,
 252        TokenType.NEXT,
 253        TokenType.OFFSET,
 254        TokenType.ORDINALITY,
 255        TokenType.OVERWRITE,
 256        TokenType.PARTITION,
 257        TokenType.PERCENT,
 258        TokenType.PIVOT,
 259        TokenType.PRAGMA,
 260        TokenType.RANGE,
 261        TokenType.REFERENCES,
 262        TokenType.RIGHT,
 263        TokenType.ROW,
 264        TokenType.ROWS,
 265        TokenType.SEMI,
 266        TokenType.SET,
 267        TokenType.SETTINGS,
 268        TokenType.SHOW,
 269        TokenType.TEMPORARY,
 270        TokenType.TOP,
 271        TokenType.TRUE,
 272        TokenType.UNIQUE,
 273        TokenType.UNPIVOT,
 274        TokenType.VOLATILE,
 275        TokenType.WINDOW,
 276        *CREATABLES,
 277        *SUBQUERY_PREDICATES,
 278        *TYPE_TOKENS,
 279        *NO_PAREN_FUNCTIONS,
 280    }
 281
 282    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 283
 284    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 285        TokenType.APPLY,
 286        TokenType.ASOF,
 287        TokenType.FULL,
 288        TokenType.LEFT,
 289        TokenType.LOCK,
 290        TokenType.NATURAL,
 291        TokenType.OFFSET,
 292        TokenType.RIGHT,
 293        TokenType.WINDOW,
 294    }
 295
 296    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 297
 298    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 299
 300    TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
 301
 302    FUNC_TOKENS = {
 303        TokenType.COMMAND,
 304        TokenType.CURRENT_DATE,
 305        TokenType.CURRENT_DATETIME,
 306        TokenType.CURRENT_TIMESTAMP,
 307        TokenType.CURRENT_TIME,
 308        TokenType.CURRENT_USER,
 309        TokenType.FILTER,
 310        TokenType.FIRST,
 311        TokenType.FORMAT,
 312        TokenType.GLOB,
 313        TokenType.IDENTIFIER,
 314        TokenType.INDEX,
 315        TokenType.ISNULL,
 316        TokenType.ILIKE,
 317        TokenType.LIKE,
 318        TokenType.MERGE,
 319        TokenType.OFFSET,
 320        TokenType.PRIMARY_KEY,
 321        TokenType.RANGE,
 322        TokenType.REPLACE,
 323        TokenType.ROW,
 324        TokenType.UNNEST,
 325        TokenType.VAR,
 326        TokenType.LEFT,
 327        TokenType.RIGHT,
 328        TokenType.DATE,
 329        TokenType.DATETIME,
 330        TokenType.TABLE,
 331        TokenType.TIMESTAMP,
 332        TokenType.TIMESTAMPTZ,
 333        TokenType.WINDOW,
 334        *TYPE_TOKENS,
 335        *SUBQUERY_PREDICATES,
 336    }
 337
 338    CONJUNCTION = {
 339        TokenType.AND: exp.And,
 340        TokenType.OR: exp.Or,
 341    }
 342
 343    EQUALITY = {
 344        TokenType.EQ: exp.EQ,
 345        TokenType.NEQ: exp.NEQ,
 346        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 347    }
 348
 349    COMPARISON = {
 350        TokenType.GT: exp.GT,
 351        TokenType.GTE: exp.GTE,
 352        TokenType.LT: exp.LT,
 353        TokenType.LTE: exp.LTE,
 354    }
 355
 356    BITWISE = {
 357        TokenType.AMP: exp.BitwiseAnd,
 358        TokenType.CARET: exp.BitwiseXor,
 359        TokenType.PIPE: exp.BitwiseOr,
 360        TokenType.DPIPE: exp.DPipe,
 361    }
 362
 363    TERM = {
 364        TokenType.DASH: exp.Sub,
 365        TokenType.PLUS: exp.Add,
 366        TokenType.MOD: exp.Mod,
 367        TokenType.COLLATE: exp.Collate,
 368    }
 369
 370    FACTOR = {
 371        TokenType.DIV: exp.IntDiv,
 372        TokenType.LR_ARROW: exp.Distance,
 373        TokenType.SLASH: exp.Div,
 374        TokenType.STAR: exp.Mul,
 375    }
 376
 377    TIMESTAMPS = {
 378        TokenType.TIME,
 379        TokenType.TIMESTAMP,
 380        TokenType.TIMESTAMPTZ,
 381        TokenType.TIMESTAMPLTZ,
 382    }
 383
 384    SET_OPERATIONS = {
 385        TokenType.UNION,
 386        TokenType.INTERSECT,
 387        TokenType.EXCEPT,
 388    }
 389
 390    JOIN_METHODS = {
 391        TokenType.NATURAL,
 392        TokenType.ASOF,
 393    }
 394
 395    JOIN_SIDES = {
 396        TokenType.LEFT,
 397        TokenType.RIGHT,
 398        TokenType.FULL,
 399    }
 400
 401    JOIN_KINDS = {
 402        TokenType.INNER,
 403        TokenType.OUTER,
 404        TokenType.CROSS,
 405        TokenType.SEMI,
 406        TokenType.ANTI,
 407    }
 408
 409    JOIN_HINTS: t.Set[str] = set()
 410
 411    LAMBDAS = {
 412        TokenType.ARROW: lambda self, expressions: self.expression(
 413            exp.Lambda,
 414            this=self._replace_lambda(
 415                self._parse_conjunction(),
 416                {node.name for node in expressions},
 417            ),
 418            expressions=expressions,
 419        ),
 420        TokenType.FARROW: lambda self, expressions: self.expression(
 421            exp.Kwarg,
 422            this=exp.var(expressions[0].name),
 423            expression=self._parse_conjunction(),
 424        ),
 425    }
 426
 427    COLUMN_OPERATORS = {
 428        TokenType.DOT: None,
 429        TokenType.DCOLON: lambda self, this, to: self.expression(
 430            exp.Cast if self.STRICT_CAST else exp.TryCast,
 431            this=this,
 432            to=to,
 433        ),
 434        TokenType.ARROW: lambda self, this, path: self.expression(
 435            exp.JSONExtract,
 436            this=this,
 437            expression=path,
 438        ),
 439        TokenType.DARROW: lambda self, this, path: self.expression(
 440            exp.JSONExtractScalar,
 441            this=this,
 442            expression=path,
 443        ),
 444        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 445            exp.JSONBExtract,
 446            this=this,
 447            expression=path,
 448        ),
 449        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 450            exp.JSONBExtractScalar,
 451            this=this,
 452            expression=path,
 453        ),
 454        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 455            exp.JSONBContains,
 456            this=this,
 457            expression=key,
 458        ),
 459    }
 460
 461    EXPRESSION_PARSERS = {
 462        exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"),
 463        exp.Column: lambda self: self._parse_column(),
 464        exp.Condition: lambda self: self._parse_conjunction(),
 465        exp.DataType: lambda self: self._parse_types(),
 466        exp.Expression: lambda self: self._parse_statement(),
 467        exp.From: lambda self: self._parse_from(),
 468        exp.Group: lambda self: self._parse_group(),
 469        exp.Having: lambda self: self._parse_having(),
 470        exp.Identifier: lambda self: self._parse_id_var(),
 471        exp.Join: lambda self: self._parse_join(),
 472        exp.Lambda: lambda self: self._parse_lambda(),
 473        exp.Lateral: lambda self: self._parse_lateral(),
 474        exp.Limit: lambda self: self._parse_limit(),
 475        exp.Offset: lambda self: self._parse_offset(),
 476        exp.Order: lambda self: self._parse_order(),
 477        exp.Ordered: lambda self: self._parse_ordered(),
 478        exp.Properties: lambda self: self._parse_properties(),
 479        exp.Qualify: lambda self: self._parse_qualify(),
 480        exp.Returning: lambda self: self._parse_returning(),
 481        exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"),
 482        exp.Table: lambda self: self._parse_table_parts(),
 483        exp.TableAlias: lambda self: self._parse_table_alias(),
 484        exp.Where: lambda self: self._parse_where(),
 485        exp.Window: lambda self: self._parse_named_window(),
 486        exp.With: lambda self: self._parse_with(),
 487        "JOIN_TYPE": lambda self: self._parse_join_parts(),
 488    }
 489
 490    STATEMENT_PARSERS = {
 491        TokenType.ALTER: lambda self: self._parse_alter(),
 492        TokenType.BEGIN: lambda self: self._parse_transaction(),
 493        TokenType.CACHE: lambda self: self._parse_cache(),
 494        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 495        TokenType.COMMENT: lambda self: self._parse_comment(),
 496        TokenType.CREATE: lambda self: self._parse_create(),
 497        TokenType.DELETE: lambda self: self._parse_delete(),
 498        TokenType.DESC: lambda self: self._parse_describe(),
 499        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 500        TokenType.DROP: lambda self: self._parse_drop(),
 501        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 502        TokenType.FROM: lambda self: exp.select("*").from_(
 503            t.cast(exp.From, self._parse_from(skip_from_token=True))
 504        ),
 505        TokenType.INSERT: lambda self: self._parse_insert(),
 506        TokenType.LOAD: lambda self: self._parse_load(),
 507        TokenType.MERGE: lambda self: self._parse_merge(),
 508        TokenType.PIVOT: lambda self: self._parse_simplified_pivot(),
 509        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 510        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 511        TokenType.SET: lambda self: self._parse_set(),
 512        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 513        TokenType.UPDATE: lambda self: self._parse_update(),
 514        TokenType.USE: lambda self: self.expression(
 515            exp.Use,
 516            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 517            and exp.var(self._prev.text),
 518            this=self._parse_table(schema=False),
 519        ),
 520    }
 521
 522    UNARY_PARSERS = {
 523        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 524        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 525        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 526        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 527    }
 528
 529    PRIMARY_PARSERS = {
 530        TokenType.STRING: lambda self, token: self.expression(
 531            exp.Literal, this=token.text, is_string=True
 532        ),
 533        TokenType.NUMBER: lambda self, token: self.expression(
 534            exp.Literal, this=token.text, is_string=False
 535        ),
 536        TokenType.STAR: lambda self, _: self.expression(
 537            exp.Star,
 538            **{"except": self._parse_except(), "replace": self._parse_replace()},
 539        ),
 540        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 541        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 542        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 543        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 544        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 545        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 546        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 547        TokenType.NATIONAL_STRING: lambda self, token: self.expression(
 548            exp.National, this=token.text
 549        ),
 550        TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text),
 551        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 552    }
 553
 554    PLACEHOLDER_PARSERS = {
 555        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 556        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 557        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 558        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 559        else None,
 560    }
 561
 562    RANGE_PARSERS = {
 563        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 564        TokenType.GLOB: binary_range_parser(exp.Glob),
 565        TokenType.ILIKE: binary_range_parser(exp.ILike),
 566        TokenType.IN: lambda self, this: self._parse_in(this),
 567        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 568        TokenType.IS: lambda self, this: self._parse_is(this),
 569        TokenType.LIKE: binary_range_parser(exp.Like),
 570        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 571        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 572        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 573    }
 574
 575    PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
 576        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 577        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 578        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 579        "CHARACTER SET": lambda self: self._parse_character_set(),
 580        "CHECKSUM": lambda self: self._parse_checksum(),
 581        "CLUSTER": lambda self: self._parse_cluster(),
 582        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 583        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 584        "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
 585        "DEFINER": lambda self: self._parse_definer(),
 586        "DETERMINISTIC": lambda self: self.expression(
 587            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 588        ),
 589        "DISTKEY": lambda self: self._parse_distkey(),
 590        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 591        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 592        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 593        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 594        "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
 595        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 596        "FREESPACE": lambda self: self._parse_freespace(),
 597        "IMMUTABLE": lambda self: self.expression(
 598            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 599        ),
 600        "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
 601        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 602        "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"),
 603        "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"),
 604        "LIKE": lambda self: self._parse_create_like(),
 605        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 606        "LOCK": lambda self: self._parse_locking(),
 607        "LOCKING": lambda self: self._parse_locking(),
 608        "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
 609        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 610        "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
 611        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 612        "NO": lambda self: self._parse_no_property(),
 613        "ON": lambda self: self._parse_on_property(),
 614        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 615        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 616        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 617        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 618        "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True),
 619        "RANGE": lambda self: self._parse_dict_range(this="RANGE"),
 620        "RETURNS": lambda self: self._parse_returns(),
 621        "ROW": lambda self: self._parse_row(),
 622        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 623        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 624        "SETTINGS": lambda self: self.expression(
 625            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 626        ),
 627        "SORTKEY": lambda self: self._parse_sortkey(),
 628        "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"),
 629        "STABLE": lambda self: self.expression(
 630            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 631        ),
 632        "STORED": lambda self: self._parse_stored(),
 633        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 634        "TEMP": lambda self: self.expression(exp.TemporaryProperty),
 635        "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
 636        "TO": lambda self: self._parse_to_table(),
 637        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 638        "TTL": lambda self: self._parse_ttl(),
 639        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 640        "VOLATILE": lambda self: self._parse_volatile_property(),
 641        "WITH": lambda self: self._parse_with_property(),
 642    }
 643
 644    CONSTRAINT_PARSERS = {
 645        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 646        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 647        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 648        "CHARACTER SET": lambda self: self.expression(
 649            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 650        ),
 651        "CHECK": lambda self: self.expression(
 652            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 653        ),
 654        "COLLATE": lambda self: self.expression(
 655            exp.CollateColumnConstraint, this=self._parse_var()
 656        ),
 657        "COMMENT": lambda self: self.expression(
 658            exp.CommentColumnConstraint, this=self._parse_string()
 659        ),
 660        "COMPRESS": lambda self: self._parse_compress(),
 661        "DEFAULT": lambda self: self.expression(
 662            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 663        ),
 664        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 665        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 666        "FORMAT": lambda self: self.expression(
 667            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 668        ),
 669        "GENERATED": lambda self: self._parse_generated_as_identity(),
 670        "IDENTITY": lambda self: self._parse_auto_increment(),
 671        "INLINE": lambda self: self._parse_inline(),
 672        "LIKE": lambda self: self._parse_create_like(),
 673        "NOT": lambda self: self._parse_not_constraint(),
 674        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 675        "ON": lambda self: self._match(TokenType.UPDATE)
 676        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 677        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 678        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 679        "REFERENCES": lambda self: self._parse_references(match=False),
 680        "TITLE": lambda self: self.expression(
 681            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 682        ),
 683        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 684        "UNIQUE": lambda self: self._parse_unique(),
 685        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 686    }
 687
 688    ALTER_PARSERS = {
 689        "ADD": lambda self: self._parse_alter_table_add(),
 690        "ALTER": lambda self: self._parse_alter_table_alter(),
 691        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 692        "DROP": lambda self: self._parse_alter_table_drop(),
 693        "RENAME": lambda self: self._parse_alter_table_rename(),
 694    }
 695
 696    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 697
 698    NO_PAREN_FUNCTION_PARSERS = {
 699        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 700        TokenType.CASE: lambda self: self._parse_case(),
 701        TokenType.IF: lambda self: self._parse_if(),
 702        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 703            exp.NextValueFor,
 704            this=self._parse_column(),
 705            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 706        ),
 707    }
 708
 709    FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"}
 710
 711    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 712        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 713        "CONCAT": lambda self: self._parse_concat(),
 714        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 715        "DECODE": lambda self: self._parse_decode(),
 716        "EXTRACT": lambda self: self._parse_extract(),
 717        "JSON_OBJECT": lambda self: self._parse_json_object(),
 718        "LOG": lambda self: self._parse_logarithm(),
 719        "MATCH": lambda self: self._parse_match_against(),
 720        "OPENJSON": lambda self: self._parse_open_json(),
 721        "POSITION": lambda self: self._parse_position(),
 722        "SAFE_CAST": lambda self: self._parse_cast(False),
 723        "STRING_AGG": lambda self: self._parse_string_agg(),
 724        "SUBSTRING": lambda self: self._parse_substring(),
 725        "TRIM": lambda self: self._parse_trim(),
 726        "TRY_CAST": lambda self: self._parse_cast(False),
 727        "TRY_CONVERT": lambda self: self._parse_convert(False),
 728    }
 729
 730    QUERY_MODIFIER_PARSERS = {
 731        "joins": lambda self: list(iter(self._parse_join, None)),
 732        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 733        "match": lambda self: self._parse_match_recognize(),
 734        "where": lambda self: self._parse_where(),
 735        "group": lambda self: self._parse_group(),
 736        "having": lambda self: self._parse_having(),
 737        "qualify": lambda self: self._parse_qualify(),
 738        "windows": lambda self: self._parse_window_clause(),
 739        "order": lambda self: self._parse_order(),
 740        "limit": lambda self: self._parse_limit(),
 741        "offset": lambda self: self._parse_offset(),
 742        "locks": lambda self: self._parse_locks(),
 743        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 744    }
 745
 746    SET_PARSERS = {
 747        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 748        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 749        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 750        "TRANSACTION": lambda self: self._parse_set_transaction(),
 751    }
 752
 753    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 754
 755    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 756
 757    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 758
 759    DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN}
 760
 761    PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE}
 762
 763    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 764    TRANSACTION_CHARACTERISTICS = {
 765        "ISOLATION LEVEL REPEATABLE READ",
 766        "ISOLATION LEVEL READ COMMITTED",
 767        "ISOLATION LEVEL READ UNCOMMITTED",
 768        "ISOLATION LEVEL SERIALIZABLE",
 769        "READ WRITE",
 770        "READ ONLY",
 771    }
 772
 773    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 774
 775    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 776
 777    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 778    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 779    WINDOW_SIDES = {"FOLLOWING", "PRECEDING"}
 780
 781    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 782
 783    STRICT_CAST = True
 784
 785    CONCAT_NULL_OUTPUTS_STRING = False  # A NULL arg in CONCAT yields NULL by default
 786
 787    CONVERT_TYPE_FIRST = False
 788
 789    PREFIXED_PIVOT_COLUMNS = False
 790    IDENTIFY_PIVOT_STRINGS = False
 791
 792    LOG_BASE_FIRST = True
 793    LOG_DEFAULTS_TO_LN = False
 794
 795    __slots__ = (
 796        "error_level",
 797        "error_message_context",
 798        "max_errors",
 799        "sql",
 800        "errors",
 801        "_tokens",
 802        "_index",
 803        "_curr",
 804        "_next",
 805        "_prev",
 806        "_prev_comments",
 807    )
 808
 809    # Autofilled
 810    INDEX_OFFSET: int = 0
 811    UNNEST_COLUMN_ONLY: bool = False
 812    ALIAS_POST_TABLESAMPLE: bool = False
 813    STRICT_STRING_CONCAT = False
 814    NULL_ORDERING: str = "nulls_are_small"
 815    SHOW_TRIE: t.Dict = {}
 816    SET_TRIE: t.Dict = {}
 817    FORMAT_MAPPING: t.Dict[str, str] = {}
 818    FORMAT_TRIE: t.Dict = {}
 819    TIME_MAPPING: t.Dict[str, str] = {}
 820    TIME_TRIE: t.Dict = {}
 821
 822    def __init__(
 823        self,
 824        error_level: t.Optional[ErrorLevel] = None,
 825        error_message_context: int = 100,
 826        max_errors: int = 3,
 827    ):
 828        self.error_level = error_level or ErrorLevel.IMMEDIATE
 829        self.error_message_context = error_message_context
 830        self.max_errors = max_errors
 831        self.reset()
 832
 833    def reset(self):
 834        self.sql = ""
 835        self.errors = []
 836        self._tokens = []
 837        self._index = 0
 838        self._curr = None
 839        self._next = None
 840        self._prev = None
 841        self._prev_comments = None
 842
 843    def parse(
 844        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 845    ) -> t.List[t.Optional[exp.Expression]]:
 846        """
 847        Parses a list of tokens and returns a list of syntax trees, one tree
 848        per parsed SQL statement.
 849
 850        Args:
 851            raw_tokens: The list of tokens.
 852            sql: The original SQL string, used to produce helpful debug messages.
 853
 854        Returns:
 855            The list of the produced syntax trees.
 856        """
 857        return self._parse(
 858            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 859        )
 860
 861    def parse_into(
 862        self,
 863        expression_types: exp.IntoType,
 864        raw_tokens: t.List[Token],
 865        sql: t.Optional[str] = None,
 866    ) -> t.List[t.Optional[exp.Expression]]:
 867        """
 868        Parses a list of tokens into a given Expression type. If a collection of Expression
 869        types is given instead, this method will try to parse the token list into each one
 870        of them, stopping at the first for which the parsing succeeds.
 871
 872        Args:
 873            expression_types: The expression type(s) to try and parse the token list into.
 874            raw_tokens: The list of tokens.
 875            sql: The original SQL string, used to produce helpful debug messages.
 876
 877        Returns:
 878            The target Expression.
 879        """
 880        errors = []
 881        for expression_type in ensure_list(expression_types):
 882            parser = self.EXPRESSION_PARSERS.get(expression_type)
 883            if not parser:
 884                raise TypeError(f"No parser registered for {expression_type}")
 885
 886            try:
 887                return self._parse(parser, raw_tokens, sql)
 888            except ParseError as e:
 889                e.errors[0]["into_expression"] = expression_type
 890                errors.append(e)
 891
 892        raise ParseError(
 893            f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
 894            errors=merge_errors(errors),
 895        ) from errors[-1]
 896
 897    def _parse(
 898        self,
 899        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 900        raw_tokens: t.List[Token],
 901        sql: t.Optional[str] = None,
 902    ) -> t.List[t.Optional[exp.Expression]]:
 903        self.reset()
 904        self.sql = sql or ""
 905
 906        total = len(raw_tokens)
 907        chunks: t.List[t.List[Token]] = [[]]
 908
 909        for i, token in enumerate(raw_tokens):
 910            if token.token_type == TokenType.SEMICOLON:
 911                if i < total - 1:
 912                    chunks.append([])
 913            else:
 914                chunks[-1].append(token)
 915
 916        expressions = []
 917
 918        for tokens in chunks:
 919            self._index = -1
 920            self._tokens = tokens
 921            self._advance()
 922
 923            expressions.append(parse_method(self))
 924
 925            if self._index < len(self._tokens):
 926                self.raise_error("Invalid expression / Unexpected token")
 927
 928            self.check_errors()
 929
 930        return expressions
 931
 932    def check_errors(self) -> None:
 933        """Logs or raises any found errors, depending on the chosen error level setting."""
 934        if self.error_level == ErrorLevel.WARN:
 935            for error in self.errors:
 936                logger.error(str(error))
 937        elif self.error_level == ErrorLevel.RAISE and self.errors:
 938            raise ParseError(
 939                concat_messages(self.errors, self.max_errors),
 940                errors=merge_errors(self.errors),
 941            )
 942
 943    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 944        """
 945        Appends an error in the list of recorded errors or raises it, depending on the chosen
 946        error level setting.
 947        """
 948        token = token or self._curr or self._prev or Token.string("")
 949        start = token.start
 950        end = token.end + 1
 951        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 952        highlight = self.sql[start:end]
 953        end_context = self.sql[end : end + self.error_message_context]
 954
 955        error = ParseError.new(
 956            f"{message}. Line {token.line}, Col: {token.col}.\n"
 957            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 958            description=message,
 959            line=token.line,
 960            col=token.col,
 961            start_context=start_context,
 962            highlight=highlight,
 963            end_context=end_context,
 964        )
 965
 966        if self.error_level == ErrorLevel.IMMEDIATE:
 967            raise error
 968
 969        self.errors.append(error)
 970
 971    def expression(
 972        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 973    ) -> E:
 974        """
 975        Creates a new, validated Expression.
 976
 977        Args:
 978            exp_class: The expression class to instantiate.
 979            comments: An optional list of comments to attach to the expression.
 980            kwargs: The arguments to set for the expression along with their respective values.
 981
 982        Returns:
 983            The target expression.
 984        """
 985        instance = exp_class(**kwargs)
 986        instance.add_comments(comments) if comments else self._add_comments(instance)
 987        return self.validate_expression(instance)
 988
 989    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 990        if expression and self._prev_comments:
 991            expression.add_comments(self._prev_comments)
 992            self._prev_comments = None
 993
 994    def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E:
 995        """
 996        Validates an Expression, making sure that all its mandatory arguments are set.
 997
 998        Args:
 999            expression: The expression to validate.
1000            args: An optional list of items that was used to instantiate the expression, if it's a Func.
1001
1002        Returns:
1003            The validated expression.
1004        """
1005        if self.error_level != ErrorLevel.IGNORE:
1006            for error_message in expression.error_messages(args):
1007                self.raise_error(error_message)
1008
1009        return expression
1010
1011    def _find_sql(self, start: Token, end: Token) -> str:
1012        return self.sql[start.start : end.end + 1]
1013
1014    def _advance(self, times: int = 1) -> None:
1015        self._index += times
1016        self._curr = seq_get(self._tokens, self._index)
1017        self._next = seq_get(self._tokens, self._index + 1)
1018
1019        if self._index > 0:
1020            self._prev = self._tokens[self._index - 1]
1021            self._prev_comments = self._prev.comments
1022        else:
1023            self._prev = None
1024            self._prev_comments = None
1025
1026    def _retreat(self, index: int) -> None:
1027        if index != self._index:
1028            self._advance(index - self._index)
1029
1030    def _parse_command(self) -> exp.Command:
1031        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1032
1033    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1034        start = self._prev
1035        exists = self._parse_exists() if allow_exists else None
1036
1037        self._match(TokenType.ON)
1038
1039        kind = self._match_set(self.CREATABLES) and self._prev
1040        if not kind:
1041            return self._parse_as_command(start)
1042
1043        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1044            this = self._parse_user_defined_function(kind=kind.token_type)
1045        elif kind.token_type == TokenType.TABLE:
1046            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1047        elif kind.token_type == TokenType.COLUMN:
1048            this = self._parse_column()
1049        else:
1050            this = self._parse_id_var()
1051
1052        self._match(TokenType.IS)
1053
1054        return self.expression(
1055            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1056        )
1057
1058    def _parse_to_table(
1059        self,
1060    ) -> exp.ToTableProperty:
1061        table = self._parse_table_parts(schema=True)
1062        return self.expression(exp.ToTableProperty, this=table)
1063
1064    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1065    def _parse_ttl(self) -> exp.Expression:
1066        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1067            this = self._parse_bitwise()
1068
1069            if self._match_text_seq("DELETE"):
1070                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1071            if self._match_text_seq("RECOMPRESS"):
1072                return self.expression(
1073                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1074                )
1075            if self._match_text_seq("TO", "DISK"):
1076                return self.expression(
1077                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1078                )
1079            if self._match_text_seq("TO", "VOLUME"):
1080                return self.expression(
1081                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1082                )
1083
1084            return this
1085
1086        expressions = self._parse_csv(_parse_ttl_action)
1087        where = self._parse_where()
1088        group = self._parse_group()
1089
1090        aggregates = None
1091        if group and self._match(TokenType.SET):
1092            aggregates = self._parse_csv(self._parse_set_item)
1093
1094        return self.expression(
1095            exp.MergeTreeTTL,
1096            expressions=expressions,
1097            where=where,
1098            group=group,
1099            aggregates=aggregates,
1100        )
1101
1102    def _parse_statement(self) -> t.Optional[exp.Expression]:
1103        if self._curr is None:
1104            return None
1105
1106        if self._match_set(self.STATEMENT_PARSERS):
1107            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1108
1109        if self._match_set(Tokenizer.COMMANDS):
1110            return self._parse_command()
1111
1112        expression = self._parse_expression()
1113        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1114        return self._parse_query_modifiers(expression)
1115
1116    def _parse_drop(self) -> exp.Drop | exp.Command:
1117        start = self._prev
1118        temporary = self._match(TokenType.TEMPORARY)
1119        materialized = self._match_text_seq("MATERIALIZED")
1120
1121        kind = self._match_set(self.CREATABLES) and self._prev.text
1122        if not kind:
1123            return self._parse_as_command(start)
1124
1125        return self.expression(
1126            exp.Drop,
1127            exists=self._parse_exists(),
1128            this=self._parse_table(schema=True),
1129            kind=kind,
1130            temporary=temporary,
1131            materialized=materialized,
1132            cascade=self._match_text_seq("CASCADE"),
1133            constraints=self._match_text_seq("CONSTRAINTS"),
1134            purge=self._match_text_seq("PURGE"),
1135        )
1136
1137    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1138        return (
1139            self._match(TokenType.IF)
1140            and (not not_ or self._match(TokenType.NOT))
1141            and self._match(TokenType.EXISTS)
1142        )
1143
1144    def _parse_create(self) -> exp.Create | exp.Command:
1145        # Note: this can't be None because we've matched a statement parser
1146        start = self._prev
1147        replace = start.text.upper() == "REPLACE" or self._match_pair(
1148            TokenType.OR, TokenType.REPLACE
1149        )
1150        unique = self._match(TokenType.UNIQUE)
1151
1152        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1153            self._advance()
1154
1155        properties = None
1156        create_token = self._match_set(self.CREATABLES) and self._prev
1157
1158        if not create_token:
1159            # exp.Properties.Location.POST_CREATE
1160            properties = self._parse_properties()
1161            create_token = self._match_set(self.CREATABLES) and self._prev
1162
1163            if not properties or not create_token:
1164                return self._parse_as_command(start)
1165
1166        exists = self._parse_exists(not_=True)
1167        this = None
1168        expression = None
1169        indexes = None
1170        no_schema_binding = None
1171        begin = None
1172        clone = None
1173
1174        def extend_props(temp_props: t.Optional[exp.Properties]) -> None:
1175            nonlocal properties
1176            if properties and temp_props:
1177                properties.expressions.extend(temp_props.expressions)
1178            elif temp_props:
1179                properties = temp_props
1180
1181        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1182            this = self._parse_user_defined_function(kind=create_token.token_type)
1183
1184            # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature)
1185            extend_props(self._parse_properties())
1186
1187            self._match(TokenType.ALIAS)
1188            begin = self._match(TokenType.BEGIN)
1189            return_ = self._match_text_seq("RETURN")
1190            expression = self._parse_statement()
1191
1192            if return_:
1193                expression = self.expression(exp.Return, this=expression)
1194        elif create_token.token_type == TokenType.INDEX:
1195            this = self._parse_index(index=self._parse_id_var())
1196        elif create_token.token_type in self.DB_CREATABLES:
1197            table_parts = self._parse_table_parts(schema=True)
1198
1199            # exp.Properties.Location.POST_NAME
1200            self._match(TokenType.COMMA)
1201            extend_props(self._parse_properties(before=True))
1202
1203            this = self._parse_schema(this=table_parts)
1204
1205            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1206            extend_props(self._parse_properties())
1207
1208            self._match(TokenType.ALIAS)
1209            if not self._match_set(self.DDL_SELECT_TOKENS, advance=False):
1210                # exp.Properties.Location.POST_ALIAS
1211                extend_props(self._parse_properties())
1212
1213            expression = self._parse_ddl_select()
1214
1215            if create_token.token_type == TokenType.TABLE:
1216                indexes = []
1217                while True:
1218                    index = self._parse_index()
1219
1220                    # exp.Properties.Location.POST_EXPRESSION and POST_INDEX
1221                    extend_props(self._parse_properties())
1222
1223                    if not index:
1224                        break
1225                    else:
1226                        self._match(TokenType.COMMA)
1227                        indexes.append(index)
1228            elif create_token.token_type == TokenType.VIEW:
1229                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1230                    no_schema_binding = True
1231
1232            if self._match_text_seq("CLONE"):
1233                clone = self._parse_table(schema=True)
1234                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1235                clone_kind = (
1236                    self._match(TokenType.L_PAREN)
1237                    and self._match_texts(self.CLONE_KINDS)
1238                    and self._prev.text.upper()
1239                )
1240                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1241                self._match(TokenType.R_PAREN)
1242                clone = self.expression(
1243                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1244                )
1245
1246        return self.expression(
1247            exp.Create,
1248            this=this,
1249            kind=create_token.text,
1250            replace=replace,
1251            unique=unique,
1252            expression=expression,
1253            exists=exists,
1254            properties=properties,
1255            indexes=indexes,
1256            no_schema_binding=no_schema_binding,
1257            begin=begin,
1258            clone=clone,
1259        )
1260
1261    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1262        # only used for teradata currently
1263        self._match(TokenType.COMMA)
1264
1265        kwargs = {
1266            "no": self._match_text_seq("NO"),
1267            "dual": self._match_text_seq("DUAL"),
1268            "before": self._match_text_seq("BEFORE"),
1269            "default": self._match_text_seq("DEFAULT"),
1270            "local": (self._match_text_seq("LOCAL") and "LOCAL")
1271            or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
1272            "after": self._match_text_seq("AFTER"),
1273            "minimum": self._match_texts(("MIN", "MINIMUM")),
1274            "maximum": self._match_texts(("MAX", "MAXIMUM")),
1275        }
1276
1277        if self._match_texts(self.PROPERTY_PARSERS):
1278            parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
1279            try:
1280                return parser(self, **{k: v for k, v in kwargs.items() if v})
1281            except TypeError:
1282                self.raise_error(f"Cannot parse property '{self._prev.text}'")
1283
1284        return None
1285
1286    def _parse_property(self) -> t.Optional[exp.Expression]:
1287        if self._match_texts(self.PROPERTY_PARSERS):
1288            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1289
1290        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1291            return self._parse_character_set(default=True)
1292
1293        if self._match_text_seq("COMPOUND", "SORTKEY"):
1294            return self._parse_sortkey(compound=True)
1295
1296        if self._match_text_seq("SQL", "SECURITY"):
1297            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1298
1299        assignment = self._match_pair(
1300            TokenType.VAR, TokenType.EQ, advance=False
1301        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1302
1303        if assignment:
1304            key = self._parse_var_or_string()
1305            self._match(TokenType.EQ)
1306            return self.expression(exp.Property, this=key, value=self._parse_column())
1307
1308        return None
1309
1310    def _parse_stored(self) -> exp.FileFormatProperty:
1311        self._match(TokenType.ALIAS)
1312
1313        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1314        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1315
1316        return self.expression(
1317            exp.FileFormatProperty,
1318            this=self.expression(
1319                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1320            )
1321            if input_format or output_format
1322            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1323        )
1324
1325    def _parse_property_assignment(self, exp_class: t.Type[E]) -> E:
1326        self._match(TokenType.EQ)
1327        self._match(TokenType.ALIAS)
1328        return self.expression(exp_class, this=self._parse_field())
1329
1330    def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]:
1331        properties = []
1332        while True:
1333            if before:
1334                prop = self._parse_property_before()
1335            else:
1336                prop = self._parse_property()
1337
1338            if not prop:
1339                break
1340            for p in ensure_list(prop):
1341                properties.append(p)
1342
1343        if properties:
1344            return self.expression(exp.Properties, expressions=properties)
1345
1346        return None
1347
1348    def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty:
1349        return self.expression(
1350            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1351        )
1352
1353    def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty:
1354        if self._index >= 2:
1355            pre_volatile_token = self._tokens[self._index - 2]
1356        else:
1357            pre_volatile_token = None
1358
1359        if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS:
1360            return exp.VolatileProperty()
1361
1362        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1363
1364    def _parse_with_property(
1365        self,
1366    ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]:
1367        self._match(TokenType.WITH)
1368        if self._match(TokenType.L_PAREN, advance=False):
1369            return self._parse_wrapped_csv(self._parse_property)
1370
1371        if self._match_text_seq("JOURNAL"):
1372            return self._parse_withjournaltable()
1373
1374        if self._match_text_seq("DATA"):
1375            return self._parse_withdata(no=False)
1376        elif self._match_text_seq("NO", "DATA"):
1377            return self._parse_withdata(no=True)
1378
1379        if not self._next:
1380            return None
1381
1382        return self._parse_withisolatedloading()
1383
1384    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1385    def _parse_definer(self) -> t.Optional[exp.DefinerProperty]:
1386        self._match(TokenType.EQ)
1387
1388        user = self._parse_id_var()
1389        self._match(TokenType.PARAMETER)
1390        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1391
1392        if not user or not host:
1393            return None
1394
1395        return exp.DefinerProperty(this=f"{user}@{host}")
1396
1397    def _parse_withjournaltable(self) -> exp.WithJournalTableProperty:
1398        self._match(TokenType.TABLE)
1399        self._match(TokenType.EQ)
1400        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1401
1402    def _parse_log(self, no: bool = False) -> exp.LogProperty:
1403        return self.expression(exp.LogProperty, no=no)
1404
1405    def _parse_journal(self, **kwargs) -> exp.JournalProperty:
1406        return self.expression(exp.JournalProperty, **kwargs)
1407
1408    def _parse_checksum(self) -> exp.ChecksumProperty:
1409        self._match(TokenType.EQ)
1410
1411        on = None
1412        if self._match(TokenType.ON):
1413            on = True
1414        elif self._match_text_seq("OFF"):
1415            on = False
1416
1417        return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT))
1418
1419    def _parse_cluster(self) -> t.Optional[exp.Cluster]:
1420        if not self._match_text_seq("BY"):
1421            self._retreat(self._index - 1)
1422            return None
1423
1424        return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered))
1425
1426    def _parse_freespace(self) -> exp.FreespaceProperty:
1427        self._match(TokenType.EQ)
1428        return self.expression(
1429            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1430        )
1431
1432    def _parse_mergeblockratio(
1433        self, no: bool = False, default: bool = False
1434    ) -> exp.MergeBlockRatioProperty:
1435        if self._match(TokenType.EQ):
1436            return self.expression(
1437                exp.MergeBlockRatioProperty,
1438                this=self._parse_number(),
1439                percent=self._match(TokenType.PERCENT),
1440            )
1441
1442        return self.expression(exp.MergeBlockRatioProperty, no=no, default=default)
1443
1444    def _parse_datablocksize(
1445        self,
1446        default: t.Optional[bool] = None,
1447        minimum: t.Optional[bool] = None,
1448        maximum: t.Optional[bool] = None,
1449    ) -> exp.DataBlocksizeProperty:
1450        self._match(TokenType.EQ)
1451        size = self._parse_number()
1452
1453        units = None
1454        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1455            units = self._prev.text
1456
1457        return self.expression(
1458            exp.DataBlocksizeProperty,
1459            size=size,
1460            units=units,
1461            default=default,
1462            minimum=minimum,
1463            maximum=maximum,
1464        )
1465
1466    def _parse_blockcompression(self) -> exp.BlockCompressionProperty:
1467        self._match(TokenType.EQ)
1468        always = self._match_text_seq("ALWAYS")
1469        manual = self._match_text_seq("MANUAL")
1470        never = self._match_text_seq("NEVER")
1471        default = self._match_text_seq("DEFAULT")
1472
1473        autotemp = None
1474        if self._match_text_seq("AUTOTEMP"):
1475            autotemp = self._parse_schema()
1476
1477        return self.expression(
1478            exp.BlockCompressionProperty,
1479            always=always,
1480            manual=manual,
1481            never=never,
1482            default=default,
1483            autotemp=autotemp,
1484        )
1485
1486    def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty:
1487        no = self._match_text_seq("NO")
1488        concurrent = self._match_text_seq("CONCURRENT")
1489        self._match_text_seq("ISOLATED", "LOADING")
1490        for_all = self._match_text_seq("FOR", "ALL")
1491        for_insert = self._match_text_seq("FOR", "INSERT")
1492        for_none = self._match_text_seq("FOR", "NONE")
1493        return self.expression(
1494            exp.IsolatedLoadingProperty,
1495            no=no,
1496            concurrent=concurrent,
1497            for_all=for_all,
1498            for_insert=for_insert,
1499            for_none=for_none,
1500        )
1501
1502    def _parse_locking(self) -> exp.LockingProperty:
1503        if self._match(TokenType.TABLE):
1504            kind = "TABLE"
1505        elif self._match(TokenType.VIEW):
1506            kind = "VIEW"
1507        elif self._match(TokenType.ROW):
1508            kind = "ROW"
1509        elif self._match_text_seq("DATABASE"):
1510            kind = "DATABASE"
1511        else:
1512            kind = None
1513
1514        if kind in ("DATABASE", "TABLE", "VIEW"):
1515            this = self._parse_table_parts()
1516        else:
1517            this = None
1518
1519        if self._match(TokenType.FOR):
1520            for_or_in = "FOR"
1521        elif self._match(TokenType.IN):
1522            for_or_in = "IN"
1523        else:
1524            for_or_in = None
1525
1526        if self._match_text_seq("ACCESS"):
1527            lock_type = "ACCESS"
1528        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1529            lock_type = "EXCLUSIVE"
1530        elif self._match_text_seq("SHARE"):
1531            lock_type = "SHARE"
1532        elif self._match_text_seq("READ"):
1533            lock_type = "READ"
1534        elif self._match_text_seq("WRITE"):
1535            lock_type = "WRITE"
1536        elif self._match_text_seq("CHECKSUM"):
1537            lock_type = "CHECKSUM"
1538        else:
1539            lock_type = None
1540
1541        override = self._match_text_seq("OVERRIDE")
1542
1543        return self.expression(
1544            exp.LockingProperty,
1545            this=this,
1546            kind=kind,
1547            for_or_in=for_or_in,
1548            lock_type=lock_type,
1549            override=override,
1550        )
1551
1552    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1553        if self._match(TokenType.PARTITION_BY):
1554            return self._parse_csv(self._parse_conjunction)
1555        return []
1556
1557    def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
1558        self._match(TokenType.EQ)
1559        return self.expression(
1560            exp.PartitionedByProperty,
1561            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1562        )
1563
1564    def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty:
1565        if self._match_text_seq("AND", "STATISTICS"):
1566            statistics = True
1567        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1568            statistics = False
1569        else:
1570            statistics = None
1571
1572        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1573
1574    def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]:
1575        if self._match_text_seq("PRIMARY", "INDEX"):
1576            return exp.NoPrimaryIndexProperty()
1577        return None
1578
1579    def _parse_on_property(self) -> t.Optional[exp.Expression]:
1580        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
1581            return exp.OnCommitProperty()
1582        elif self._match_text_seq("COMMIT", "DELETE", "ROWS"):
1583            return exp.OnCommitProperty(delete=True)
1584        return None
1585
1586    def _parse_distkey(self) -> exp.DistKeyProperty:
1587        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1588
1589    def _parse_create_like(self) -> t.Optional[exp.LikeProperty]:
1590        table = self._parse_table(schema=True)
1591
1592        options = []
1593        while self._match_texts(("INCLUDING", "EXCLUDING")):
1594            this = self._prev.text.upper()
1595
1596            id_var = self._parse_id_var()
1597            if not id_var:
1598                return None
1599
1600            options.append(
1601                self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper()))
1602            )
1603
1604        return self.expression(exp.LikeProperty, this=table, expressions=options)
1605
1606    def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty:
1607        return self.expression(
1608            exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound
1609        )
1610
1611    def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty:
1612        self._match(TokenType.EQ)
1613        return self.expression(
1614            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1615        )
1616
1617    def _parse_returns(self) -> exp.ReturnsProperty:
1618        value: t.Optional[exp.Expression]
1619        is_table = self._match(TokenType.TABLE)
1620
1621        if is_table:
1622            if self._match(TokenType.LT):
1623                value = self.expression(
1624                    exp.Schema,
1625                    this="TABLE",
1626                    expressions=self._parse_csv(self._parse_struct_types),
1627                )
1628                if not self._match(TokenType.GT):
1629                    self.raise_error("Expecting >")
1630            else:
1631                value = self._parse_schema(exp.var("TABLE"))
1632        else:
1633            value = self._parse_types()
1634
1635        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1636
1637    def _parse_describe(self) -> exp.Describe:
1638        kind = self._match_set(self.CREATABLES) and self._prev.text
1639        this = self._parse_table()
1640        return self.expression(exp.Describe, this=this, kind=kind)
1641
1642    def _parse_insert(self) -> exp.Insert:
1643        overwrite = self._match(TokenType.OVERWRITE)
1644        local = self._match_text_seq("LOCAL")
1645        alternative = None
1646
1647        if self._match_text_seq("DIRECTORY"):
1648            this: t.Optional[exp.Expression] = self.expression(
1649                exp.Directory,
1650                this=self._parse_var_or_string(),
1651                local=local,
1652                row_format=self._parse_row_format(match_row=True),
1653            )
1654        else:
1655            if self._match(TokenType.OR):
1656                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1657
1658            self._match(TokenType.INTO)
1659            self._match(TokenType.TABLE)
1660            this = self._parse_table(schema=True)
1661
1662        return self.expression(
1663            exp.Insert,
1664            this=this,
1665            exists=self._parse_exists(),
1666            partition=self._parse_partition(),
1667            expression=self._parse_ddl_select(),
1668            conflict=self._parse_on_conflict(),
1669            returning=self._parse_returning(),
1670            overwrite=overwrite,
1671            alternative=alternative,
1672        )
1673
1674    def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]:
1675        conflict = self._match_text_seq("ON", "CONFLICT")
1676        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1677
1678        if not conflict and not duplicate:
1679            return None
1680
1681        nothing = None
1682        expressions = None
1683        key = None
1684        constraint = None
1685
1686        if conflict:
1687            if self._match_text_seq("ON", "CONSTRAINT"):
1688                constraint = self._parse_id_var()
1689            else:
1690                key = self._parse_csv(self._parse_value)
1691
1692        self._match_text_seq("DO")
1693        if self._match_text_seq("NOTHING"):
1694            nothing = True
1695        else:
1696            self._match(TokenType.UPDATE)
1697            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1698
1699        return self.expression(
1700            exp.OnConflict,
1701            duplicate=duplicate,
1702            expressions=expressions,
1703            nothing=nothing,
1704            key=key,
1705            constraint=constraint,
1706        )
1707
1708    def _parse_returning(self) -> t.Optional[exp.Returning]:
1709        if not self._match(TokenType.RETURNING):
1710            return None
1711
1712        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1713
1714    def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]:
1715        if not self._match(TokenType.FORMAT):
1716            return None
1717        return self._parse_row_format()
1718
1719    def _parse_row_format(
1720        self, match_row: bool = False
1721    ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]:
1722        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1723            return None
1724
1725        if self._match_text_seq("SERDE"):
1726            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1727
1728        self._match_text_seq("DELIMITED")
1729
1730        kwargs = {}
1731
1732        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1733            kwargs["fields"] = self._parse_string()
1734            if self._match_text_seq("ESCAPED", "BY"):
1735                kwargs["escaped"] = self._parse_string()
1736        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1737            kwargs["collection_items"] = self._parse_string()
1738        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1739            kwargs["map_keys"] = self._parse_string()
1740        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1741            kwargs["lines"] = self._parse_string()
1742        if self._match_text_seq("NULL", "DEFINED", "AS"):
1743            kwargs["null"] = self._parse_string()
1744
1745        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1746
1747    def _parse_load(self) -> exp.LoadData | exp.Command:
1748        if self._match_text_seq("DATA"):
1749            local = self._match_text_seq("LOCAL")
1750            self._match_text_seq("INPATH")
1751            inpath = self._parse_string()
1752            overwrite = self._match(TokenType.OVERWRITE)
1753            self._match_pair(TokenType.INTO, TokenType.TABLE)
1754
1755            return self.expression(
1756                exp.LoadData,
1757                this=self._parse_table(schema=True),
1758                local=local,
1759                overwrite=overwrite,
1760                inpath=inpath,
1761                partition=self._parse_partition(),
1762                input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1763                serde=self._match_text_seq("SERDE") and self._parse_string(),
1764            )
1765        return self._parse_as_command(self._prev)
1766
1767    def _parse_delete(self) -> exp.Delete:
1768        self._match(TokenType.FROM)
1769
1770        return self.expression(
1771            exp.Delete,
1772            this=self._parse_table(),
1773            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1774            where=self._parse_where(),
1775            returning=self._parse_returning(),
1776        )
1777
1778    def _parse_update(self) -> exp.Update:
1779        return self.expression(
1780            exp.Update,
1781            **{  # type: ignore
1782                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1783                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1784                "from": self._parse_from(modifiers=True),
1785                "where": self._parse_where(),
1786                "returning": self._parse_returning(),
1787            },
1788        )
1789
1790    def _parse_uncache(self) -> exp.Uncache:
1791        if not self._match(TokenType.TABLE):
1792            self.raise_error("Expecting TABLE after UNCACHE")
1793
1794        return self.expression(
1795            exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True)
1796        )
1797
1798    def _parse_cache(self) -> exp.Cache:
1799        lazy = self._match_text_seq("LAZY")
1800        self._match(TokenType.TABLE)
1801        table = self._parse_table(schema=True)
1802
1803        options = []
1804        if self._match_text_seq("OPTIONS"):
1805            self._match_l_paren()
1806            k = self._parse_string()
1807            self._match(TokenType.EQ)
1808            v = self._parse_string()
1809            options = [k, v]
1810            self._match_r_paren()
1811
1812        self._match(TokenType.ALIAS)
1813        return self.expression(
1814            exp.Cache,
1815            this=table,
1816            lazy=lazy,
1817            options=options,
1818            expression=self._parse_select(nested=True),
1819        )
1820
1821    def _parse_partition(self) -> t.Optional[exp.Partition]:
1822        if not self._match(TokenType.PARTITION):
1823            return None
1824
1825        return self.expression(
1826            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1827        )
1828
1829    def _parse_value(self) -> exp.Tuple:
1830        if self._match(TokenType.L_PAREN):
1831            expressions = self._parse_csv(self._parse_conjunction)
1832            self._match_r_paren()
1833            return self.expression(exp.Tuple, expressions=expressions)
1834
1835        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1836        # Source: https://prestodb.io/docs/current/sql/values.html
1837        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1838
1839    def _parse_select(
1840        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1841    ) -> t.Optional[exp.Expression]:
1842        cte = self._parse_with()
1843        if cte:
1844            this = self._parse_statement()
1845
1846            if not this:
1847                self.raise_error("Failed to parse any statement following CTE")
1848                return cte
1849
1850            if "with" in this.arg_types:
1851                this.set("with", cte)
1852            else:
1853                self.raise_error(f"{this.key} does not support CTE")
1854                this = cte
1855        elif self._match(TokenType.SELECT):
1856            comments = self._prev_comments
1857
1858            hint = self._parse_hint()
1859            all_ = self._match(TokenType.ALL)
1860            distinct = self._match(TokenType.DISTINCT)
1861
1862            kind = (
1863                self._match(TokenType.ALIAS)
1864                and self._match_texts(("STRUCT", "VALUE"))
1865                and self._prev.text
1866            )
1867
1868            if distinct:
1869                distinct = self.expression(
1870                    exp.Distinct,
1871                    on=self._parse_value() if self._match(TokenType.ON) else None,
1872                )
1873
1874            if all_ and distinct:
1875                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1876
1877            limit = self._parse_limit(top=True)
1878            expressions = self._parse_csv(self._parse_expression)
1879
1880            this = self.expression(
1881                exp.Select,
1882                kind=kind,
1883                hint=hint,
1884                distinct=distinct,
1885                expressions=expressions,
1886                limit=limit,
1887            )
1888            this.comments = comments
1889
1890            into = self._parse_into()
1891            if into:
1892                this.set("into", into)
1893
1894            from_ = self._parse_from()
1895            if from_:
1896                this.set("from", from_)
1897
1898            this = self._parse_query_modifiers(this)
1899        elif (table or nested) and self._match(TokenType.L_PAREN):
1900            if self._match(TokenType.PIVOT):
1901                this = self._parse_simplified_pivot()
1902            elif self._match(TokenType.FROM):
1903                this = exp.select("*").from_(
1904                    t.cast(exp.From, self._parse_from(skip_from_token=True))
1905                )
1906            else:
1907                this = self._parse_table() if table else self._parse_select(nested=True)
1908                this = self._parse_set_operations(self._parse_query_modifiers(this))
1909
1910            self._match_r_paren()
1911
1912            # early return so that subquery unions aren't parsed again
1913            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1914            # Union ALL should be a property of the top select node, not the subquery
1915            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1916        elif self._match(TokenType.VALUES):
1917            this = self.expression(
1918                exp.Values,
1919                expressions=self._parse_csv(self._parse_value),
1920                alias=self._parse_table_alias(),
1921            )
1922        else:
1923            this = None
1924
1925        return self._parse_set_operations(this)
1926
1927    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]:
1928        if not skip_with_token and not self._match(TokenType.WITH):
1929            return None
1930
1931        comments = self._prev_comments
1932        recursive = self._match(TokenType.RECURSIVE)
1933
1934        expressions = []
1935        while True:
1936            expressions.append(self._parse_cte())
1937
1938            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1939                break
1940            else:
1941                self._match(TokenType.WITH)
1942
1943        return self.expression(
1944            exp.With, comments=comments, expressions=expressions, recursive=recursive
1945        )
1946
1947    def _parse_cte(self) -> exp.CTE:
1948        alias = self._parse_table_alias()
1949        if not alias or not alias.this:
1950            self.raise_error("Expected CTE to have alias")
1951
1952        self._match(TokenType.ALIAS)
1953        return self.expression(
1954            exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias
1955        )
1956
1957    def _parse_table_alias(
1958        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1959    ) -> t.Optional[exp.TableAlias]:
1960        any_token = self._match(TokenType.ALIAS)
1961        alias = (
1962            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1963            or self._parse_string_as_identifier()
1964        )
1965
1966        index = self._index
1967        if self._match(TokenType.L_PAREN):
1968            columns = self._parse_csv(self._parse_function_parameter)
1969            self._match_r_paren() if columns else self._retreat(index)
1970        else:
1971            columns = None
1972
1973        if not alias and not columns:
1974            return None
1975
1976        return self.expression(exp.TableAlias, this=alias, columns=columns)
1977
1978    def _parse_subquery(
1979        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1980    ) -> t.Optional[exp.Subquery]:
1981        if not this:
1982            return None
1983
1984        return self.expression(
1985            exp.Subquery,
1986            this=this,
1987            pivots=self._parse_pivots(),
1988            alias=self._parse_table_alias() if parse_alias else None,
1989        )
1990
1991    def _parse_query_modifiers(
1992        self, this: t.Optional[exp.Expression]
1993    ) -> t.Optional[exp.Expression]:
1994        if isinstance(this, self.MODIFIABLES):
1995            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1996                expression = parser(self)
1997
1998                if expression:
1999                    this.set(key, expression)
2000        return this
2001
2002    def _parse_hint(self) -> t.Optional[exp.Hint]:
2003        if self._match(TokenType.HINT):
2004            hints = self._parse_csv(self._parse_function)
2005
2006            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2007                self.raise_error("Expected */ after HINT")
2008
2009            return self.expression(exp.Hint, expressions=hints)
2010
2011        return None
2012
2013    def _parse_into(self) -> t.Optional[exp.Into]:
2014        if not self._match(TokenType.INTO):
2015            return None
2016
2017        temp = self._match(TokenType.TEMPORARY)
2018        unlogged = self._match_text_seq("UNLOGGED")
2019        self._match(TokenType.TABLE)
2020
2021        return self.expression(
2022            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2023        )
2024
2025    def _parse_from(
2026        self, modifiers: bool = False, skip_from_token: bool = False
2027    ) -> t.Optional[exp.From]:
2028        if not skip_from_token and not self._match(TokenType.FROM):
2029            return None
2030
2031        comments = self._prev_comments
2032        this = self._parse_table()
2033
2034        return self.expression(
2035            exp.From,
2036            comments=comments,
2037            this=self._parse_query_modifiers(this) if modifiers else this,
2038        )
2039
2040    def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]:
2041        if not self._match(TokenType.MATCH_RECOGNIZE):
2042            return None
2043
2044        self._match_l_paren()
2045
2046        partition = self._parse_partition_by()
2047        order = self._parse_order()
2048        measures = (
2049            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2050        )
2051
2052        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2053            rows = exp.var("ONE ROW PER MATCH")
2054        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2055            text = "ALL ROWS PER MATCH"
2056            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2057                text += f" SHOW EMPTY MATCHES"
2058            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2059                text += f" OMIT EMPTY MATCHES"
2060            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2061                text += f" WITH UNMATCHED ROWS"
2062            rows = exp.var(text)
2063        else:
2064            rows = None
2065
2066        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2067            text = "AFTER MATCH SKIP"
2068            if self._match_text_seq("PAST", "LAST", "ROW"):
2069                text += f" PAST LAST ROW"
2070            elif self._match_text_seq("TO", "NEXT", "ROW"):
2071                text += f" TO NEXT ROW"
2072            elif self._match_text_seq("TO", "FIRST"):
2073                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2074            elif self._match_text_seq("TO", "LAST"):
2075                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2076            after = exp.var(text)
2077        else:
2078            after = None
2079
2080        if self._match_text_seq("PATTERN"):
2081            self._match_l_paren()
2082
2083            if not self._curr:
2084                self.raise_error("Expecting )", self._curr)
2085
2086            paren = 1
2087            start = self._curr
2088
2089            while self._curr and paren > 0:
2090                if self._curr.token_type == TokenType.L_PAREN:
2091                    paren += 1
2092                if self._curr.token_type == TokenType.R_PAREN:
2093                    paren -= 1
2094
2095                end = self._prev
2096                self._advance()
2097
2098            if paren > 0:
2099                self.raise_error("Expecting )", self._curr)
2100
2101            pattern = exp.var(self._find_sql(start, end))
2102        else:
2103            pattern = None
2104
2105        define = (
2106            self._parse_csv(
2107                lambda: self.expression(
2108                    exp.Alias,
2109                    alias=self._parse_id_var(any_token=True),
2110                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2111                )
2112            )
2113            if self._match_text_seq("DEFINE")
2114            else None
2115        )
2116
2117        self._match_r_paren()
2118
2119        return self.expression(
2120            exp.MatchRecognize,
2121            partition_by=partition,
2122            order=order,
2123            measures=measures,
2124            rows=rows,
2125            after=after,
2126            pattern=pattern,
2127            define=define,
2128            alias=self._parse_table_alias(),
2129        )
2130
2131    def _parse_lateral(self) -> t.Optional[exp.Lateral]:
2132        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2133        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2134
2135        if outer_apply or cross_apply:
2136            this = self._parse_select(table=True)
2137            view = None
2138            outer = not cross_apply
2139        elif self._match(TokenType.LATERAL):
2140            this = self._parse_select(table=True)
2141            view = self._match(TokenType.VIEW)
2142            outer = self._match(TokenType.OUTER)
2143        else:
2144            return None
2145
2146        if not this:
2147            this = self._parse_function() or self._parse_id_var(any_token=False)
2148            while self._match(TokenType.DOT):
2149                this = exp.Dot(
2150                    this=this,
2151                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2152                )
2153
2154        if view:
2155            table = self._parse_id_var(any_token=False)
2156            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2157            table_alias: t.Optional[exp.TableAlias] = self.expression(
2158                exp.TableAlias, this=table, columns=columns
2159            )
2160        else:
2161            table_alias = self._parse_table_alias()
2162
2163        return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias)
2164
2165    def _parse_join_parts(
2166        self,
2167    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2168        return (
2169            self._match_set(self.JOIN_METHODS) and self._prev,
2170            self._match_set(self.JOIN_SIDES) and self._prev,
2171            self._match_set(self.JOIN_KINDS) and self._prev,
2172        )
2173
2174    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]:
2175        if self._match(TokenType.COMMA):
2176            return self.expression(exp.Join, this=self._parse_table())
2177
2178        index = self._index
2179        method, side, kind = self._parse_join_parts()
2180        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2181        join = self._match(TokenType.JOIN)
2182
2183        if not skip_join_token and not join:
2184            self._retreat(index)
2185            kind = None
2186            method = None
2187            side = None
2188
2189        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2190        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2191
2192        if not skip_join_token and not join and not outer_apply and not cross_apply:
2193            return None
2194
2195        if outer_apply:
2196            side = Token(TokenType.LEFT, "LEFT")
2197
2198        kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()}
2199
2200        if method:
2201            kwargs["method"] = method.text
2202        if side:
2203            kwargs["side"] = side.text
2204        if kind:
2205            kwargs["kind"] = kind.text
2206        if hint:
2207            kwargs["hint"] = hint
2208
2209        if self._match(TokenType.ON):
2210            kwargs["on"] = self._parse_conjunction()
2211        elif self._match(TokenType.USING):
2212            kwargs["using"] = self._parse_wrapped_id_vars()
2213
2214        return self.expression(exp.Join, **kwargs)
2215
2216    def _parse_index(
2217        self,
2218        index: t.Optional[exp.Expression] = None,
2219    ) -> t.Optional[exp.Index]:
2220        if index:
2221            unique = None
2222            primary = None
2223            amp = None
2224
2225            self._match(TokenType.ON)
2226            self._match(TokenType.TABLE)  # hive
2227            table = self._parse_table_parts(schema=True)
2228        else:
2229            unique = self._match(TokenType.UNIQUE)
2230            primary = self._match_text_seq("PRIMARY")
2231            amp = self._match_text_seq("AMP")
2232
2233            if not self._match(TokenType.INDEX):
2234                return None
2235
2236            index = self._parse_id_var()
2237            table = None
2238
2239        using = self._parse_field() if self._match(TokenType.USING) else None
2240
2241        if self._match(TokenType.L_PAREN, advance=False):
2242            columns = self._parse_wrapped_csv(self._parse_ordered)
2243        else:
2244            columns = None
2245
2246        return self.expression(
2247            exp.Index,
2248            this=index,
2249            table=table,
2250            using=using,
2251            columns=columns,
2252            unique=unique,
2253            primary=primary,
2254            amp=amp,
2255            partition_by=self._parse_partition_by(),
2256        )
2257
2258    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2259        return (
2260            (not schema and self._parse_function())
2261            or self._parse_id_var(any_token=False)
2262            or self._parse_string_as_identifier()
2263            or self._parse_placeholder()
2264        )
2265
2266    def _parse_table_parts(self, schema: bool = False) -> exp.Table:
2267        catalog = None
2268        db = None
2269        table = self._parse_table_part(schema=schema)
2270
2271        while self._match(TokenType.DOT):
2272            if catalog:
2273                # This allows nesting the table in arbitrarily many dot expressions if needed
2274                table = self.expression(
2275                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2276                )
2277            else:
2278                catalog = db
2279                db = table
2280                table = self._parse_table_part(schema=schema)
2281
2282        if not table:
2283            self.raise_error(f"Expected table name but got {self._curr}")
2284
2285        return self.expression(
2286            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2287        )
2288
2289    def _parse_table(
2290        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2291    ) -> t.Optional[exp.Expression]:
2292        lateral = self._parse_lateral()
2293        if lateral:
2294            return lateral
2295
2296        unnest = self._parse_unnest()
2297        if unnest:
2298            return unnest
2299
2300        values = self._parse_derived_table_values()
2301        if values:
2302            return values
2303
2304        subquery = self._parse_select(table=True)
2305        if subquery:
2306            if not subquery.args.get("pivots"):
2307                subquery.set("pivots", self._parse_pivots())
2308            return subquery
2309
2310        this: exp.Expression = self._parse_table_parts(schema=schema)
2311
2312        if schema:
2313            return self._parse_schema(this=this)
2314
2315        if self.ALIAS_POST_TABLESAMPLE:
2316            table_sample = self._parse_table_sample()
2317
2318        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2319        if alias:
2320            this.set("alias", alias)
2321
2322        if not this.args.get("pivots"):
2323            this.set("pivots", self._parse_pivots())
2324
2325        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2326            this.set(
2327                "hints",
2328                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2329            )
2330            self._match_r_paren()
2331
2332        if not self.ALIAS_POST_TABLESAMPLE:
2333            table_sample = self._parse_table_sample()
2334
2335        if table_sample:
2336            table_sample.set("this", this)
2337            this = table_sample
2338
2339        return this
2340
2341    def _parse_unnest(self) -> t.Optional[exp.Unnest]:
2342        if not self._match(TokenType.UNNEST):
2343            return None
2344
2345        expressions = self._parse_wrapped_csv(self._parse_type)
2346        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2347        alias = self._parse_table_alias()
2348
2349        if alias and self.UNNEST_COLUMN_ONLY:
2350            if alias.args.get("columns"):
2351                self.raise_error("Unexpected extra column alias in unnest.")
2352
2353            alias.set("columns", [alias.this])
2354            alias.set("this", None)
2355
2356        offset = None
2357        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2358            self._match(TokenType.ALIAS)
2359            offset = self._parse_id_var() or exp.to_identifier("offset")
2360
2361        return self.expression(
2362            exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset
2363        )
2364
2365    def _parse_derived_table_values(self) -> t.Optional[exp.Values]:
2366        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2367        if not is_derived and not self._match(TokenType.VALUES):
2368            return None
2369
2370        expressions = self._parse_csv(self._parse_value)
2371
2372        if is_derived:
2373            self._match_r_paren()
2374
2375        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2376
2377    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]:
2378        if not self._match(TokenType.TABLE_SAMPLE) and not (
2379            as_modifier and self._match_text_seq("USING", "SAMPLE")
2380        ):
2381            return None
2382
2383        bucket_numerator = None
2384        bucket_denominator = None
2385        bucket_field = None
2386        percent = None
2387        rows = None
2388        size = None
2389        seed = None
2390
2391        kind = (
2392            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2393        )
2394        method = self._parse_var(tokens=(TokenType.ROW,))
2395
2396        self._match(TokenType.L_PAREN)
2397
2398        num = self._parse_number()
2399
2400        if self._match_text_seq("BUCKET"):
2401            bucket_numerator = self._parse_number()
2402            self._match_text_seq("OUT", "OF")
2403            bucket_denominator = bucket_denominator = self._parse_number()
2404            self._match(TokenType.ON)
2405            bucket_field = self._parse_field()
2406        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2407            percent = num
2408        elif self._match(TokenType.ROWS):
2409            rows = num
2410        else:
2411            size = num
2412
2413        self._match(TokenType.R_PAREN)
2414
2415        if self._match(TokenType.L_PAREN):
2416            method = self._parse_var()
2417            seed = self._match(TokenType.COMMA) and self._parse_number()
2418            self._match_r_paren()
2419        elif self._match_texts(("SEED", "REPEATABLE")):
2420            seed = self._parse_wrapped(self._parse_number)
2421
2422        return self.expression(
2423            exp.TableSample,
2424            method=method,
2425            bucket_numerator=bucket_numerator,
2426            bucket_denominator=bucket_denominator,
2427            bucket_field=bucket_field,
2428            percent=percent,
2429            rows=rows,
2430            size=size,
2431            seed=seed,
2432            kind=kind,
2433        )
2434
2435    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2436        return list(iter(self._parse_pivot, None))
2437
2438    # https://duckdb.org/docs/sql/statements/pivot
2439    def _parse_simplified_pivot(self) -> exp.Pivot:
2440        def _parse_on() -> t.Optional[exp.Expression]:
2441            this = self._parse_bitwise()
2442            return self._parse_in(this) if self._match(TokenType.IN) else this
2443
2444        this = self._parse_table()
2445        expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on)
2446        using = self._match(TokenType.USING) and self._parse_csv(
2447            lambda: self._parse_alias(self._parse_function())
2448        )
2449        group = self._parse_group()
2450        return self.expression(
2451            exp.Pivot, this=this, expressions=expressions, using=using, group=group
2452        )
2453
2454    def _parse_pivot(self) -> t.Optional[exp.Pivot]:
2455        index = self._index
2456
2457        if self._match(TokenType.PIVOT):
2458            unpivot = False
2459        elif self._match(TokenType.UNPIVOT):
2460            unpivot = True
2461        else:
2462            return None
2463
2464        expressions = []
2465        field = None
2466
2467        if not self._match(TokenType.L_PAREN):
2468            self._retreat(index)
2469            return None
2470
2471        if unpivot:
2472            expressions = self._parse_csv(self._parse_column)
2473        else:
2474            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2475
2476        if not expressions:
2477            self.raise_error("Failed to parse PIVOT's aggregation list")
2478
2479        if not self._match(TokenType.FOR):
2480            self.raise_error("Expecting FOR")
2481
2482        value = self._parse_column()
2483
2484        if not self._match(TokenType.IN):
2485            self.raise_error("Expecting IN")
2486
2487        field = self._parse_in(value, alias=True)
2488
2489        self._match_r_paren()
2490
2491        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2492
2493        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2494            pivot.set("alias", self._parse_table_alias())
2495
2496        if not unpivot:
2497            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2498
2499            columns: t.List[exp.Expression] = []
2500            for fld in pivot.args["field"].expressions:
2501                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2502                for name in names:
2503                    if self.PREFIXED_PIVOT_COLUMNS:
2504                        name = f"{name}_{field_name}" if name else field_name
2505                    else:
2506                        name = f"{field_name}_{name}" if name else field_name
2507
2508                    columns.append(exp.to_identifier(name))
2509
2510            pivot.set("columns", columns)
2511
2512        return pivot
2513
2514    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2515        return [agg.alias for agg in aggregations]
2516
2517    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]:
2518        if not skip_where_token and not self._match(TokenType.WHERE):
2519            return None
2520
2521        return self.expression(
2522            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2523        )
2524
2525    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]:
2526        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2527            return None
2528
2529        elements = defaultdict(list)
2530
2531        while True:
2532            expressions = self._parse_csv(self._parse_conjunction)
2533            if expressions:
2534                elements["expressions"].extend(expressions)
2535
2536            grouping_sets = self._parse_grouping_sets()
2537            if grouping_sets:
2538                elements["grouping_sets"].extend(grouping_sets)
2539
2540            rollup = None
2541            cube = None
2542            totals = None
2543
2544            with_ = self._match(TokenType.WITH)
2545            if self._match(TokenType.ROLLUP):
2546                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2547                elements["rollup"].extend(ensure_list(rollup))
2548
2549            if self._match(TokenType.CUBE):
2550                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2551                elements["cube"].extend(ensure_list(cube))
2552
2553            if self._match_text_seq("TOTALS"):
2554                totals = True
2555                elements["totals"] = True  # type: ignore
2556
2557            if not (grouping_sets or rollup or cube or totals):
2558                break
2559
2560        return self.expression(exp.Group, **elements)  # type: ignore
2561
2562    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2563        if not self._match(TokenType.GROUPING_SETS):
2564            return None
2565
2566        return self._parse_wrapped_csv(self._parse_grouping_set)
2567
2568    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2569        if self._match(TokenType.L_PAREN):
2570            grouping_set = self._parse_csv(self._parse_column)
2571            self._match_r_paren()
2572            return self.expression(exp.Tuple, expressions=grouping_set)
2573
2574        return self._parse_column()
2575
2576    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]:
2577        if not skip_having_token and not self._match(TokenType.HAVING):
2578            return None
2579        return self.expression(exp.Having, this=self._parse_conjunction())
2580
2581    def _parse_qualify(self) -> t.Optional[exp.Qualify]:
2582        if not self._match(TokenType.QUALIFY):
2583            return None
2584        return self.expression(exp.Qualify, this=self._parse_conjunction())
2585
2586    def _parse_order(
2587        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2588    ) -> t.Optional[exp.Expression]:
2589        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2590            return this
2591
2592        return self.expression(
2593            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2594        )
2595
2596    def _parse_sort(self, exp_class: t.Type[E], *texts: str) -> t.Optional[E]:
2597        if not self._match_text_seq(*texts):
2598            return None
2599        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2600
2601    def _parse_ordered(self) -> exp.Ordered:
2602        this = self._parse_conjunction()
2603        self._match(TokenType.ASC)
2604
2605        is_desc = self._match(TokenType.DESC)
2606        is_nulls_first = self._match_text_seq("NULLS", "FIRST")
2607        is_nulls_last = self._match_text_seq("NULLS", "LAST")
2608        desc = is_desc or False
2609        asc = not desc
2610        nulls_first = is_nulls_first or False
2611        explicitly_null_ordered = is_nulls_first or is_nulls_last
2612
2613        if (
2614            not explicitly_null_ordered
2615            and (
2616                (asc and self.NULL_ORDERING == "nulls_are_small")
2617                or (desc and self.NULL_ORDERING != "nulls_are_small")
2618            )
2619            and self.NULL_ORDERING != "nulls_are_last"
2620        ):
2621            nulls_first = True
2622
2623        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2624
2625    def _parse_limit(
2626        self, this: t.Optional[exp.Expression] = None, top: bool = False
2627    ) -> t.Optional[exp.Expression]:
2628        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2629            limit_paren = self._match(TokenType.L_PAREN)
2630            limit_exp = self.expression(
2631                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2632            )
2633
2634            if limit_paren:
2635                self._match_r_paren()
2636
2637            return limit_exp
2638
2639        if self._match(TokenType.FETCH):
2640            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2641            direction = self._prev.text if direction else "FIRST"
2642
2643            count = self._parse_number()
2644            percent = self._match(TokenType.PERCENT)
2645
2646            self._match_set((TokenType.ROW, TokenType.ROWS))
2647
2648            only = self._match_text_seq("ONLY")
2649            with_ties = self._match_text_seq("WITH", "TIES")
2650
2651            if only and with_ties:
2652                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2653
2654            return self.expression(
2655                exp.Fetch,
2656                direction=direction,
2657                count=count,
2658                percent=percent,
2659                with_ties=with_ties,
2660            )
2661
2662        return this
2663
2664    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2665        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2666            return this
2667
2668        count = self._parse_number()
2669        self._match_set((TokenType.ROW, TokenType.ROWS))
2670        return self.expression(exp.Offset, this=this, expression=count)
2671
2672    def _parse_locks(self) -> t.List[exp.Lock]:
2673        locks = []
2674        while True:
2675            if self._match_text_seq("FOR", "UPDATE"):
2676                update = True
2677            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2678                "LOCK", "IN", "SHARE", "MODE"
2679            ):
2680                update = False
2681            else:
2682                break
2683
2684            expressions = None
2685            if self._match_text_seq("OF"):
2686                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2687
2688            wait: t.Optional[bool | exp.Expression] = None
2689            if self._match_text_seq("NOWAIT"):
2690                wait = True
2691            elif self._match_text_seq("WAIT"):
2692                wait = self._parse_primary()
2693            elif self._match_text_seq("SKIP", "LOCKED"):
2694                wait = False
2695
2696            locks.append(
2697                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2698            )
2699
2700        return locks
2701
2702    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2703        if not self._match_set(self.SET_OPERATIONS):
2704            return this
2705
2706        token_type = self._prev.token_type
2707
2708        if token_type == TokenType.UNION:
2709            expression = exp.Union
2710        elif token_type == TokenType.EXCEPT:
2711            expression = exp.Except
2712        else:
2713            expression = exp.Intersect
2714
2715        return self.expression(
2716            expression,
2717            this=this,
2718            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2719            expression=self._parse_set_operations(self._parse_select(nested=True)),
2720        )
2721
2722    def _parse_expression(self) -> t.Optional[exp.Expression]:
2723        return self._parse_alias(self._parse_conjunction())
2724
2725    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2726        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2727
2728    def _parse_equality(self) -> t.Optional[exp.Expression]:
2729        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2730
2731    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2732        return self._parse_tokens(self._parse_range, self.COMPARISON)
2733
2734    def _parse_range(self) -> t.Optional[exp.Expression]:
2735        this = self._parse_bitwise()
2736        negate = self._match(TokenType.NOT)
2737
2738        if self._match_set(self.RANGE_PARSERS):
2739            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2740            if not expression:
2741                return this
2742
2743            this = expression
2744        elif self._match(TokenType.ISNULL):
2745            this = self.expression(exp.Is, this=this, expression=exp.Null())
2746
2747        # Postgres supports ISNULL and NOTNULL for conditions.
2748        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2749        if self._match(TokenType.NOTNULL):
2750            this = self.expression(exp.Is, this=this, expression=exp.Null())
2751            this = self.expression(exp.Not, this=this)
2752
2753        if negate:
2754            this = self.expression(exp.Not, this=this)
2755
2756        if self._match(TokenType.IS):
2757            this = self._parse_is(this)
2758
2759        return this
2760
2761    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2762        index = self._index - 1
2763        negate = self._match(TokenType.NOT)
2764
2765        if self._match_text_seq("DISTINCT", "FROM"):
2766            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2767            return self.expression(klass, this=this, expression=self._parse_expression())
2768
2769        expression = self._parse_null() or self._parse_boolean()
2770        if not expression:
2771            self._retreat(index)
2772            return None
2773
2774        this = self.expression(exp.Is, this=this, expression=expression)
2775        return self.expression(exp.Not, this=this) if negate else this
2776
2777    def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In:
2778        unnest = self._parse_unnest()
2779        if unnest:
2780            this = self.expression(exp.In, this=this, unnest=unnest)
2781        elif self._match(TokenType.L_PAREN):
2782            expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias))
2783
2784            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2785                this = self.expression(exp.In, this=this, query=expressions[0])
2786            else:
2787                this = self.expression(exp.In, this=this, expressions=expressions)
2788
2789            self._match_r_paren(this)
2790        else:
2791            this = self.expression(exp.In, this=this, field=self._parse_field())
2792
2793        return this
2794
2795    def _parse_between(self, this: exp.Expression) -> exp.Between:
2796        low = self._parse_bitwise()
2797        self._match(TokenType.AND)
2798        high = self._parse_bitwise()
2799        return self.expression(exp.Between, this=this, low=low, high=high)
2800
2801    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2802        if not self._match(TokenType.ESCAPE):
2803            return this
2804        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2805
2806    def _parse_interval(self) -> t.Optional[exp.Interval]:
2807        if not self._match(TokenType.INTERVAL):
2808            return None
2809
2810        this = self._parse_primary() or self._parse_term()
2811        unit = self._parse_function() or self._parse_var()
2812
2813        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2814        # each INTERVAL expression into this canonical form so it's easy to transpile
2815        if this and this.is_number:
2816            this = exp.Literal.string(this.name)
2817        elif this and this.is_string:
2818            parts = this.name.split()
2819
2820            if len(parts) == 2:
2821                if unit:
2822                    # this is not actually a unit, it's something else
2823                    unit = None
2824                    self._retreat(self._index - 1)
2825                else:
2826                    this = exp.Literal.string(parts[0])
2827                    unit = self.expression(exp.Var, this=parts[1])
2828
2829        return self.expression(exp.Interval, this=this, unit=unit)
2830
2831    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2832        this = self._parse_term()
2833
2834        while True:
2835            if self._match_set(self.BITWISE):
2836                this = self.expression(
2837                    self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term()
2838                )
2839            elif self._match_pair(TokenType.LT, TokenType.LT):
2840                this = self.expression(
2841                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2842                )
2843            elif self._match_pair(TokenType.GT, TokenType.GT):
2844                this = self.expression(
2845                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2846                )
2847            else:
2848                break
2849
2850        return this
2851
2852    def _parse_term(self) -> t.Optional[exp.Expression]:
2853        return self._parse_tokens(self._parse_factor, self.TERM)
2854
2855    def _parse_factor(self) -> t.Optional[exp.Expression]:
2856        return self._parse_tokens(self._parse_unary, self.FACTOR)
2857
2858    def _parse_unary(self) -> t.Optional[exp.Expression]:
2859        if self._match_set(self.UNARY_PARSERS):
2860            return self.UNARY_PARSERS[self._prev.token_type](self)
2861        return self._parse_at_time_zone(self._parse_type())
2862
2863    def _parse_type(self) -> t.Optional[exp.Expression]:
2864        interval = self._parse_interval()
2865        if interval:
2866            return interval
2867
2868        index = self._index
2869        data_type = self._parse_types(check_func=True)
2870        this = self._parse_column()
2871
2872        if data_type:
2873            if isinstance(this, exp.Literal):
2874                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2875                if parser:
2876                    return parser(self, this, data_type)
2877                return self.expression(exp.Cast, this=this, to=data_type)
2878            if not data_type.expressions:
2879                self._retreat(index)
2880                return self._parse_column()
2881            return self._parse_column_ops(data_type)
2882
2883        return this
2884
2885    def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]:
2886        this = self._parse_type()
2887        if not this:
2888            return None
2889
2890        return self.expression(
2891            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2892        )
2893
2894    def _parse_types(
2895        self, check_func: bool = False, schema: bool = False
2896    ) -> t.Optional[exp.Expression]:
2897        index = self._index
2898
2899        prefix = self._match_text_seq("SYSUDTLIB", ".")
2900
2901        if not self._match_set(self.TYPE_TOKENS):
2902            return None
2903
2904        type_token = self._prev.token_type
2905
2906        if type_token == TokenType.PSEUDO_TYPE:
2907            return self.expression(exp.PseudoType, this=self._prev.text)
2908
2909        nested = type_token in self.NESTED_TYPE_TOKENS
2910        is_struct = type_token == TokenType.STRUCT
2911        expressions = None
2912        maybe_func = False
2913
2914        if self._match(TokenType.L_PAREN):
2915            if is_struct:
2916                expressions = self._parse_csv(self._parse_struct_types)
2917            elif nested:
2918                expressions = self._parse_csv(
2919                    lambda: self._parse_types(check_func=check_func, schema=schema)
2920                )
2921            else:
2922                expressions = self._parse_csv(self._parse_type_size)
2923
2924            if not expressions or not self._match(TokenType.R_PAREN):
2925                self._retreat(index)
2926                return None
2927
2928            maybe_func = True
2929
2930        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2931            this = exp.DataType(
2932                this=exp.DataType.Type.ARRAY,
2933                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2934                nested=True,
2935            )
2936
2937            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2938                this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True)
2939
2940            return this
2941
2942        if self._match(TokenType.L_BRACKET):
2943            self._retreat(index)
2944            return None
2945
2946        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2947        if nested and self._match(TokenType.LT):
2948            if is_struct:
2949                expressions = self._parse_csv(self._parse_struct_types)
2950            else:
2951                expressions = self._parse_csv(
2952                    lambda: self._parse_types(check_func=check_func, schema=schema)
2953                )
2954
2955            if not self._match(TokenType.GT):
2956                self.raise_error("Expecting >")
2957
2958            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2959                values = self._parse_csv(self._parse_conjunction)
2960                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2961
2962        value: t.Optional[exp.Expression] = None
2963        if type_token in self.TIMESTAMPS:
2964            if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ:
2965                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2966            elif (
2967                self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE")
2968                or type_token == TokenType.TIMESTAMPLTZ
2969            ):
2970                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2971            elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
2972                if type_token == TokenType.TIME:
2973                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2974                else:
2975                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2976
2977            maybe_func = maybe_func and value is None
2978
2979            if value is None:
2980                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2981        elif type_token == TokenType.INTERVAL:
2982            unit = self._parse_var()
2983
2984            if not unit:
2985                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2986            else:
2987                value = self.expression(exp.Interval, unit=unit)
2988
2989        if maybe_func and check_func:
2990            index2 = self._index
2991            peek = self._parse_string()
2992
2993            if not peek:
2994                self._retreat(index)
2995                return None
2996
2997            self._retreat(index2)
2998
2999        if value:
3000            return value
3001
3002        return exp.DataType(
3003            this=exp.DataType.Type[type_token.value.upper()],
3004            expressions=expressions,
3005            nested=nested,
3006            values=values,
3007            prefix=prefix,
3008        )
3009
3010    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
3011        this = self._parse_type() or self._parse_id_var()
3012        self._match(TokenType.COLON)
3013        return self._parse_column_def(this)
3014
3015    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3016        if not self._match_text_seq("AT", "TIME", "ZONE"):
3017            return this
3018        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
3019
3020    def _parse_column(self) -> t.Optional[exp.Expression]:
3021        this = self._parse_field()
3022        if isinstance(this, exp.Identifier):
3023            this = self.expression(exp.Column, this=this)
3024        elif not this:
3025            return self._parse_bracket(this)
3026        return self._parse_column_ops(this)
3027
3028    def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3029        this = self._parse_bracket(this)
3030
3031        while self._match_set(self.COLUMN_OPERATORS):
3032            op_token = self._prev.token_type
3033            op = self.COLUMN_OPERATORS.get(op_token)
3034
3035            if op_token == TokenType.DCOLON:
3036                field = self._parse_types()
3037                if not field:
3038                    self.raise_error("Expected type")
3039            elif op and self._curr:
3040                self._advance()
3041                value = self._prev.text
3042                field = (
3043                    exp.Literal.number(value)
3044                    if self._prev.token_type == TokenType.NUMBER
3045                    else exp.Literal.string(value)
3046                )
3047            else:
3048                field = self._parse_field(anonymous_func=True)
3049
3050            if isinstance(field, exp.Func):
3051                # bigquery allows function calls like x.y.count(...)
3052                # SAFE.SUBSTR(...)
3053                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3054                this = self._replace_columns_with_dots(this)
3055
3056            if op:
3057                this = op(self, this, field)
3058            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3059                this = self.expression(
3060                    exp.Column,
3061                    this=field,
3062                    table=this.this,
3063                    db=this.args.get("table"),
3064                    catalog=this.args.get("db"),
3065                )
3066            else:
3067                this = self.expression(exp.Dot, this=this, expression=field)
3068            this = self._parse_bracket(this)
3069        return this
3070
3071    def _parse_primary(self) -> t.Optional[exp.Expression]:
3072        if self._match_set(self.PRIMARY_PARSERS):
3073            token_type = self._prev.token_type
3074            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3075
3076            if token_type == TokenType.STRING:
3077                expressions = [primary]
3078                while self._match(TokenType.STRING):
3079                    expressions.append(exp.Literal.string(self._prev.text))
3080
3081                if len(expressions) > 1:
3082                    return self.expression(exp.Concat, expressions=expressions)
3083
3084            return primary
3085
3086        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3087            return exp.Literal.number(f"0.{self._prev.text}")
3088
3089        if self._match(TokenType.L_PAREN):
3090            comments = self._prev_comments
3091            query = self._parse_select()
3092
3093            if query:
3094                expressions = [query]
3095            else:
3096                expressions = self._parse_csv(self._parse_expression)
3097
3098            this = self._parse_query_modifiers(seq_get(expressions, 0))
3099
3100            if isinstance(this, exp.Subqueryable):
3101                this = self._parse_set_operations(
3102                    self._parse_subquery(this=this, parse_alias=False)
3103                )
3104            elif len(expressions) > 1:
3105                this = self.expression(exp.Tuple, expressions=expressions)
3106            else:
3107                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3108
3109            if this:
3110                this.add_comments(comments)
3111
3112            self._match_r_paren(expression=this)
3113            return this
3114
3115        return None
3116
3117    def _parse_field(
3118        self,
3119        any_token: bool = False,
3120        tokens: t.Optional[t.Collection[TokenType]] = None,
3121        anonymous_func: bool = False,
3122    ) -> t.Optional[exp.Expression]:
3123        return (
3124            self._parse_primary()
3125            or self._parse_function(anonymous=anonymous_func)
3126            or self._parse_id_var(any_token=any_token, tokens=tokens)
3127        )
3128
3129    def _parse_function(
3130        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3131    ) -> t.Optional[exp.Expression]:
3132        if not self._curr:
3133            return None
3134
3135        token_type = self._curr.token_type
3136
3137        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3138            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3139
3140        if not self._next or self._next.token_type != TokenType.L_PAREN:
3141            if token_type in self.NO_PAREN_FUNCTIONS:
3142                self._advance()
3143                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3144
3145            return None
3146
3147        if token_type not in self.FUNC_TOKENS:
3148            return None
3149
3150        this = self._curr.text
3151        upper = this.upper()
3152        self._advance(2)
3153
3154        parser = self.FUNCTION_PARSERS.get(upper)
3155
3156        if parser and not anonymous:
3157            this = parser(self)
3158        else:
3159            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3160
3161            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3162                this = self.expression(subquery_predicate, this=self._parse_select())
3163                self._match_r_paren()
3164                return this
3165
3166            if functions is None:
3167                functions = self.FUNCTIONS
3168
3169            function = functions.get(upper)
3170
3171            alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS
3172            args = self._parse_csv(lambda: self._parse_lambda(alias=alias))
3173
3174            if function and not anonymous:
3175                this = self.validate_expression(function(args), args)
3176            else:
3177                this = self.expression(exp.Anonymous, this=this, expressions=args)
3178
3179        self._match_r_paren(this)
3180        return self._parse_window(this)
3181
3182    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3183        return self._parse_column_def(self._parse_id_var())
3184
3185    def _parse_user_defined_function(
3186        self, kind: t.Optional[TokenType] = None
3187    ) -> t.Optional[exp.Expression]:
3188        this = self._parse_id_var()
3189
3190        while self._match(TokenType.DOT):
3191            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3192
3193        if not self._match(TokenType.L_PAREN):
3194            return this
3195
3196        expressions = self._parse_csv(self._parse_function_parameter)
3197        self._match_r_paren()
3198        return self.expression(
3199            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3200        )
3201
3202    def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier:
3203        literal = self._parse_primary()
3204        if literal:
3205            return self.expression(exp.Introducer, this=token.text, expression=literal)
3206
3207        return self.expression(exp.Identifier, this=token.text)
3208
3209    def _parse_session_parameter(self) -> exp.SessionParameter:
3210        kind = None
3211        this = self._parse_id_var() or self._parse_primary()
3212
3213        if this and self._match(TokenType.DOT):
3214            kind = this.name
3215            this = self._parse_var() or self._parse_primary()
3216
3217        return self.expression(exp.SessionParameter, this=this, kind=kind)
3218
3219    def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]:
3220        index = self._index
3221
3222        if self._match(TokenType.L_PAREN):
3223            expressions = self._parse_csv(self._parse_id_var)
3224
3225            if not self._match(TokenType.R_PAREN):
3226                self._retreat(index)
3227        else:
3228            expressions = [self._parse_id_var()]
3229
3230        if self._match_set(self.LAMBDAS):
3231            return self.LAMBDAS[self._prev.token_type](self, expressions)
3232
3233        self._retreat(index)
3234
3235        this: t.Optional[exp.Expression]
3236
3237        if self._match(TokenType.DISTINCT):
3238            this = self.expression(
3239                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3240            )
3241        else:
3242            this = self._parse_select_or_expression(alias=alias)
3243
3244            if isinstance(this, exp.EQ):
3245                left = this.this
3246                if isinstance(left, exp.Column):
3247                    left.replace(exp.var(left.text("this")))
3248
3249        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3250
3251    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3252        index = self._index
3253
3254        if not self.errors:
3255            try:
3256                if self._parse_select(nested=True):
3257                    return this
3258            except ParseError:
3259                pass
3260            finally:
3261                self.errors.clear()
3262                self._retreat(index)
3263
3264        if not self._match(TokenType.L_PAREN):
3265            return this
3266
3267        args = self._parse_csv(
3268            lambda: self._parse_constraint()
3269            or self._parse_column_def(self._parse_field(any_token=True))
3270        )
3271
3272        self._match_r_paren()
3273        return self.expression(exp.Schema, this=this, expressions=args)
3274
3275    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3276        # column defs are not really columns, they're identifiers
3277        if isinstance(this, exp.Column):
3278            this = this.this
3279
3280        kind = self._parse_types(schema=True)
3281
3282        if self._match_text_seq("FOR", "ORDINALITY"):
3283            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3284
3285        constraints = []
3286        while True:
3287            constraint = self._parse_column_constraint()
3288            if not constraint:
3289                break
3290            constraints.append(constraint)
3291
3292        if not kind and not constraints:
3293            return this
3294
3295        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3296
3297    def _parse_auto_increment(
3298        self,
3299    ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint:
3300        start = None
3301        increment = None
3302
3303        if self._match(TokenType.L_PAREN, advance=False):
3304            args = self._parse_wrapped_csv(self._parse_bitwise)
3305            start = seq_get(args, 0)
3306            increment = seq_get(args, 1)
3307        elif self._match_text_seq("START"):
3308            start = self._parse_bitwise()
3309            self._match_text_seq("INCREMENT")
3310            increment = self._parse_bitwise()
3311
3312        if start and increment:
3313            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3314
3315        return exp.AutoIncrementColumnConstraint()
3316
3317    def _parse_compress(self) -> exp.CompressColumnConstraint:
3318        if self._match(TokenType.L_PAREN, advance=False):
3319            return self.expression(
3320                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3321            )
3322
3323        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3324
3325    def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint:
3326        if self._match_text_seq("BY", "DEFAULT"):
3327            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3328            this = self.expression(
3329                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3330            )
3331        else:
3332            self._match_text_seq("ALWAYS")
3333            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3334
3335        self._match(TokenType.ALIAS)
3336        identity = self._match_text_seq("IDENTITY")
3337
3338        if self._match(TokenType.L_PAREN):
3339            if self._match_text_seq("START", "WITH"):
3340                this.set("start", self._parse_bitwise())
3341            if self._match_text_seq("INCREMENT", "BY"):
3342                this.set("increment", self._parse_bitwise())
3343            if self._match_text_seq("MINVALUE"):
3344                this.set("minvalue", self._parse_bitwise())
3345            if self._match_text_seq("MAXVALUE"):
3346                this.set("maxvalue", self._parse_bitwise())
3347
3348            if self._match_text_seq("CYCLE"):
3349                this.set("cycle", True)
3350            elif self._match_text_seq("NO", "CYCLE"):
3351                this.set("cycle", False)
3352
3353            if not identity:
3354                this.set("expression", self._parse_bitwise())
3355
3356            self._match_r_paren()
3357
3358        return this
3359
3360    def _parse_inline(self) -> exp.InlineLengthColumnConstraint:
3361        self._match_text_seq("LENGTH")
3362        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3363
3364    def _parse_not_constraint(
3365        self,
3366    ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]:
3367        if self._match_text_seq("NULL"):
3368            return self.expression(exp.NotNullColumnConstraint)
3369        if self._match_text_seq("CASESPECIFIC"):
3370            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3371        return None
3372
3373    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3374        if self._match(TokenType.CONSTRAINT):
3375            this = self._parse_id_var()
3376        else:
3377            this = None
3378
3379        if self._match_texts(self.CONSTRAINT_PARSERS):
3380            return self.expression(
3381                exp.ColumnConstraint,
3382                this=this,
3383                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3384            )
3385
3386        return this
3387
3388    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3389        if not self._match(TokenType.CONSTRAINT):
3390            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3391
3392        this = self._parse_id_var()
3393        expressions = []
3394
3395        while True:
3396            constraint = self._parse_unnamed_constraint() or self._parse_function()
3397            if not constraint:
3398                break
3399            expressions.append(constraint)
3400
3401        return self.expression(exp.Constraint, this=this, expressions=expressions)
3402
3403    def _parse_unnamed_constraint(
3404        self, constraints: t.Optional[t.Collection[str]] = None
3405    ) -> t.Optional[exp.Expression]:
3406        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3407            return None
3408
3409        constraint = self._prev.text.upper()
3410        if constraint not in self.CONSTRAINT_PARSERS:
3411            self.raise_error(f"No parser found for schema constraint {constraint}.")
3412
3413        return self.CONSTRAINT_PARSERS[constraint](self)
3414
3415    def _parse_unique(self) -> exp.UniqueColumnConstraint:
3416        self._match_text_seq("KEY")
3417        return self.expression(
3418            exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False))
3419        )
3420
3421    def _parse_key_constraint_options(self) -> t.List[str]:
3422        options = []
3423        while True:
3424            if not self._curr:
3425                break
3426
3427            if self._match(TokenType.ON):
3428                action = None
3429                on = self._advance_any() and self._prev.text
3430
3431                if self._match_text_seq("NO", "ACTION"):
3432                    action = "NO ACTION"
3433                elif self._match_text_seq("CASCADE"):
3434                    action = "CASCADE"
3435                elif self._match_pair(TokenType.SET, TokenType.NULL):
3436                    action = "SET NULL"
3437                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3438                    action = "SET DEFAULT"
3439                else:
3440                    self.raise_error("Invalid key constraint")
3441
3442                options.append(f"ON {on} {action}")
3443            elif self._match_text_seq("NOT", "ENFORCED"):
3444                options.append("NOT ENFORCED")
3445            elif self._match_text_seq("DEFERRABLE"):
3446                options.append("DEFERRABLE")
3447            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3448                options.append("INITIALLY DEFERRED")
3449            elif self._match_text_seq("NORELY"):
3450                options.append("NORELY")
3451            elif self._match_text_seq("MATCH", "FULL"):
3452                options.append("MATCH FULL")
3453            else:
3454                break
3455
3456        return options
3457
3458    def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]:
3459        if match and not self._match(TokenType.REFERENCES):
3460            return None
3461
3462        expressions = None
3463        this = self._parse_id_var()
3464
3465        if self._match(TokenType.L_PAREN, advance=False):
3466            expressions = self._parse_wrapped_id_vars()
3467
3468        options = self._parse_key_constraint_options()
3469        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3470
3471    def _parse_foreign_key(self) -> exp.ForeignKey:
3472        expressions = self._parse_wrapped_id_vars()
3473        reference = self._parse_references()
3474        options = {}
3475
3476        while self._match(TokenType.ON):
3477            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3478                self.raise_error("Expected DELETE or UPDATE")
3479
3480            kind = self._prev.text.lower()
3481
3482            if self._match_text_seq("NO", "ACTION"):
3483                action = "NO ACTION"
3484            elif self._match(TokenType.SET):
3485                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3486                action = "SET " + self._prev.text.upper()
3487            else:
3488                self._advance()
3489                action = self._prev.text.upper()
3490
3491            options[kind] = action
3492
3493        return self.expression(
3494            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3495        )
3496
3497    def _parse_primary_key(
3498        self, wrapped_optional: bool = False, in_props: bool = False
3499    ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey:
3500        desc = (
3501            self._match_set((TokenType.ASC, TokenType.DESC))
3502            and self._prev.token_type == TokenType.DESC
3503        )
3504
3505        if not in_props and not self._match(TokenType.L_PAREN, advance=False):
3506            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3507
3508        expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional)
3509        options = self._parse_key_constraint_options()
3510        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3511
3512    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3513        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3514            return this
3515
3516        bracket_kind = self._prev.token_type
3517
3518        if self._match(TokenType.COLON):
3519            expressions: t.List[t.Optional[exp.Expression]] = [
3520                self.expression(exp.Slice, expression=self._parse_conjunction())
3521            ]
3522        else:
3523            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3524
3525        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3526        if bracket_kind == TokenType.L_BRACE:
3527            this = self.expression(exp.Struct, expressions=expressions)
3528        elif not this or this.name.upper() == "ARRAY":
3529            this = self.expression(exp.Array, expressions=expressions)
3530        else:
3531            expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET)
3532            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3533
3534        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3535            self.raise_error("Expected ]")
3536        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3537            self.raise_error("Expected }")
3538
3539        self._add_comments(this)
3540        return self._parse_bracket(this)
3541
3542    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3543        if self._match(TokenType.COLON):
3544            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3545        return this
3546
3547    def _parse_case(self) -> t.Optional[exp.Expression]:
3548        ifs = []
3549        default = None
3550
3551        expression = self._parse_conjunction()
3552
3553        while self._match(TokenType.WHEN):
3554            this = self._parse_conjunction()
3555            self._match(TokenType.THEN)
3556            then = self._parse_conjunction()
3557            ifs.append(self.expression(exp.If, this=this, true=then))
3558
3559        if self._match(TokenType.ELSE):
3560            default = self._parse_conjunction()
3561
3562        if not self._match(TokenType.END):
3563            self.raise_error("Expected END after CASE", self._prev)
3564
3565        return self._parse_window(
3566            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3567        )
3568
3569    def _parse_if(self) -> t.Optional[exp.Expression]:
3570        if self._match(TokenType.L_PAREN):
3571            args = self._parse_csv(self._parse_conjunction)
3572            this = self.validate_expression(exp.If.from_arg_list(args), args)
3573            self._match_r_paren()
3574        else:
3575            index = self._index - 1
3576            condition = self._parse_conjunction()
3577
3578            if not condition:
3579                self._retreat(index)
3580                return None
3581
3582            self._match(TokenType.THEN)
3583            true = self._parse_conjunction()
3584            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3585            self._match(TokenType.END)
3586            this = self.expression(exp.If, this=condition, true=true, false=false)
3587
3588        return self._parse_window(this)
3589
3590    def _parse_extract(self) -> exp.Extract:
3591        this = self._parse_function() or self._parse_var() or self._parse_type()
3592
3593        if self._match(TokenType.FROM):
3594            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3595
3596        if not self._match(TokenType.COMMA):
3597            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3598
3599        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3600
3601    def _parse_cast(self, strict: bool) -> exp.Expression:
3602        this = self._parse_conjunction()
3603
3604        if not self._match(TokenType.ALIAS):
3605            if self._match(TokenType.COMMA):
3606                return self.expression(
3607                    exp.CastToStrType, this=this, expression=self._parse_string()
3608                )
3609            else:
3610                self.raise_error("Expected AS after CAST")
3611
3612        to = self._parse_types()
3613
3614        if not to:
3615            self.raise_error("Expected TYPE after CAST")
3616        elif to.this == exp.DataType.Type.CHAR:
3617            if self._match(TokenType.CHARACTER_SET):
3618                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3619        elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT):
3620            fmt = self._parse_string()
3621
3622            return self.expression(
3623                exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime,
3624                this=this,
3625                format=exp.Literal.string(
3626                    format_time(
3627                        fmt.this if fmt else "",
3628                        self.FORMAT_MAPPING or self.TIME_MAPPING,
3629                        self.FORMAT_TRIE or self.TIME_TRIE,
3630                    )
3631                ),
3632            )
3633
3634        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3635
3636    def _parse_concat(self) -> t.Optional[exp.Expression]:
3637        args = self._parse_csv(self._parse_conjunction)
3638        if self.CONCAT_NULL_OUTPUTS_STRING:
3639            args = [exp.func("COALESCE", arg, exp.Literal.string("")) for arg in args]
3640
3641        # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when
3642        # we find such a call we replace it with its argument.
3643        if len(args) == 1:
3644            return args[0]
3645
3646        return self.expression(
3647            exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args
3648        )
3649
3650    def _parse_string_agg(self) -> exp.Expression:
3651        expression: t.Optional[exp.Expression]
3652
3653        if self._match(TokenType.DISTINCT):
3654            args = self._parse_csv(self._parse_conjunction)
3655            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3656        else:
3657            args = self._parse_csv(self._parse_conjunction)
3658            expression = seq_get(args, 0)
3659
3660        index = self._index
3661        if not self._match(TokenType.R_PAREN):
3662            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3663            order = self._parse_order(this=expression)
3664            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3665
3666        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3667        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3668        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3669        if not self._match_text_seq("WITHIN", "GROUP"):
3670            self._retreat(index)
3671            return self.validate_expression(exp.GroupConcat.from_arg_list(args), args)
3672
3673        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3674        order = self._parse_order(this=expression)
3675        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3676
3677    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3678        to: t.Optional[exp.Expression]
3679        this = self._parse_bitwise()
3680
3681        if self._match(TokenType.USING):
3682            to = self.expression(exp.CharacterSet, this=self._parse_var())
3683        elif self._match(TokenType.COMMA):
3684            to = self._parse_bitwise()
3685        else:
3686            to = None
3687
3688        # Swap the argument order if needed to produce the correct AST
3689        if self.CONVERT_TYPE_FIRST:
3690            this, to = to, this
3691
3692        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3693
3694    def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]:
3695        """
3696        There are generally two variants of the DECODE function:
3697
3698        - DECODE(bin, charset)
3699        - DECODE(expression, search, result [, search, result] ... [, default])
3700
3701        The second variant will always be parsed into a CASE expression. Note that NULL
3702        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3703        instead of relying on pattern matching.
3704        """
3705        args = self._parse_csv(self._parse_conjunction)
3706
3707        if len(args) < 3:
3708            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3709
3710        expression, *expressions = args
3711        if not expression:
3712            return None
3713
3714        ifs = []
3715        for search, result in zip(expressions[::2], expressions[1::2]):
3716            if not search or not result:
3717                return None
3718
3719            if isinstance(search, exp.Literal):
3720                ifs.append(
3721                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3722                )
3723            elif isinstance(search, exp.Null):
3724                ifs.append(
3725                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3726                )
3727            else:
3728                cond = exp.or_(
3729                    exp.EQ(this=expression.copy(), expression=search),
3730                    exp.and_(
3731                        exp.Is(this=expression.copy(), expression=exp.Null()),
3732                        exp.Is(this=search.copy(), expression=exp.Null()),
3733                        copy=False,
3734                    ),
3735                    copy=False,
3736                )
3737                ifs.append(exp.If(this=cond, true=result))
3738
3739        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3740
3741    def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]:
3742        self._match_text_seq("KEY")
3743        key = self._parse_field()
3744        self._match(TokenType.COLON)
3745        self._match_text_seq("VALUE")
3746        value = self._parse_field()
3747
3748        if not key and not value:
3749            return None
3750        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3751
3752    def _parse_json_object(self) -> exp.JSONObject:
3753        expressions = self._parse_csv(self._parse_json_key_value)
3754
3755        null_handling = None
3756        if self._match_text_seq("NULL", "ON", "NULL"):
3757            null_handling = "NULL ON NULL"
3758        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3759            null_handling = "ABSENT ON NULL"
3760
3761        unique_keys = None
3762        if self._match_text_seq("WITH", "UNIQUE"):
3763            unique_keys = True
3764        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3765            unique_keys = False
3766
3767        self._match_text_seq("KEYS")
3768
3769        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3770        format_json = self._match_text_seq("FORMAT", "JSON")
3771        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3772
3773        return self.expression(
3774            exp.JSONObject,
3775            expressions=expressions,
3776            null_handling=null_handling,
3777            unique_keys=unique_keys,
3778            return_type=return_type,
3779            format_json=format_json,
3780            encoding=encoding,
3781        )
3782
3783    def _parse_logarithm(self) -> exp.Func:
3784        # Default argument order is base, expression
3785        args = self._parse_csv(self._parse_range)
3786
3787        if len(args) > 1:
3788            if not self.LOG_BASE_FIRST:
3789                args.reverse()
3790            return exp.Log.from_arg_list(args)
3791
3792        return self.expression(
3793            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3794        )
3795
3796    def _parse_match_against(self) -> exp.MatchAgainst:
3797        expressions = self._parse_csv(self._parse_column)
3798
3799        self._match_text_seq(")", "AGAINST", "(")
3800
3801        this = self._parse_string()
3802
3803        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3804            modifier = "IN NATURAL LANGUAGE MODE"
3805            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3806                modifier = f"{modifier} WITH QUERY EXPANSION"
3807        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3808            modifier = "IN BOOLEAN MODE"
3809        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3810            modifier = "WITH QUERY EXPANSION"
3811        else:
3812            modifier = None
3813
3814        return self.expression(
3815            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3816        )
3817
3818    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3819    def _parse_open_json(self) -> exp.OpenJSON:
3820        this = self._parse_bitwise()
3821        path = self._match(TokenType.COMMA) and self._parse_string()
3822
3823        def _parse_open_json_column_def() -> exp.OpenJSONColumnDef:
3824            this = self._parse_field(any_token=True)
3825            kind = self._parse_types()
3826            path = self._parse_string()
3827            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3828
3829            return self.expression(
3830                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3831            )
3832
3833        expressions = None
3834        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3835            self._match_l_paren()
3836            expressions = self._parse_csv(_parse_open_json_column_def)
3837
3838        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3839
3840    def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition:
3841        args = self._parse_csv(self._parse_bitwise)
3842
3843        if self._match(TokenType.IN):
3844            return self.expression(
3845                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3846            )
3847
3848        if haystack_first:
3849            haystack = seq_get(args, 0)
3850            needle = seq_get(args, 1)
3851        else:
3852            needle = seq_get(args, 0)
3853            haystack = seq_get(args, 1)
3854
3855        return self.expression(
3856            exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2)
3857        )
3858
3859    def _parse_join_hint(self, func_name: str) -> exp.JoinHint:
3860        args = self._parse_csv(self._parse_table)
3861        return exp.JoinHint(this=func_name.upper(), expressions=args)
3862
3863    def _parse_substring(self) -> exp.Substring:
3864        # Postgres supports the form: substring(string [from int] [for int])
3865        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3866
3867        args = self._parse_csv(self._parse_bitwise)
3868
3869        if self._match(TokenType.FROM):
3870            args.append(self._parse_bitwise())
3871            if self._match(TokenType.FOR):
3872                args.append(self._parse_bitwise())
3873
3874        return self.validate_expression(exp.Substring.from_arg_list(args), args)
3875
3876    def _parse_trim(self) -> exp.Trim:
3877        # https://www.w3resource.com/sql/character-functions/trim.php
3878        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3879
3880        position = None
3881        collation = None
3882
3883        if self._match_texts(self.TRIM_TYPES):
3884            position = self._prev.text.upper()
3885
3886        expression = self._parse_bitwise()
3887        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3888            this = self._parse_bitwise()
3889        else:
3890            this = expression
3891            expression = None
3892
3893        if self._match(TokenType.COLLATE):
3894            collation = self._parse_bitwise()
3895
3896        return self.expression(
3897            exp.Trim, this=this, position=position, expression=expression, collation=collation
3898        )
3899
3900    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3901        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3902
3903    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3904        return self._parse_window(self._parse_id_var(), alias=True)
3905
3906    def _parse_respect_or_ignore_nulls(
3907        self, this: t.Optional[exp.Expression]
3908    ) -> t.Optional[exp.Expression]:
3909        if self._match_text_seq("IGNORE", "NULLS"):
3910            return self.expression(exp.IgnoreNulls, this=this)
3911        if self._match_text_seq("RESPECT", "NULLS"):
3912            return self.expression(exp.RespectNulls, this=this)
3913        return this
3914
3915    def _parse_window(
3916        self, this: t.Optional[exp.Expression], alias: bool = False
3917    ) -> t.Optional[exp.Expression]:
3918        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3919            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3920            self._match_r_paren()
3921
3922        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3923        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3924        if self._match_text_seq("WITHIN", "GROUP"):
3925            order = self._parse_wrapped(self._parse_order)
3926            this = self.expression(exp.WithinGroup, this=this, expression=order)
3927
3928        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3929        # Some dialects choose to implement and some do not.
3930        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3931
3932        # There is some code above in _parse_lambda that handles
3933        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3934
3935        # The below changes handle
3936        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3937
3938        # Oracle allows both formats
3939        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3940        #   and Snowflake chose to do the same for familiarity
3941        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3942        this = self._parse_respect_or_ignore_nulls(this)
3943
3944        # bigquery select from window x AS (partition by ...)
3945        if alias:
3946            over = None
3947            self._match(TokenType.ALIAS)
3948        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3949            return this
3950        else:
3951            over = self._prev.text.upper()
3952
3953        if not self._match(TokenType.L_PAREN):
3954            return self.expression(
3955                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3956            )
3957
3958        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3959
3960        first = self._match(TokenType.FIRST)
3961        if self._match_text_seq("LAST"):
3962            first = False
3963
3964        partition = self._parse_partition_by()
3965        order = self._parse_order()
3966        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3967
3968        if kind:
3969            self._match(TokenType.BETWEEN)
3970            start = self._parse_window_spec()
3971            self._match(TokenType.AND)
3972            end = self._parse_window_spec()
3973
3974            spec = self.expression(
3975                exp.WindowSpec,
3976                kind=kind,
3977                start=start["value"],
3978                start_side=start["side"],
3979                end=end["value"],
3980                end_side=end["side"],
3981            )
3982        else:
3983            spec = None
3984
3985        self._match_r_paren()
3986
3987        return self.expression(
3988            exp.Window,
3989            this=this,
3990            partition_by=partition,
3991            order=order,
3992            spec=spec,
3993            alias=window_alias,
3994            over=over,
3995            first=first,
3996        )
3997
3998    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3999        self._match(TokenType.BETWEEN)
4000
4001        return {
4002            "value": (
4003                (self._match_text_seq("UNBOUNDED") and "UNBOUNDED")
4004                or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW")
4005                or self._parse_bitwise()
4006            ),
4007            "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text,
4008        }
4009
4010    def _parse_alias(
4011        self, this: t.Optional[exp.Expression], explicit: bool = False
4012    ) -> t.Optional[exp.Expression]:
4013        any_token = self._match(TokenType.ALIAS)
4014
4015        if explicit and not any_token:
4016            return this
4017
4018        if self._match(TokenType.L_PAREN):
4019            aliases = self.expression(
4020                exp.Aliases,
4021                this=this,
4022                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
4023            )
4024            self._match_r_paren(aliases)
4025            return aliases
4026
4027        alias = self._parse_id_var(any_token)
4028
4029        if alias:
4030            return self.expression(exp.Alias, this=this, alias=alias)
4031
4032        return this
4033
4034    def _parse_id_var(
4035        self,
4036        any_token: bool = True,
4037        tokens: t.Optional[t.Collection[TokenType]] = None,
4038    ) -> t.Optional[exp.Expression]:
4039        identifier = self._parse_identifier()
4040
4041        if identifier:
4042            return identifier
4043
4044        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
4045            quoted = self._prev.token_type == TokenType.STRING
4046            return exp.Identifier(this=self._prev.text, quoted=quoted)
4047
4048        return None
4049
4050    def _parse_string(self) -> t.Optional[exp.Expression]:
4051        if self._match(TokenType.STRING):
4052            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
4053        return self._parse_placeholder()
4054
4055    def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]:
4056        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
4057
4058    def _parse_number(self) -> t.Optional[exp.Expression]:
4059        if self._match(TokenType.NUMBER):
4060            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
4061        return self._parse_placeholder()
4062
4063    def _parse_identifier(self) -> t.Optional[exp.Expression]:
4064        if self._match(TokenType.IDENTIFIER):
4065            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
4066        return self._parse_placeholder()
4067
4068    def _parse_var(
4069        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4070    ) -> t.Optional[exp.Expression]:
4071        if (
4072            (any_token and self._advance_any())
4073            or self._match(TokenType.VAR)
4074            or (self._match_set(tokens) if tokens else False)
4075        ):
4076            return self.expression(exp.Var, this=self._prev.text)
4077        return self._parse_placeholder()
4078
4079    def _advance_any(self) -> t.Optional[Token]:
4080        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4081            self._advance()
4082            return self._prev
4083        return None
4084
4085    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4086        return self._parse_var() or self._parse_string()
4087
4088    def _parse_null(self) -> t.Optional[exp.Expression]:
4089        if self._match(TokenType.NULL):
4090            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4091        return None
4092
4093    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4094        if self._match(TokenType.TRUE):
4095            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4096        if self._match(TokenType.FALSE):
4097            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4098        return None
4099
4100    def _parse_star(self) -> t.Optional[exp.Expression]:
4101        if self._match(TokenType.STAR):
4102            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4103        return None
4104
4105    def _parse_parameter(self) -> exp.Parameter:
4106        wrapped = self._match(TokenType.L_BRACE)
4107        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4108        self._match(TokenType.R_BRACE)
4109        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4110
4111    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4112        if self._match_set(self.PLACEHOLDER_PARSERS):
4113            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4114            if placeholder:
4115                return placeholder
4116            self._advance(-1)
4117        return None
4118
4119    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4120        if not self._match(TokenType.EXCEPT):
4121            return None
4122        if self._match(TokenType.L_PAREN, advance=False):
4123            return self._parse_wrapped_csv(self._parse_column)
4124        return self._parse_csv(self._parse_column)
4125
4126    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4127        if not self._match(TokenType.REPLACE):
4128            return None
4129        if self._match(TokenType.L_PAREN, advance=False):
4130            return self._parse_wrapped_csv(self._parse_expression)
4131        return self._parse_csv(self._parse_expression)
4132
4133    def _parse_csv(
4134        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4135    ) -> t.List[t.Optional[exp.Expression]]:
4136        parse_result = parse_method()
4137        items = [parse_result] if parse_result is not None else []
4138
4139        while self._match(sep):
4140            self._add_comments(parse_result)
4141            parse_result = parse_method()
4142            if parse_result is not None:
4143                items.append(parse_result)
4144
4145        return items
4146
4147    def _parse_tokens(
4148        self, parse_method: t.Callable, expressions: t.Dict
4149    ) -> t.Optional[exp.Expression]:
4150        this = parse_method()
4151
4152        while self._match_set(expressions):
4153            this = self.expression(
4154                expressions[self._prev.token_type],
4155                this=this,
4156                comments=self._prev_comments,
4157                expression=parse_method(),
4158            )
4159
4160        return this
4161
4162    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4163        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4164
4165    def _parse_wrapped_csv(
4166        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4167    ) -> t.List[t.Optional[exp.Expression]]:
4168        return self._parse_wrapped(
4169            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4170        )
4171
4172    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4173        wrapped = self._match(TokenType.L_PAREN)
4174        if not wrapped and not optional:
4175            self.raise_error("Expecting (")
4176        parse_result = parse_method()
4177        if wrapped:
4178            self._match_r_paren()
4179        return parse_result
4180
4181    def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]:
4182        return self._parse_select() or self._parse_set_operations(
4183            self._parse_expression() if alias else self._parse_conjunction()
4184        )
4185
4186    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4187        return self._parse_query_modifiers(
4188            self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False))
4189        )
4190
4191    def _parse_transaction(self) -> exp.Transaction:
4192        this = None
4193        if self._match_texts(self.TRANSACTION_KIND):
4194            this = self._prev.text
4195
4196        self._match_texts({"TRANSACTION", "WORK"})
4197
4198        modes = []
4199        while True:
4200            mode = []
4201            while self._match(TokenType.VAR):
4202                mode.append(self._prev.text)
4203
4204            if mode:
4205                modes.append(" ".join(mode))
4206            if not self._match(TokenType.COMMA):
4207                break
4208
4209        return self.expression(exp.Transaction, this=this, modes=modes)
4210
4211    def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback:
4212        chain = None
4213        savepoint = None
4214        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4215
4216        self._match_texts({"TRANSACTION", "WORK"})
4217
4218        if self._match_text_seq("TO"):
4219            self._match_text_seq("SAVEPOINT")
4220            savepoint = self._parse_id_var()
4221
4222        if self._match(TokenType.AND):
4223            chain = not self._match_text_seq("NO")
4224            self._match_text_seq("CHAIN")
4225
4226        if is_rollback:
4227            return self.expression(exp.Rollback, savepoint=savepoint)
4228
4229        return self.expression(exp.Commit, chain=chain)
4230
4231    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4232        if not self._match_text_seq("ADD"):
4233            return None
4234
4235        self._match(TokenType.COLUMN)
4236        exists_column = self._parse_exists(not_=True)
4237        expression = self._parse_column_def(self._parse_field(any_token=True))
4238
4239        if expression:
4240            expression.set("exists", exists_column)
4241
4242            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4243            if self._match_texts(("FIRST", "AFTER")):
4244                position = self._prev.text
4245                column_position = self.expression(
4246                    exp.ColumnPosition, this=self._parse_column(), position=position
4247                )
4248                expression.set("position", column_position)
4249
4250        return expression
4251
4252    def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]:
4253        drop = self._match(TokenType.DROP) and self._parse_drop()
4254        if drop and not isinstance(drop, exp.Command):
4255            drop.set("kind", drop.args.get("kind", "COLUMN"))
4256        return drop
4257
4258    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4259    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition:
4260        return self.expression(
4261            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4262        )
4263
4264    def _parse_add_constraint(self) -> exp.AddConstraint:
4265        this = None
4266        kind = self._prev.token_type
4267
4268        if kind == TokenType.CONSTRAINT:
4269            this = self._parse_id_var()
4270
4271            if self._match_text_seq("CHECK"):
4272                expression = self._parse_wrapped(self._parse_conjunction)
4273                enforced = self._match_text_seq("ENFORCED")
4274
4275                return self.expression(
4276                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4277                )
4278
4279        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4280            expression = self._parse_foreign_key()
4281        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4282            expression = self._parse_primary_key()
4283        else:
4284            expression = None
4285
4286        return self.expression(exp.AddConstraint, this=this, expression=expression)
4287
4288    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4289        index = self._index - 1
4290
4291        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4292            return self._parse_csv(self._parse_add_constraint)
4293
4294        self._retreat(index)
4295        return self._parse_csv(self._parse_add_column)
4296
4297    def _parse_alter_table_alter(self) -> exp.AlterColumn:
4298        self._match(TokenType.COLUMN)
4299        column = self._parse_field(any_token=True)
4300
4301        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4302            return self.expression(exp.AlterColumn, this=column, drop=True)
4303        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4304            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4305
4306        self._match_text_seq("SET", "DATA")
4307        return self.expression(
4308            exp.AlterColumn,
4309            this=column,
4310            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4311            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4312            using=self._match(TokenType.USING) and self._parse_conjunction(),
4313        )
4314
4315    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4316        index = self._index - 1
4317
4318        partition_exists = self._parse_exists()
4319        if self._match(TokenType.PARTITION, advance=False):
4320            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4321
4322        self._retreat(index)
4323        return self._parse_csv(self._parse_drop_column)
4324
4325    def _parse_alter_table_rename(self) -> exp.RenameTable:
4326        self._match_text_seq("TO")
4327        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4328
4329    def _parse_alter(self) -> exp.AlterTable | exp.Command:
4330        start = self._prev
4331
4332        if not self._match(TokenType.TABLE):
4333            return self._parse_as_command(start)
4334
4335        exists = self._parse_exists()
4336        this = self._parse_table(schema=True)
4337
4338        if self._next:
4339            self._advance()
4340        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4341
4342        if parser:
4343            actions = ensure_list(parser(self))
4344
4345            if not self._curr:
4346                return self.expression(
4347                    exp.AlterTable,
4348                    this=this,
4349                    exists=exists,
4350                    actions=actions,
4351                )
4352        return self._parse_as_command(start)
4353
4354    def _parse_merge(self) -> exp.Merge:
4355        self._match(TokenType.INTO)
4356        target = self._parse_table()
4357
4358        self._match(TokenType.USING)
4359        using = self._parse_table()
4360
4361        self._match(TokenType.ON)
4362        on = self._parse_conjunction()
4363
4364        whens = []
4365        while self._match(TokenType.WHEN):
4366            matched = not self._match(TokenType.NOT)
4367            self._match_text_seq("MATCHED")
4368            source = (
4369                False
4370                if self._match_text_seq("BY", "TARGET")
4371                else self._match_text_seq("BY", "SOURCE")
4372            )
4373            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4374
4375            self._match(TokenType.THEN)
4376
4377            if self._match(TokenType.INSERT):
4378                _this = self._parse_star()
4379                if _this:
4380                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4381                else:
4382                    then = self.expression(
4383                        exp.Insert,
4384                        this=self._parse_value(),
4385                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4386                    )
4387            elif self._match(TokenType.UPDATE):
4388                expressions = self._parse_star()
4389                if expressions:
4390                    then = self.expression(exp.Update, expressions=expressions)
4391                else:
4392                    then = self.expression(
4393                        exp.Update,
4394                        expressions=self._match(TokenType.SET)
4395                        and self._parse_csv(self._parse_equality),
4396                    )
4397            elif self._match(TokenType.DELETE):
4398                then = self.expression(exp.Var, this=self._prev.text)
4399            else:
4400                then = None
4401
4402            whens.append(
4403                self.expression(
4404                    exp.When,
4405                    matched=matched,
4406                    source=source,
4407                    condition=condition,
4408                    then=then,
4409                )
4410            )
4411
4412        return self.expression(
4413            exp.Merge,
4414            this=target,
4415            using=using,
4416            on=on,
4417            expressions=whens,
4418        )
4419
4420    def _parse_show(self) -> t.Optional[exp.Expression]:
4421        parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE)
4422        if parser:
4423            return parser(self)
4424        self._advance()
4425        return self.expression(exp.Show, this=self._prev.text.upper())
4426
4427    def _parse_set_item_assignment(
4428        self, kind: t.Optional[str] = None
4429    ) -> t.Optional[exp.Expression]:
4430        index = self._index
4431
4432        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4433            return self._parse_set_transaction(global_=kind == "GLOBAL")
4434
4435        left = self._parse_primary() or self._parse_id_var()
4436
4437        if not self._match_texts(("=", "TO")):
4438            self._retreat(index)
4439            return None
4440
4441        right = self._parse_statement() or self._parse_id_var()
4442        this = self.expression(exp.EQ, this=left, expression=right)
4443
4444        return self.expression(exp.SetItem, this=this, kind=kind)
4445
4446    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4447        self._match_text_seq("TRANSACTION")
4448        characteristics = self._parse_csv(
4449            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4450        )
4451        return self.expression(
4452            exp.SetItem,
4453            expressions=characteristics,
4454            kind="TRANSACTION",
4455            **{"global": global_},  # type: ignore
4456        )
4457
4458    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4459        parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE)
4460        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4461
4462    def _parse_set(self) -> exp.Set | exp.Command:
4463        index = self._index
4464        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4465
4466        if self._curr:
4467            self._retreat(index)
4468            return self._parse_as_command(self._prev)
4469
4470        return set_
4471
4472    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]:
4473        for option in options:
4474            if self._match_text_seq(*option.split(" ")):
4475                return exp.var(option)
4476        return None
4477
4478    def _parse_as_command(self, start: Token) -> exp.Command:
4479        while self._curr:
4480            self._advance()
4481        text = self._find_sql(start, self._prev)
4482        size = len(start.text)
4483        return exp.Command(this=text[:size], expression=text[size:])
4484
4485    def _parse_dict_property(self, this: str) -> exp.DictProperty:
4486        settings = []
4487
4488        self._match_l_paren()
4489        kind = self._parse_id_var()
4490
4491        if self._match(TokenType.L_PAREN):
4492            while True:
4493                key = self._parse_id_var()
4494                value = self._parse_primary()
4495
4496                if not key and value is None:
4497                    break
4498                settings.append(self.expression(exp.DictSubProperty, this=key, value=value))
4499            self._match(TokenType.R_PAREN)
4500
4501        self._match_r_paren()
4502
4503        return self.expression(
4504            exp.DictProperty,
4505            this=this,
4506            kind=kind.this if kind else None,
4507            settings=settings,
4508        )
4509
4510    def _parse_dict_range(self, this: str) -> exp.DictRange:
4511        self._match_l_paren()
4512        has_min = self._match_text_seq("MIN")
4513        if has_min:
4514            min = self._parse_var() or self._parse_primary()
4515            self._match_text_seq("MAX")
4516            max = self._parse_var() or self._parse_primary()
4517        else:
4518            max = self._parse_var() or self._parse_primary()
4519            min = exp.Literal.number(0)
4520        self._match_r_paren()
4521        return self.expression(exp.DictRange, this=this, min=min, max=max)
4522
4523    def _find_parser(
4524        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4525    ) -> t.Optional[t.Callable]:
4526        if not self._curr:
4527            return None
4528
4529        index = self._index
4530        this = []
4531        while True:
4532            # The current token might be multiple words
4533            curr = self._curr.text.upper()
4534            key = curr.split(" ")
4535            this.append(curr)
4536            self._advance()
4537            result, trie = in_trie(trie, key)
4538            if result == 0:
4539                break
4540            if result == 2:
4541                subparser = parsers[" ".join(this)]
4542                return subparser
4543        self._retreat(index)
4544        return None
4545
4546    def _match(self, token_type, advance=True, expression=None):
4547        if not self._curr:
4548            return None
4549
4550        if self._curr.token_type == token_type:
4551            if advance:
4552                self._advance()
4553            self._add_comments(expression)
4554            return True
4555
4556        return None
4557
4558    def _match_set(self, types, advance=True):
4559        if not self._curr:
4560            return None
4561
4562        if self._curr.token_type in types:
4563            if advance:
4564                self._advance()
4565            return True
4566
4567        return None
4568
4569    def _match_pair(self, token_type_a, token_type_b, advance=True):
4570        if not self._curr or not self._next:
4571            return None
4572
4573        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4574            if advance:
4575                self._advance(2)
4576            return True
4577
4578        return None
4579
4580    def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4581        if not self._match(TokenType.L_PAREN, expression=expression):
4582            self.raise_error("Expecting (")
4583
4584    def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4585        if not self._match(TokenType.R_PAREN, expression=expression):
4586            self.raise_error("Expecting )")
4587
4588    def _match_texts(self, texts, advance=True):
4589        if self._curr and self._curr.text.upper() in texts:
4590            if advance:
4591                self._advance()
4592            return True
4593        return False
4594
4595    def _match_text_seq(self, *texts, advance=True):
4596        index = self._index
4597        for text in texts:
4598            if self._curr and self._curr.text.upper() == text:
4599                self._advance()
4600            else:
4601                self._retreat(index)
4602                return False
4603
4604        if not advance:
4605            self._retreat(index)
4606
4607        return True
4608
4609    @t.overload
4610    def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression:
4611        ...
4612
4613    @t.overload
4614    def _replace_columns_with_dots(
4615        self, this: t.Optional[exp.Expression]
4616    ) -> t.Optional[exp.Expression]:
4617        ...
4618
4619    def _replace_columns_with_dots(self, this):
4620        if isinstance(this, exp.Dot):
4621            exp.replace_children(this, self._replace_columns_with_dots)
4622        elif isinstance(this, exp.Column):
4623            exp.replace_children(this, self._replace_columns_with_dots)
4624            table = this.args.get("table")
4625            this = (
4626                self.expression(exp.Dot, this=table, expression=this.this)
4627                if table
4628                else self.expression(exp.Var, this=this.name)
4629            )
4630        elif isinstance(this, exp.Identifier):
4631            this = self.expression(exp.Var, this=this.name)
4632
4633        return this
4634
4635    def _replace_lambda(
4636        self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str]
4637    ) -> t.Optional[exp.Expression]:
4638        if not node:
4639            return node
4640
4641        for column in node.find_all(exp.Column):
4642            if column.parts[0].name in lambda_variables:
4643                dot_or_id = column.to_dot() if column.table else column.this
4644                parent = column.parent
4645
4646                while isinstance(parent, exp.Dot):
4647                    if not isinstance(parent.parent, exp.Dot):
4648                        parent.replace(dot_or_id)
4649                        break
4650                    parent = parent.parent
4651                else:
4652                    if column is node:
4653                        node = dot_or_id
4654                    else:
4655                        column.replace(dot_or_id)
4656        return node

Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
  • error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, max_errors: int = 3)
822    def __init__(
823        self,
824        error_level: t.Optional[ErrorLevel] = None,
825        error_message_context: int = 100,
826        max_errors: int = 3,
827    ):
828        self.error_level = error_level or ErrorLevel.IMMEDIATE
829        self.error_message_context = error_message_context
830        self.max_errors = max_errors
831        self.reset()
def reset(self):
833    def reset(self):
834        self.sql = ""
835        self.errors = []
836        self._tokens = []
837        self._index = 0
838        self._curr = None
839        self._next = None
840        self._prev = None
841        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
843    def parse(
844        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
845    ) -> t.List[t.Optional[exp.Expression]]:
846        """
847        Parses a list of tokens and returns a list of syntax trees, one tree
848        per parsed SQL statement.
849
850        Args:
851            raw_tokens: The list of tokens.
852            sql: The original SQL string, used to produce helpful debug messages.
853
854        Returns:
855            The list of the produced syntax trees.
856        """
857        return self._parse(
858            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
859        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: The list of tokens.
  • sql: The original SQL string, used to produce helpful debug messages.
Returns:

The list of the produced syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
861    def parse_into(
862        self,
863        expression_types: exp.IntoType,
864        raw_tokens: t.List[Token],
865        sql: t.Optional[str] = None,
866    ) -> t.List[t.Optional[exp.Expression]]:
867        """
868        Parses a list of tokens into a given Expression type. If a collection of Expression
869        types is given instead, this method will try to parse the token list into each one
870        of them, stopping at the first for which the parsing succeeds.
871
872        Args:
873            expression_types: The expression type(s) to try and parse the token list into.
874            raw_tokens: The list of tokens.
875            sql: The original SQL string, used to produce helpful debug messages.
876
877        Returns:
878            The target Expression.
879        """
880        errors = []
881        for expression_type in ensure_list(expression_types):
882            parser = self.EXPRESSION_PARSERS.get(expression_type)
883            if not parser:
884                raise TypeError(f"No parser registered for {expression_type}")
885
886            try:
887                return self._parse(parser, raw_tokens, sql)
888            except ParseError as e:
889                e.errors[0]["into_expression"] = expression_type
890                errors.append(e)
891
892        raise ParseError(
893            f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
894            errors=merge_errors(errors),
895        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: The expression type(s) to try and parse the token list into.
  • raw_tokens: The list of tokens.
  • sql: The original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
932    def check_errors(self) -> None:
933        """Logs or raises any found errors, depending on the chosen error level setting."""
934        if self.error_level == ErrorLevel.WARN:
935            for error in self.errors:
936                logger.error(str(error))
937        elif self.error_level == ErrorLevel.RAISE and self.errors:
938            raise ParseError(
939                concat_messages(self.errors, self.max_errors),
940                errors=merge_errors(self.errors),
941            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
943    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
944        """
945        Appends an error in the list of recorded errors or raises it, depending on the chosen
946        error level setting.
947        """
948        token = token or self._curr or self._prev or Token.string("")
949        start = token.start
950        end = token.end + 1
951        start_context = self.sql[max(start - self.error_message_context, 0) : start]
952        highlight = self.sql[start:end]
953        end_context = self.sql[end : end + self.error_message_context]
954
955        error = ParseError.new(
956            f"{message}. Line {token.line}, Col: {token.col}.\n"
957            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
958            description=message,
959            line=token.line,
960            col=token.col,
961            start_context=start_context,
962            highlight=highlight,
963            end_context=end_context,
964        )
965
966        if self.error_level == ErrorLevel.IMMEDIATE:
967            raise error
968
969        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[~E], comments: Optional[List[str]] = None, **kwargs) -> ~E:
971    def expression(
972        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
973    ) -> E:
974        """
975        Creates a new, validated Expression.
976
977        Args:
978            exp_class: The expression class to instantiate.
979            comments: An optional list of comments to attach to the expression.
980            kwargs: The arguments to set for the expression along with their respective values.
981
982        Returns:
983            The target expression.
984        """
985        instance = exp_class(**kwargs)
986        instance.add_comments(comments) if comments else self._add_comments(instance)
987        return self.validate_expression(instance)

Creates a new, validated Expression.

Arguments:
  • exp_class: The expression class to instantiate.
  • comments: An optional list of comments to attach to the expression.
  • kwargs: The arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression(self, expression: ~E, args: Optional[List] = None) -> ~E:
 994    def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E:
 995        """
 996        Validates an Expression, making sure that all its mandatory arguments are set.
 997
 998        Args:
 999            expression: The expression to validate.
1000            args: An optional list of items that was used to instantiate the expression, if it's a Func.
1001
1002        Returns:
1003            The validated expression.
1004        """
1005        if self.error_level != ErrorLevel.IGNORE:
1006            for error_message in expression.error_messages(args):
1007                self.raise_error(error_message)
1008
1009        return expression

Validates an Expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: The expression to validate.
  • args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:

The validated expression.